Welcome to the Linux Foundation Forum!

How can I resolve the "Transport layer mercury error" to create a container on DAOS client?

anonymous2520
anonymous2520 Posts: 2
edited June 10 in Red Hat Enterprise

Hello,

I am encountering an issue when attempting to create a container on my DAOS client. The error messages point to a transport layer mercury error. Below is the command and errors I'm seeing:

[root@client1 ~]# daos cont create test --label mycont
external ERR # [15688.017525] mercury->msg: [error] /builddir/build/BUILD/mercury-2.1.0rc4/src/na/na_ofi.c:3047
# na_ofi_msg_send(): fi_tsend() failed, rc: -2 (No such file or directory)
external ERR # [15688.018207] mercury->hg: [error] /builddir/build/BUILD/mercury-2.1.0rc4/src/mercury_core.c:2727
# hg_core_forward_na(): Could not post send for input buffer (NA_NOENTRY)
hg ERR src/cart/crt_hg.c:1104 crt_hg_req_send_cb(0x30248c0) [opc=0x1020004 (DAOS) rpcid=0x72d8b86300000000 rank:tag=0:0] RPC failed; rc: DER_HG(-1020): 'Transport layer mercury error'
mgmt ERR src/mgmt/cli_mgmt.c:882 dc_mgmt_pool_find() test: failed to get PS replicas from 1 servers, DER_HG(-1020): 'Transport layer mercury error'
pool ERR src/pool/cli.c:198 dc_pool_choose_svc_rank() 00000000:test: dc_mgmt_pool_find() failed, DER_HG(-1020): 'Transport layer mercury error'
pool ERR src/pool/cli.c:503 dc_pool_connect_internal() 00000000:test: cannot find pool service: DER_HG(-1020): 'Transport layer mercury error'
ERROR: daos: DER_HG(-1020): Transport layer mercury error

System Information:

  • daos version 2.0.3, libdaos 2.0.0
  • mercury version: 2.1.0
  • fi_info: 1.15.1
  • libfabric: 1.15.1
  • libfabric api: 1.15
  • Network fabric: ethernet (enp0s3)
  • OS: CentOS 7.9

daos_server and daos_agent both status is running
Configurations:
daos_server.yml
DAOS server configuration file.

name: daos_server

access_points: ['192.168.100.9']
port: 10001

transport_config:
allow_insecure: true
client_cert_dir: /etc/daos/certs/clients
ca_cert: /etc/daos/certs/daosCA.crt
cert: /etc/daos/certs/server.crt
key: /etc/daos/certs/server.key
provider: ofi+sockets
socket_dir: /var/run/daos_server
nr_hugepages: 4096
control_log_mask: DEBUG
control_log_file: /tmp/daos_server.log
helper_log_file: /tmp/daos_admin.log

engines:

targets: 8
nr_xs_helpers: 0
fabric_iface: enp0s3
fabric_iface_port: 31316
log_mask: INFO
log_file: /tmp/daos_engine_0.log
env_vars:
- CRT_TIMEOUT=30
storage:
-
scm_mount: /mnt/daos0
class: ram
scm_size: 16

daos_agent.yml
DAOS agent configuration file.

name: daos_server
access_points: ['192.168.100.9']
port: 10001
transport_config:
allow_insecure: true
ca_cert: /etc/daos/certs/daosCA.crt
cert: /etc/daos/certs/agent.crt
key: /etc/daos/certs/agent.key
log_file: /tmp/daos_agent.log

fabric_ifaces:

numa_node: 0
devices:
-
iface: enp0s3

daos_control.yml
DAOS manager (dmg) configuration file.

name: daos_server
port: 10001
hostlist: ['192.168.100.9']
transport_config:
allow_insecure: true
ca_cert: /etc/daos/certs/daosCA.crt
cert: /etc/daos/certs/admin.crt
key: /etc/daos/certs/admin.key

server ip addr show
[root@server ~]# ip addr show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: enp0s3: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 08:00:27:c1:40:b8 brd ff:ff:ff:ff:ff:ff
inet 192.168.100.9/24 brd 192.168.100.255 scope global noprefixroute dynamic enp0s3
valid_lft 600sec preferred_lft 600sec
inet6 fe80::ee83:e7ce:45ca:f9b/64 scope link noprefixroute
valid_lft forever preferred_lft forever
3: enp0s8: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether 08:00:27:b4:3a:98 brd ff:ff:ff:ff:ff:ff
inet 192.168.56.105/24 brd 192.168.56.255 scope global noprefixroute dynamic enp0s8
valid_lft 354sec preferred_lft 354sec
inet6 fe80::8a74:f52f:3e83:e609/64 scope link noprefixroute
valid_lft forever preferred_lft forever

Categories

Upcoming Training