On 10/3/15 8:38 AM, Niels de Vos wrote:
> I would suggest to double check executables and libraries with "ldd".
> Maybe there is a component that links against libibverbs too, and
> somehow caused the loading of the old symbol in the process.
>
> When the dynamic linker loads a library, the symbols are stored in a
> table. Upon the next access (checks for previous entries in the table
> are done without version, iiuc), the address of the resolved symbol is
> returned. I've seen symbol collisions before, and this might be similar.
>
Thanks!  This (and the elided examples above) are helpful.


> Can you pass the steps to reproduce this unexpected behaviour? What
> needs to be compiled (and how), which binaries are incorrect after that?
>
bill@simpson:~$ cd rdma/
bill@simpson:~/rdma$ git clone https://github.com/nfs-ganesha/ntirpc.git

bill@simpson:~/rdma$ cat cmake-ntirpc.sh
#!/bin/bash
rm -rf build-ntirpc && mkdir build-ntirpc && cd build-ntirpc && cmake 
-DCMAKE_BUILD_TYPE=Maintainer -DCMAKE_INSTALL_PREFIX=../install 
-DUSE_RPC_RDMA=ON -DCMAKE_C_FLAGS="-O0 -g -gdwarf-4" ../ntirpc/
bill@simpson:~/rdma$ . cmake-ntirpc.sh

bill@simpson:~/rdma/build-ntirpc$ make -j4 install
bill@simpson:~/rdma/build-ntirpc$ cd ..

bill@simpson:~/rdma$ cat cmake-ganesha.sh
#!/bin/bash
rm -rf build-ganesha && mkdir build-ganesha && cd build-ganesha && cmake 
-DCMAKE_BUILD_TYPE=Maintainer -DCMAKE_INSTALL_PREFIX=../install 
-DUSE_NFS_RDMA=ON -DCMAKE_C_FLAGS="-O0 -g -gdwarf-4" ../nfs-ganesha/src
bill@simpson:~/rdma$ . cmake-ganesha.sh

bill@simpson:~/rdma/build-ganesha$ make -j4 install
bill@simpson:~/rdma/build-ganesha$ cd ../install

bill@simpson:~/rdma/install$ sudo bash
[sudo] password for bill:
[root@simpson install]# cat ../setup.sh
#!/bin/bash
export LD_LIBRARY_PATH=:`pwd`/lib:`pwd`/lib64
echo $LD_LIBRARY_PATH
rm -f var/log/ganesha.log
[root@simpson install]# . ../setup.sh
:/home/bill/rdma/install/lib:/home/bill/rdma/install/lib64
[root@simpson install]# gdb ./bin/ganesha.nfsd
...
(gdb) run -F
...
*** mount from elsewhere ***

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fffc7fff700 (LWP 5662)]
0x00007ffff7bd4bc4 in ibv_alloc_pd () from /lib64/libibverbs.so.1
Missing separate debuginfos, use: dnf debuginfo-install 
gssproxy-0.4.1-2.fc23.x86_64 keyutils-libs-1.5.9-7.fc23.x86_64 
krb5-libs-1.13.2-10.fc23.x86_64 libcom_err-1.42.13-3.fc23.x86_64 
libcxgb3-1.3.1-8.fc23.x86_64 libibverbs-1.1.8-4.fc23.x86_64 
libmlx4-1.0.6-3.fc23.x86_64 libselinux-2.4-3.fc23.x86_64 
nss-mdns-0.10-16.fc23.x86_64 pcre-8.37-4.fc23.x86_64
(gdb) gcore ../dump02
warning: target file /proc/5607/cmdline contained unexpected null characters
Saved corefile ../dump02
(gdb) bt
#0  0x00007ffff7bd4bc4 in ibv_alloc_pd () from /lib64/libibverbs.so.1
#1  0x00007ffff7bd1c07 in ibv_alloc_pd () from /lib64/libibverbs.so.1
#2  0x00007ffff68045dc in rpc_rdma_pd_get (xprt=0x7fffbc0065f0)
     at /home/bill/rdma/ntirpc/src/rpc_rdma.c:260
#3  0x00007ffff6808582 in rpc_rdma_clone (l_xprt=0x7fffbc0008c0,
     cm_id=0x7fffa0000a30) at /home/bill/rdma/ntirpc/src/rpc_rdma.c:1827
#4  0x00007ffff6808d30 in rpc_rdma_accept_timedwait (l_xprt=0x7fffbc0008c0,
     abstime=0x7fffc7ffe520) at /home/bill/rdma/ntirpc/src/rpc_rdma.c:2032
#5  0x00007ffff6808e06 in rpc_rdma_accept_wait (l_xprt=0x7fffbc0008c0,
     msleep=30000) at /home/bill/rdma/ntirpc/src/rpc_rdma.c:2051
#6  0x00007ffff680964e in svc_rdma_ncreate (arg=0x7fffbc0008c0, sendsize=4096,
     recvsize=4096, flags=0) at /home/bill/rdma/ntirpc/src/svc_rdma.c:93
#7  0x00000000004513b4 in nfs_rdma_dispatcher_thread (nullarg=0x0)
     at /home/bill/rdma/nfs-ganesha/src/MainNFSD/nfs_rpc_rdma.c:161
#8  0x00007ffff6c3260a in start_thread (arg=0x7fffc7fff700)
     at pthread_create.c:334
#9  0x00007ffff6509bbd in clone ()
     at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
(gdb) quit
A debugging session is active.

        Inferior 1 [process 5607] will be killed.

Quit anyway? (y or n) y


[root@simpson install]# dnf debuginfo-install libibverbs-1.1.8-4.fc23.x86_64
...

[root@simpson install]# gdb ./bin/ganesha.nfsd ../dump02
...
(gdb) info threads
   Id   Target Id         Frame
   35   Thread 0x7ffff7fd8040 (LWP 6674) 0x00007ffff69d86ad in pthread_join (
     threadid=140736481720064, thread_return=0x0) at pthread_join.c:90
   34   Thread 0x7ffff4f65700 (LWP 6685) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   33   Thread 0x7ffff7ff6700 (LWP 6687) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   32   Thread 0x7fffeccc8700 (LWP 6830) pthread_cond_wait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
   31   Thread 0x7fffec4c7700 (LWP 6831) do_sigwait (sig=0x7fffec4c660c,
     set=<optimized out>) at ../sysdeps/unix/sysv/linux/sigwait.c:64
   30   Thread 0x7fffebcc6700 (LWP 6832) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   29   Thread 0x7fffeb4c5700 (LWP 6833) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   28   Thread 0x7fffeacc4700 (LWP 6834) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   27   Thread 0x7fffea4c3700 (LWP 6835) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   26   Thread 0x7fffe9cc2700 (LWP 6836) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   25   Thread 0x7fffe94c1700 (LWP 6837) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
---Type <return> to continue, or q <return> to quit---
   24   Thread 0x7fffe8cc0700 (LWP 6838) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   23   Thread 0x7fffe84bf700 (LWP 6839) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   22   Thread 0x7fffe7cbe700 (LWP 6840) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   21   Thread 0x7fffe74bd700 (LWP 6841) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   20   Thread 0x7fffe6cbc700 (LWP 6842) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   19   Thread 0x7fffe64bb700 (LWP 6843) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   18   Thread 0x7fffe5cba700 (LWP 6844) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   17   Thread 0x7fffe54b9700 (LWP 6845) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   16   Thread 0x7fffe4cb8700 (LWP 6846) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   15   Thread 0x7fffe44b7700 (LWP 6847) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   14   Thread 0x7fffe3cb6700 (LWP 6848) 0x00007ffff650a1b3 in epoll_wait ()
     at ../sysdeps/unix/syscall-template.S:84
   13   Thread 0x7fffe34b5700 (LWP 6849) 0x00007ffff650a1b3 in epoll_wait ()
---Type <return> to continue, or q <return> to quit---
     at ../sysdeps/unix/syscall-template.S:84
   12   Thread 0x7fffe2cb4700 (LWP 6850) 0x00007ffff69dfaed in write ()
     at ../sysdeps/unix/syscall-template.S:84
   11   Thread 0x7fffe24b3700 (LWP 6851) 0x00007ffff650a1b3 in epoll_wait ()
     at ../sysdeps/unix/syscall-template.S:84
   10   Thread 0x7fffe1cb2700 (LWP 6852) 0x00007ffff650a1b3 in epoll_wait ()
     at ../sysdeps/unix/syscall-template.S:84
   9    Thread 0x7fffe14b1700 (LWP 6853) 0x00007ffff69dfd9d in accept ()
     at ../sysdeps/unix/syscall-template.S:84
   8    Thread 0x7fffe0cb0700 (LWP 6854) __ibv_alloc_pd (
     context=0x7ffff7975f40 <ibv_cmd_attach_mcast>) at src/verbs.c:196
   7    Thread 0x7fffc3fff700 (LWP 6855) pthread_cond_wait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185
   6    Thread 0x7fffc37fe700 (LWP 6856) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   5    Thread 0x7ffff7e156c0 (LWP 6857) 0x00007ffff650a1b3 in epoll_wait ()
     at ../sysdeps/unix/syscall-template.S:84
   4    Thread 0x7fffc2ffd700 (LWP 6858) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
   3    Thread 0x7fffc27bb700 (LWP 6859) 0x00007ffff69dfaed in write ()
     at ../sysdeps/unix/syscall-template.S:84
   2    Thread 0x7fffe001e700 (LWP 6861) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
---Type <return> to continue, or q <return> to quit---
* 1    Thread 0x7fffc1fba700 (LWP 6862) pthread_cond_timedwait@@GLIBC_2.3.2 ()
     at ../sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S:225
(gdb) thread 8
[Switching to thread 8 (Thread 0x7fffe0cb0700 (LWP 6854))]
#0  __ibv_alloc_pd (context=0x7ffff7975f40 <ibv_cmd_attach_mcast>)
     at src/verbs.c:196
196             pd = context->ops.alloc_pd(context);
(gdb) bt
#0  __ibv_alloc_pd (context=0x7ffff7975f40 <ibv_cmd_attach_mcast>)
     at src/verbs.c:196
#1  0x00007ffff7976c07 in __ibv_alloc_pd_1_0 (context=0x7fffc80020d0)
     at src/compat-1_0.c:613
#2  0x00007ffff7bbd5dc in rpc_rdma_pd_get (xprt=0x7fffc80065f0)
     at /home/bill/rdma/nfs-ganesha/src/libntirpc/src/rpc_rdma.c:260
#3  0x00007ffff7bc1582 in rpc_rdma_clone (l_xprt=0x7fffc80008c0,
     cm_id=0x7fffa0000a30)
     at /home/bill/rdma/nfs-ganesha/src/libntirpc/src/rpc_rdma.c:1827
#4  0x00007ffff7bc1d30 in rpc_rdma_accept_timedwait (l_xprt=0x7fffc80008c0,
     abstime=0x7fffe0caf520)
     at /home/bill/rdma/nfs-ganesha/src/libntirpc/src/rpc_rdma.c:2032
#5  0x00007ffff7bc1e06 in rpc_rdma_accept_wait (l_xprt=0x7fffc80008c0,
     msleep=30000)
     at /home/bill/rdma/nfs-ganesha/src/libntirpc/src/rpc_rdma.c:2051
#6  0x00007ffff7bc264e in svc_rdma_ncreate (arg=0x7fffc80008c0,
     sendsize=4096, recvsize=4096, flags=0)
     at /home/bill/rdma/nfs-ganesha/src/libntirpc/src/svc_rdma.c:93
#7  0x00000000004513b4 in nfs_rdma_dispatcher_thread (nullarg=0x0)
     at /home/bill/rdma/nfs-ganesha/src/MainNFSD/nfs_rpc_rdma.c:106
#8  0x00007ffff69d760a in start_thread (arg=0x7fffe0cb0700)
     at pthread_create.c:334
#9  0x00007ffff6509bbd in clone ()
---Type <return> to continue, or q <return> to quit---
     at ../sysdeps/unix/sysv/linux/x86_64/clone.S:109
(gdb) p context
$1 = (struct ibv_context *) 0x7ffff7975f40 <ibv_cmd_attach_mcast>
(gdb) up
#1  0x00007ffff7976c07 in __ibv_alloc_pd_1_0 (context=0x7fffc80020d0)
     at src/compat-1_0.c:613
613             real_pd = ibv_alloc_pd(context->real_context);
(gdb) p context
$2 = (struct ibv_context_1_0 *) 0x7fffc80020d0
(gdb) p *context
$3 = {device = 0x7fffc8001da0, ops = {
     query_device = 0x7fffe00a55f0 <mlx4_query_device>,
     query_port = 0x7fffe00a5680 <mlx4_query_port>,
     alloc_pd = 0x7fffe00a5710 <mlx4_alloc_pd>,
     dealloc_pd = 0x7fffe00a57a0 <mlx4_free_pd>,
     reg_mr = 0x7fffe00a58a0 <mlx4_reg_mr>, dereg_mr = 0x0,
     create_cq = 0x7fffe00a5950 <mlx4_dereg_mr>, poll_cq = 0x0,
     req_notify_cq = 0x0, cq_event = 0x0,
     resize_cq = 0x7fffe00a59a0 <mlx4_create_cq>,
     destroy_cq = 0x7fffe00a2b10 <mlx4_poll_cq>,
     create_srq = 0x7fffe00a3110 <mlx4_arm_cq>,
     modify_srq = 0x7fffe00a3170 <mlx4_cq_event>,
     query_srq = 0x7fffe00a5b50 <mlx4_resize_cq>,
     destroy_srq = 0x7fffe00a5c90 <mlx4_destroy_cq>,
     post_srq_recv = 0x7fffe00a5ce0 <mlx4_create_srq>,
     create_qp = 0x7fffe00a5e90 <mlx4_modify_srq>,
     query_qp = 0x7fffe00a5ed0 <mlx4_query_srq>,
     modify_qp = 0x7fffe00a5f10 <mlx4_destroy_srq>,
     destroy_qp = 0x7fffe00a4ea0 <mlx4_post_srq_recv>,
     post_send = 0x7fffe00a6350 <mlx4_create_qp>,
     post_recv = 0x7fffe00a64d0 <mlx4_query_qp>,
     create_ah = 0x7fffe00a6560 <mlx4_modify_qp>,
     destroy_ah = 0x7fffe00a6700 <mlx4_destroy_qp>,
---Type <return> to continue, or q <return> to quit---
     attach_mcast = 0x7fffe00a3ef0 <mlx4_post_send>,
     detach_mcast = 0x7fffe00a47f0 <mlx4_post_recv>}, cmd_fd = -536188640,
   async_fd = 32767, num_comp_vectors = -536187936,
   real_context = 0x7ffff7975f40 <ibv_cmd_attach_mcast>}
(gdb) up
#2  0x00007ffff7bbd5dc in rpc_rdma_pd_get (xprt=0x7fffc80065f0)
     at /home/bill/rdma/nfs-ganesha/src/libntirpc/src/rpc_rdma.c:260
260                     xprt->pd->pd = ibv_alloc_pd(xprt->cm_id->verbs);
(gdb)


------------------------------------------------------------------------------
_______________________________________________
Nfs-ganesha-devel mailing list
Nfs-ganesha-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs-ganesha-devel

Reply via email to