On 09/04/2017 03:52 AM, Madhu P Punjabi wrote:
Hi All,

We have a customer who has reported a segfault in libntirpc code, when using ganesha 2.3 on CentOS 7.

Looking at the customer's coredump below, it was not clear why a segfault was seen even though the values (and addresses) passed to the 'recvmsg' seem to be valid. Can you please refer the below coredump to check if it is possible to see a segfault even when the input arguments to the function (recvmsg) look fine and the addresses are accessible? Thank you for helping with this.

Following is the coredump:
Core was generated by `/usr/bin/ganesha.nfsd -L /var/log/ganesha.log -f /etc/ganesha/ganesha.conf -N N'.
Program terminated with signal 11, Segmentation fault.
#0 0x00007fcabc3130f4 in clnt_dg_call (clnt=0x7fc550002850, auth=0x7fcabc551340 <auth_none_priv>, proc=3, xargs=0x7fcabc31993c <xdr_pmap>, argsp=0x7fc5e37fccd0, xresults=0x7fcabc3330c8 <xdr_u_short>, resultsp=0x7fc5e37fcd1e, utimeout=...) at /usr/src/debug/nfs-ganesha-2.3.2-ibm42-0.1.1-Source/libntirpc/src/clnt_dg.c:372
372                     ret = recvmsg(cu->cu_fd, &msg, MSG_ERRQUEUE);
Missing separate debuginfos, use: debuginfo-install sssd-client-1.12.2-58.el7_1.18.x86_64
(gdb) p cu->cu_fd
$1 = 67856
(gdb) p msg
$2 = {msg_name = 0x7fc5e37fc600, msg_namelen = 16, msg_iov = 0x7fc5e37fc5f0, msg_iovlen = 1, msg_control = 0x7fc5e2ffefa0, msg_controllen = 256, msg_flags = 0}
(gdb) p *(struct sockaddr_in*)msg.msg_name
$3 = {sin_family = 2, sin_port = 28416, sin_addr = {s_addr = 676717841}, sin_zero = "\000\000\000\000\000\000\000"}
(gdb) p *(msg.msg_iov)
$4 = {iov_base = 0x7fc5e2fff0a0, iov_len = 56}
(gdb) p *(char*)msg.msg_iov->iov_base
$5 = 0 '\000'
(gdb) p *(char*)(msg.msg_control)
$6 = 0 '\000'
(gdb) l -
362
363 iov.iov_base = cbuf + 256;
364 iov.iov_len = outlen;
365 msg.msg_name = (void *)&err_addr;
366 msg.msg_namelen = sizeof(err_addr);
367 msg.msg_iov = &iov;
368 msg.msg_iovlen = 1;
369 msg.msg_flags = 0;
370 msg.msg_control = cbuf;
371 msg.msg_controllen = 256;
(gdb) l -
352 #ifdef IP_RECVERR
353 if (fd.revents & POLLERR) {
354 struct msghdr msg;
355 struct cmsghdr *cmsg;
356 struct sock_extended_err *e;
357 struct sockaddr_in err_addr;
358 struct sockaddr_in *sin = (struct sockaddr_in *)&cu->cu_raddr;
359 struct iovec iov;
360 char *cbuf = (char *)alloca(outlen + 256);
361 int ret;

For reference following is the corresponding assembly code which shows the address where crash occurred is 0x00007fcabc3130f4:
    0x00007fcabc3130dc <+1552>:  mov    -0x48(%rbp),%rax
    0x00007fcabc3130e0 <+1556>:  mov    0x60(%rax),%eax
    0x00007fcabc3130e3 <+1559>:  lea    -0x6d0(%rbp),%rcx
    0x00007fcabc3130ea <+1566>:  mov    $0x2000,%edx
    0x00007fcabc3130ef <+1571>:  mov    %rcx,%rsi
    0x00007fcabc3130f2 <+1574>:  mov    %eax,%edi
=> 0x00007fcabc3130f4 <+1576>:  callq  0x7fcabc30d240 <recvmsg@plt>
    0x00007fcabc3130f9 <+1581>:  mov    %eax,-0x74(%rbp)

(gdb) disassemble 0x7fcabc30d240   <-- this is for recvmsg@plt
Dump of assembler code for function recvmsg@plt:
0x00007fcabc30d240 <+0>: jmpq *0x243442(%rip) # 0x7fcabc550688

The only thing that jumps out at me is this^^ What it at 0x7fcabc550688? It's not recvmsg() below, that's a different address.


    0x00007fcabc30d246 <+6>:     pushq  $0xce
    0x00007fcabc30d24b <+11>:    jmpq   0x7fcabc30c550
End of assembler dump.

(gdb) disassemble recvmsg
Dump of assembler code for function recvmsg:
0x00007fcabc768680 <+0>: cmpl $0x0,0x20cd39(%rip) # 0x7fcabc9753c0 <__pthread_multiple_threads>
    0x00007fcabc768687 <+7>:     jne    0x7fcabc768699 <recvmsg+25>
    0x00007fcabc768689 <+0>:     mov    $0x2f,%eax
    0x00007fcabc76868e <+5>:     syscall
    0x00007fcabc768690 <+7>:     cmp    $0xfffffffffffff001,%rax
    0x00007fcabc768696 <+13>:    jae    0x7fcabc7686c9 <recvmsg+73>
    0x00007fcabc768698 <+15>:    retq
    0x00007fcabc768699 <+25>:    sub    $0x8,%rsp
0x00007fcabc76869d <+29>: callq 0x7fcabc767e70 <__pthread_enable_asynccancel>
    0x00007fcabc7686a2 <+34>:    mov    %rax,(%rsp)
    0x00007fcabc7686a6 <+38>:    mov    $0x2f,%eax
    0x00007fcabc7686ab <+43>:    syscall
    0x00007fcabc7686ad <+45>:    mov    (%rsp),%rdi
    0x00007fcabc7686b1 <+49>:    mov    %rax,%rdx
0x00007fcabc7686b4 <+52>: callq 0x7fcabc767ed0 <__pthread_disable_asynccancel>
    0x00007fcabc7686b9 <+57>:    mov    %rdx,%rax
    0x00007fcabc7686bc <+60>:    add    $0x8,%rsp
    0x00007fcabc7686c0 <+64>:    cmp    $0xfffffffffffff001,%rax
    0x00007fcabc7686c6 <+70>:    jae    0x7fcabc7686c9 <recvmsg+73>
    0x00007fcabc7686c8 <+72>:    retq
0x00007fcabc7686c9 <+73>: mov 0x2088b8(%rip),%rcx # 0x7fcabc970f88
    0x00007fcabc7686d0 <+80>:    neg    %eax
    0x00007fcabc7686d2 <+82>:    mov    %eax,%fs:(%rcx)
    0x00007fcabc7686d5 <+85>:    or     $0xffffffffffffffff,%rax
    0x00007fcabc7686d9 <+89>:    retq
End of assembler dump.


Daniel

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Nfs-ganesha-devel mailing list
Nfs-ganesha-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs-ganesha-devel

Reply via email to