You might also want to turn on all of the kernel module debugging by echo'ing 32767 into /proc/sys/pvfs2/debug and looking at what it generated in dmesg and/or /var/log/messages in the crashing cp case.

thanks,
-Phil

Phil Carns wrote:
I don't see anything offensive in the stat output. There are no messages in dmesg or pvfs2-client.log, right?

Two other possible ways to proceed may be to:

1) try the same strace'd cp on a different file system to compare the output and see what system call is supposed to happen next after the fstat's

2) try downloading a the source to your version of core-utils (http://ftp.gnu.org/gnu/coreutils/) and compiling it with debugging symbols so that you can actually see the segfault in gdb or valgrind. You can probably just set the CFLAGS env varaible to "-g" before running configure in core-utils to get debugging symbols.

Actually, running valgrind on the cp command that you already have might possibly indicate something interesting, even if it can't map it to a particular line number.

-Phil

Bart Taylor wrote:
Here is a full strace -v output:



[r...@node1 root]# strace -v cp test.file /mnt/pvfs2/
execve("/bin/cp", ["cp", "test.file", "/mnt/pvfs2/"], [/* 22 vars */]) = 0 uname({sysname="Linux", nodename="node1", release="2.4.21-27.0.2.ELsmp", version="#1 SMP Wed Jan 12 23:35:44 EST 2005", machine="i686"}) = 0
brk(0)                                  = 0x9692000
open("/etc/ld.so.preload", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY)      = 3
fstat64(3, {st_dev=makedev(104, 3), st_ino=229475, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=32, st_size=14525, st_atime=2009/04/07-15:54:03, st_mtime=2009/04/07-13:38:35, st_ctime=2009/04/07-13:38:35}) = 0
old_mmap(NULL, 14525, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb75f5000
close(3)                                = 0
open("/lib/libacl.so.1", O_RDONLY)      = 3
read(3, "\177elf\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\...@\24\0\000"..., 512) = 512 fstat64(3, {st_dev=makedev(104, 3), st_ino=524363, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=40, st_size=19248, st_atime=2009/04/07-15:54:03, st_mtime=2003/01/28-18:42:21, st_ctime=2009/04/07-13:37:22}) = 0
old_mmap(NULL, 22224, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0x89c000
old_mmap(0x8a1000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x4000) = 0x8a1000
close(3)                                = 0
mprotect(0xbfffa000, 4096, PROT_READ|PROT_WRITE|PROT_EXEC|PROT_GROWSDOWN) = 0
open("/lib/tls/libc.so.6", O_RDONLY)    = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\200X\1"..., 512) = 512 old_mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb75f4000 fstat64(3, {st_dev=makedev(104, 3), st_ino=14172162, st_mode=S_IFREG|0755, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=3080, st_size=1571692, st_atime=2009/04/07-15:54:03, st_mtime=2004/10/22-04:01:20, st_ctime=2009/04/07-13:37:20}) = 0 old_mmap(NULL, 1275340, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xd4c000 old_mmap(0xe7e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x132000) = 0xe7e000 old_mmap(0xe81000, 9676, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xe81000
close(3)                                = 0
open("/lib/libattr.so.1", O_RDONLY)     = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\320\n\0"..., 512) = 512 fstat64(3, {st_dev=makedev(104, 3), st_ino=524361, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=16, st_size=7148, st_atime=2009/04/07-15:54:03, st_mtime=2003/01/28-18:09:10, st_ctime=2009/04/07-13:37:22}) = 0
old_mmap(NULL, 10124, PROT_READ|PROT_EXEC, MAP_PRIVATE, 3, 0) = 0xb1e000
old_mmap(0xb20000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED, 3, 0x1000) = 0xb20000
close(3)                                = 0
set_thread_area({entry_number:-1 -> 6, base_addr:0xb75f4ae0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0, useable:1}) = 0
munmap(0xb75f5000, 14525)               = 0
brk(0)                                  = 0x9692000
brk(0x96b3000)                          = 0x96b3000
brk(0)                                  = 0x96b3000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
fstat64(3, {st_dev=makedev(104, 3), st_ino=3325956, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=62808, st_size=32148976, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-13:37:19, st_ctime=2009/04/07-13:37:20}) = 0
mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
close(3)                                = 0
geteuid32()                             = 0
lstat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=4096, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:31:17}) = 0 stat64("/mnt/pvfs2/", {st_dev=makedev(0, 10), st_ino=1048576, st_mode=S_IFDIR|S_ISVTX|0777, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=4096, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:31:17}) = 0 stat64("test.file", {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0 stat64("/mnt/pvfs2/test.file", {st_dev=makedev(0, 10), st_ino=1048571, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=5, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:31:17}) = 0
open("test.file", O_RDONLY|O_LARGEFILE) = 3
fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
fstat64(4, {st_dev=makedev(0, 10), st_ino=1048571, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=33554432, st_blocks=8, st_size=0, st_atime=2009/04/07-15:31:17, st_mtime=2009/04/07-15:31:17, st_ctime=2009/04/07-15:55:20}) = 0 fstat64(3, {st_dev=makedev(104, 3), st_ino=294926, st_mode=S_IFREG|0644, st_nlink=1, st_uid=0, st_gid=0, st_blksize=4096, st_blocks=8, st_size=5, st_atime=2009/04/07-15:29:59, st_mtime=2009/04/07-14:45:08, st_ctime=2009/04/07-14:46:32}) = 0
--- SIGSEGV (Segmentation fault) @ 0 (0) ---
+++ killed by SIGSEGV +++
[r...@node1 root]#







On Tue, Apr 7, 2009 at 2:53 PM, Phil Carns <[email protected] <mailto:[email protected]>> wrote:

    Hi Bart,

     From your strace output, my guess is that cp is running into
    trouble with the value of one of the fstat() fields, but its hard to
    say which one.

    Are you able to reproduce this reliably?  Could you run the strace
    again with the -v option to see if it gives a full listing of what
    values were in the stat structs it got before crashing?

    -Phil

    Bart Taylor wrote:

        Hey guys,

        I am running into a problem with a system copy command
        segfaulting on 2.4 kernels. Specifically, I am seeing this show
        up on RHEL3 machines running a patched version of PVFS 2.6.
        Machines running Linux 2.6 kernels do not experience this
        problem.  I believe we may have mentioned this recently but
        hoped it would be fixed by some updates pulled into dcache.
        That, apparently, is not the case.

        The segfault is extremely consistent; it happens every time a cp
        is executed with a PVFS2 file system as the target.  The target
        file is always created with a size of zero, so at least part of
        the command is completing. 'dd' commands execute normally.

        The setup is simple:  1 server node (RHEL4 2.6 kernel) with the
        default interactive genconfig output, and 1 client with a 2.4
        kernel.  Mount the file system, execute a copy onto the file
        system.
        Here is the conf file contents:

        <Defaults>
               UnexpectedRequests 50
               EventLogging none
               LogStamp datetime
               BMIModules bmi_tcp
               FlowModules flowproto_multiqueue
               PerfUpdateInterval 1000
               ServerJobBMITimeoutSecs 30
               ServerJobFlowTimeoutSecs 30
               ClientJobBMITimeoutSecs 300
               ClientJobFlowTimeoutSecs 300
               ClientRetryLimit 5
               ClientRetryDelayMilliSecs 2000
               TCPBindSpecific yes
        </Defaults>

        <Aliases>
               Alias node1 tcp://node1:3334
        </Aliases>

        <Filesystem>
               Name pvfs2-fs
               ID 1227216139
               RootHandle 1048576
               <MetaHandleRanges>
                       Range node1 4-2147483650
               </MetaHandleRanges>
               <DataHandleRanges>
                       Range node1 2147483651-4294967297
               </DataHandleRanges>
               <StorageHints>
                       TroveSyncMeta no
                       TroveSyncData no
                       CoalescingHighWatermark infinity
                       CoalescingLowWatermark 0
                       TroveSyncMetaTimerSecs 5
                       DBCacheSizeBytes 1073741824
               </StorageHints>
        </Filesystem>

        And here is the last bit of an strace on a copy command:

        [r...@node1 root]# strace cp test.file /mnt/pvfs2/
        .....
        brk(0)                                  = 0x95ce000
        open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
        fstat64(3, {st_mode=S_IFREG|0644, st_size=32148976, ...}) = 0
        mmap2(NULL, 2097152, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb73f4000
        close(3)                                = 0
        geteuid32()                             = 0
        lstat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
        st_size=4096, ...}) = 0
        stat64("/mnt/pvfs2/", {st_mode=S_IFDIR|S_ISVTX|0777,
        st_size=4096, ...}) = 0
        stat64("test.file", {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
        stat64("/mnt/pvfs2/test.file", {st_mode=S_IFREG|0644, st_size=0,
        ...}) = 0
        open("test.file", O_RDONLY|O_LARGEFILE) = 3
        fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
        open("/mnt/pvfs2/test.file", O_WRONLY|O_TRUNC|O_LARGEFILE) = 4
        fstat64(4, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
        fstat64(3, {st_mode=S_IFREG|0644, st_size=5, ...}) = 0
        --- SIGSEGV (Segmentation fault) @ 0 (0) ---
        +++ killed by SIGSEGV +++


        There is nothing in the client or server logs without turning on
        additional logging.

        Are there any suggestions on what might be causing this? Can I
        provide any additional information that will be helpful for
        debugging?

        Bart.


------------------------------------------------------------------------

        _______________________________________________
        Pvfs2-developers mailing list
        [email protected]
        <mailto:[email protected]>
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers




_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

_______________________________________________
Pvfs2-developers mailing list
[email protected]
http://www.beowulf-underground.org/mailman/listinfo/pvfs2-developers

Reply via email to