Hello again

> > This is what I did:
> 
> This is the counter part of what I wrote "you got broken aufs code",
> right? And I understand you are correct.

Yes, this was the answer of "you got broken aufs code".

> > I will check the memory as well.

The memory check didn't report any error, but I replaced the whole
memory anyway.
Unfortunately, the problem still exists on both machines.

> This is for "there already happened another critical error somewhere
> else, and it corrupted memory", right?
> If something is wrong before aufs, then the kernel should produce a
> message and you will be able to find something in your log file.
> Of course, setting syslog.conf or some debugging configuration may
help
> us.

The kernel or any other process don't log any other problem. The only
thing I see is "mount" which remains blocked.

> > Normally I execute "mount / -o remount,udba=3Dreval". But I played a
> > little bit with some options and when checked the hanging command
> with
> > "ps" I saw "mount -I -n -t aufs -o rw,remount,udba=3Dreval aufs /".
> > Sorry for confusing you.
> 
> What is "-I"? Do you mean "-i"?
> Anyway I'd suggest you to try strace again.

Yes, I mean "-i".

I can reproducte it again and again on two machines and even with
replaced RAM. A new awareness is that sometimes the command is
unblocking again (after a few minutes). But not everytime.

Here I have more recent issue:

ps output:
root     12763 29248  0 09:40 pts/0    00:00:00 strace -o
/tmp/strace.log -f mount / -o remount,udba=reval
root     12765 12763  0 09:40 pts/0    00:00:00 mount / -o
remount,udba=reval
root     12766 12765  0 09:40 pts/0    00:00:00 /sbin/mount.aufs aufs /
-o rw,remount,udba=reval
root     12767 12766  0 09:40 pts/0    00:00:00 mount -i -n -t aufs -o
rw,remount,udba=reval aufs /


strace (this time with "-f" to follow the fork):
12765 execve("/bin/mount", ["mount", "/", "-o", "remount,udba=reval"],
[/* 24 vars */]) = 0
12765 brk(0)                            = 0x8057000
12765 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb771d000
12765 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or
directory)
12765 open("/etc/ld.so.cache", O_RDONLY) = 3
12765 fstat64(3, {st_mode=S_IFREG|0644, st_size=21200, ...}) = 0
12765 mmap2(NULL, 21200, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7717000
12765 close(3)                          = 0
12765 open("/lib/libblkid.so.1", O_RDONLY) = 3
12765 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0
=\0\0004\0\0\0D"..., 512) = 512
12765 fstat64(3, {st_mode=S_IFREG|0755, st_size=96380, ...}) = 0
12765 mmap2(NULL, 99212, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb76fe000
12765 mmap2(0xb7714000, 12288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15) = 0xb7714000
12765 close(3)                          = 0
12765 open("/lib/libuuid.so.1", O_RDONLY) = 3
12765 read(3,
"\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0P\21\0\0004\0\0\0\250"..
., 512) = 512
12765 fstat64(3, {st_mode=S_IFREG|0755, st_size=13872, ...}) = 0
12765 mmap2(NULL, 16604, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb76f9000
12765 mmap2(0xb76fc000, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2) = 0xb76fc000
12765 close(3)                          = 0
12765 open("/lib/libc.so.6", O_RDONLY)  = 3
12765 read(3,
"\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\20m\1\0004\0\0\0\24"...
, 512) = 512
12765 fstat64(3, {st_mode=S_IFREG|0755, st_size=1437796, ...}) = 0
12765 mmap2(NULL, 1444136, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb7598000
12765 mmap2(0xb76f3000, 12288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15b) = 0xb76f3000
12765 mmap2(0xb76f6000, 10536, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb76f6000
12765 close(3)                          = 0
12765 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7597000
12765 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb7596000
12765 set_thread_area({entry_number:-1 -> 6, base_addr:0xb7597b60,
limit:1048575, seg_32bit:1, contents:0, read_exec_only:0,
limit_in_pages:1, seg_not_present:0, useable:1}) = 0
12765 mprotect(0xb76f3000, 8192, PROT_READ) = 0
12765 mprotect(0xb76fc000, 4096, PROT_READ) = 0
12765 mprotect(0xb7714000, 8192, PROT_READ) = 0
12765 mprotect(0x8054000, 4096, PROT_READ) = 0
12765 mprotect(0xb773b000, 4096, PROT_READ) = 0
12765 munmap(0xb7717000, 21200)         = 0
12765 brk(0)                            = 0x8057000
12765 brk(0x8078000)                    = 0x8078000
12765 open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
12765 fstat64(3, {st_mode=S_IFREG|0644, st_size=1527552, ...}) = 0
12765 mmap2(NULL, 1527552, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7421000
12765 close(3)                          = 0
12765 umask(022)                        = 022
12765 open("/dev/null", O_RDWR|O_LARGEFILE) = 3
12765 close(3)                          = 0
12765 getuid32()                        = 0
12765 geteuid32()                       = 0
12765 umask(077)                        = 022
12765 open("/etc/fstab", O_RDONLY|O_LARGEFILE) = 3
12765 umask(022)                        = 077
12765 fstat64(3, {st_mode=S_IFREG|0600, st_size=909, ...}) = 0
12765 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb771c000
12765 read(3, "# Static file system information "..., 4096) = 909
12765 read(3, ""..., 4096)              = 0
12765 close(3)                          = 0
12765 munmap(0xb771c000, 4096)          = 0
12765 getcwd("/root"..., 4095)          = 6
12765 readlink("/root/aufs", 0xbff345af, 4096) = -1 ENOENT (No such file
or directory)
12765 readlink("/dev", 0xbff345af, 4096) = -1 EINVAL (Invalid argument)
12765 readlink("/dev/sda1", 0xbff345af, 4096) = -1 EINVAL (Invalid
argument)
12765 readlink("/dev", 0xbff345af, 4096) = -1 EINVAL (Invalid argument)
12765 readlink("/dev/sda1", 0xbff345af, 4096) = -1 EINVAL (Invalid
argument)
12765 readlink("/dev", 0xbff345af, 4096) = -1 EINVAL (Invalid argument)
12765 readlink("/dev/sda5", 0xbff345af, 4096) = -1 EINVAL (Invalid
argument)
12765 getcwd("/root"..., 4095)          = 6
12765 readlink("/root/aufs", 0xbff345af, 4096) = -1 ENOENT (No such file
or directory)
12765 readlink("/dev", 0xbff345af, 4096) = -1 EINVAL (Invalid argument)
12765 readlink("/dev/sda1", 0xbff345af, 4096) = -1 EINVAL (Invalid
argument)
12765 readlink("/dev", 0xbff345af, 4096) = -1 EINVAL (Invalid argument)
12765 readlink("/dev/sda1", 0xbff345af, 4096) = -1 EINVAL (Invalid
argument)
12765 readlink("/dev", 0xbff345af, 4096) = -1 EINVAL (Invalid argument)
12765 readlink("/dev/sda5", 0xbff345af, 4096) = -1 EINVAL (Invalid
argument)
12765 getcwd("/root"..., 4095)          = 6
12765 readlink("/root/aufs", 0xbff3458f, 4096) = -1 ENOENT (No such file
or directory)
12765 stat64("/sbin/mount.aufs", {st_mode=S_IFREG|0755, st_size=691188,
...}) = 0
12765 clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0xb7597bc8) = 12766
12765 wait4(-1,  <unfinished ...>
12766 getgid32()                        = 0
12766 setgid32(0)                       = 0
12766 getuid32()                        = 0
12766 setuid32(0)                       = 0
12766 execve("/sbin/mount.aufs", ["/sbin/mount.aufs", "aufs", "/", "-o",
"rw,remount,udba=reval"], [/* 20 vars */]) = 0
12766 uname({sys="Linux", node="chgut12fw01", ...}) = 0
12766 brk(0)                            = 0x80f4000
12766 brk(0x80f4cd0)                    = 0x80f4cd0
12766 set_thread_area({entry_number:-1 -> 6, base_addr:0x80f4830,
limit:1048575, seg_32bit:1, contents:0, read_exec_only:0,
limit_in_pages:1, seg_not_present:0, useable:1}) = 0
12766 brk(0x8115cd0)                    = 0x8115cd0
12766 brk(0x8116000)                    = 0x8116000
12766 open(".", O_RDONLY|O_NONBLOCK|O_LARGEFILE|O_DIRECTORY|O_CLOEXEC) =
3
12766 fcntl64(3, F_GETFD)               = 0x1 (flags FD_CLOEXEC)
12766 chdir("/")                        = 0
12766 getcwd("/"..., 4096)              = 2
12766 fchdir(3)                         = 0
12766 close(3)                          = 0
12766 access("/etc/mtab", R_OK|W_OK)    = 0
12766 clone(child_stack=0,
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x80f4898) = 12767
12766 waitpid(12767,  <unfinished ...>
12767 execve("mount", ["mount", "-i", "-n", "-t", "aufs", "-o",
"rw,remount,udba=reval", "aufs", "/"], [/* 20 vars */]) = -1 ENOENT (No
such file or directory)
12767 execve("/bin/mount", ["mount", "-i", "-n", "-t", "aufs", "-o",
"rw,remount,udba=reval", "aufs", "/"], [/* 20 vars */]) = 0
12767 brk(0)                            = 0x8057000
12767 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb786e000
12767 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or
directory)
12767 open("/etc/ld.so.cache", O_RDONLY) = 3
12767 fstat64(3, {st_mode=S_IFREG|0644, st_size=21200, ...}) = 0
12767 mmap2(NULL, 21200, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7868000
12767 close(3)                          = 0
12767 open("/lib/libblkid.so.1", O_RDONLY) = 3
12767 read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0
=\0\0004\0\0\0D"..., 512) = 512
12767 fstat64(3, {st_mode=S_IFREG|0755, st_size=96380, ...}) = 0
12767 mmap2(NULL, 99212, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb784f000
12767 mmap2(0xb7865000, 12288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15) = 0xb7865000
12767 close(3)                          = 0
12767 open("/lib/libuuid.so.1", O_RDONLY) = 3
12767 read(3,
"\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0P\21\0\0004\0\0\0\250"..
., 512) = 512
12767 fstat64(3, {st_mode=S_IFREG|0755, st_size=13872, ...}) = 0
12767 mmap2(NULL, 16604, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0xb784a000
12767 mmap2(0xb784d000, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2) = 0xb784d000
12767 close(3)                          = 0
12767 open("/lib/libc.so.6", O_RDONLY)  = 3
12767 read(3,
"\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\20m\1\0004\0\0\0\24"...
, 512) = 512
12767 fstat64(3, {st_mode=S_IFREG|0755, st_size=1437796, ...}) = 0
12767 mmap2(NULL, 1444136, PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0xb76e9000
12767 mmap2(0xb7844000, 12288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x15b) = 0xb7844000
12767 mmap2(0xb7847000, 10536, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0xb7847000
12767 close(3)                          = 0
12767 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb76e8000
12767 mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
-1, 0) = 0xb76e7000
12767 set_thread_area({entry_number:-1 -> 6, base_addr:0xb76e8b60,
limit:1048575, seg_32bit:1, contents:0, read_exec_only:0,
limit_in_pages:1, seg_not_present:0, useable:1}) = 0
12767 mprotect(0xb7844000, 8192, PROT_READ) = 0
12767 mprotect(0xb784d000, 4096, PROT_READ) = 0
12767 mprotect(0xb7865000, 8192, PROT_READ) = 0
12767 mprotect(0x8054000, 4096, PROT_READ) = 0
12767 mprotect(0xb788c000, 4096, PROT_READ) = 0
12767 munmap(0xb7868000, 21200)         = 0
12767 brk(0)                            = 0x8057000
12767 brk(0x8078000)                    = 0x8078000
12767 open("/usr/lib/locale/locale-archive", O_RDONLY|O_LARGEFILE) = 3
12767 fstat64(3, {st_mode=S_IFREG|0644, st_size=1527552, ...}) = 0
12767 mmap2(NULL, 1527552, PROT_READ, MAP_PRIVATE, 3, 0) = 0xb7572000
12767 close(3)                          = 0
12767 umask(022)                        = 022
12767 open("/dev/null", O_RDWR|O_LARGEFILE) = 3
12767 close(3)                          = 0
12767 getuid32()                        = 0
12767 geteuid32()                       = 0
12767 getcwd("/root"..., 4095)          = 6
12767 readlink("/root/aufs", 0xbf89429f, 4096) = -1 ENOENT (No such file
or directory)
12767 rt_sigprocmask(SIG_BLOCK, ~[TRAP SEGV RTMIN RT_1], NULL, 8) = 0
12767 mount("aufs", "/", 0x80580b0, MS_MGC_VAL|MS_REMOUNT, "udba=reval"

And here the w-SysRq Output:
SysRq : Show Blocked State
  task                PC stack   pid father
fsnotify_mark   D f658de90     0   352      2 0x00000000
 f658dea0 00000046 00000002 f658de90 00000000 00000000 35548000 00000000
 005b9fa5 00000000 f6a05380 00000000 f6494810 c14bd380 c14b8000 c14bd380
 f6494810 00000000 00000000 00000000 00000001 00000001 00000001 00000000
Call Trace:
 [<c10292c4>] ? load_balance+0x64/0x530
 [<c134b630>] schedule+0x30/0x50
 [<c134b955>] schedule_timeout+0x125/0x180
 [<c104817e>] ? sched_clock_cpu+0x5e/0x160
 [<c134af3b>] ? __schedule+0x1fb/0x810
 [<c134ab33>] wait_for_common+0x93/0x120
 [<c10286a0>] ? try_to_wake_up+0x1f0/0x1f0
 [<c105f770>] ? call_rcu_bh+0x10/0x10
 [<c134ac62>] wait_for_completion+0x12/0x20
 [<c10405b6>] wait_rcu_gp+0x36/0x40
 [<c10405c0>] ? wait_rcu_gp+0x40/0x40
 [<c105f2df>] synchronize_sched+0x1f/0x30
 [<c104749b>] __synchronize_srcu+0x3b/0xa0
 [<c105f2c0>] ? synchronize_rcu_bh+0x30/0x30
 [<c104751d>] synchronize_srcu+0xd/0x10
 [<c10c8bc4>] fsnotify_mark_destroy+0x64/0x110
 [<c10431a0>] ? wake_up_bit+0x60/0x60
 [<c10c8b60>] ? fsnotify_set_mark_ignored_mask_locked+0x20/0x20
 [<c1042e34>] kthread+0x74/0x80
 [<c1042dc0>] ? kthread_worker_fn+0x100/0x100
 [<c134ddf6>] kernel_thread_helper+0x6/0xd
mount           D d44b7e00     0 12767  12766 0x00000001
 d44b7e10 00000086 00000002 d44b7e00 d44b7da0 c102862b 35348000 00000000
 005b9fa4 f658df90 f6805380 00000000 d4433440 c14bd380 c14b8000 c14bd380
 d4433440 f658df90 c1441380 d44b7de0 c1020dc0 00000000 00000001 00000003
Call Trace:
 [<c102862b>] ? try_to_wake_up+0x17b/0x1f0
 [<c1020dc0>] ? __wake_up_common+0x40/0x70
 [<c10ac1ef>] ? iput+0x2f/0x1a0
 [<c134b630>] schedule+0x30/0x50
 [<f80bbdfd>] au_hn_alloc+0x24d/0x370 [aufs]
 [<c10431a0>] ? wake_up_bit+0x60/0x60
 [<f80bbb8c>] au_hn_free+0x1c/0x40 [aufs]
 [<f80b36cb>] au_hiput+0xb/0x20 [aufs]
 [<f80b380b>] au_iinfo_fin+0x12b/0x1a0 [aufs]
 [<f80a13ab>] au_si_free+0xabb/0xc00 [aufs]
 [<c10ac04c>] destroy_inode+0x2c/0x50
 [<c10ac143>] evict+0xd3/0x150
 [<c10ac27d>] iput+0xbd/0x1a0
 [<c10aa1ef>] d_kill+0x9f/0xf0
 [<c10aa3f0>] shrink_dentry_list+0x1b0/0x1d0
 [<c10aa8ae>] shrink_dcache_sb+0x5e/0x90
 [<c1098b25>] do_remount_sb+0x35/0x160
 [<c1034110>] ? ns_capable+0x20/0x50
 [<c10b1870>] do_mount+0x500/0x6e0
 [<c101b840>] ? mm_fault_error+0x130/0x130
 [<c10b0208>] ? copy_mount_options+0x98/0x110
 [<c10b1ab6>] sys_mount+0x66/0xa0
 [<c134d065>] syscall_call+0x7/0xb

You say, that au_hn_alloc() cannot follow au_hn_free(). But how can it
be, that can reproduce it again and again? Which code is executing the
function "wake_up_bit()"?

Is there everything else I can try?

Thanks for your help.

Best regards
Elmar

Attachment: smime.p7s
Description: S/MIME cryptographic signature

------------------------------------------------------------------------------
Keep Your Developer Skills Current with LearnDevNow!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-d2d

Reply via email to