Hi Guys,

We're trying to run a pure 64-bit environment (the debian etch
amd64 port on both host and guests) but we're having serious 
problems running our server applications on the guests. To my 
untrained eyes, it looks like a process calls clone() and then 
it's child and itself get killed off by a SIGSEGV. I've included 
some straces down below. Any insight or direction anyone can offer 
would be much appreciated. A solution that doesn't involve changing 
the host kernel wins full points! :-)

We tested this stuff using host:

  * debian stock 2.6.18-3-amd64 kernel

and guests:

  * 2.6.20 from kernel.org
  * 2.6.18 with debian patches applied

Most of the testing was done using the default configuration
(ARCH=um make defconfig) but statically linked. We tried a few 
other configurations as well but the problem remained.

We usually run UML instances inside chroot jails, but we've
also tested all this stuff in the wild with:
   
  ./vmlinux umid=tuff mem=160M ubda=fs.cow,fs.base \
     ubdb=swapfile eth0=tuntap,ituff con=pts ssl=pts uml_dir=tmp

Startup output looks like:
  Checking that ptrace can change system call numbers...OK
  Checking syscall emulation patch for ptrace...missing
  Checking for tmpfs mount on /dev/shm...OK
  Checking PROT_EXEC mmap in /dev/shm/...OK
  Checking for the skas3 patch in the host:
    - /proc/mm...not found
    - PTRACE_FAULTINFO...not found
    - PTRACE_LDT...not found
  UML running in SKAS0 mode
  Checking that ptrace can change system call numbers...OK
  Checking syscall emulation patch for ptrace...missing

The server software that we've been testing with (Asterisk and
Apache) are standard debian packages that seem to work fine
on the host platform. We also compiled Asterisk from it's
original source on a guest and tried that as well. All this
stuff (configurations, packages, etc) work fine for us on x86.

I'm including three sample strace outputs:
  
(1) A trace taken from the host when sshd is the only server 
    running on the guest.
(2) A guest trace of Asterisk dying.
(3) A guest trace of Apache running. Unlike Asterisk, Apache 
    doesn't get killed. I reckon this is because it 
    registers a signal handler, but what do I know. :-)

I've got plenty more traces but the other ones tend to be 
very verbose. 

(1) Sample trace of the guest doing nothing much:
...
--- SIGALRM (Alarm clock) @ 0 (0) ---
setitimer(ITIMER_REAL, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
setitimer(ITIMER_VIRTUAL, {it_interval={0, 10000}, it_value={0, 10000}}, NULL) 
= 0
rt_sigprocmask(SIG_UNBLOCK, [USR1], [USR1 ALRM WINCH IO], 8) = 0
setitimer(ITIMER_VIRTUAL, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
setitimer(ITIMER_REAL, {it_interval={0, 10000}, it_value={0, 10000}}, NULL) = 0
rt_sigreturn(0)          = -1 EINTR (Interrupted system call)
nanosleep({10, 0}, 0)    = ? ERESTART_RESTARTBLOCK (To be restarted)
--- SIGALRM (Alarm clock) @ 0 (0) ---
...
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(5627, [{WIFSTOPPED(s) && WSTOPSIG(s) == 133}], WSTOPPED, NULL) = 5627
ptrace(PTRACE_GETREGS, 5627, 0, 0x60e9f188) = 0
ptrace(PTRACE_GETFPREGS, 5627, 0, 0x60e9f260) = 0
ptrace(PTRACE_POKEUSER, 5627, 8*ORIG_RAX, 0x27) = 0
ptrace(PTRACE_SYSCALL, 5627, 0, SIG_0) = 0
--- SIGCHLD (Child exited) @ 0 (0) ---
wait4(5627, [{WIFSTOPPED(s) && WSTOPSIG(s) == 133}], WSTOPPED, NULL) = 5627
ptrace(PTRACE_SETREGS, 5627, 0, 0x60e9f188) = 0
ptrace(PTRACE_SETFPREGS, 5627, 0, 0x60e9f260) = 0
ptrace(PTRACE_SYSCALL, 5627, 0, SIG_0) = 0
...


(2) Sample trace of Asterisk's demise: 
1785  clone(child_stack=0, 
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x4001f640) 
= 1786
1785  exit_group(0)            = ?
1786  setsid()                 = 1786
1786  chdir("/")               = 0
1786  open("/dev/null", O_RDWR) = 3
1786  fstat(3, {st_mode=S_IFCHR|0666, st_rdev=makedev(1, 3), ...}) = 0
1786  dup2(3, 0)               = 0
1786  dup2(3, 1)               = 1
1786  dup2(3, 2)               = 2
1786  close(3)                 = 0
1786  unlink("/var/run/asterisk/asterisk.pid") = 0
1786  open("/var/run/asterisk/asterisk.pid", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 3
1786  fstat(3, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
1786  mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) 
= 0x40019000
1786  write(3, "1786\n", 5)    = 5
1786  close(3)                 = 0
1786  munmap(0x40019000, 4096) = 0
1786  mmap(NULL, 266240, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40, 
-1, 0) = 0x40020000
1786  mprotect(0x40020000, 4096, PROT_NONE) = 0
1786  clone(child_stack=0x40060280, 
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETTID|CLONE_CHILD_CLEARTID|CLONE_DETACHED,
 parent_tidptr=0x400609f0, tls=0x40060960, child_tidptr=0x400609f0) = 1787
1786  nanosleep({0, 100000},  <unfinished ...>
1787  --- SIGSEGV (Segmentation fault) @ 0 (0) ---


(3) Sample trace of Apache: 
...
945  rt_sigaction(SIGSEGV, {0x43ace0, [], SA_RESTORER|SA_ONESHOT, 0x4113f410}, 
NULL, 8) = 0
945  rt_sigaction(SIGBUS, {0x43ace0, [], SA_RESTORER|SA_ONESHOT, 0x4113f410}, 
NULL, 8) = 0
...
945  select(0, NULL, NULL, NULL, {1, 0}) = 0 (Timeout)
945  clone(child_stack=0, 
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x40025a60) 
= 961
945  wait4(-1, 0x7f7fc3267c, WNOHANG|WSTOPPED, NULL) = 0
945  select(0, NULL, NULL, NULL, {1, 0} <unfinished ...>
961  rt_sigaction(SIGTERM, {0x446b10, [], SA_RESTORER|SA_INTERRUPT, 
0x4113f410}, {0x444fd0, [], SA_RESTORER, 0x4113f410}, 8) = 0
961  geteuid()                         = 0
961  setgid(33)                        = 0
961  open("/proc/sys/kernel/ngroups_max", O_RDONLY) = 8
961  read(8, "65536\n", 31)            = 6
961  close(8)                          = 0
961  open("/etc/group", O_RDONLY)      = 8
961  fcntl(8, F_GETFD)                 = 0
961  fcntl(8, F_SETFD, FD_CLOEXEC)     = 0
961  lseek(8, 0, SEEK_CUR)             = 0
961  fstat(8, {st_mode=S_IFREG|0644, st_size=485, ...}) = 0
961  mmap(NULL, 485, PROT_READ, MAP_SHARED, 8, 0) = 0x40019000
961  lseek(8, 485, SEEK_SET)           = 485
961  fstat(8, {st_mode=S_IFREG|0644, st_size=485, ...}) = 0
961  munmap(0x40019000, 485)           = 0
961  close(8)                          = 0
961  setgroups(1, [33])                = 0
961  geteuid()                         = 0
961  setuid(33)                        = 0
961  rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV USR2 PIPE SYS 
RTMIN RT_1], NULL, 8) = 0
961  mmap(NULL, 8392704, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|0x40, 
-1, 0) = 0x43981000
961  mprotect(0x43981000, 4096, PROT_NONE) = 0
961  clone(child_stack=0x44181280, 
flags=CLONE_VM|CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|CLONE_SYSVSEM|CLONE_SETTLS|CLONE_PARENT_SETT
ID|LONE_CHILD_CLEARTID|CLONE_DETACHED, parent_tidptr=0x441819f0, 
tls=0x44181960, child_tidptr=0x441819f0) = 962
961  rt_sigprocmask(SIG_UNBLOCK, [TERM], NULL, 8) = 0
961  rt_sigaction(SIGTERM, {0x445040, [], SA_RESTORER|SA_INTERRUPT, 
0x4113f410}, {0x446b10, [], SA_RESTORER|SA_INTERRUPT, 0x4113f410}, 8) = 0
961  read(4,  <unfinished ...>
962  --- SIGSEGV (Segmentation fault) @ 0 (0) ---
962  chdir("/etc/apache2")             = 0
962  rt_sigaction(SIGSEGV, {SIG_DFL}, {SIG_DFL}, 8) = 0
962  kill(961, SIGSEGV)                = 0
961  <... read resumed> 0x7f7fc32657, 1) = ? ERESTARTSYS (To be restarted)
961  --- SIGSEGV (Segmentation fault) @ 0 (0) ---
962  +++ killed by SIGSEGV +++
945  <... select resumed> )            = ? ERESTARTNOHAND (To be restarted)
945  --- SIGCHLD (Child exited) @ 0 (0) ---
945  select(0, NULL, NULL, NULL, {1, 0}) = 0 (Timeout)
945  wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGSEGV}], WNOHANG|WSTOPPED, 
NULL) = 961
945  write(6, "[Tue Feb 20 02:46:59 2007] [notice] child pid 961 exit signal 
Segmentation fault (11)\n", 86) = 86
945  wait4(-1, 0x7f7fc3267c, WNOHANG|WSTOPPED, NULL) = 0
945  select(0, NULL, NULL, NULL, {1, 0}) = 0 (Timeout)
945  clone(child_stack=0, 
flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x40025a60) 
= 963
... -- a la groundhog day 

Even if you can't help, thanks for reading this far!

jez

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
User-mode-linux-user mailing list
User-mode-linux-user@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/user-mode-linux-user

Reply via email to