Hi, I'm occasionally getting core files from clone03/clone06 testcases. The testcase itself gives PASS, it is the child which is randomly crashing. It seems to occur more on single cpu systems.
For example:
Core was generated by `clone03'.
Program terminated with signal 11, Segmentation fault.
#0 0x0000000000402bfd in tst_print (tcid=0x403d0e "clone03", tnum=1, ttype=2,
tmesg=0x14c6070 "unexpected signal 15 received (pid = 17427).") at
tst_res.c:412
412 {
(gdb) bt
#0 0x0000000000402bfd in tst_print (tcid=0x403d0e "clone03", tnum=1, ttype=2,
tmesg=0x14c6070 "unexpected signal 15 received (pid = 17427).") at
tst_res.c:412
#1 0x00000000004031be in tst_res (ttype=2, fname=<value optimized out>,
arg_fmt=<value optimized out>) at tst_res.c:316
#2 0x0000000000403761 in tst_brk (ttype=2, fname=0x0, func=0x4013d0 <cleanup>,
arg_fmt=<value optimized out>) at tst_res.c:640
#3 0x0000000000403960 in tst_brkm (ttype=2, func=0x4013d0 <cleanup>,
arg_fmt=<value optimized out>) at tst_res.c:698
#4 0x0000000000403b45 in def_handler (sig=15) at tst_sig.c:248
#5 <signal handler called>
#6 0x00000037940db650 in __write_nocancel () at
../sysdeps/unix/syscall-template.S:82
#7 0x000000000040169e in child_fn () at clone03.c:208
#8 0x00000037940e890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115
Dump of assembler code for function tst_print:
0x0000000000402bd0 <+0>: mov %rbx,-0x30(%rsp)
0x0000000000402bd5 <+5>: mov %rbp,-0x28(%rsp)
0x0000000000402bda <+10>: mov %edx,%ebx
0x0000000000402bdc <+12>: mov %r12,-0x20(%rsp)
0x0000000000402be1 <+17>: mov %r13,-0x18(%rsp)
0x0000000000402be6 <+22>: mov %rdi,%r12
0x0000000000402be9 <+25>: mov %r14,-0x10(%rsp)
0x0000000000402bee <+30>: mov %r15,-0x8(%rsp)
0x0000000000402bf3 <+35>: sub $0x2858,%rsp
0x0000000000402bfa <+42>: mov %esi,%r14d
=> 0x0000000000402bfd <+45>: mov %rcx,0x18(%rsp)
(gdb) p $rsp
$1 = (void *) 0x14c3800
(gdb) x/1x $rsp
0x14c3800: Cannot access memory at address 0x14c3800
It looks like it receives SIGTERM and while handling SIGTERM it hits SIGSEGV.
I don't know what is source of that SIGTERM. I was looking into the second part
and looks like the stack for child is not large enough.
I modified clone03.c (see attached clone03_poison.patch) to get some extra
empty buffer before the child's stack, which was set to pattern 0xDE.
Before:
|-------------------------------|
child_stack child_stack+CHILD_STACK_SIZE
After:
|---------------------|-------------------------------|
poision_start child_stack child_stack+CHILD_STACK_SIZE
Now if I start clone03 and kill it I can randomly reproduce the SIGSEGV
(attached clone03_kill.sh).
The backtrace usually looks like:
... (random place)
#5 0x000000000040324e in tst_res (ttype=2, fname=<value optimized out>,
arg_fmt=<value optimized out>) at tst_res.c:316
#6 0x00000000004037f1 in tst_brk (ttype=2, fname=0x0, func=0x401420 <cleanup>,
arg_fmt=<value optimized out>) at tst_res.c:640
#7 0x00000000004039f0 in tst_brkm (ttype=2, func=0x401420 <cleanup>,
arg_fmt=<value optimized out>) at tst_res.c:698
#8 0x0000000000403bd5 in def_handler (sig=13) at tst_sig.c:248
#9 <signal handler called>
#10 0x0000003327cdb650 in __write_nocancel () at
../sysdeps/unix/syscall-template.S:82
#11 0x000000000040172e in child_fn () at clone03.c:212
#12 0x0000003327ce890d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:115
(gdb) p poison_start
$1 = (void *) 0xa02010
(gdb) p child_stack
$2 = (void *) 0xa03010
(gdb) x/16x poison_start
0xa02010: 0xdededede 0xdededede 0xdededede 0xdededede
0xa02020: 0xdededede 0xdededede 0xdededede 0xdededede
0xa02030: 0xdededede 0xdededede 0xdededede 0xdededede
0xa02040: 0xdededede 0xdededede 0xdededede 0xdededede
...
(gdb)
0xa02490: 0xdededede 0xdededede 0xdededede 0xdededede
0xa024a0: 0x00000018 0x00000030 0x00a02800 0x00000000
0xa024b0: 0x00a02740 0x00000000 0xdededede 0xdededede
0xa024c0: 0xdededede 0xdededede 0x27409296 0x00000033
The above shows that 0xDE pattern has been overwritten.
Extending child stack helps with the second part: SIGSEGV
#define CHILD_STACK_SIZE 16384*4
but I have no idea, where is that first SIGTERM coming from. Any ideas?
Regards,
Jan
clone03_kill.sh
Description: Bourne shell script
diff --git a/testcases/kernel/syscalls/clone/clone03.c b/testcases/kernel/syscalls/clone/clone03.c
index 24ee8e6..dada00c 100644
--- a/testcases/kernel/syscalls/clone/clone03.c
+++ b/testcases/kernel/syscalls/clone/clone03.c
@@ -87,13 +87,15 @@ static int pfd[2];
char *TCID = "clone03"; /* Test program identifier. */
int TST_TOTAL = 1; /* Total number of test cases. */
+void *poison_start; /* stack for child */
+void *child_stack; /* stack for child */
+#define POISON_SIZE getpagesize()
int main(int ac, char **av)
{
int lc;
char *msg;
- void *child_stack; /* stack for child */
char buff[10];
int child_pid;
@@ -104,10 +106,13 @@ int main(int ac, char **av)
setup();
/* Allocate stack for child */
- if ((child_stack = (void *)malloc(CHILD_STACK_SIZE)) == NULL) {
+ if ((poison_start = (void *)malloc(POISON_SIZE+CHILD_STACK_SIZE)) == NULL) {
tst_brkm(TBROK, cleanup, "Cannot allocate stack for child");
}
+ memset(poison_start, 0xDE, POISON_SIZE);
+ child_stack = poison_start + POISON_SIZE;
+
for (lc = 0; TEST_LOOPING(lc); lc++) {
Tst_count = 0;
@@ -154,7 +159,7 @@ int main(int ac, char **av)
}
- free(child_stack);
+ free(poison_start);
cleanup();
tst_exit();
------------------------------------------------------------------------------ Keep yourself connected to Go Parallel: TUNE You got it built. Now make it sing. Tune shows you how. http://goparallel.sourceforge.net
_______________________________________________ Ltp-list mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/ltp-list
