Hi Roland, After testing on ia64 with real example of SIGKILL I made a few fixes and now it works like this:
Unpatched: [r...@hp-rx6600-03 strace.2]# strace ./k execve("./k", ["./k"], [/* 22 vars */]) = 0 uname({sys="Linux", node="hp-rx6600-03.rhts.bos.redhat.com", ...}) = 0 brk(0) = 0x600000000001c000 brk(0x600000000001cf38) = 0x600000000001cf38 brk(0x6000000000040f38) = 0x6000000000040f38 brk(0x6000000000044000) = 0x6000000000044000 getpid() = 7413 kill(7413, SIGKILLupeek: ptrace(PTRACE_PEEKUSER,7413,2096,0): No such process upeek: ptrace(PTRACE_PEEKUSER,7413,2240,0): No such process Patched: [r...@hp-rx6600-03 strace.2]# ./strace ./k execve("./k", ["./k"], [/* 22 vars */]) = 0 uname({sys="Linux", node="hp-rx6600-03.rhts.bos.redhat.com", ...}) = 0 brk(0) = 0x600000000001c000 brk(0x600000000001cf38) = 0x600000000001cf38 brk(0x6000000000040f38) = 0x6000000000040f38 brk(0x6000000000044000) = 0x6000000000044000 getpid() = 7416 kill(7416, SIGKILL) = ? <unavailable> +++ killed by SIGKILL +++ On x86_64: # strace ./k execve("./k", ["./k"], [/* 55 vars */]) = 0 uname({sys="Linux", node="localhost.localdomain", ...}) = 0 brk(0) = 0x263b000 brk(0x263bf10) = 0x263bf10 arch_prctl(ARCH_SET_FS, 0x263b850) = 0 brk(0x265cf10) = 0x265cf10 brk(0x265d000) = 0x265d000 getpid() = 26619 kill(26619, SIGKILL <unfinished ...> +++ killed by SIGKILL +++ The patch itself is below. -- vda diff -d -urpN strace.1/defs.h strace.2/defs.h --- strace.1/defs.h 2008-12-11 15:32:06.000000000 +0100 +++ strace.2/defs.h 2008-12-11 16:04:09.000000000 +0100 @@ -336,6 +336,7 @@ struct tcb { prstatus_t status; /* procfs status structure */ #endif #endif + int ptrace_errno; #ifdef FREEBSD struct procfs_status status; int pfd_reg; @@ -466,6 +467,8 @@ extern void set_overhead P((int)); extern void qualify P((char *)); extern int get_scno P((struct tcb *)); extern long known_scno P((struct tcb *)); +extern long do_ptrace P((int request, struct tcb *tcp, void *addr, void *data)); +extern int ptrace_restart P((int request, struct tcb *tcp, int sig)); extern int trace_syscall P((struct tcb *)); extern int count_syscall P((struct tcb *, struct timeval *)); extern void printxval P((const struct xlat *, int, const char *)); diff -d -urpN strace.1/process.c strace.2/process.c --- strace.1/process.c 2008-12-11 15:32:06.000000000 +0100 +++ strace.2/process.c 2008-12-11 15:40:41.000000000 +0100 @@ -918,10 +918,8 @@ struct tcb *tcp; clearbpt(tcpchild); tcpchild->flags &= ~(TCB_SUSPENDED|TCB_STARTUP); - if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, 0) < 0) { - perror("resume: ptrace(PTRACE_SYSCALL, ...)"); + if (ptrace_restart(PTRACE_SYSCALL, tcpchild, 0) < 0) return -1; - } if (!qflag) fprintf(stderr, "\ diff -d -urpN strace.1/strace.c strace.2/strace.c --- strace.1/strace.c 2008-12-11 15:32:06.000000000 +0100 +++ strace.2/strace.c 2008-12-11 17:55:01.000000000 +0100 @@ -1358,10 +1358,8 @@ struct tcb *tcp; tcp->parent->nclone_waiting--; #endif - if (ptrace(PTRACE_SYSCALL, tcp->pid, (char *) 1, 0) < 0) { - perror("resume: ptrace(PTRACE_SYSCALL, ...)"); + if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) return -1; - } if (!qflag) fprintf(stderr, "Process %u resumed\n", tcp->pid); @@ -1533,21 +1531,14 @@ int sig; break; } if (WSTOPSIG(status) == SIGSTOP) { - if ((error = ptrace(PTRACE_DETACH, - tcp->pid, (char *) 1, sig)) < 0) { - if (errno != ESRCH) - perror("detach: ptrace(PTRACE_DETACH, ...)"); - /* I died trying. */ - } + ptrace_restart(PTRACE_DETACH, tcp, sig); break; } - if ((error = ptrace(PTRACE_CONT, tcp->pid, (char *) 1, - WSTOPSIG(status) == SIGTRAP ? - 0 : WSTOPSIG(status))) < 0) { - if (errno != ESRCH) - perror("detach: ptrace(PTRACE_CONT, ...)"); + error = ptrace_restart(PTRACE_CONT, tcp, + WSTOPSIG(status) == SIGTRAP ? 0 + : WSTOPSIG(status)); + if (error < 0) break; - } } #endif /* LINUX */ @@ -1556,8 +1547,7 @@ int sig; if (sig && kill(tcp->pid, sig) < 0) perror("detach: kill"); sig = 0; - if ((error = ptrace(PTRACE_DETACH, tcp->pid, (char *) 1, sig)) < 0) - perror("detach: ptrace(PTRACE_DETACH, ...)"); + error = ptrace_restart(PTRACE_DETACH, tcp, sig); #endif /* SUNOS4 */ #ifndef USE_PROCFS @@ -2160,17 +2150,16 @@ handle_group_exit(struct tcb *tcp, int s detach(tcp, sig); if (leader != NULL && leader != tcp) leader->flags |= TCB_GROUP_EXITING; - } - else if (ptrace(PTRACE_CONT, tcp->pid, (char *) 1, sig) < 0) { - perror("strace: ptrace(PTRACE_CONT, ...)"); - cleanup(); - return -1; - } - else { - if (leader != NULL) + } else { + if (ptrace_restart(PTRACE_CONT, tcp, sig) < 0) { + cleanup(); + return -1; + } + if (leader != NULL) { leader->flags |= TCB_GROUP_EXITING; - if (leader != NULL && leader != tcp) - droptcb(tcp); + if (leader != tcp) + droptcb(tcp); + } /* The leader will report to us as parent now, and then we'll get to the SIG==-1 case. */ return 0; @@ -2411,9 +2400,7 @@ Process %d attached (waiting for parent) * Hope we are back in control now. */ tcp->flags &= ~(TCB_INSYSCALL | TCB_SIGTRAPPED); - if (ptrace(PTRACE_SYSCALL, - pid, (char *) 1, 0) < 0) { - perror("trace: ptrace(PTRACE_SYSCALL, ...)"); + if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) { cleanup(); return -1; } @@ -2460,9 +2447,7 @@ Process %d attached (waiting for parent) #endif continue; } - if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, - WSTOPSIG(status)) < 0) { - perror("trace: ptrace(PTRACE_SYSCALL, ...)"); + if (ptrace_restart(PTRACE_SYSCALL, tcp, WSTOPSIG(status)) < 0) { cleanup(); return -1; } @@ -2472,7 +2457,7 @@ Process %d attached (waiting for parent) /* we handled the STATUS, we are permitted to interrupt now. */ if (interrupted) return 0; - if (trace_syscall(tcp) < 0) { + if (trace_syscall(tcp) < 0 && !tcp->ptrace_errno) { if (tcp->flags & TCB_ATTACHED) detach(tcp, 0); else { @@ -2492,8 +2477,7 @@ Process %d attached (waiting for parent) #endif if (tcp->flags & TCB_ATTACHED) detach(tcp, 0); - else if (ptrace(PTRACE_CONT, pid, (char *) 1, 0) < 0) { - perror("strace: ptrace(PTRACE_CONT, ...)"); + else if (ptrace_restart(PTRACE_CONT, tcp, 0) < 0) { cleanup(); return -1; } @@ -2505,8 +2489,7 @@ Process %d attached (waiting for parent) continue; } tracing: - if (ptrace(PTRACE_SYSCALL, pid, (char *) 1, 0) < 0) { - perror("trace: ptrace(PTRACE_SYSCALL, ...)"); + if (ptrace_restart(PTRACE_SYSCALL, tcp, 0) < 0) { cleanup(); return -1; } @@ -2554,9 +2537,18 @@ void printleader(tcp) struct tcb *tcp; { - if (tcp_last && (!outfname || followfork < 2 || tcp_last == tcp)) { - tcp_last->flags |= TCB_REPRINT; - tprintf(" <unfinished ...>\n"); + if (tcp_last) { + if (tcp_last->ptrace_errno) { + if (tcp_last->flags & TCB_INSYSCALL) { + tprintf(" <unavailable>)"); + tabto(acolumn); + } + tprintf("= ? <unavailable>\n"); + tcp_last->ptrace_errno = 0; + } else if (!outfname || followfork < 2 || tcp_last == tcp) { + tcp_last->flags |= TCB_REPRINT; + tprintf(" <unfinished ...>\n"); + } } curcol = 0; if ((followfork == 1 || pflag_seen > 1) && outfname) diff -d -urpN strace.1/syscall.c strace.2/syscall.c --- strace.1/syscall.c 2008-12-11 15:32:06.000000000 +0100 +++ strace.2/syscall.c 2008-12-11 19:58:32.000000000 +0100 @@ -2278,28 +2278,30 @@ trace_syscall(struct tcb *tcp) { int sys_res; struct timeval tv; - int res; - - /* Measure the exit time as early as possible to avoid errors. */ - if (dtime && (tcp->flags & TCB_INSYSCALL)) - gettimeofday(&tv, NULL); - - res = get_scno(tcp); - if (res != 1) - return res; - - res = syscall_fixup(tcp); - if (res != 1) - return res; + int res, scno_good; if (tcp->flags & TCB_INSYSCALL) { long u_error; - res = get_error(tcp); - if (res != 1) + + /* Measure the exit time as early as possible to avoid errors. */ + if (dtime) + gettimeofday(&tv, NULL); + + scno_good = res = get_scno(tcp); + if (res == 0) + return res; + if (res == 1) + res = syscall_fixup(tcp); + if (res == 0) return res; + if (res == 1) + res = get_error(tcp); + if (res == 0) + return res; + if (res == 1) + internal_syscall(tcp); - internal_syscall(tcp); - if (tcp->scno >= 0 && tcp->scno < nsyscalls && + if (res == 1 && tcp->scno >= 0 && tcp->scno < nsyscalls && !(qual_flags[tcp->scno] & QUAL_TRACE)) { tcp->flags &= ~TCB_INSYSCALL; return 0; @@ -2308,7 +2310,9 @@ trace_syscall(struct tcb *tcp) if (tcp->flags & TCB_REPRINT) { printleader(tcp); tprintf("<... "); - if (tcp->scno >= nsyscalls || tcp->scno < 0) + if (scno_good != 1) + tprintf("????"); + else if (tcp->scno >= nsyscalls || tcp->scno < 0) tprintf("syscall_%lu", tcp->scno); else tprintf("%s", sysent[tcp->scno].sys_name); @@ -2318,6 +2322,13 @@ trace_syscall(struct tcb *tcp) if (cflag) return count_syscall(tcp, &tv); + if (res != 1) { + tprintf(") "); + tabto(acolumn); + tcp->flags &= ~TCB_INSYSCALL; + return res; + } + if (tcp->scno >= nsyscalls || tcp->scno < 0 || (qual_flags[tcp->scno] & QUAL_RAW)) sys_res = printargs(tcp); @@ -2420,10 +2431,36 @@ trace_syscall(struct tcb *tcp) } /* Entering system call */ - res = syscall_enter(tcp); - if (res != 1) + scno_good = res = get_scno(tcp); + if (res == 0) + return res; + if (res == 1) + res = syscall_fixup(tcp); + if (res == 0) + return res; + if (res == 1) + res = syscall_enter(tcp); + if (res == 0) return res; + if (res != 1) { + printleader(tcp); + tcp->flags &= ~TCB_REPRINT; + tcp_last = tcp; + if (scno_good != 1) + tprintf("????" /* anti-trigraph gap */ "("); + else if (tcp->scno >= nsyscalls || tcp->scno < 0) + tprintf("syscall_%lu(", tcp->scno); + else + tprintf("%s(", sysent[tcp->scno].sys_name); + /* + * " <unavailable>" will be added later by the code which + * detects ptrace errors. + */ + tcp->flags |= TCB_INSYSCALL; + return res; + } + switch (known_scno(tcp)) { #ifdef SYS_socket_subcall case SYS_socketcall: diff -d -urpN strace.1/util.c strace.2/util.c --- strace.1/util.c 2008-12-11 15:32:06.000000000 +0100 +++ strace.2/util.c 2008-12-11 20:00:17.000000000 +0100 @@ -241,6 +241,61 @@ xlookup(const struct xlat *xlat, int val } /* + * Generic ptrace wrapper which tracks ESRCH errors + * by setting tcp->ptrace_errno to it. + * + * We assume that ESRCH indicates likely process death (SIGKILL?), + * modulo bugs where process somehow ended up not stopped. + * Unfortunately kernel uses ESRCH for that case too. Oh well. + * + * Currently used by upeek() only. + * TODO: use this in all other ptrace() calls while decoding. + */ +long +do_ptrace(int request, struct tcb *tcp, void *addr, void *data) +{ + long l; + + errno = 0; + l = ptrace(request, tcp->pid, addr, data); + /* Non-ESRCH errors might be our invalid reg/mem accesses, + * we do not record them. */ + if (errno == ESRCH) + tcp->ptrace_errno = ESRCH; + return l; +} + +/* + * Used when we want to unblock stopped traced process. + * Should be only used with PTRACE_CONT, PTRACE_DETACH and PTRACE_SYSCALL. + * Returns 0 on success or if error was ESRCH + * (presumably process was killed while we talk to it). + * Otherwise prints error message and returns -1. + */ +int +ptrace_restart(int op, struct tcb *tcp, int sig) +{ + int err; + const char *msg; + + errno = 0; + ptrace(op, tcp->pid, (void *) 1, (void *) (long) sig); + err = errno; + if (!err || err == ESRCH) + return 0; + + tcp->ptrace_errno = err; + msg = "SYSCALL"; + if (op == PTRACE_CONT) + msg = "CONT"; + if (op == PTRACE_DETACH) + msg = "DETACH"; + fprintf(stderr, "strace: ptrace(PTRACE_%s,1,%d): %s\n", + msg, sig, strerror(err)); + return -1; +} + +/* * Print entry in struct xlat table, if there. */ void @@ -1035,11 +1090,13 @@ long *res; } #endif /* SUNOS4_KERNEL_ARCH_KLUDGE */ errno = 0; - val = ptrace(PTRACE_PEEKUSER, tcp->pid, (char *) off, 0); + val = do_ptrace(PTRACE_PEEKUSER, tcp, (char *) off, 0); if (val == -1 && errno) { - char buf[60]; - sprintf(buf,"upeek: ptrace(PTRACE_PEEKUSER,%d,%lu,0)", tcp->pid, off); - perror(buf); + if (errno != ESRCH) { + char buf[60]; + sprintf(buf,"upeek: ptrace(PTRACE_PEEKUSER,%d,%lu,0)", tcp->pid, off); + perror(buf); + } return -1; } *res = val; ------------------------------------------------------------------------------ SF.Net email is Sponsored by MIX09, March 18-20, 2009 in Las Vegas, Nevada. The future of the web can't happen without you. Join us at MIX09 to help pave the way to the Next Web now. Learn more and register at http://ad.doubleclick.net/clk;208669438;13503038;i?http://2009.visitmix.com/ _______________________________________________ Strace-devel mailing list Strace-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/strace-devel