diff -x CVS -urpN 2009-01-17/ChangeLog 2009-01-18/ChangeLog
--- 2009-01-17/ChangeLog        2009-01-14 15:29:45.000000000 +0100
+++ 2009-01-18/ChangeLog        2009-01-17 02:52:54.000000000 +0100
@@ -1,3 +1,47 @@
+2009-01-17  Denys Vlasenko  <[email protected]>
+
+       Two cleanups: tcb table expansion failure is not really a survivable
+       event, we do not have any viable way to continue. No wonder most
+       places where that is detected have FIXMEs.
+       It's way simpler to treat as fatal failure, and handle it inside
+       tcb table expansion finctions.
+       Second cleanup: tidy up haphazard locations of a few externs.
+
+       * defs.h: Change return type of expand_tcbtab() to void.
+       Declare change_syscall().
+       * process.c: Change all callsites of alloctcb(), alloc_tcb() and
+       fork_tcb(), removing now-redundant error checks.
+       (fork_tcb): Change return type to void - it can't fail now.
+       * strace.c: Move extern declarations out of function bodies.
+       Change all callsites of alloctcb(), alloc_tcb() and
+        fork_tcb(), removing now-redundant error checks.
+       (expand_tcbtab): Change return type to void - it can't fail now.
+       On failure to expand, print a message, clean up, and exit.
+       (alloc_tcb): On failure to expand, print a message, clean up, and exit.
+       * util.c (setbpt): Remove extern declaration from function body.
+
+2009-01-17  Denys Vlasenko  <[email protected]>
+
+       * defs.h: Update a comment. No code changes.
+       * strace.c (handle_stopped_tcbs): Discard all execve stops
+       and clear TCB_WAITEXECVE bit.
+       * syscall.c (get_scno): Add the code to not mistakenly
+       treat ptrace stop as execve stop (execve stops can be blocked
+       by traced program).
+       Fixes RH#477775 "strace hangs if the target process blocks SIGTRAP".
+
+2009-01-17  Denys Vlasenko  <[email protected]>
+
+       * process.c: Add a comment. No code changes.
+       * strace.c (collect_stopped_tcbs): Stop reversing list of stopped
+       tcp's. I'm not totally convinced it is crucial, but this is surely
+       fits the concept of "least surprise".
+       Do not collect TCB_SUSPENDED tcp's (this is closer to how
+       it was before).
+       (handle_stopped_tcbs): Remove the code to reject TCB_SUSPENDED tcp's,
+       it's done earlier now. In an unobvious way, this was causing
+       SIGSTOPs from freshly attached children to be misinterpreted.
+
 2009-01-14  Denys Vlasenko  <[email protected]>
 
        * linux/bfin/syscallent.h: sys_futex has 6 parameters, not 5.
diff -x CVS -urpN 2009-01-17/defs.h 2009-01-18/defs.h
--- 2009-01-17/defs.h   2009-01-13 19:30:55.000000000 +0100
+++ 2009-01-18/defs.h   2009-01-17 02:52:54.000000000 +0100
@@ -26,7 +26,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *     $Id: defs.h,v 1.94 2009/01/13 18:30:55 vda_linux Exp $
+ *     $Id: defs.h,v 1.96 2009/01/17 01:52:54 vda_linux Exp $
  */
 
 #ifdef HAVE_CONFIG_H
@@ -363,9 +363,17 @@ struct tcb {
 #define TCB_FOLLOWFORK 00400   /* Process should have forks followed */
 #define TCB_REPRINT    01000   /* We should reprint this syscall on exit */
 #ifdef LINUX
-/* x86 does not need TCB_WAITEXECVE.
+/* TCB_WAITEXECVE bit means "ignore next SIGTRAP, it's execve exit stop".
+ * it is not reliable if traced program masks SIGTRAP.
+ *
+ * x86 does not need TCB_WAITEXECVE.
  * It can detect execve's SIGTRAP by looking at eax/rax.
  * See "stray syscall exit: eax = " message in syscall_fixup().
+ *
+ * Note that on newer kernels, we use ptrace options and therefore
+ * can filter out execve stops reliably on any architecture,
+ * without using TCB_WAITEXECVE flag.
+ * I guess we can remove it from the source somewhere around year 2010 :)
  */
 # if defined(ALPHA) || defined(SPARC) || defined(SPARC64) || defined(POWERPC) 
|| defined(IA64) || defined(HPPA) || defined(SH) || defined(SH64) || 
defined(S390) || defined(S390X) || defined(ARM) || defined(MIPS) || 
defined(BFIN)
 #  define TCB_WAITEXECVE 02000 /* ignore SIGTRAP after exceve */
@@ -473,7 +481,7 @@ extern const char *xlookup P((const stru
 extern struct tcb *alloc_tcb P((int, int));
 extern struct tcb *pid2tcb P((int));
 extern void droptcb P((struct tcb *));
-extern int expand_tcbtab P((void));
+extern void expand_tcbtab P((void));
 
 #define alloctcb(pid)  alloc_tcb((pid), 1)
 
@@ -527,6 +535,7 @@ extern void tprint_iov P((struct tcb *, 
 extern void tprint_open_modes P((struct tcb *, mode_t));
 extern int is_restart_error P((struct tcb *));
 
+extern int change_syscall P((struct tcb *, int));
 #ifdef LINUX
 extern int internal_clone P((struct tcb *));
 #endif
diff -x CVS -urpN 2009-01-17/process.c 2009-01-18/process.c
--- 2009-01-17/process.c        2009-01-13 19:30:55.000000000 +0100
+++ 2009-01-18/process.c        2009-01-17 02:52:54.000000000 +0100
@@ -34,7 +34,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *     $Id: process.c,v 1.123 2009/01/13 18:30:55 vda_linux Exp $
+ *     $Id: process.c,v 1.125 2009/01/17 01:52:54 vda_linux Exp $
  */
 
 #include "defs.h"
@@ -487,25 +487,19 @@ struct tcb *tcp;
 /* TCP is creating a child we want to follow.
    If there will be space in tcbtab for it, set TCB_FOLLOWFORK and return 0.
    If not, clear TCB_FOLLOWFORK, print an error, and return 1.  */
-static int
+static void
 fork_tcb(struct tcb *tcp)
 {
-       if (nprocs == tcbtabsize) {
-               if (expand_tcbtab()) {
-                       tcp->flags &= ~TCB_FOLLOWFORK;
-                       return 1;
-               }
-       }
+       if (nprocs == tcbtabsize)
+               expand_tcbtab();
 
        tcp->flags |= TCB_FOLLOWFORK;
-       return 0;
 }
 
 #ifdef USE_PROCFS
 
 int
-sys_fork(tcp)
-struct tcb *tcp;
+sys_fork(struct tcb *tcp)
 {
        if (exiting(tcp) && !syserror(tcp)) {
                if (getrval2(tcp)) {
@@ -551,12 +545,10 @@ struct tcb *tcp;
                        return 0;
                if (!followfork)
                        return 0;
-               if (fork_tcb(tcp))
-                       return 0;
+               fork_tcb(tcp);
                if (syserror(tcp))
                        return 0;
-               if ((tcpchild = alloctcb(tcp->u_rval)) == NULL)
-                       return 0;
+               tcpchild = alloctcb(tcp->u_rval);
                if (proc_open(tcpchild, 2) < 0)
                        droptcb(tcpchild);
        }
@@ -706,9 +698,7 @@ struct tcb *tcp;
 }
 
 int
-change_syscall(tcp, new)
-struct tcb *tcp;
-int new;
+change_syscall(struct tcb *tcp, int new)
 {
 #if defined(LINUX)
 #if defined(I386)
@@ -758,9 +748,12 @@ int new;
 #elif defined(IA64)
        if (ia32) {
                switch (new) {
-                     case 2: break;    /* x86 SYS_fork */
-                     case SYS_clone:   new = 120; break;
-                     default:
+               case 2:
+                       break;  /* x86 SYS_fork */
+               case SYS_clone:
+                       new = 120;
+                       break;
+               default:
                        fprintf(stderr, "%s: unexpected syscall %d\n",
                                __FUNCTION__, new);
                        return -1;
@@ -892,8 +885,7 @@ struct tcb *tcp;
        if (entering(tcp)) {
                if (!followfork)
                        return 0;
-               if (fork_tcb(tcp))
-                       return 0;
+               fork_tcb(tcp);
                if (setbpt(tcp) < 0)
                        return 0;
        } else {
@@ -924,12 +916,8 @@ struct tcb *tcp;
                }
                else
 #endif
-               if (fork_tcb(tcp) || (tcpchild = alloctcb(pid)) == NULL) {
-                       if (bpt)
-                               clearbpt(tcp);
-                       kill(pid, SIGKILL); /* XXX */
-                       return 0;
-               }
+               fork_tcb(tcp);
+               tcpchild = alloctcb(pid);
 
 #ifndef CLONE_PTRACE
                /* Attach to the new child */
@@ -963,6 +951,9 @@ struct tcb *tcp;
                                clearbpt(tcpchild);
 
                        tcpchild->flags &= ~(TCB_SUSPENDED|TCB_STARTUP);
+                       /* TCB_SUSPENDED tasks are not collected by waitpid
+                        * loop, and left stopped. Restart it:
+                        */
                        if (ptrace_restart(PTRACE_SYSCALL, tcpchild, 0) < 0)
                                return -1;
 
@@ -1039,8 +1030,7 @@ struct tcb *tcp;
        if (entering(tcp)) {
                if (!followfork || dont_follow)
                        return 0;
-               if (fork_tcb(tcp))
-                       return 0;
+               fork_tcb(tcp);
                if (setbpt(tcp) < 0)
                        return 0;
        }
@@ -1056,10 +1046,8 @@ struct tcb *tcp;
                        return 0;
 
                pid = tcp->u_rval;
-               if (fork_tcb(tcp) || (tcpchild = alloctcb(pid)) == NULL) {
-                       kill(pid, SIGKILL); /* XXX */
-                       return 0;
-               }
+               fork_tcb(tcp);
+               tcpchild = alloctcb(pid);
 #ifdef LINUX
 #ifdef HPPA
                /* The child must have run before it can be attached. */
diff -x CVS -urpN 2009-01-17/strace.c 2009-01-18/strace.c
--- 2009-01-17/strace.c 2009-01-13 19:30:55.000000000 +0100
+++ 2009-01-18/strace.c 2009-01-17 02:52:54.000000000 +0100
@@ -27,7 +27,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *     $Id: strace.c,v 1.99 2009/01/13 18:30:55 vda_linux Exp $
+ *     $Id: strace.c,v 1.102 2009/01/17 01:52:54 vda_linux Exp $
  */
 
 #include "defs.h"
@@ -78,6 +78,8 @@
 #endif
 #endif
 extern char **environ;
+extern int optind;
+extern char *optarg;
 
 
 int debug = 0, followfork = 0;
@@ -436,13 +438,7 @@ startup_attach(void)
                                                   (char *) 1, 0) < 0)
                                                ++nerr;
                                        else if (tid != tcbtab[tcbi]->pid) {
-                                               if (nprocs == tcbtabsize &&
-                                                   expand_tcbtab())
-                                                       tcp = NULL;
-                                               else
-                                                       tcp = alloctcb(tid);
-                                               if (tcp == NULL)
-                                                       exit(1);
+                                               tcp = alloctcb(tid);
                                                tcp->flags |= 
TCB_ATTACHED|TCB_CLONE_THREAD|TCB_CLONE_DETACHED|TCB_FOLLOWFORK;
                                                tcbtab[tcbi]->nchildren++;
                                                tcbtab[tcbi]->nclone_threads++;
@@ -675,10 +671,6 @@ startup_child (char **argv)
 
        /* We are the tracer.  */
        tcp = alloctcb(daemonized_tracer ? getppid() : pid);
-       if (tcp == NULL) {
-               cleanup();
-               exit(1);
-       }
        if (daemonized_tracer) {
                /* We want subsequent startup_attach() to attach to it.  */
                tcp->flags |= TCB_ATTACHED;
@@ -695,8 +687,6 @@ startup_child (char **argv)
 int
 main(int argc, char *argv[])
 {
-       extern int optind;
-       extern char *optarg;
        struct tcb *tcp;
        int c, pid = 0;
        int optF = 0;
@@ -708,11 +698,11 @@ main(int argc, char *argv[])
 
        /* Allocate the initial tcbtab.  */
        tcbtabsize = argc;      /* Surely enough for all -p args.  */
-       if ((tcbtab = calloc (tcbtabsize, sizeof tcbtab[0])) == NULL) {
+       if ((tcbtab = calloc(tcbtabsize, sizeof tcbtab[0])) == NULL) {
                fprintf(stderr, "%s: out of memory\n", progname);
                exit(1);
        }
-       if ((tcbtab[0] = calloc (tcbtabsize, sizeof tcbtab[0][0])) == NULL) {
+       if ((tcbtab[0] = calloc(tcbtabsize, sizeof tcbtab[0][0])) == NULL) {
                fprintf(stderr, "%s: out of memory\n", progname);
                exit(1);
        }
@@ -807,11 +797,7 @@ main(int argc, char *argv[])
                                fprintf(stderr, "%s: I'm sorry, I can't let you 
do that, Dave.\n", progname);
                                break;
                        }
-                       if ((tcp = alloc_tcb(pid, 0)) == NULL) {
-                               fprintf(stderr, "%s: out of memory\n",
-                                       progname);
-                               exit(1);
-                       }
+                       tcp = alloc_tcb(pid, 0);
                        tcp->flags |= TCB_ATTACHED;
                        pflag_seen++;
                        break;
@@ -979,8 +965,8 @@ main(int argc, char *argv[])
        exit(exit_code);
 }
 
-int
-expand_tcbtab()
+void
+expand_tcbtab(void)
 {
        /* Allocate some more TCBs and expand the table.
           We don't want to relocate the TCBs because our
@@ -993,27 +979,26 @@ expand_tcbtab()
                                                    sizeof *newtcbs);
        int i;
        if (newtab == NULL || newtcbs == NULL) {
-               if (newtab != NULL)
-                       free(newtab);
                fprintf(stderr, "%s: expand_tcbtab: out of memory\n",
                        progname);
-               return 1;
+               cleanup();
+               exit(1);
        }
        for (i = tcbtabsize; i < 2 * tcbtabsize; ++i)
                newtab[i] = &newtcbs[i - tcbtabsize];
        tcbtabsize *= 2;
        tcbtab = newtab;
-
-       return 0;
 }
 
-
 struct tcb *
 alloc_tcb(int pid, int command_options_parsed)
 {
        int i;
        struct tcb *tcp;
 
+       if (nprocs == tcbtabsize)
+               expand_tcbtab();
+
        for (i = 0; i < tcbtabsize; i++) {
                tcp = tcbtab[i];
                if ((tcp->flags & TCB_INUSE) == 0) {
@@ -1036,15 +1021,14 @@ alloc_tcb(int pid, int command_options_p
                        return tcp;
                }
        }
-       fprintf(stderr, "%s: alloc_tcb: tcb table full\n", progname);
-       return NULL;
+       fprintf(stderr, "%s: bug in alloc_tcb\n", progname);
+       cleanup();
+       exit(1);
 }
 
 #ifdef USE_PROCFS
 int
-proc_open(tcp, attaching)
-struct tcb *tcp;
-int attaching;
+proc_open(struct tcb *tcp, int attaching)
 {
        char proc[32];
        long arg;
@@ -2279,7 +2263,8 @@ collect_stopped_tcbs(void)
        struct rusage ru;
        struct rusage* ru_ptr = cflag ? &ru : NULL;
        int wnohang = 0;
-       struct tcb *found_tcps = NULL;
+       struct tcb *found_tcps;
+       struct tcb **nextp = &found_tcps;
 #ifdef __WALL
        int wait4_options = __WALL;
 #endif
@@ -2368,15 +2353,7 @@ collect_stopped_tcbs(void)
                                   will we have the association of parent and
                                   child so that we know how to do clearbpt
                                   in the child.  */
-                               if (nprocs == tcbtabsize &&
-                                   expand_tcbtab())
-                                       tcp = NULL;
-                               else
-                                       tcp = alloctcb(pid);
-                               if (tcp == NULL) {
-                                       kill(pid, SIGKILL); /* XXX */
-                                       return NULL;
-                               }
+                               tcp = alloctcb(pid);
                                tcp->flags |= TCB_ATTACHED | TCB_SUSPENDED;
                                if (!qflag)
                                        fprintf(stderr, "\
@@ -2401,10 +2378,28 @@ Process %d attached (waiting for parent)
                        tcp->stime = ru.ru_stime;
 #endif
                }
+               if (tcp->flags & TCB_SUSPENDED) {
+                       /*
+                        * Apparently, doing any ptrace() call on a stopped
+                        * process, provokes the kernel to report the process
+                        * status again on a subsequent wait(), even if the
+                        * process has not been actually restarted.
+                        * Since we have inspected the arguments of suspended
+                        * processes we end up here testing for this case.
+                        */
+                       continue;
+               }
+
                tcp->wait_status = status;
 #ifdef LINUX
-               tcp->next_need_service = found_tcps;
-               found_tcps = tcp;
+               /* It is important to not invert the order of tasks
+                * to process. For one, alloc_tcb() above picks newly forked
+                * threads in some order, processing of them and their parent
+                * should be in the same order, otherwise bad things happen
+                * (misinterpreted SIGSTOPs and such).
+                */
+               *nextp = tcp;
+               nextp = &tcp->next_need_service;
                wnohang = WNOHANG;
 #endif
 #ifdef SUNOS4
@@ -2413,9 +2408,10 @@ Process %d attached (waiting for parent)
                 */
                break;
 #endif
-       } /* while (1) - collecting all stopped/exited tracees */
+       }
 
-       return tcp;
+       *nextp = NULL;
+       return found_tcps;
 }
 
 static int
@@ -2425,18 +2421,6 @@ handle_stopped_tcbs(struct tcb *tcp)
                int pid;
                int status;
 
-               if (tcp->flags & TCB_SUSPENDED) {
-                       /*
-                        * Apparently, doing any ptrace() call on a stopped
-                        * process, provokes the kernel to report the process
-                        * status again on a subsequent wait(), even if the
-                        * process has not been actually restarted.
-                        * Since we have inspected the arguments of suspended
-                        * processes we end up here testing for this case.
-                        */
-                       continue;
-               }
-
                outf = tcp->outf;
                status = tcp->wait_status;
                pid = tcp->pid;
@@ -2568,12 +2552,17 @@ handle_stopped_tcbs(struct tcb *tcp)
                         * but be paranoid about it.
                         */
                        if (((unsigned)status >> 16) == PTRACE_EVENT_EXEC) {
-                               /* It's post-exec ptrace stop.  */
-                               /* Set WSTOPSIG(status) = (SIGTRAP | 0x80).  */
-                               status |= 0x8000;
+                               /* It's post-exec ptrace stop. Ignore it,
+                                * we will get syscall exit ptrace stop later.
+                                */
+#ifdef TCB_WAITEXECVE
+                               tcp->flags &= ~TCB_WAITEXECVE;
+#endif
+                               goto tracing;
                        } else {
                                /* Take a better look...  */
                                siginfo_t si;
+                               si.si_signo = 0;
                                ptrace(PTRACE_GETSIGINFO, pid, (void*) 0, 
(void*) &si);
                                /*
                                 * Check some fields to make sure we see
diff -x CVS -urpN 2009-01-17/syscall.c 2009-01-18/syscall.c
--- 2009-01-17/syscall.c        2009-01-06 22:45:06.000000000 +0100
+++ 2009-01-18/syscall.c        2009-01-17 02:06:18.000000000 +0100
@@ -30,7 +30,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *     $Id: syscall.c,v 1.106 2009/01/06 21:45:06 vda_linux Exp $
+ *     $Id: syscall.c,v 1.107 2009/01/17 01:06:18 vda_linux Exp $
  */
 
 #include "defs.h"
@@ -976,26 +976,47 @@ struct tcb *tcp;
        }
 #elif defined(IA64)
 #      define IA64_PSR_IS      ((long)1 << 34)
-       if (upeek (tcp, PT_CR_IPSR, &psr) >= 0)
+       if (upeek(tcp, PT_CR_IPSR, &psr) >= 0)
                ia32 = (psr & IA64_PSR_IS) != 0;
        if (!(tcp->flags & TCB_INSYSCALL)) {
                if (ia32) {
                        if (upeek(tcp, PT_R1, &scno) < 0)       /* orig eax */
                                return -1;
                } else {
-                       if (upeek (tcp, PT_R15, &scno) < 0)
+                       if (upeek(tcp, PT_R15, &scno) < 0)
                                return -1;
                }
                /* Check if we return from execve. */
                if (tcp->flags & TCB_WAITEXECVE) {
+#if defined PTRACE_GETSIGINFO
+                       siginfo_t si;
+
+                       tcp->flags &= ~TCB_WAITEXECVE;
+                       /* If SIGTRAP is masked, execve's magic SIGTRAP
+                        * is not delivered. We end up here on a subsequent
+                        * ptrace stop instead. Luckily, we can check
+                        * for the type of this SIGTRAP. execve's magic one
+                        * has 0 (SI_USER) in si.si_code, ptrace stop has 5.
+                        * (I don't know why 5).
+                        */
+                       si.si_code = SI_USER;
+                       /* If PTRACE_GETSIGINFO fails, we assume it's
+                        * magic SIGTRAP. Moot anyway, PTRACE_GETSIGINFO
+                        * doesn't fail.
+                        */
+                       ptrace(PTRACE_GETSIGINFO, pid, (void*) 0, (void*) &si);
+                       if (si.si_code == SI_USER)
+                               return 0;
+#else
                        tcp->flags &= ~TCB_WAITEXECVE;
                        return 0;
+#endif
                }
        } else {
                /* syscall in progress */
-               if (upeek (tcp, PT_R8, &r8) < 0)
+               if (upeek(tcp, PT_R8, &r8) < 0)
                        return -1;
-               if (upeek (tcp, PT_R10, &r10) < 0)
+               if (upeek(tcp, PT_R10, &r10) < 0)
                        return -1;
        }
 #elif defined (ARM)
diff -x CVS -urpN 2009-01-17/util.c 2009-01-18/util.c
--- 2009-01-17/util.c   2009-01-13 19:30:55.000000000 +0100
+++ 2009-01-18/util.c   2009-01-17 02:52:54.000000000 +0100
@@ -30,7 +30,7 @@
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *     $Id: util.c,v 1.86 2009/01/13 18:30:55 vda_linux Exp $
+ *     $Id: util.c,v 1.87 2009/01/17 01:52:54 vda_linux Exp $
  */
 
 #include "defs.h"
@@ -1593,11 +1593,9 @@ set_arg1 (struct tcb *tcp, void *cookie,
 #endif
 
 int
-setbpt(tcp)
-struct tcb *tcp;
+setbpt(struct tcb *tcp)
 {
        static int clone_scno[SUPPORTED_PERSONALITIES] = { SYS_clone };
-       extern int change_syscall(struct tcb *, int);
        arg_setup_state state;
 
        if (tcp->flags & TCB_BPTSET) {



------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Strace-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/strace-devel

Reply via email to