Hi, Attached little program many_looping_threads.c starts N threads, and exits (terminating them all) as soon as they are all started. Each thread runs infinite loop with getuid(). N is given and a 1st command line parameter.
Ran standalone, it finishes ok, even with large number of threads (500). Currently, strace -f fails miserably starting approximately with 5 threads. After a few threads created, strace is flooded with syscall entry/exit notifications from these threads, and the main thread (which wants to create more threads) does not get a chance for its syscall start/stop notifications to be delivered! This patch fixes it. Run tested. The gist of the patch is that we don't wait(2) for the *first* process to stop/exit, we wait for them all (calling wait(2) in a loop, with WNOHANG). Only when we got all such processes, we process them and restart them. This ensures that one or a few fast stopping/starting/stopping threads can't usurp strace's attention. Slower threads will always get a chance to do at least some progress. The patch needs some comment removal and re-indentation before it can be applied to strace cvs, but otherwise seems to be ready. I'd vote for subsequent patch to split trace() function into "collect stopped tasks" and "process collected tasks" parts, without changing the logic. -- vda
diff -d -urpN strace.6/defs.h strace.7/defs.h --- strace.6/defs.h 2009-01-06 22:45:06.000000000 +0100 +++ strace.7/defs.h 2009-01-07 17:36:33.000000000 +0100 @@ -298,8 +298,11 @@ extern int mp_ioctl (int f, int c, void /* Trace Control Block */ struct tcb { - short flags; /* See below for TCB_ values */ + int flags; /* See below for TCB_ values */ int pid; /* Process Id of this entry */ + int wait_status; /* Status from last wait() */ //vda + struct tcb *next_need_service; + /* Linked list of tracees found by wait()s */ long scno; /* System call number */ int u_nargs; /* System call arguments */ long u_arg[MAX_ARGS]; /* System call arguments */ diff -d -urpN strace.6/strace.c strace.7/strace.c --- strace.6/strace.c 2009-01-06 22:45:06.000000000 +0100 +++ strace.7/strace.c 2009-01-07 17:39:40.000000000 +0100 @@ -2277,8 +2277,11 @@ trace() struct tcb *tcp; #ifdef LINUX struct rusage ru; + struct rusage* ru_ptr = cflag ? &ru : NULL; + int wnohang; + struct tcb *tcps_need_service; #ifdef __WALL - static int wait4_options = __WALL; + int wait4_options = __WALL; #endif #endif /* LINUX */ @@ -2287,37 +2290,49 @@ trace() return 0; if (interactive) sigprocmask(SIG_SETMASK, &empty_set, NULL); + + wnohang = 0; //vda + tcps_need_service = NULL; + while (1) { #ifdef LINUX #ifdef __WALL - pid = wait4(-1, &status, wait4_options, cflag ? &ru : NULL); + pid = wait4(-1, &status, wait4_options | wnohang, ru_ptr); if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) { /* this kernel does not support __WALL */ wait4_options &= ~__WALL; errno = 0; - pid = wait4(-1, &status, wait4_options, - cflag ? &ru : NULL); + pid = wait4(-1, &status, wait4_options | wnohang, ru_ptr); } if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) { /* most likely a "cloned" process */ - pid = wait4(-1, &status, __WCLONE, - cflag ? &ru : NULL); - if (pid == -1) { - fprintf(stderr, "strace: clone wait4 " + pid = wait4(-1, &status, __WCLONE | wnohang, ru_ptr); + if (pid < 0 && errno != ECHILD) { + fprintf(stderr, "strace: wait4(WCLONE) " "failed: %s\n", strerror(errno)); } } -#else - pid = wait4(-1, &status, 0, cflag ? &ru : NULL); -#endif /* __WALL */ +#else /* !__WALL */ + pid = wait4(-1, &status, wnohang, ru_ptr); +#endif #endif /* LINUX */ #ifdef SUNOS4 pid = wait(&status); +//probably: pid = waitpid(-1, &status, wnohang); #endif /* SUNOS4 */ wait_errno = errno; if (interactive) sigprocmask(SIG_BLOCK, &blocked_set, NULL); - if (pid == -1) { + if (pid == 0 && wnohang) { + /* We had at least one successful + * wait() before. We waited + * with WNOHANG second time. + * Stop collecting more tracees, + * process what we already have. + */ + break; + } + if (pid == -1) { switch (wait_errno) { case EINTR: continue; @@ -2388,14 +2403,22 @@ Process %d attached (waiting for parent) exit(1); } } - /* set current output file */ - outf = tcp->outf; + if (cflag) { #ifdef LINUX tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime); tcp->stime = ru.ru_stime; #endif /* !LINUX */ } + tcp->wait_status = status; //vda + tcp->next_need_service = tcps_need_service; + tcps_need_service = tcp; + wnohang = WNOHANG; + } /* while (1) - collecting all stopped/exited tracees */ //vda + + while (tcps_need_service) { //vda + tcp = tcps_need_service; + tcps_need_service = tcp->next_need_service; //vda if (tcp->flags & TCB_SUSPENDED) { /* @@ -2408,6 +2431,11 @@ Process %d attached (waiting for parent) */ continue; } + + outf = tcp->outf; //vda + status = tcp->wait_status; + pid = tcp->pid; //vda + if (WIFSIGNALED(status)) { if (pid == strace_child) exit_code = 0x100 | WTERMSIG(status); @@ -2682,6 +2710,7 @@ Process %d attached (waiting for parent) cleanup(); return -1; } + } /* while (tcps_need_service) - processing all stopped/exited tcps */ //vda } return 0; }
#include <stdio.h> #include <pthread.h> #include <unistd.h> #include <sys/types.h> #include <signal.h> #include <stdlib.h> static int thd_no; static void *sub_thd(void *c) { fprintf(stderr, "sub-thread %d created\n", ++thd_no); for (;;) getuid(); return NULL; } int main(int argc, char *argv[]) { int i; pthread_t *thd; int num_threads = 1; if (argv[1]) num_threads = atoi(argv[1]); thd = malloc(num_threads * sizeof(thd[0])); fprintf(stderr, "test start, num_threads:%d...\n", num_threads); for (i = 0; i < num_threads; i++) { pthread_create(&thd[i], NULL, sub_thd, NULL); fprintf(stderr, "after pthread_create\n"); } /* Exit. This kills all threads */ return 0; }