Hi,

Attached little program many_looping_threads.c
starts N threads, and exits (terminating them all)
as soon as they are all started.
Each thread runs infinite loop with getuid().
N is given and a 1st command line parameter.

Ran standalone, it finishes ok, even with large
number of threads (500).

Currently, strace -f fails miserably starting approximately
with 5 threads. After a few threads created, strace
is flooded with syscall entry/exit notifications
from these threads, and the main thread (which wants
to create more threads) does not get a chance for its
syscall start/stop notifications to be delivered!

This patch fixes it. Run tested.

The gist of the patch is that we don't wait(2) for the *first* process
to stop/exit, we wait for them all (calling wait(2) in a loop, with
WNOHANG). Only when we got all such processes, we process them and
restart them.

This ensures that one or a few fast stopping/starting/stopping threads
can't usurp strace's attention. Slower threads will always get a chance
to do at least some progress.

The patch needs some comment removal and re-indentation before it can be
applied to strace cvs, but otherwise seems to be ready.

I'd vote for subsequent patch to split trace() function into "collect
stopped tasks" and "process collected tasks" parts, without changing
the logic.
--
vda

diff -d -urpN strace.6/defs.h strace.7/defs.h
--- strace.6/defs.h	2009-01-06 22:45:06.000000000 +0100
+++ strace.7/defs.h	2009-01-07 17:36:33.000000000 +0100
@@ -298,8 +298,11 @@ extern int mp_ioctl (int f, int c, void 
 
 /* Trace Control Block */
 struct tcb {
-	short flags;		/* See below for TCB_ values */
+	int flags;		/* See below for TCB_ values */
 	int pid;		/* Process Id of this entry */
+	int wait_status;	/* Status from last wait() */  //vda
+	struct tcb *next_need_service;
+				/* Linked list of tracees found by wait()s */
 	long scno;		/* System call number */
 	int u_nargs;		/* System call arguments */
 	long u_arg[MAX_ARGS];	/* System call arguments */
diff -d -urpN strace.6/strace.c strace.7/strace.c
--- strace.6/strace.c	2009-01-06 22:45:06.000000000 +0100
+++ strace.7/strace.c	2009-01-07 17:39:40.000000000 +0100
@@ -2277,8 +2277,11 @@ trace()
 	struct tcb *tcp;
 #ifdef LINUX
 	struct rusage ru;
+	struct rusage* ru_ptr = cflag ? &ru : NULL;
+	int wnohang;
+	struct tcb *tcps_need_service;
 #ifdef __WALL
-	static int wait4_options = __WALL;
+	int wait4_options = __WALL;
 #endif
 #endif /* LINUX */
 
@@ -2287,37 +2290,49 @@ trace()
 			return 0;
 		if (interactive)
 			sigprocmask(SIG_SETMASK, &empty_set, NULL);
+
+		wnohang = 0; //vda
+		tcps_need_service = NULL;
+		while (1) {
 #ifdef LINUX
 #ifdef __WALL
-		pid = wait4(-1, &status, wait4_options, cflag ? &ru : NULL);
+		pid = wait4(-1, &status, wait4_options | wnohang, ru_ptr);
 		if (pid < 0 && (wait4_options & __WALL) && errno == EINVAL) {
 			/* this kernel does not support __WALL */
 			wait4_options &= ~__WALL;
 			errno = 0;
-			pid = wait4(-1, &status, wait4_options,
-					cflag ? &ru : NULL);
+			pid = wait4(-1, &status, wait4_options | wnohang, ru_ptr);
 		}
 		if (pid < 0 && !(wait4_options & __WALL) && errno == ECHILD) {
 			/* most likely a "cloned" process */
-			pid = wait4(-1, &status, __WCLONE,
-					cflag ? &ru : NULL);
-			if (pid == -1) {
-				fprintf(stderr, "strace: clone wait4 "
+			pid = wait4(-1, &status, __WCLONE | wnohang, ru_ptr);
+			if (pid < 0 && errno != ECHILD) {
+				fprintf(stderr, "strace: wait4(WCLONE) "
 						"failed: %s\n", strerror(errno));
 			}
 		}
-#else
-		pid = wait4(-1, &status, 0, cflag ? &ru : NULL);
-#endif /* __WALL */
+#else /* !__WALL */
+		pid = wait4(-1, &status, wnohang, ru_ptr);
+#endif
 #endif /* LINUX */
 #ifdef SUNOS4
 		pid = wait(&status);
+//probably:	pid = waitpid(-1, &status, wnohang);
 #endif /* SUNOS4 */
 		wait_errno = errno;
 		if (interactive)
 			sigprocmask(SIG_BLOCK, &blocked_set, NULL);
 
-		if (pid == -1) {
+		if (pid == 0 && wnohang) {
+			/* We had at least one successful
+			 * wait() before. We waited
+			 * with WNOHANG second time.
+			 * Stop collecting more tracees,
+			 * process what we already have.
+			 */
+			break;
+		}
+ 		if (pid == -1) {
 			switch (wait_errno) {
 			case EINTR:
 				continue;
@@ -2388,14 +2403,22 @@ Process %d attached (waiting for parent)
 				exit(1);
 			}
 		}
-		/* set current output file */
-		outf = tcp->outf;
+
 		if (cflag) {
 #ifdef LINUX
 			tv_sub(&tcp->dtime, &ru.ru_stime, &tcp->stime);
 			tcp->stime = ru.ru_stime;
 #endif /* !LINUX */
 		}
+		tcp->wait_status = status; //vda
+		tcp->next_need_service = tcps_need_service;
+		tcps_need_service = tcp;
+		wnohang = WNOHANG;
+		} /* while (1) - collecting all stopped/exited tracees */  //vda
+
+		while (tcps_need_service) {  //vda
+			tcp = tcps_need_service;
+			tcps_need_service = tcp->next_need_service; //vda
 
 		if (tcp->flags & TCB_SUSPENDED) {
 			/*
@@ -2408,6 +2431,11 @@ Process %d attached (waiting for parent)
 			 */
 			continue;
 		}
+
+		outf = tcp->outf;  //vda
+		status = tcp->wait_status;
+		pid = tcp->pid;  //vda
+
 		if (WIFSIGNALED(status)) {
 			if (pid == strace_child)
 				exit_code = 0x100 | WTERMSIG(status);
@@ -2682,6 +2710,7 @@ Process %d attached (waiting for parent)
 			cleanup();
 			return -1;
 		}
+	} /* while (tcps_need_service) - processing all stopped/exited tcps */ //vda
 	}
 	return 0;
 }
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <stdlib.h>

static int thd_no;

static void *sub_thd(void *c)
{
	fprintf(stderr, "sub-thread %d created\n", ++thd_no);
	for (;;)
		getuid();
	return NULL;
}

int main(int argc, char *argv[])
{
	int i;
	pthread_t *thd;
	int num_threads = 1;

	if (argv[1])
		num_threads = atoi(argv[1]);

	thd = malloc(num_threads * sizeof(thd[0]));
	fprintf(stderr, "test start, num_threads:%d...\n", num_threads);
	for (i = 0; i < num_threads; i++) {
		pthread_create(&thd[i], NULL, sub_thd, NULL);
		fprintf(stderr, "after pthread_create\n");
	}
	/* Exit. This kills all threads */
	return 0;
}

Reply via email to