http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53292
--- Comment #8 from Jakub Jelinek <jakub at gcc dot gnu.org> 2012-05-09 15:01:24 UTC --- Just try equivalent pthread program and you'll note the same behavior. #include <pthread.h> #include <stdlib.h> double *p; int c; void *tf (void *x) { int i, s = ((long) x) * c, e = s + c; for (i = s; i < e; i++) p[i] = 1.0; return NULL; } int main (int argc, char **argv) { int n = atoi (argv[1]), i; int sz = atoi (argv[2]); if (n > 32 || n < 1 || sz < 128 || (sz % n) != 0) return 1; p = malloc (sz * sizeof (double)); if (p == NULL) return 1; c = sz / n; pthread_t t[32]; for (i = 1; i < n; i++) pthread_create (&t[i], NULL, tf, (void *)(long) i); tf ((void *) 0L); for (i = 1; i < n; i++) pthread_join (t[i], NULL); return 0; }