Folks, While investigating an issue started at http://www.open-mpi.org/community/lists/users/2014/10/25562.php i found that it is mandatory to compile with -D_REENTRANT on Solaris (10 and 11) (otherwise errno is not per thread specific, and the pmix thread silently misinterpret EAGAIN or EWOULDBLOCK and that leads to random behaviour, that generally terminates the application)
This is a bug / unexpected side effect introduced by me in commit b1c4daa9567c7647318b9b673698c2251264f22e on a RedHat 6 like server, this is not necessary. on aix and/or freebsd, it might be necessary to compile with -D_THREAD_SAFE in order to get a correct behaviour. i wrote the simple attached program in order to check the correct behavior with/without -D_REENTRANT or -D_THREAD_SAFE. one option is to add automatically test this in config/opal_config_pthreads.m4, an other option is to hardcode this for the required OS. Paul, since you have access to many platforms, could you please run this test with and without -D_REENTRANT / -D_THREAD_SAFE and tell me where the program produces incorrect behaviour (output is KO...) without the flag ? Thanks in advance, Gilles
#include <errno.h> #include <unistd.h> #include <pthread.h> #include <stdio.h> static void * fn (void * arg) { if (errno == 1) { return (void *)-1; } read(0, NULL, 0); if (errno != 0) { return (void *)-2; } errno = 2; return NULL; } int main (int argc, char *argv[]) { pthread_t t; void *s = NULL; errno = 1; if (pthread_create(&t, NULL, fn, NULL) < 0) { perror ("pthread_create "); return 1; } if (pthread_join(t, &s) < 0) { perror ("pthread_join "); return 2; } if (NULL != s) { fprintf(stderr, "KO: error 3 (%ld)\n", (long)s); return 3; } else if (2 == errno) { fprintf(stderr, "KO: error 4 (%ld)\n", (long)s); return 4; } else { fprintf(stderr, "OK\n"); return 0; } }