Hi! Vincent reported that on some machines affinity stopped working, apparently sometimes sysconf (_SC_NPROCESSORS_CONF) is smaller than the minimum number of logical CPUs kernel allows for sched_getaffinity syscall :(, and I don't seem to see how to query that magic number.
So, this patch, if pthread_getaffinity_np fails with EINVAL, retries with a bigger mask set (if the starting cpusetsize was smaller than sizeof (cpu_set_t), then it goes on with that size (1024 CPUs), then always doubles the size), and after successful pthread_getaffinity_np looks through the affinity mask if it could use smaller value for the places array and pthread_setaffinity_np syscalls. gomp_get_cpuset_size is the larger of the sizes then, for use in pthread_getaffinity_np and the size of gomp_cpusetp bitmask, gomp_cpuset_size is equal to that or smaller and is what is passed to pthread_setaffinity_np. Say, kernel configured with 32 present CPUs and up to 96 hotplug CPUs, could have gomp_get_cpuset_size 128 (1024 CPUs) or perhaps 16 (128 CPUs), while gomp_cpuset_size 8 (up to 64 CPUs). 2013-10-07 Jakub Jelinek <ja...@redhat.com> PR libgomp/58642 * config/linux/proc.c: Include errno.h. (gomp_get_cpuset_size): New variable. (gomp_cpuset_popcount): Add cpusetsize argument, use it instead of gomp_cpuset_size. (gomp_init_num_threads): If CPU_ALLOC_SIZE is defined and pthread_getaffinity_np returned EINVAL, increase gomp_cpuset_size and retry. After successful pthread_getaffinity_np copy gomp_cpuset_size to gomp_get_cpuset_size and try to find out minimum gomp_cpuset_size that covers all the CPUs set in gomp_cpusetp. (get_num_procs): Pass gomp_get_cpuset_size rather than gomp_cpuset_size to pthread_getaffinity_np, adjust gomp_cpuset_popcount caller. * config/linux/proc.h (gomp_cpuset_popcount): Add cpusetsize argument. * config/linux/affinity.c (gomp_affinity_finalize_place_list, gomp_affinity_init_level): Adjust gomp_cpuset_popcount callers. * testsuite/libgomp.c/affinity-1.c (pthread_getaffinity_np): Set contig_cpucount from the first successful pthread_getaffinity_np call, rather than just first call. --- libgomp/config/linux/proc.c.jj 2013-10-04 09:03:01.000000000 +0200 +++ libgomp/config/linux/proc.c 2013-10-07 09:31:00.350979942 +0200 @@ -30,6 +30,7 @@ #endif #include "libgomp.h" #include "proc.h" +#include <errno.h> #include <stdlib.h> #include <unistd.h> #ifdef HAVE_GETLOADAVG @@ -40,17 +41,18 @@ #ifdef HAVE_PTHREAD_AFFINITY_NP unsigned long gomp_cpuset_size; +static unsigned long gomp_get_cpuset_size; cpu_set_t *gomp_cpusetp; unsigned long -gomp_cpuset_popcount (cpu_set_t *cpusetp) +gomp_cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp) { #ifdef CPU_COUNT_S /* glibc 2.7 and above provide a macro for this. */ - return CPU_COUNT_S (gomp_cpuset_size, cpusetp); + return CPU_COUNT_S (cpusetsize, cpusetp); #else #ifdef CPU_COUNT - if (gomp_cpuset_size == sizeof (cpu_set_t)) + if (cpusetsize == sizeof (cpu_set_t)) /* glibc 2.6 and above provide a macro for this. */ return CPU_COUNT (cpusetp); #endif @@ -59,7 +61,7 @@ gomp_cpuset_popcount (cpu_set_t *cpusetp extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int) ? 1 : -1]; - for (i = 0; i < gomp_cpuset_size / sizeof (cpusetp->__bits[0]); i++) + for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++) { unsigned long int mask = cpusetp->__bits[i]; if (mask == 0) @@ -86,24 +88,55 @@ gomp_init_num_threads (void) #endif gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size); - if (pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, - gomp_cpusetp) == 0) + do { - /* Count only the CPUs this process can use. */ - gomp_global_icv.nthreads_var = gomp_cpuset_popcount (gomp_cpusetp); - if (gomp_global_icv.nthreads_var == 0) + int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, + gomp_cpusetp); + if (ret == 0) { - gomp_global_icv.nthreads_var = 1; - free (gomp_cpusetp); - gomp_cpusetp = NULL; + unsigned long i; + /* Count only the CPUs this process can use. */ + gomp_global_icv.nthreads_var + = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp); + if (gomp_global_icv.nthreads_var == 0) + break; + gomp_get_cpuset_size = gomp_cpuset_size; +#ifdef CPU_ALLOC_SIZE + for (i = gomp_cpuset_size * 8; i; i--) + if (CPU_ISSET_S (i - 1, gomp_cpuset_size, gomp_cpusetp)) + break; + gomp_cpuset_size = CPU_ALLOC_SIZE (i); +#endif + return; } - return; - } - else - { - free (gomp_cpusetp); - gomp_cpusetp = NULL; + if (ret != EINVAL) + break; +#ifdef CPU_ALLOC_SIZE + if (gomp_cpuset_size < sizeof (cpu_set_t)) + gomp_cpuset_size = sizeof (cpu_set_t); + else + gomp_cpuset_size = gomp_cpuset_size * 2; + if (gomp_cpuset_size < 8 * sizeof (cpu_set_t)) + gomp_cpusetp + = (cpu_set_t *) gomp_realloc (gomp_cpusetp, gomp_cpuset_size); + else + { + /* Avoid gomp_fatal if too large memory allocation would be + requested, e.g. kernel returning EINVAL all the time. */ + void *p = realloc (gomp_cpusetp, gomp_cpuset_size); + if (p == NULL) + break; + gomp_cpusetp = (cpu_set_t *) p; + } +#else + break; +#endif } + while (1); + gomp_cpuset_size = 0; + gomp_global_icv.nthreads_var = 1; + free (gomp_cpusetp); + gomp_cpusetp = NULL; #endif #ifdef _SC_NPROCESSORS_ONLN gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN); @@ -118,10 +151,10 @@ get_num_procs (void) { /* Count only the CPUs this process can use. */ if (gomp_cpusetp - && pthread_getaffinity_np (pthread_self (), gomp_cpuset_size, + && pthread_getaffinity_np (pthread_self (), gomp_get_cpuset_size, gomp_cpusetp) == 0) { - int ret = gomp_cpuset_popcount (gomp_cpusetp); + int ret = gomp_cpuset_popcount (gomp_get_cpuset_size, gomp_cpusetp); return ret != 0 ? ret : 1; } } --- libgomp/config/linux/proc.h.jj 2013-10-01 13:48:00.000000000 +0200 +++ libgomp/config/linux/proc.h 2013-10-07 09:09:08.576141237 +0200 @@ -30,7 +30,8 @@ #ifdef HAVE_PTHREAD_AFFINITY_NP extern unsigned long gomp_cpuset_size attribute_hidden; extern cpu_set_t *gomp_cpusetp attribute_hidden; -extern unsigned long gomp_cpuset_popcount (cpu_set_t *) attribute_hidden; +extern unsigned long gomp_cpuset_popcount (unsigned long, cpu_set_t *) + attribute_hidden; #endif #endif /* GOMP_PROC_H */ --- libgomp/config/linux/affinity.c.jj 2013-10-04 09:27:56.000000000 +0200 +++ libgomp/config/linux/affinity.c 2013-10-07 09:30:52.523021443 +0200 @@ -193,7 +193,7 @@ gomp_affinity_finalize_place_list (bool bool nonempty = false; #ifdef CPU_AND_S CPU_AND_S (gomp_cpuset_size, cpusetp, cpusetp, gomp_cpusetp); - nonempty = gomp_cpuset_popcount (cpusetp) != 0; + nonempty = gomp_cpuset_popcount (gomp_cpuset_size, cpusetp) != 0; #else unsigned long k, max = gomp_cpuset_size / sizeof (cpusetp->__bits[0]); for (k = 0; k < max; k++) @@ -228,7 +228,8 @@ gomp_affinity_init_level (int level, uns if (gomp_cpusetp) { - unsigned long maxcount = gomp_cpuset_popcount (gomp_cpusetp); + unsigned long maxcount + = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp); if (count > maxcount) count = maxcount; } --- libgomp/testsuite/libgomp.c/affinity-1.c.jj 2013-10-04 15:58:13.000000000 +0200 +++ libgomp/testsuite/libgomp.c/affinity-1.c 2013-10-07 09:31:53.884695701 +0200 @@ -76,23 +76,26 @@ int pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset) { int ret; + unsigned long i, max; if (orig_getaffinity_np == NULL) { - unsigned long i, max; orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *)) dlsym (RTLD_NEXT, "pthread_getaffinity_np"); if (orig_getaffinity_np == NULL) exit (0); - ret = orig_getaffinity_np (thread, cpusetsize, cpuset); - if (ret != 0) - return ret; + } + ret = orig_getaffinity_np (thread, cpusetsize, cpuset); + if (ret != 0) + return ret; + if (contig_cpucount == 0) + { max = 8 * cpusetsize; for (i = 0; i < max; i++) if (!CPU_ISSET_S (i, cpusetsize, cpuset)) break; contig_cpucount = i; } - return orig_getaffinity_np (thread, cpusetsize, cpuset); + return ret; } #endif Jakub