Hi!

Vincent reported that on some machines affinity stopped working,
apparently sometimes sysconf (_SC_NPROCESSORS_CONF) is smaller than
the minimum number of logical CPUs kernel allows for sched_getaffinity
syscall :(, and I don't seem to see how to query that magic number.

So, this patch, if pthread_getaffinity_np fails with EINVAL, retries
with a bigger mask set (if the starting cpusetsize was smaller than
sizeof (cpu_set_t), then it goes on with that size (1024 CPUs), then
always doubles the size), and after successful pthread_getaffinity_np
looks through the affinity mask if it could use smaller value for the
places array and pthread_setaffinity_np syscalls.  gomp_get_cpuset_size
is the larger of the sizes then, for use in pthread_getaffinity_np
and the size of gomp_cpusetp bitmask, gomp_cpuset_size is equal to that
or smaller and is what is passed to pthread_setaffinity_np. Say, kernel
configured with 32 present CPUs and up to 96 hotplug CPUs, could have
gomp_get_cpuset_size 128 (1024 CPUs) or perhaps 16 (128 CPUs), while
gomp_cpuset_size 8 (up to 64 CPUs).

2013-10-07  Jakub Jelinek  <ja...@redhat.com>

        PR libgomp/58642
        * config/linux/proc.c: Include errno.h.
        (gomp_get_cpuset_size): New variable.
        (gomp_cpuset_popcount): Add cpusetsize argument, use it instead of
        gomp_cpuset_size.
        (gomp_init_num_threads): If CPU_ALLOC_SIZE is defined and
        pthread_getaffinity_np returned EINVAL, increase gomp_cpuset_size
        and retry.  After successful pthread_getaffinity_np copy
        gomp_cpuset_size to gomp_get_cpuset_size and try to find out
        minimum gomp_cpuset_size that covers all the CPUs set in gomp_cpusetp.
        (get_num_procs): Pass gomp_get_cpuset_size rather than gomp_cpuset_size
        to pthread_getaffinity_np, adjust gomp_cpuset_popcount caller.
        * config/linux/proc.h (gomp_cpuset_popcount): Add cpusetsize argument.
        * config/linux/affinity.c (gomp_affinity_finalize_place_list,
        gomp_affinity_init_level): Adjust gomp_cpuset_popcount callers.
        * testsuite/libgomp.c/affinity-1.c (pthread_getaffinity_np): Set
        contig_cpucount from the first successful pthread_getaffinity_np
        call, rather than just first call.

--- libgomp/config/linux/proc.c.jj      2013-10-04 09:03:01.000000000 +0200
+++ libgomp/config/linux/proc.c 2013-10-07 09:31:00.350979942 +0200
@@ -30,6 +30,7 @@
 #endif
 #include "libgomp.h"
 #include "proc.h"
+#include <errno.h>
 #include <stdlib.h>
 #include <unistd.h>
 #ifdef HAVE_GETLOADAVG
@@ -40,17 +41,18 @@
 
 #ifdef HAVE_PTHREAD_AFFINITY_NP
 unsigned long gomp_cpuset_size;
+static unsigned long gomp_get_cpuset_size;
 cpu_set_t *gomp_cpusetp;
 
 unsigned long
-gomp_cpuset_popcount (cpu_set_t *cpusetp)
+gomp_cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp)
 {
 #ifdef CPU_COUNT_S
   /* glibc 2.7 and above provide a macro for this.  */
-  return CPU_COUNT_S (gomp_cpuset_size, cpusetp);
+  return CPU_COUNT_S (cpusetsize, cpusetp);
 #else
 #ifdef CPU_COUNT
-  if (gomp_cpuset_size == sizeof (cpu_set_t))
+  if (cpusetsize == sizeof (cpu_set_t))
     /* glibc 2.6 and above provide a macro for this.  */
     return CPU_COUNT (cpusetp);
 #endif
@@ -59,7 +61,7 @@ gomp_cpuset_popcount (cpu_set_t *cpusetp
   extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)
                   ? 1 : -1];
 
-  for (i = 0; i < gomp_cpuset_size / sizeof (cpusetp->__bits[0]); i++)
+  for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++)
     {
       unsigned long int mask = cpusetp->__bits[i];
       if (mask == 0)
@@ -86,24 +88,55 @@ gomp_init_num_threads (void)
 #endif
 
   gomp_cpusetp = (cpu_set_t *) gomp_malloc (gomp_cpuset_size);
-  if (pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
-                             gomp_cpusetp) == 0)
+  do
     {
-      /* Count only the CPUs this process can use.  */
-      gomp_global_icv.nthreads_var = gomp_cpuset_popcount (gomp_cpusetp);
-      if (gomp_global_icv.nthreads_var == 0)
+      int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+                                       gomp_cpusetp);
+      if (ret == 0)
        {
-         gomp_global_icv.nthreads_var = 1;
-         free (gomp_cpusetp);
-         gomp_cpusetp = NULL;
+         unsigned long i;
+         /* Count only the CPUs this process can use.  */
+         gomp_global_icv.nthreads_var
+           = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
+         if (gomp_global_icv.nthreads_var == 0)
+           break;
+         gomp_get_cpuset_size = gomp_cpuset_size;
+#ifdef CPU_ALLOC_SIZE
+         for (i = gomp_cpuset_size * 8; i; i--)
+           if (CPU_ISSET_S (i - 1, gomp_cpuset_size, gomp_cpusetp))
+             break;
+         gomp_cpuset_size = CPU_ALLOC_SIZE (i);
+#endif
+         return;
        }
-      return;
-    }
-  else
-    {
-      free (gomp_cpusetp);
-      gomp_cpusetp = NULL;
+      if (ret != EINVAL)
+       break;
+#ifdef CPU_ALLOC_SIZE
+      if (gomp_cpuset_size < sizeof (cpu_set_t))
+       gomp_cpuset_size = sizeof (cpu_set_t);
+      else
+       gomp_cpuset_size = gomp_cpuset_size * 2;
+      if (gomp_cpuset_size < 8 * sizeof (cpu_set_t))
+       gomp_cpusetp
+         = (cpu_set_t *) gomp_realloc (gomp_cpusetp, gomp_cpuset_size);
+      else
+       {
+         /* Avoid gomp_fatal if too large memory allocation would be
+            requested, e.g. kernel returning EINVAL all the time.  */
+         void *p = realloc (gomp_cpusetp, gomp_cpuset_size);
+         if (p == NULL)
+           break;
+         gomp_cpusetp = (cpu_set_t *) p;
+       }
+#else
+      break;
+#endif
     }
+  while (1);
+  gomp_cpuset_size = 0;
+  gomp_global_icv.nthreads_var = 1;
+  free (gomp_cpusetp);
+  gomp_cpusetp = NULL;
 #endif
 #ifdef _SC_NPROCESSORS_ONLN
   gomp_global_icv.nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
@@ -118,10 +151,10 @@ get_num_procs (void)
     {
       /* Count only the CPUs this process can use.  */
       if (gomp_cpusetp
-         && pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+         && pthread_getaffinity_np (pthread_self (), gomp_get_cpuset_size,
                                     gomp_cpusetp) == 0)
        {
-         int ret = gomp_cpuset_popcount (gomp_cpusetp);
+         int ret = gomp_cpuset_popcount (gomp_get_cpuset_size, gomp_cpusetp);
          return ret != 0 ? ret : 1;
        }
     }
--- libgomp/config/linux/proc.h.jj      2013-10-01 13:48:00.000000000 +0200
+++ libgomp/config/linux/proc.h 2013-10-07 09:09:08.576141237 +0200
@@ -30,7 +30,8 @@
 #ifdef HAVE_PTHREAD_AFFINITY_NP
 extern unsigned long gomp_cpuset_size attribute_hidden;
 extern cpu_set_t *gomp_cpusetp attribute_hidden;
-extern unsigned long gomp_cpuset_popcount (cpu_set_t *) attribute_hidden;
+extern unsigned long gomp_cpuset_popcount (unsigned long, cpu_set_t *)
+     attribute_hidden;
 #endif
 
 #endif /* GOMP_PROC_H */
--- libgomp/config/linux/affinity.c.jj  2013-10-04 09:27:56.000000000 +0200
+++ libgomp/config/linux/affinity.c     2013-10-07 09:30:52.523021443 +0200
@@ -193,7 +193,7 @@ gomp_affinity_finalize_place_list (bool
       bool nonempty = false;
 #ifdef CPU_AND_S
       CPU_AND_S (gomp_cpuset_size, cpusetp, cpusetp, gomp_cpusetp);
-      nonempty = gomp_cpuset_popcount (cpusetp) != 0;
+      nonempty = gomp_cpuset_popcount (gomp_cpuset_size, cpusetp) != 0;
 #else
       unsigned long k, max = gomp_cpuset_size / sizeof (cpusetp->__bits[0]);
       for (k = 0; k < max; k++)
@@ -228,7 +228,8 @@ gomp_affinity_init_level (int level, uns
 
   if (gomp_cpusetp)
     {
-      unsigned long maxcount = gomp_cpuset_popcount (gomp_cpusetp);
+      unsigned long maxcount
+       = gomp_cpuset_popcount (gomp_cpuset_size, gomp_cpusetp);
       if (count > maxcount)
        count = maxcount;
     }
--- libgomp/testsuite/libgomp.c/affinity-1.c.jj 2013-10-04 15:58:13.000000000 
+0200
+++ libgomp/testsuite/libgomp.c/affinity-1.c    2013-10-07 09:31:53.884695701 
+0200
@@ -76,23 +76,26 @@ int
 pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
 {
   int ret;
+  unsigned long i, max;
   if (orig_getaffinity_np == NULL)
     {
-      unsigned long i, max;
       orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
                            dlsym (RTLD_NEXT, "pthread_getaffinity_np");
       if (orig_getaffinity_np == NULL)
        exit (0);
-      ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
-      if (ret != 0)
-       return ret;
+    }
+  ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
+  if (ret != 0)
+    return ret;
+  if (contig_cpucount == 0)
+    {
       max = 8 * cpusetsize;
       for (i = 0; i < max; i++)
        if (!CPU_ISSET_S (i, cpusetsize, cpuset))
          break;
       contig_cpucount = i;
     }
-  return orig_getaffinity_np (thread, cpusetsize, cpuset);
+  return ret;
 }
 #endif
 


        Jakub

Reply via email to