odp_cpu_count() returns the number of online CPUs, but when running in
a control group access may be restricted to only a subset of these.
This combined with a lack of error handling in odph_linux_pthread_create()
means a number of the applications create more threads than there are
cores available to them, leading to deadlocks in odp_barrier_wait().

Signed-off-by: Stuart Haslam <[email protected]>
---
 helper/include/odp/helper/linux.h        |  4 +-
 platform/linux-generic/odp_linux.c       | 63 +++++++++++++++++---------------
 platform/linux-generic/odp_system_info.c | 16 +++++---
 3 files changed, 47 insertions(+), 36 deletions(-)

diff --git a/helper/include/odp/helper/linux.h 
b/helper/include/odp/helper/linux.h
index 146e26c..44ee787 100644
--- a/helper/include/odp/helper/linux.h
+++ b/helper/include/odp/helper/linux.h
@@ -70,8 +70,10 @@ int odph_linux_cpumask_default(odp_cpumask_t *mask, int num);
  * @param mask          CPU mask
  * @param start_routine Thread start function
  * @param arg           Thread argument
+ *
+ * @return Number of threads created
  */
-void odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
+int odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
                               const odp_cpumask_t *mask,
                               void *(*start_routine) (void *), void *arg);
 
diff --git a/platform/linux-generic/odp_linux.c 
b/platform/linux-generic/odp_linux.c
index 6865ab1..5a0d456 100644
--- a/platform/linux-generic/odp_linux.c
+++ b/platform/linux-generic/odp_linux.c
@@ -23,42 +23,38 @@
 #include <odp/system_info.h>
 #include <odp_debug_internal.h>
 
-int odph_linux_cpumask_default(odp_cpumask_t *mask, int num_in)
+
+int odph_linux_cpumask_default(odp_cpumask_t *mask, int num)
 {
-       int i;
-       int first_cpu = 1;
-       int num = num_in;
-       int cpu_count;
+       int ret, cpu, i;
+       cpu_set_t cpuset;
 
-       cpu_count = odp_cpu_count();
+       ret = pthread_getaffinity_np(pthread_self(),
+                                    sizeof(cpu_set_t), &cpuset);
+       if (ret != 0)
+               ODP_ABORT("failed to read CPU affinity value\n");
+
+       odp_cpumask_zero(mask);
 
        /*
         * If no user supplied number or it's too large, then attempt
         * to use all CPUs
         */
-       if (0 == num)
-               num = cpu_count;
-       if (cpu_count < num)
-               num = cpu_count;
-
-       /*
-        * Always force "first_cpu" to a valid CPU
-        */
-       if (first_cpu >= cpu_count)
-               first_cpu = cpu_count - 1;
-
-       /* Build the mask */
-       odp_cpumask_zero(mask);
-       for (i = 0; i < num; i++) {
-               int cpu;
-
-               cpu = (first_cpu + i) % cpu_count;
-               odp_cpumask_set(mask, cpu);
+       if (0 == num || CPU_SETSIZE < num)
+               num = CPU_COUNT(&cpuset);
+
+       /* build the mask, allocating down from highest numbered CPU */
+       for (cpu = 0, i = CPU_SETSIZE-1; i >= 0 && cpu < num; --i) {
+               if (CPU_ISSET(i, &cpuset)) {
+                       odp_cpumask_set(mask, i);
+                       cpu++;
+               }
        }
 
-       return num;
+       return cpu;
 }
 
+
 static void *odp_run_start_routine(void *arg)
 {
        odp_start_args_t *start_args = arg;
@@ -80,7 +76,7 @@ static void *odp_run_start_routine(void *arg)
 }
 
 
-void odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
+int odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
                               const odp_cpumask_t *mask_in,
                               void *(*start_routine) (void *), void *arg)
 {
@@ -89,6 +85,7 @@ void odph_linux_pthread_create(odph_linux_pthread_t 
*thread_tbl,
        odp_cpumask_t mask;
        int cpu_count;
        int cpu;
+       int ret;
 
        odp_cpumask_copy(&mask, mask_in);
        num = odp_cpumask_count(&mask);
@@ -98,8 +95,9 @@ void odph_linux_pthread_create(odph_linux_pthread_t 
*thread_tbl,
        cpu_count = odp_cpu_count();
 
        if (num < 1 || num > cpu_count) {
-               ODP_ERR("Bad num\n");
-               return;
+               ODP_ERR("Invalid number of threads: %d (%d cores available)\n",
+                       num, cpu_count);
+               return 0;
        }
 
        cpu = odp_cpumask_first(&mask);
@@ -123,11 +121,18 @@ void odph_linux_pthread_create(odph_linux_pthread_t 
*thread_tbl,
                thread_tbl[i].start_args->start_routine = start_routine;
                thread_tbl[i].start_args->arg           = arg;
 
-               pthread_create(&thread_tbl[i].thread, &thread_tbl[i].attr,
+               ret = pthread_create(&thread_tbl[i].thread, &thread_tbl[i].attr,
                               odp_run_start_routine, thread_tbl[i].start_args);
+               if (ret != 0) {
+                       ODP_ERR("Failed to start thread on cpu #%d\n", cpu);
+                       free(thread_tbl[i].start_args);
+                       break;
+               }
 
                cpu = odp_cpumask_next(&mask, cpu);
        }
+
+       return i;
 }
 
 
diff --git a/platform/linux-generic/odp_system_info.c 
b/platform/linux-generic/odp_system_info.c
index 6b6c723..0aaaeda 100644
--- a/platform/linux-generic/odp_system_info.c
+++ b/platform/linux-generic/odp_system_info.c
@@ -4,11 +4,14 @@
  * SPDX-License-Identifier:     BSD-3-Clause
  */
 
+#define _GNU_SOURCE
 #include <odp/system_info.h>
 #include <odp_internal.h>
 #include <odp_debug_internal.h>
 #include <odp/align.h>
 #include <odp/cpu.h>
+#include <pthread.h>
+#include <sched.h>
 #include <string.h>
 #include <stdio.h>
 
@@ -46,20 +49,21 @@ static odp_system_info_t odp_system_info;
 
 
 /*
- * Report the number of online CPU's
+ * Report the number of CPUs in the affinity mask of the main thread
  */
 static int sysconf_cpu_count(void)
 {
-       long ret;
+       cpu_set_t cpuset;
+       int ret;
 
-       ret = sysconf(_SC_NPROCESSORS_ONLN);
-       if (ret < 0)
+       ret = pthread_getaffinity_np(pthread_self(),
+                                    sizeof(cpuset), &cpuset);
+       if (ret != 0)
                return 0;
 
-       return (int)ret;
+       return CPU_COUNT(&cpuset);
 }
 
-
 #if defined __x86_64__ || defined __i386__ || defined __OCTEON__ || \
 defined __powerpc__
 /*
-- 
2.1.1

_______________________________________________
lng-odp mailing list
[email protected]
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to