On Wed, 30 Mar 2016 17:09:54 -0500
"Gary S. Robertson" <[email protected]> wrote:

> sched_getaffinity() and pthread_getaffinity_np() do not return
> an accurate mask of all CPUs in the machine when the kernel
> is compiled with NO_HZ_FULL support.
> 
> See Linaro BUG 2027 for details.
>       https://bugs.linaro.org/show_bug.cgi?id=2027
> 
> This code replaces the 'getaffinity' based CPU discovery logic
> -and- removes any exclusivity between default control and worker
> cpumasks, based on an assumption that external cpumask specifications
> will segregate CPUs if needed.
> 
> The results of these changes which address BUG 2027 are:
> (1) all CPUs known to the kernel at boot time are considered
>     for use by ODP regardless of the default CPU affinity masks
>     set by the kernel scheduler,
> (2) the default control and worker cpumasks are given reasonable
>     values based on the set of installed CPUs
> 
> Also - this code:
> (a) adds control and worker cpumasks to the linux-generic global data
> (b) adds logic to odp_cpumask.c to initialize these masks
> (c) calls the new cpumask initialization logic from odp_init_global()
> (d) reduces odp_cpumask_default_control() and
>     odp_cpumask_default_worker() to use the content of these new
>     cpumasks without modification.
> These changes provide prerequisite infrastructure for pending changes
> which will allow ODP to accept cpumasks passed in
> from external entities such as a provisioning service.
> 
> Signed-off-by: Gary S. Robertson <[email protected]>
> ---
>  platform/linux-generic/include/odp_internal.h |   7 ++
>  platform/linux-generic/odp_cpumask.c          | 126 
> ++++++++++++++++++++++++++
>  platform/linux-generic/odp_cpumask_task.c     |  70 +++++++++-----
>  platform/linux-generic/odp_init.c             |   9 +-
>  platform/linux-generic/odp_system_info.c      |  13 +--
>  5 files changed, 187 insertions(+), 38 deletions(-)
> 
> diff --git a/platform/linux-generic/include/odp_internal.h 
> b/platform/linux-generic/include/odp_internal.h
> index 679d9d2..567b1fb 100644
> --- a/platform/linux-generic/include/odp_internal.h
> +++ b/platform/linux-generic/include/odp_internal.h
> @@ -18,6 +18,7 @@ extern "C" {
>  #endif
>  
>  #include <odp/api/init.h>
> +#include <odp/api/cpumask.h>
>  #include <odp/api/thread.h>
>  #include <stdio.h>
>  
> @@ -40,6 +41,9 @@ struct odp_global_data_s {
>       odp_log_func_t log_fn;
>       odp_abort_func_t abort_fn;
>       odp_system_info_t system_info;
> +     odp_cpumask_t control_cpus;
> +     odp_cpumask_t worker_cpus;
> +     int num_cpus_installed;
>  };
>  
>  enum init_stage {
> @@ -65,6 +69,9 @@ extern struct odp_global_data_s odp_global_data;
>  int _odp_term_global(enum init_stage stage);
>  int _odp_term_local(enum init_stage stage);
>  
> +int odp_cpumask_init_global(void);
> +int odp_cpumask_term_global(void);
> +
>  int odp_system_info_init(void);
>  int odp_system_info_term(void);
>  
> diff --git a/platform/linux-generic/odp_cpumask.c 
> b/platform/linux-generic/odp_cpumask.c
> index 4249f1d..320ca8e 100644
> --- a/platform/linux-generic/odp_cpumask.c
> +++ b/platform/linux-generic/odp_cpumask.c
> @@ -15,6 +15,10 @@
>  #include <stdlib.h>
>  #include <string.h>
>  
> +#include <dirent.h>
> +#include <errno.h>
> +#include <sys/types.h>
> +
>  /** @internal Compile time assert */
>  _ODP_STATIC_ASSERT(CPU_SETSIZE >= ODP_CPUMASK_SIZE,
>                  "ODP_CPUMASK_SIZE__SIZE_ERROR");
> @@ -208,3 +212,125 @@ int odp_cpumask_next(const odp_cpumask_t *mask, int cpu)
>                       return cpu;
>       return -1;
>  }
> +
> +/*
> + * This function obtains system information specifying which cpus are
> + * available at boot time. These data are then used to produce cpumasks of
> + * configured CPUs without concern over isolation support.
> + */
> +static int get_installed_cpus(void)
> +{
> +     char *numptr;
> +     char *endptr;
> +     long int cpu_idnum;
> +     DIR  *d;
> +     struct dirent *dir;
> +
> +     /* Clear the global cpumasks for control and worker CPUs */
> +     odp_cpumask_zero(&odp_global_data.control_cpus);
> +     odp_cpumask_zero(&odp_global_data.worker_cpus);
> +
> +     /*
> +      * Scan the /sysfs pseudo-filesystem for CPU info directories.
> +      * There should be one subdirectory for each installed logical CPU
> +      */
> +     d = opendir("/sys/devices/system/cpu");
> +     if (d) {
> +             while ((dir = readdir(d)) != NULL) {
> +                     cpu_idnum = CPU_SETSIZE;
> +
> +                     /*
> +                      * If the current directory entry doesn't represent
> +                      * a CPU info subdirectory then skip to the next entry.
> +                      */
> +                     if (dir->d_type == DT_DIR) {
> +                             if (!strncmp(dir->d_name, "cpu", 3)) {
> +                                     /*
> +                                      * Directory name starts with "cpu"...
> +                                      * Try to extract a CPU ID number
> +                                      * from the remainder of the dirname.
> +                                      */
> +                                     errno = 0;
> +                                     numptr = dir->d_name;
> +                                     numptr += 3;
> +                                     cpu_idnum = strtol(numptr, &endptr,
> +                                                        10);
> +                                     if (errno || (endptr == numptr))
> +                                             continue;
> +                             } else {
> +                                     continue;
> +                             }
> +                     } else {
> +                             continue;
> +                     }
> +                     /*
> +                      * If we get here the current directory entry specifies
> +                      * a CPU info subdir for the CPU indexed by cpu_idnum.
> +                      */
> +
> +                     /* Track number of logical CPUs discovered */
> +                     if (odp_global_data.num_cpus_installed <
> +                         (int)(cpu_idnum + 1))
> +                             odp_global_data.num_cpus_installed =
> +                                             (int)(cpu_idnum + 1);
> +
> +                     /* Add the CPU to our default cpumasks */
> +                     odp_cpumask_set(&odp_global_data.control_cpus,
> +                                     (int)cpu_idnum);
> +                     odp_cpumask_set(&odp_global_data.worker_cpus,
> +                                     (int)cpu_idnum);
> +             }
> +             closedir(d);
> +             return 0;
> +     } else {
> +             return -1;
> +     }
> +}
> +
> +/*
> + * This function creates reasonable default cpumasks for control and worker
> + * tasks from the set of CPUs available at boot time.
> + */
> +int odp_cpumask_init_global(void)
> +{
> +     odp_cpumask_t *control_mask = &odp_global_data.control_cpus;
> +     odp_cpumask_t *worker_mask = &odp_global_data.worker_cpus;
> +     int i;
> +     int retval = -1;
> +
> +     if (!get_installed_cpus()) {
> +             /* CPU 0 is only used for workers on uniprocessor systems */
> +             if (odp_global_data.num_cpus_installed > 1)
> +                     odp_cpumask_clr(worker_mask, 0);
> +             /*
> +              * If only one or two CPUs installed, use CPU 0 for control.
> +              * Otherwise leave it for the kernel and start with CPU 1.
> +              */
> +             if (odp_global_data.num_cpus_installed < 3) {
> +                     /*
> +                      * If only two CPUS, use CPU 0 for control and
> +                      * use CPU 1 for workers.
> +                      */
> +                     odp_cpumask_clr(control_mask, 1);
> +             } else {
> +                     /*
> +                      * If three or more CPUs, reserve CPU 0 for kernel,
> +                      * reserve CPU 1 for control, and
> +                      * reserve remaining CPUs for workers
> +                      */
> +                     odp_cpumask_clr(control_mask, 0);
> +                     odp_cpumask_clr(worker_mask, 1);
> +                     for (i = 2; i < CPU_SETSIZE; i++) {
> +                             if (odp_cpumask_isset(worker_mask, i))
> +                                     odp_cpumask_clr(control_mask, i);
> +                     }
> +             }
> +             retval = 0;
> +     }
> +     return retval;
> +}
> +
> +int odp_cpumask_term_global(void)
> +{
> +     return 0;
> +}
> diff --git a/platform/linux-generic/odp_cpumask_task.c 
> b/platform/linux-generic/odp_cpumask_task.c
> index dbedff2..10885ce 100644
> --- a/platform/linux-generic/odp_cpumask_task.c
> +++ b/platform/linux-generic/odp_cpumask_task.c
> @@ -14,53 +14,75 @@
>  
>  int odp_cpumask_default_worker(odp_cpumask_t *mask, int num)
>  {
> -     int ret, cpu, i;
> -     cpu_set_t cpuset;
> -
> -     ret = pthread_getaffinity_np(pthread_self(),
> -                                  sizeof(cpu_set_t), &cpuset);
> -     if (ret != 0)
> -             ODP_ABORT("failed to read CPU affinity value\n");
> -
> -     odp_cpumask_zero(mask);
> +     odp_cpumask_t overlap;
> +     int cpu, i;
>  
>       /*
>        * If no user supplied number or it's too large, then attempt
>        * to use all CPUs
>        */
> -     if (0 == num || CPU_SETSIZE < num)
> -             num = CPU_COUNT(&cpuset);
> +     cpu = odp_cpumask_count(&odp_global_data.worker_cpus);
> +     if (0 == num || cpu < num)
> +             num = cpu;
>  
>       /* build the mask, allocating down from highest numbered CPU */
> +     odp_cpumask_zero(mask);
>       for (cpu = 0, i = CPU_SETSIZE - 1; i >= 0 && cpu < num; --i) {
> -             if (CPU_ISSET(i, &cpuset)) {
> +             if (odp_cpumask_isset(&odp_global_data.worker_cpus, i)) {
>                       odp_cpumask_set(mask, i);
>                       cpu++;
>               }
>       }
>  
> -     if (odp_cpumask_isset(mask, 0))
> -             ODP_DBG("\n\tCPU0 will be used for both control and worker 
> threads,\n"
> -                     "\tthis will likely have a performance impact on the 
> worker thread.\n");
> +     odp_cpumask_and(&overlap, mask, &odp_global_data.control_cpus);
> +     if (odp_cpumask_count(&overlap))
> +             ODP_DBG("\n\tWorker CPUs overlap with control CPUs...\n"
> +                     "\tthis will likely have a performance impact on the 
> worker threads.\n");
>  
>       return cpu;
>  }
>  
> -int odp_cpumask_default_control(odp_cpumask_t *mask, int num ODP_UNUSED)
> +int odp_cpumask_default_control(odp_cpumask_t *mask, int num)
>  {
> +     odp_cpumask_t overlap;
> +     int cpu, i;
> +
> +     /*
> +      * If no user supplied number then default to one control CPU.
> +      */
> +     if (0 == num) {
> +             num = 1;
> +     } else {
> +             /*
> +              * If user supplied number is too large, then attempt
> +              * to use all installed control CPUs
> +              */
> +             cpu = odp_cpumask_count(&odp_global_data.control_cpus);
> +             if (cpu < num)
> +                     num = cpu;
> +     }
> +
> +     /* build the mask, allocating upwards from lowest numbered CPU */
>       odp_cpumask_zero(mask);
> -     /* By default all control threads on CPU 0 */
> -     odp_cpumask_set(mask, 0);
> -     return 1;
> +     for (cpu = 0, i = 0; i < CPU_SETSIZE && cpu < num; i++) {
> +             if (odp_cpumask_isset(&odp_global_data.control_cpus, i)) {
> +                     odp_cpumask_set(mask, i);
> +                     cpu++;
> +             }
> +     }
> +
> +     odp_cpumask_and(&overlap, mask, &odp_global_data.worker_cpus);
> +     if (odp_cpumask_count(&overlap))
> +             ODP_DBG("\n\tControl CPUs overlap with worker CPUs...\n"
> +                     "\tthis will likely have a performance impact on the 
> worker threads.\n");
> +
> +     return cpu;
>  }
>  
>  int odp_cpumask_all_available(odp_cpumask_t *mask)
>  {
> -     odp_cpumask_t mask_work, mask_ctrl;
> -
> -     odp_cpumask_default_worker(&mask_work, 0);
> -     odp_cpumask_default_control(&mask_ctrl, 0);
> -     odp_cpumask_or(mask, &mask_work, &mask_ctrl);
> +     odp_cpumask_or(mask, &odp_global_data.worker_cpus,
> +                    &odp_global_data.control_cpus);
>  
>       return odp_cpumask_count(mask);
>  }
> diff --git a/platform/linux-generic/odp_init.c 
> b/platform/linux-generic/odp_init.c
> index f30c310..d1b86ad 100644
> --- a/platform/linux-generic/odp_init.c
> +++ b/platform/linux-generic/odp_init.c
> @@ -3,11 +3,9 @@
>   *
>   * SPDX-License-Identifier:     BSD-3-Clause
>   */
> -
>  #include <odp/api/init.h>
> -#include <odp_internal.h>
> -#include <odp/api/debug.h>
>  #include <odp_debug_internal.h>
> +#include <odp/api/debug.h>
>  
>  struct odp_global_data_s odp_global_data;
>  
> @@ -26,6 +24,11 @@ int odp_init_global(odp_instance_t *instance,
>                       odp_global_data.abort_fn = params->abort_fn;
>       }
>  
> +     if (odp_cpumask_init_global()) {
> +             ODP_ERR("ODP cpumask init failed.\n");
> +             goto init_failed;
> +     }

For consistency, should add an init_stage here and in odp_term_global()

> +
>       if (odp_time_init_global()) {
>               ODP_ERR("ODP time init failed.\n");
>               goto init_failed;
> diff --git a/platform/linux-generic/odp_system_info.c 
> b/platform/linux-generic/odp_system_info.c
> index 395b274..1ecf18a 100644
> --- a/platform/linux-generic/odp_system_info.c
> +++ b/platform/linux-generic/odp_system_info.c
> @@ -30,21 +30,12 @@
>  
>  #define HUGE_PAGE_DIR "/sys/kernel/mm/hugepages"
>  
> -
>  /*
> - * Report the number of CPUs in the affinity mask of the main thread
> + * Report the number of logical CPUs detected at boot time
>   */
>  static int sysconf_cpu_count(void)
>  {
> -     cpu_set_t cpuset;
> -     int ret;
> -
> -     ret = pthread_getaffinity_np(pthread_self(),
> -                                  sizeof(cpuset), &cpuset);
> -     if (ret != 0)
> -             return 0;
> -
> -     return CPU_COUNT(&cpuset);
> +     return odp_global_data.num_cpus_installed;
>  }
>  
>  #if defined __x86_64__ || defined __i386__ || defined __OCTEON__ || \
> -- 
> 1.9.1
> 


-- 
Bill Fischofer <[email protected]>
_______________________________________________
lng-odp mailing list
[email protected]
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to