On Wed, 30 Mar 2016 17:09:54 -0500 "Gary S. Robertson" <[email protected]> wrote:
> sched_getaffinity() and pthread_getaffinity_np() do not return > an accurate mask of all CPUs in the machine when the kernel > is compiled with NO_HZ_FULL support. > > See Linaro BUG 2027 for details. > https://bugs.linaro.org/show_bug.cgi?id=2027 > > This code replaces the 'getaffinity' based CPU discovery logic > -and- removes any exclusivity between default control and worker > cpumasks, based on an assumption that external cpumask specifications > will segregate CPUs if needed. > > The results of these changes which address BUG 2027 are: > (1) all CPUs known to the kernel at boot time are considered > for use by ODP regardless of the default CPU affinity masks > set by the kernel scheduler, > (2) the default control and worker cpumasks are given reasonable > values based on the set of installed CPUs > > Also - this code: > (a) adds control and worker cpumasks to the linux-generic global data > (b) adds logic to odp_cpumask.c to initialize these masks > (c) calls the new cpumask initialization logic from odp_init_global() > (d) reduces odp_cpumask_default_control() and > odp_cpumask_default_worker() to use the content of these new > cpumasks without modification. > These changes provide prerequisite infrastructure for pending changes > which will allow ODP to accept cpumasks passed in > from external entities such as a provisioning service. > > Signed-off-by: Gary S. Robertson <[email protected]> > --- > platform/linux-generic/include/odp_internal.h | 7 ++ > platform/linux-generic/odp_cpumask.c | 126 > ++++++++++++++++++++++++++ > platform/linux-generic/odp_cpumask_task.c | 70 +++++++++----- > platform/linux-generic/odp_init.c | 9 +- > platform/linux-generic/odp_system_info.c | 13 +-- > 5 files changed, 187 insertions(+), 38 deletions(-) > > diff --git a/platform/linux-generic/include/odp_internal.h > b/platform/linux-generic/include/odp_internal.h > index 679d9d2..567b1fb 100644 > --- a/platform/linux-generic/include/odp_internal.h > +++ b/platform/linux-generic/include/odp_internal.h > @@ -18,6 +18,7 @@ extern "C" { > #endif > > #include <odp/api/init.h> > +#include <odp/api/cpumask.h> > #include <odp/api/thread.h> > #include <stdio.h> > > @@ -40,6 +41,9 @@ struct odp_global_data_s { > odp_log_func_t log_fn; > odp_abort_func_t abort_fn; > odp_system_info_t system_info; > + odp_cpumask_t control_cpus; > + odp_cpumask_t worker_cpus; > + int num_cpus_installed; > }; > > enum init_stage { > @@ -65,6 +69,9 @@ extern struct odp_global_data_s odp_global_data; > int _odp_term_global(enum init_stage stage); > int _odp_term_local(enum init_stage stage); > > +int odp_cpumask_init_global(void); > +int odp_cpumask_term_global(void); > + > int odp_system_info_init(void); > int odp_system_info_term(void); > > diff --git a/platform/linux-generic/odp_cpumask.c > b/platform/linux-generic/odp_cpumask.c > index 4249f1d..320ca8e 100644 > --- a/platform/linux-generic/odp_cpumask.c > +++ b/platform/linux-generic/odp_cpumask.c > @@ -15,6 +15,10 @@ > #include <stdlib.h> > #include <string.h> > > +#include <dirent.h> > +#include <errno.h> > +#include <sys/types.h> > + > /** @internal Compile time assert */ > _ODP_STATIC_ASSERT(CPU_SETSIZE >= ODP_CPUMASK_SIZE, > "ODP_CPUMASK_SIZE__SIZE_ERROR"); > @@ -208,3 +212,125 @@ int odp_cpumask_next(const odp_cpumask_t *mask, int cpu) > return cpu; > return -1; > } > + > +/* > + * This function obtains system information specifying which cpus are > + * available at boot time. These data are then used to produce cpumasks of > + * configured CPUs without concern over isolation support. > + */ > +static int get_installed_cpus(void) > +{ > + char *numptr; > + char *endptr; > + long int cpu_idnum; > + DIR *d; > + struct dirent *dir; > + > + /* Clear the global cpumasks for control and worker CPUs */ > + odp_cpumask_zero(&odp_global_data.control_cpus); > + odp_cpumask_zero(&odp_global_data.worker_cpus); > + > + /* > + * Scan the /sysfs pseudo-filesystem for CPU info directories. > + * There should be one subdirectory for each installed logical CPU > + */ > + d = opendir("/sys/devices/system/cpu"); > + if (d) { > + while ((dir = readdir(d)) != NULL) { > + cpu_idnum = CPU_SETSIZE; > + > + /* > + * If the current directory entry doesn't represent > + * a CPU info subdirectory then skip to the next entry. > + */ > + if (dir->d_type == DT_DIR) { > + if (!strncmp(dir->d_name, "cpu", 3)) { > + /* > + * Directory name starts with "cpu"... > + * Try to extract a CPU ID number > + * from the remainder of the dirname. > + */ > + errno = 0; > + numptr = dir->d_name; > + numptr += 3; > + cpu_idnum = strtol(numptr, &endptr, > + 10); > + if (errno || (endptr == numptr)) > + continue; > + } else { > + continue; > + } > + } else { > + continue; > + } > + /* > + * If we get here the current directory entry specifies > + * a CPU info subdir for the CPU indexed by cpu_idnum. > + */ > + > + /* Track number of logical CPUs discovered */ > + if (odp_global_data.num_cpus_installed < > + (int)(cpu_idnum + 1)) > + odp_global_data.num_cpus_installed = > + (int)(cpu_idnum + 1); > + > + /* Add the CPU to our default cpumasks */ > + odp_cpumask_set(&odp_global_data.control_cpus, > + (int)cpu_idnum); > + odp_cpumask_set(&odp_global_data.worker_cpus, > + (int)cpu_idnum); > + } > + closedir(d); > + return 0; > + } else { > + return -1; > + } > +} > + > +/* > + * This function creates reasonable default cpumasks for control and worker > + * tasks from the set of CPUs available at boot time. > + */ > +int odp_cpumask_init_global(void) > +{ > + odp_cpumask_t *control_mask = &odp_global_data.control_cpus; > + odp_cpumask_t *worker_mask = &odp_global_data.worker_cpus; > + int i; > + int retval = -1; > + > + if (!get_installed_cpus()) { > + /* CPU 0 is only used for workers on uniprocessor systems */ > + if (odp_global_data.num_cpus_installed > 1) > + odp_cpumask_clr(worker_mask, 0); > + /* > + * If only one or two CPUs installed, use CPU 0 for control. > + * Otherwise leave it for the kernel and start with CPU 1. > + */ > + if (odp_global_data.num_cpus_installed < 3) { > + /* > + * If only two CPUS, use CPU 0 for control and > + * use CPU 1 for workers. > + */ > + odp_cpumask_clr(control_mask, 1); > + } else { > + /* > + * If three or more CPUs, reserve CPU 0 for kernel, > + * reserve CPU 1 for control, and > + * reserve remaining CPUs for workers > + */ > + odp_cpumask_clr(control_mask, 0); > + odp_cpumask_clr(worker_mask, 1); > + for (i = 2; i < CPU_SETSIZE; i++) { > + if (odp_cpumask_isset(worker_mask, i)) > + odp_cpumask_clr(control_mask, i); > + } > + } > + retval = 0; > + } > + return retval; > +} > + > +int odp_cpumask_term_global(void) > +{ > + return 0; > +} > diff --git a/platform/linux-generic/odp_cpumask_task.c > b/platform/linux-generic/odp_cpumask_task.c > index dbedff2..10885ce 100644 > --- a/platform/linux-generic/odp_cpumask_task.c > +++ b/platform/linux-generic/odp_cpumask_task.c > @@ -14,53 +14,75 @@ > > int odp_cpumask_default_worker(odp_cpumask_t *mask, int num) > { > - int ret, cpu, i; > - cpu_set_t cpuset; > - > - ret = pthread_getaffinity_np(pthread_self(), > - sizeof(cpu_set_t), &cpuset); > - if (ret != 0) > - ODP_ABORT("failed to read CPU affinity value\n"); > - > - odp_cpumask_zero(mask); > + odp_cpumask_t overlap; > + int cpu, i; > > /* > * If no user supplied number or it's too large, then attempt > * to use all CPUs > */ > - if (0 == num || CPU_SETSIZE < num) > - num = CPU_COUNT(&cpuset); > + cpu = odp_cpumask_count(&odp_global_data.worker_cpus); > + if (0 == num || cpu < num) > + num = cpu; > > /* build the mask, allocating down from highest numbered CPU */ > + odp_cpumask_zero(mask); > for (cpu = 0, i = CPU_SETSIZE - 1; i >= 0 && cpu < num; --i) { > - if (CPU_ISSET(i, &cpuset)) { > + if (odp_cpumask_isset(&odp_global_data.worker_cpus, i)) { > odp_cpumask_set(mask, i); > cpu++; > } > } > > - if (odp_cpumask_isset(mask, 0)) > - ODP_DBG("\n\tCPU0 will be used for both control and worker > threads,\n" > - "\tthis will likely have a performance impact on the > worker thread.\n"); > + odp_cpumask_and(&overlap, mask, &odp_global_data.control_cpus); > + if (odp_cpumask_count(&overlap)) > + ODP_DBG("\n\tWorker CPUs overlap with control CPUs...\n" > + "\tthis will likely have a performance impact on the > worker threads.\n"); > > return cpu; > } > > -int odp_cpumask_default_control(odp_cpumask_t *mask, int num ODP_UNUSED) > +int odp_cpumask_default_control(odp_cpumask_t *mask, int num) > { > + odp_cpumask_t overlap; > + int cpu, i; > + > + /* > + * If no user supplied number then default to one control CPU. > + */ > + if (0 == num) { > + num = 1; > + } else { > + /* > + * If user supplied number is too large, then attempt > + * to use all installed control CPUs > + */ > + cpu = odp_cpumask_count(&odp_global_data.control_cpus); > + if (cpu < num) > + num = cpu; > + } > + > + /* build the mask, allocating upwards from lowest numbered CPU */ > odp_cpumask_zero(mask); > - /* By default all control threads on CPU 0 */ > - odp_cpumask_set(mask, 0); > - return 1; > + for (cpu = 0, i = 0; i < CPU_SETSIZE && cpu < num; i++) { > + if (odp_cpumask_isset(&odp_global_data.control_cpus, i)) { > + odp_cpumask_set(mask, i); > + cpu++; > + } > + } > + > + odp_cpumask_and(&overlap, mask, &odp_global_data.worker_cpus); > + if (odp_cpumask_count(&overlap)) > + ODP_DBG("\n\tControl CPUs overlap with worker CPUs...\n" > + "\tthis will likely have a performance impact on the > worker threads.\n"); > + > + return cpu; > } > > int odp_cpumask_all_available(odp_cpumask_t *mask) > { > - odp_cpumask_t mask_work, mask_ctrl; > - > - odp_cpumask_default_worker(&mask_work, 0); > - odp_cpumask_default_control(&mask_ctrl, 0); > - odp_cpumask_or(mask, &mask_work, &mask_ctrl); > + odp_cpumask_or(mask, &odp_global_data.worker_cpus, > + &odp_global_data.control_cpus); > > return odp_cpumask_count(mask); > } > diff --git a/platform/linux-generic/odp_init.c > b/platform/linux-generic/odp_init.c > index f30c310..d1b86ad 100644 > --- a/platform/linux-generic/odp_init.c > +++ b/platform/linux-generic/odp_init.c > @@ -3,11 +3,9 @@ > * > * SPDX-License-Identifier: BSD-3-Clause > */ > - > #include <odp/api/init.h> > -#include <odp_internal.h> > -#include <odp/api/debug.h> > #include <odp_debug_internal.h> > +#include <odp/api/debug.h> > > struct odp_global_data_s odp_global_data; > > @@ -26,6 +24,11 @@ int odp_init_global(odp_instance_t *instance, > odp_global_data.abort_fn = params->abort_fn; > } > > + if (odp_cpumask_init_global()) { > + ODP_ERR("ODP cpumask init failed.\n"); > + goto init_failed; > + } For consistency, should add an init_stage here and in odp_term_global() > + > if (odp_time_init_global()) { > ODP_ERR("ODP time init failed.\n"); > goto init_failed; > diff --git a/platform/linux-generic/odp_system_info.c > b/platform/linux-generic/odp_system_info.c > index 395b274..1ecf18a 100644 > --- a/platform/linux-generic/odp_system_info.c > +++ b/platform/linux-generic/odp_system_info.c > @@ -30,21 +30,12 @@ > > #define HUGE_PAGE_DIR "/sys/kernel/mm/hugepages" > > - > /* > - * Report the number of CPUs in the affinity mask of the main thread > + * Report the number of logical CPUs detected at boot time > */ > static int sysconf_cpu_count(void) > { > - cpu_set_t cpuset; > - int ret; > - > - ret = pthread_getaffinity_np(pthread_self(), > - sizeof(cpuset), &cpuset); > - if (ret != 0) > - return 0; > - > - return CPU_COUNT(&cpuset); > + return odp_global_data.num_cpus_installed; > } > > #if defined __x86_64__ || defined __i386__ || defined __OCTEON__ || \ > -- > 1.9.1 > -- Bill Fischofer <[email protected]> _______________________________________________ lng-odp mailing list [email protected] https://lists.linaro.org/mailman/listinfo/lng-odp
