Hi,

Couple of generic things first:

* After Ola's core->cpu rename patch, odp_coremask is in practice the last 
thing referring to "core" instead of "cpu". So maybe we should rename 
"coremask" to "cpumask" or "cpuset". Which would look better odp_cpumask_set() 
vs. odp_cpuset_set()? E.g. odp_cpu_set_set() would not work well. May be 
odp_cpumask_xxx() works the best.

* About build problems you experienced. Would it help if odp_coremask_t is 
defined in odp_platform.h? Also e.g.  odp_linux.c is already including 
<sched.h> like this ...

#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <sched.h>

* The first param in each function should be the (destination) coremask. The 
current param order in the API matches with linux cpuset, but not with other 
ODP APIs where the target object is (usually) the first param. E.g.

void odp_coremask_set(odp_coremask_t *mask, int core)
void odp_coremask_clr(odp_coremask_t *mask, int core)
...



> -----Original Message-----
> From: [email protected] [mailto:lng-odp-
> [email protected]] On Behalf Of ext Robbie King
> Sent: Saturday, January 10, 2015 5:57 PM
> To: [email protected]
> Subject: [lng-odp] [RFCv1 1/2] Convert linux thread/proc helpers to use
> core mask
> 
> Signed-off-by: Robbie King <[email protected]>
> ---
>  helper/include/odph_linux.h                       |  34 +++--
>  platform/linux-generic/include/api/odp_coremask.h | 128 ++++++++++++-----
> -
>  platform/linux-generic/odp_coremask.c             | 157 +++++++++++------
> -----
>  platform/linux-generic/odp_linux.c                |  95 +++++++++----
>  4 files changed, 257 insertions(+), 157 deletions(-)
> 
> diff --git a/helper/include/odph_linux.h b/helper/include/odph_linux.h
> index 4ed8bbe..fc7b88e 100644
> --- a/helper/include/odph_linux.h
> +++ b/helper/include/odph_linux.h
> @@ -26,6 +26,8 @@ extern "C" {
>  #include <pthread.h>
>  #include <sys/types.h>
> 
> +#include <odp.h>
> +
>  /** Linux pthread state information */
>  typedef struct {
>       pthread_t      thread; /**< Pthread ID */
> @@ -43,21 +45,34 @@ typedef struct {
> 
> 
>  /**
> + * Creates and pthread/process coremask
> + *
> + * @todo This code should move to somewhere test/example specific
> + *       as opposed to a helper routine.
> + *
> + * Creates coremask based on starting CPU and a count, actual
> + * values returned
> + *
> + * @param num           Number of threads to create, zero for all
> available
> + * @param first_cpu     First physical CPU
> + * @param mask          Coremask to initialize
> + * @return Actual values for "num" and "first_cpu" used to create mask
> + */
> +void odph_linux_coremask_create(int *num, int *first_cpu, odp_coremask_t
> *mask);

As you pointed, this could be still useful feature, but I'd modify it into this 
(part of proper API).

/**
 * Default coremask for a number of threads
 * 
 * @param mask[out]  Coremask to initialize
 * @param num        Number of threads to create, zero for all available cores
 *
 * @return Number of cores in mask, or zero on failure
 */
int odp_linux_coremask_default(odp_coremask_t *mask, int num);


User can then find out the "first core" by calling odp_coremask_first(mask) (== 
odp_coremask_ffs()). This mask may not be always optimal configuration in a 
system, but it's the "default" == easy way to get started without user input.


> +
> +/**
>   * Creates and launches pthreads
>   *
> - * Creates, pins and launches num threads to separate CPU's starting from
> - * first_cpu.
> + * Creates, pins and launches threads to separate CPU's based on the
> coremask.
>   *
>   * @param thread_tbl    Thread table
> - * @param num           Number of threads to create
> - * @param first_cpu     First physical CPU
> + * @param mask          Coremask
>   * @param start_routine Thread start function
>   * @param arg           Thread argument
>   */
>  void odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
> -                           int num, int first_cpu,
> -                           void *(*start_routine) (void *), void *arg);
> -
> +                             odp_coremask_t *mask,
> +                             void *(*start_routine) (void *), void *arg);
> 
>  /**
>   * Waits pthreads to exit
> @@ -91,14 +106,13 @@ int odph_linux_process_fork(odph_linux_process_t
> *proc, int cpu);
>   * Forks and sets CPU affinity for child processes
>   *
>   * @param proc_tbl      Process state info table (for output)
> - * @param num           Number of processes to create
> - * @param first_cpu     Destination CPU for the first process
> + * @param mask          Coremask of processes to create
>   *
>   * @return On success: 1 for the parent, 0 for the child
>   *         On failure: -1 for the parent, -2 for the child
>   */
>  int odph_linux_process_fork_n(odph_linux_process_t *proc_tbl,
> -                           int num, int first_cpu);
> +                           odp_coremask_t *mask);
> 
> 
>  /**
> diff --git a/platform/linux-generic/include/api/odp_coremask.h
> b/platform/linux-generic/include/api/odp_coremask.h
> index d4172fa..1f8b7b9 100644
> --- a/platform/linux-generic/include/api/odp_coremask.h
> +++ b/platform/linux-generic/include/api/odp_coremask.h
> @@ -18,7 +18,8 @@
>  extern "C" {
>  #endif
> 
> -
> +#include <string.h>
> +#include <sched.h>
> 
>  #include <odp_std_types.h>
> 
> @@ -27,21 +28,15 @@ extern "C" {
>   *  @{
>   */
> 
> -/** @internal */
> -#define ODP_COREMASK_SIZE_U64  1
> -
>  /**
>   * Core mask
>   *
>   * Don't access directly, use access functions.
>   */
>  typedef struct odp_coremask_t {
> -     uint64_t _u64[ODP_COREMASK_SIZE_U64]; /**< @private Mask*/
> -
> +     cpu_set_t set; /**< @private Mask*/
>  } odp_coremask_t;
> 
> -
> -
>  /**
>   * Add core mask bits from a string
>   *
> @@ -64,36 +59,13 @@ void odp_coremask_from_str(const char *str,
> odp_coremask_t *mask);
>   */
>  void odp_coremask_to_str(char *str, int len, const odp_coremask_t *mask);
> 
> -
> -/**
> - * Add core mask bits from a u64 array
> - *
> - * In the array core #0 is located at the least significant bit
> - * of the first word (u64[0] = 0x1).
> - *
> - * Examples
> - * core 0:  u64[0] = 0x1
> - * core 1:  u64[0] = 0x2
> - * ...
> - * core 63: u64[0] = 0x8000 0000 0000 0000
> - * core 64: u64[0] = 0x0, u64[1] = 0x1
> - * core 65: u64[0] = 0x0, u64[1] = 0x2
> - *
> - * @param u64    An array of u64 bit words
> - * @param num    Number of u64 words in the array
> - * @param mask   Core mask to modify
> - *
> - * @note Supports currently only core indexes upto 63
> - */
> -void odp_coremask_from_u64(const uint64_t *u64, int num, odp_coremask_t
> *mask);
> -
>  /**
>   * Clear entire mask
>   * @param mask       Core mask to flush with zero value
>   */
>  static inline void odp_coremask_zero(odp_coremask_t *mask)
>  {
> -     mask->_u64[0] = 0;
> +     CPU_ZERO(&mask->set);
>  }
> 
>  /**
> @@ -101,14 +73,20 @@ static inline void odp_coremask_zero(odp_coremask_t
> *mask)
>   * @param core  Core number
>   * @param mask  add core number in core mask
>   */
> -void odp_coremask_set(int core, odp_coremask_t *mask);
> +static inline void odp_coremask_set(int core, odp_coremask_t *mask)
> +{
> +     CPU_SET(core, &mask->set);
> +}
> 
>  /**
>   * Remove core from mask
>   * @param core  Core number
>   * @param mask  clear core number from core mask
>   */
> -void odp_coremask_clr(int core, odp_coremask_t *mask);
> +static inline void odp_coremask_clr(int core, odp_coremask_t *mask)
> +{
> +     CPU_CLR(core, &mask->set);
> +}
> 
>  /**
>   * Test if core is a member of mask
> @@ -116,16 +94,20 @@ void odp_coremask_clr(int core, odp_coremask_t
> *mask);
>   * @param mask  Core mask to check if core num set or not
>   * @return      non-zero if set otherwise 0
>   */
> -int odp_coremask_isset(int core, const odp_coremask_t *mask);
> +static inline int odp_coremask_isset(int core, const odp_coremask_t
> *mask)
> +{
> +     return CPU_ISSET(core, &mask->set);
> +}
> 
>  /**
>   * Count number of cores in mask
>   * @param mask  Core mask
>   * @return coremask count
>   */
> -int odp_coremask_count(const odp_coremask_t *mask);
> -
> -
> +static inline int odp_coremask_count(const odp_coremask_t *mask)
> +{
> +     return CPU_COUNT(&mask->set);
> +}
> 
>  /**
>   * Logical AND over two source masks.
> @@ -137,7 +119,7 @@ int odp_coremask_count(const odp_coremask_t *mask);
>  static inline void odp_coremask_and(odp_coremask_t *dest, odp_coremask_t
> *src1,
>                                   odp_coremask_t *src2)
>  {
> -     dest->_u64[0] = src1->_u64[0] & src2->_u64[0];
> +     CPU_AND(&dest->set, &src1->set, &src2->set);
>  }
> 
>  /**
> @@ -150,7 +132,7 @@ static inline void odp_coremask_and(odp_coremask_t
> *dest, odp_coremask_t *src1,
>  static inline void odp_coremask_or(odp_coremask_t *dest, odp_coremask_t
> *src1,
>                                  odp_coremask_t *src2)
>  {
> -     dest->_u64[0] = src1->_u64[0] | src2->_u64[0];
> +     CPU_OR(&dest->set, &src1->set, &src2->set);
>  }
> 
>  /**
> @@ -163,7 +145,7 @@ static inline void odp_coremask_or(odp_coremask_t
> *dest, odp_coremask_t *src1,
>  static inline void odp_coremask_xor(odp_coremask_t *dest, odp_coremask_t
> *src1,
>                                   odp_coremask_t *src2)
>  {
> -     dest->_u64[0] = src1->_u64[0] ^ src2->_u64[0];
> +     CPU_XOR(&dest->set, &src1->set, &src2->set);
>  }
> 
>  /**
> @@ -172,7 +154,69 @@ static inline void odp_coremask_xor(odp_coremask_t
> *dest, odp_coremask_t *src1,
>  static inline int odp_coremask_equal(odp_coremask_t *mask1,
>                                    odp_coremask_t *mask2)
>  {
> -     return (mask1->_u64[0] == mask2->_u64[0]);
> +     return CPU_EQUAL(&mask1->set, &mask2->set);
> +}
> +
> +/**
> + * Copy a core mask
> + */
> +static inline void odp_coremask_copy(odp_coremask_t *dest, odp_coremask_t
> *src)
> +{
> +     memcpy(&dest->set, &src->set, sizeof(src->set));
> +}
> +
> +/**
> + * Find first bit set in mask
> + *
> + * @return core else -1 if no bits set in coremask
> + */
> +static inline int odp_coremask_ffs(const odp_coremask_t *mask)

These are good additions. I'd rename this to odp_coremask_first() ...

> +{
> +     int core;
> +
> +     for (core = 0; core < CPU_SETSIZE; core++)
> +             if (odp_coremask_isset(core, mask))
> +                     return core;
> +     return -1;
> +}
> +
> +/**
> + * Find last bit set in mask
> + *
> + * @return core else -1 if no bits set in coremask
> + */
> +static inline int odp_coremask_fls(const odp_coremask_t *mask)

... and this odp_coremask_last().

> +{
> +     int core;
> +
> +     for (core = CPU_SETSIZE - 1; core >= 0; core--)
> +             if (odp_coremask_isset(core, mask))
> +                     return core;
> +     return -1;
> +}
> +
> +/**
> + * Find next core in mask
> + *
> + * Finds the next core (using find first set) in the source core mask,
> + * sets that core in the destination mask and clears it in the source
> mask.
> + *
> + * @param src         Coremask to find next core in
> + * @param dst         Coremask to move the found core to
> + * @return core found else -1
> + */
> +static inline
> +int odp_coremask_next_core(odp_coremask_t *dst, odp_coremask_t *src)
> +{
> +     int cpu = -1;
> +
> +     odp_coremask_zero(dst);
> +     cpu = odp_coremask_ffs(src);
> +     if (cpu >= 0) {
> +             odp_coremask_clr(cpu, src);
> +             odp_coremask_set(cpu, dst);
> +     }
> +     return cpu;
>  }


Again good addition, but could this be renamed and defined like this:


/**
* Find next core in mask
*
* @param mask   Coremask to find next core in
* @param core   Current core id
*
* @return Next core id found, else -1
*/
int odp_coremask_next(odp_coremask_t *mask, int core);


An user would iterate through a mask like this:

int core = odp_coremask_first(&mask);

do {
  /* do something with 'core' */
  
} while ((core = odp_coremask_next(&mask, core)) >= 0);


> 
>  /**
> diff --git a/platform/linux-generic/odp_coremask.c b/platform/linux-
> generic/odp_coremask.c
> index 54cd333..d987ee7 100644
> --- a/platform/linux-generic/odp_coremask.c
> +++ b/platform/linux-generic/odp_coremask.c
> @@ -10,100 +10,99 @@
>  #include <stdlib.h>
>  #include <string.h>
> 
> -#define MAX_CORE_NUM 64
> -
> -
> -void odp_coremask_from_str(const char *str, odp_coremask_t *mask)
> +void odp_coremask_from_str(const char *str_in, odp_coremask_t *mask)
>  {
> -     uint64_t mask_u64;
> -
> -     if (strlen(str) > 18) {
> -             /* more than 64 bits */
> -             return;
> +     cpu_set_t cpuset;
> +     const char *str = str_in;
> +     const char *p;
> +     int core = 0;
> +     int len = strlen(str);
> +
> +     CPU_ZERO(&cpuset);
> +     odp_coremask_zero(mask);
> +
> +     /* Strip leading "0x"/"0X" if present and verify length */
> +     if ((len >= 2) && ((str[1] == 'x') || (str[1] == 'X'))) {
> +             str += 2;
> +             len -= 2;
>       }
> +     if (!len)
> +             return;
> 
> -     mask_u64 = strtoull(str, NULL, 16);
> +     /* Walk string from LSB setting core bits */
> +     for (p = str + len - 1; (len > 0) && (core < CPU_SETSIZE); p--, len-
> -) {
> +             char c = *p;
> +             int value;
> +             int idx;
> +
> +             /* Convert hex nibble, abort when invalid value found */
> +             if ((c >= '0') && (c <= '9'))
> +                     value = c - '0';
> +             else if ((c >= 'A') && (c <= 'F'))
> +                     value = c - 'A' + 10;
> +             else if ((c >= 'a') && (c <= 'f'))
> +                     value = c - 'a' + 10;
> +             else
> +                     return;
> +
> +             /* Walk converted nibble and set bits in mask */
> +             for (idx = 0; idx < 4; idx++, core++)
> +                     if (value & (1 << idx))
> +                             CPU_SET(core, &cpuset);
> +     }
> 
> -     odp_coremask_from_u64(&mask_u64, 1, mask);
> +     /* Copy the computed mask */
> +     memcpy(&mask->set, &cpuset, sizeof(cpuset));
>  }
> 
> -
>  void odp_coremask_to_str(char *str, int len, const odp_coremask_t *mask)
>  {
> -     int ret;
> -
> -     ret = snprintf(str, len, "0x%"PRIx64"", mask->_u64[0]);
> -
> -     if (ret >= 0 && ret < len) {
> -             /* force trailing zero */
> -             str[len-1] = '\0';
> -     }
> -}
> -
> -
> -void odp_coremask_from_u64(const uint64_t *u64, int num, odp_coremask_t
> *mask)
> -{
> -     int i;
> -
> -     if (num > ODP_COREMASK_SIZE_U64) {
> -             /* force max size */
> -             num = ODP_COREMASK_SIZE_U64;
> -     }
> -
> -     for (i = 0; i < num; i++)
> -             mask->_u64[0] |= u64[i];
> -}
> +     char *p = str;
> +     int core = odp_coremask_fls(mask);
> +     int nibbles;
> +     int value;
> 
> -void odp_coremask_set(int core, odp_coremask_t *mask)
> -{
> -     /* should not be more than 63
> -      * core no. should be from 0..63= 64bit
> -      */
> -     if (core >= MAX_CORE_NUM) {
> -             ODP_ERR("invalid core count\n");
> +     /* Quickly handle bad string length or empty mask */
> +     if (len <= 0)
> +             return;
> +     *str = 0;
> +     if (core < 0) {
> +             if (len >= 4)
> +                     strcpy(str, "0x0");
>               return;
>       }
> 
> -     mask->_u64[0] |=  (1ULL << core);
> -}
> +     /* Compute number nibbles in coremask that have bits set */
> +     nibbles = (core / 4) + 1;
> 
> -void odp_coremask_clr(int core, odp_coremask_t *mask)
> -{
> -     /* should not be more than 63
> -      * core no. should be from 0..63= 64bit
> -      */
> -     if (core >= MAX_CORE_NUM) {
> -             ODP_ERR("invalid core count\n");
> +     /* Verify minimum space (account for "0x" and termination) */
> +     if (len < (3 + nibbles))
>               return;
> -     }
> -
> -     mask->_u64[0] &= ~(1ULL << core);
> -}
> 
> +     /* Prefix */
> +     *p++ = '0';
> +     *p++ = 'x';
> 
> -int odp_coremask_isset(int core, const odp_coremask_t *mask)
> -{
> -     /* should not be more than 63
> -      * core no. should be from 0..63= 64bit
> +     /*
> +      * Now we can scan the cores down to zero and
> +      * build the string one nibble at a time
>        */
> -     if (core >= MAX_CORE_NUM) {
> -             ODP_ERR("invalid core count\n");
> -             return -1;
> -     }
> -
> -     return (mask->_u64[0] >> core) & 1;
> -}
> -
> -int odp_coremask_count(const odp_coremask_t *mask)
> -{
> -     uint64_t coremask = mask->_u64[0];
> -     int cnt = 0;
> -
> -     while (coremask != 0) {
> -             coremask >>= 1;
> -             if (coremask & 1)
> -                     cnt++;
> -     }
> -
> -     return cnt;
> +     value = 0;
> +     do {
> +             /* Set bit to go into the current nibble */
> +             if (CPU_ISSET(core, &mask->set))
> +                     value |= 1 << (core % 4);
> +
> +             /* If we are on a nibble boundary flush value to string */
> +             if (0 == (core % 4)) {
> +                     if (value < 0xA)
> +                             *p++ = '0' + value;
> +                     else
> +                             *p++ = 'A' + value - 0xA;
> +                     value = 0;
> +             }
> +     } while (core--);
> +
> +     /* Terminate the string */
> +     *p++ = 0;
>  }
> diff --git a/platform/linux-generic/odp_linux.c b/platform/linux-
> generic/odp_linux.c
> index 79192cf..1b343b4 100644
> --- a/platform/linux-generic/odp_linux.c
> +++ b/platform/linux-generic/odp_linux.c
> @@ -25,6 +25,7 @@
>  #include <odp_system_info.h>
>  #include <odp_debug_internal.h>
> 
> +#define MAX_WORKERS 32
> 
>  typedef struct {
>       void *(*start_routine) (void *);
> @@ -33,6 +34,40 @@ typedef struct {
>  } odp_start_args_t;
> 
> 
> +void odph_linux_coremask_create(int *num, int *first_cpu, odp_coremask_t
> *mask)
> +{
> +     int i;
> +     int cpu_count;
> +
> +     cpu_count = odp_sys_cpu_count();
> +
> +     /*
> +      * If no user supplied number or it's too large, then attempt
> +      * to use all CPUs, however use no more than MAX_WORKERS.
> +      */
> +     if (0 == *num)
> +             *num = cpu_count;
> +     if (cpu_count < *num)
> +             *num = cpu_count;
> +     if (MAX_WORKERS < *num)
> +             *num = MAX_WORKERS;
> +
> +     /*
> +      * Always force "first_cpu" to a valid core
> +      */
> +     if (*first_cpu >= cpu_count)
> +             *first_cpu = cpu_count - 1;
> +
> +     /* Build the mask */
> +     odp_coremask_zero(mask);
> +     for (i = 0; i < *num; i++) {
> +             int cpu;
> +
> +             cpu = (*first_cpu + i) % cpu_count;
> +             odp_coremask_set(cpu, mask);
> +     }
> +}
> +
>  static void *odp_run_start_routine(void *arg)
>  {
>       odp_start_args_t *start_args = arg;
> @@ -55,33 +90,39 @@ static void *odp_run_start_routine(void *arg)
>  }
> 
> 
> -void odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl, int num,
> -                            int first_cpu,
> +void odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
> +                            odp_coremask_t *mask_in,
>                              void *(*start_routine) (void *), void *arg)
>  {
>       int i;
> -     cpu_set_t cpu_set;
> +     int num;
> +     odp_coremask_t mask;
>       odp_start_args_t *start_args;
>       int cpu_count;
>       int cpu;
> 
> -     cpu_count = odp_sys_cpu_count();
> +     odp_coremask_copy(&mask, mask_in);
> +     num = odp_coremask_count(&mask);
> 
> -     assert((first_cpu >= 0) && (first_cpu < cpu_count));
> -     assert((num >= 0) && (num <= cpu_count));
> +     memset(thread_tbl, 0, num * sizeof(odph_linux_process_t));


BUG?? odph_linux_** process ** _t



-Petri



_______________________________________________
lng-odp mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to