Re: [lng-odp] [API-NEXT PATCH v9 5/6] linux-gen: sched scalable: add scalable scheduler

2017-06-21 Thread Ola Liljedahl

On 20/06/2017, 15:58, "Savolainen, Petri (Nokia - FI/Espoo)"
 wrote:

>> --- a/platform/linux-generic/include/odp_config_internal.h
>> +++ b/platform/linux-generic/include/odp_config_internal.h
>> @@ -7,9 +7,7 @@
>>  #ifndef ODP_CONFIG_INTERNAL_H_
>>  #define ODP_CONFIG_INTERNAL_H_
>> 
>> -#ifdef __cplusplus
>> -extern "C" {
>> -#endif
>> +#include 
>
>Why these configs need global visibility? This file should contain
>general configuration options.
>
>> 
>>  /*
>>   * Maximum number of pools
>> @@ -22,6 +20,13 @@ extern "C" {
>>  #define ODP_CONFIG_QUEUES 1024
>> 
>>  /*
>> + * Maximum queue depth. Maximum number of elements that can be stored
>>in
>> a
>> + * queue. This value is used only when the size is not explicitly
>> provided
>> + * during queue creation.
>> + */
>> +#define CONFIG_QUEUE_SIZE 4096
>> +
>> +/*
>>   * Maximum number of ordered locks per queue
>>   */
>>  #define CONFIG_QUEUE_MAX_ORD_LOCKS 4
>> @@ -120,7 +125,7 @@ extern "C" {
>>   *
>>   * This the the number of separate SHM areas that can be reserved
>> concurrently
>>   */
>> -#define ODPDRV_CONFIG_SHM_BLOCKS 48
>> +#define ODPDRV_CONFIG_SHM_BLOCKS ODP_CONFIG_SHM_BLOCKS
>
>
>Is this change necessary? Increases driver memory usage for no reason?
The scalable scheduler should have anything to do with the drivers and
their shmem use. So I think this change is unnecessary and should be
reverted.

>
>
>> +
>> +#endif  /* ODP_SCHEDULE_SCALABLE_H */
>> diff --git 
>>a/platform/linux-generic/include/odp_schedule_scalable_config.h
>> b/platform/linux-generic/include/odp_schedule_scalable_config.h
>> new file mode 100644
>> index ..febf379b
>> --- /dev/null
>> +++ b/platform/linux-generic/include/odp_schedule_scalable_config.h
>> @@ -0,0 +1,55 @@
>> +/* Copyright (c) 2017, ARM Limited
>> + * All rights reserved.
>> + *
>> + * SPDX-License-Identifier: BSD-3-Clause
>> + */
>> +
>> +#ifndef ODP_SCHEDULE_SCALABLE_CONFIG_H_
>> +#define ODP_SCHEDULE_SCALABLE_CONFIG_H_
>> +
>> +/*
>> + * Default scaling factor for the scheduler group
>> + *
>> + * This scaling factor is used when the application creates a scheduler
>> + * group with no worker threads.
>> + */
>> +#define CONFIG_DEFAULT_XFACTOR 4
>> +
>> +/*
>> + * Default weight (in events) for WRR in scalable scheduler
>> + *
>> + * This controls the per-queue weight for WRR between queues of the
>>same
>> + * priority in the scalable scheduler
>> + * A higher value improves throughput while a lower value increases
>> fairness
>> + * and thus likely decreases latency
>> + *
>> + * If WRR is undesired, set the value to ~0 which will use the largest
>> possible
>> + * weight
>> + *
>> + * Note: an API for specifying this on a per-queue basis would be
>>useful
>> but is
>> + * not yet available
>> + */
>> +#define CONFIG_WRR_WEIGHT 64
>> +
>> +/*
>> + * Split queue producer/consumer metadata into separate cache lines.
>> + * This is beneficial on e.g. Cortex-A57 but not so much on A53.
>> + */
>> +#define CONFIG_SPLIT_PRODCONS
>> +
>> +/*
>> + * Use locks to protect queue (ring buffer) and scheduler state updates
>> + * On x86, this decreases overhead noticeably.
>> + */
>> +#ifndef __ARM_ARCH
>> +#define CONFIG_QSCHST_LOCK
>> +/* Keep all ring buffer/qschst data together when using locks */
>> +#undef CONFIG_SPLIT_PRODCONS
>> +#endif
>> +
>> +/*
>> + * Maximum number of ordered locks per queue.
>> + */
>> +#define CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE 2
>
>
>There's already CONFIG_QUEUE_MAX_ORD_LOCKS 4, in the general config file.
>Should not add the same define twice (with different value).
Yes it is unnecessary for the scalable scheduler to use a separate define
for this.

>
>
>
>> +
>> +#endif  /* ODP_SCHEDULE_SCALABLE_CONFIG_H_ */
>> diff --git a/platform/linux-
>> generic/include/odp_schedule_scalable_ordered.h b/platform/linux-
>> generic/include/odp_schedule_scalable_ordered.h
>> new file mode 100644
>> index ..9f3acf7a
>> --- /dev/null
>> +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h
>> @@ -0,0 +1,132 @@
>> +/* Copyright (c) 2017, ARM Limited
>> + * All rights reserved.
>> + *
>> + * SPDX-License-Identifier: BSD-3-Clause
>> + */
>> +
>> +#ifndef ODP_SCHEDULE_SCALABLE_ORDERED_H
>> +#define ODP_SCHEDULE_SCALABLE_ORDERED_H
>> +
>> +#include 
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include <_ishmpool_internal.h>
>
>> +
>> +/* Number of reorder contects in the reorder window.
>> + * Should be at least one per CPU.
>> + */
>> +#define RWIN_SIZE 32
>> +ODP_STATIC_ASSERT(CHECK_IS_POWER2(RWIN_SIZE), "RWIN_SIZE is not a power
>> of 2");
>> +
>> +#define NUM_OLOCKS 2
>
>Is this the same as CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE or something
>different with similar name ?
This is the define that is actually usedÅ 
There is an obvious need of a cleanup here.

>
>
>> diff --git a/platform/linux-generic/odp_queue_if.c b/platform/linux-
>> generic/odp_queue_if.c
>> index c91f00eb..d7471dfc 100644
>> --- 

Re: [lng-odp] [API-NEXT PATCH v9 5/6] linux-gen: sched scalable: add scalable scheduler

2017-06-20 Thread Savolainen, Petri (Nokia - FI/Espoo)
> --- a/platform/linux-generic/include/odp_config_internal.h
> +++ b/platform/linux-generic/include/odp_config_internal.h
> @@ -7,9 +7,7 @@
>  #ifndef ODP_CONFIG_INTERNAL_H_
>  #define ODP_CONFIG_INTERNAL_H_
> 
> -#ifdef __cplusplus
> -extern "C" {
> -#endif
> +#include 

Why these configs need global visibility? This file should contain general 
configuration options. 

> 
>  /*
>   * Maximum number of pools
> @@ -22,6 +20,13 @@ extern "C" {
>  #define ODP_CONFIG_QUEUES 1024
> 
>  /*
> + * Maximum queue depth. Maximum number of elements that can be stored in
> a
> + * queue. This value is used only when the size is not explicitly
> provided
> + * during queue creation.
> + */
> +#define CONFIG_QUEUE_SIZE 4096
> +
> +/*
>   * Maximum number of ordered locks per queue
>   */
>  #define CONFIG_QUEUE_MAX_ORD_LOCKS 4
> @@ -120,7 +125,7 @@ extern "C" {
>   *
>   * This the the number of separate SHM areas that can be reserved
> concurrently
>   */
> -#define ODPDRV_CONFIG_SHM_BLOCKS 48
> +#define ODPDRV_CONFIG_SHM_BLOCKS ODP_CONFIG_SHM_BLOCKS


Is this change necessary? Increases driver memory usage for no reason?


> +
> +#endif  /* ODP_SCHEDULE_SCALABLE_H */
> diff --git a/platform/linux-generic/include/odp_schedule_scalable_config.h
> b/platform/linux-generic/include/odp_schedule_scalable_config.h
> new file mode 100644
> index ..febf379b
> --- /dev/null
> +++ b/platform/linux-generic/include/odp_schedule_scalable_config.h
> @@ -0,0 +1,55 @@
> +/* Copyright (c) 2017, ARM Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier: BSD-3-Clause
> + */
> +
> +#ifndef ODP_SCHEDULE_SCALABLE_CONFIG_H_
> +#define ODP_SCHEDULE_SCALABLE_CONFIG_H_
> +
> +/*
> + * Default scaling factor for the scheduler group
> + *
> + * This scaling factor is used when the application creates a scheduler
> + * group with no worker threads.
> + */
> +#define CONFIG_DEFAULT_XFACTOR 4
> +
> +/*
> + * Default weight (in events) for WRR in scalable scheduler
> + *
> + * This controls the per-queue weight for WRR between queues of the same
> + * priority in the scalable scheduler
> + * A higher value improves throughput while a lower value increases
> fairness
> + * and thus likely decreases latency
> + *
> + * If WRR is undesired, set the value to ~0 which will use the largest
> possible
> + * weight
> + *
> + * Note: an API for specifying this on a per-queue basis would be useful
> but is
> + * not yet available
> + */
> +#define CONFIG_WRR_WEIGHT 64
> +
> +/*
> + * Split queue producer/consumer metadata into separate cache lines.
> + * This is beneficial on e.g. Cortex-A57 but not so much on A53.
> + */
> +#define CONFIG_SPLIT_PRODCONS
> +
> +/*
> + * Use locks to protect queue (ring buffer) and scheduler state updates
> + * On x86, this decreases overhead noticeably.
> + */
> +#ifndef __ARM_ARCH
> +#define CONFIG_QSCHST_LOCK
> +/* Keep all ring buffer/qschst data together when using locks */
> +#undef CONFIG_SPLIT_PRODCONS
> +#endif
> +
> +/*
> + * Maximum number of ordered locks per queue.
> + */
> +#define CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE 2


There's already CONFIG_QUEUE_MAX_ORD_LOCKS 4, in the general config file. 
Should not add the same define twice (with different value).



> +
> +#endif  /* ODP_SCHEDULE_SCALABLE_CONFIG_H_ */
> diff --git a/platform/linux-
> generic/include/odp_schedule_scalable_ordered.h b/platform/linux-
> generic/include/odp_schedule_scalable_ordered.h
> new file mode 100644
> index ..9f3acf7a
> --- /dev/null
> +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h
> @@ -0,0 +1,132 @@
> +/* Copyright (c) 2017, ARM Limited
> + * All rights reserved.
> + *
> + * SPDX-License-Identifier: BSD-3-Clause
> + */
> +
> +#ifndef ODP_SCHEDULE_SCALABLE_ORDERED_H
> +#define ODP_SCHEDULE_SCALABLE_ORDERED_H
> +
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include <_ishmpool_internal.h>

> +
> +/* Number of reorder contects in the reorder window.
> + * Should be at least one per CPU.
> + */
> +#define RWIN_SIZE 32
> +ODP_STATIC_ASSERT(CHECK_IS_POWER2(RWIN_SIZE), "RWIN_SIZE is not a power
> of 2");
> +
> +#define NUM_OLOCKS 2

Is this the same as CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE or something different 
with similar name ?


> diff --git a/platform/linux-generic/odp_queue_if.c b/platform/linux-
> generic/odp_queue_if.c
> index c91f00eb..d7471dfc 100644
> --- a/platform/linux-generic/odp_queue_if.c
> +++ b/platform/linux-generic/odp_queue_if.c
> @@ -6,11 +6,19 @@
> 
>  #include 
> 
> +extern const queue_api_t queue_scalable_api;
> +extern const queue_fn_t queue_scalable_fn;
> +
>  extern const queue_api_t queue_default_api;
>  extern const queue_fn_t queue_default_fn;
> 
> +#ifdef ODP_SCHEDULE_SCALABLE
> +const queue_api_t *queue_api = _scalable_api;
> +const queue_fn_t *queue_fn = _scalable_fn;
> +#else
>  const queue_api_t *queue_api = _default_api;
>  const queue_fn_t *queue_fn = _default_fn;
> +#endif
> 
>  odp_queue_t odp_queue_create(const char *name,