Re: [lng-odp] [API-NEXT PATCH v9 5/6] linux-gen: sched scalable: add scalable scheduler
On 20/06/2017, 15:58, "Savolainen, Petri (Nokia - FI/Espoo)"wrote: >> --- a/platform/linux-generic/include/odp_config_internal.h >> +++ b/platform/linux-generic/include/odp_config_internal.h >> @@ -7,9 +7,7 @@ >> #ifndef ODP_CONFIG_INTERNAL_H_ >> #define ODP_CONFIG_INTERNAL_H_ >> >> -#ifdef __cplusplus >> -extern "C" { >> -#endif >> +#include > >Why these configs need global visibility? This file should contain >general configuration options. > >> >> /* >> * Maximum number of pools >> @@ -22,6 +20,13 @@ extern "C" { >> #define ODP_CONFIG_QUEUES 1024 >> >> /* >> + * Maximum queue depth. Maximum number of elements that can be stored >>in >> a >> + * queue. This value is used only when the size is not explicitly >> provided >> + * during queue creation. >> + */ >> +#define CONFIG_QUEUE_SIZE 4096 >> + >> +/* >> * Maximum number of ordered locks per queue >> */ >> #define CONFIG_QUEUE_MAX_ORD_LOCKS 4 >> @@ -120,7 +125,7 @@ extern "C" { >> * >> * This the the number of separate SHM areas that can be reserved >> concurrently >> */ >> -#define ODPDRV_CONFIG_SHM_BLOCKS 48 >> +#define ODPDRV_CONFIG_SHM_BLOCKS ODP_CONFIG_SHM_BLOCKS > > >Is this change necessary? Increases driver memory usage for no reason? The scalable scheduler should have anything to do with the drivers and their shmem use. So I think this change is unnecessary and should be reverted. > > >> + >> +#endif /* ODP_SCHEDULE_SCALABLE_H */ >> diff --git >>a/platform/linux-generic/include/odp_schedule_scalable_config.h >> b/platform/linux-generic/include/odp_schedule_scalable_config.h >> new file mode 100644 >> index ..febf379b >> --- /dev/null >> +++ b/platform/linux-generic/include/odp_schedule_scalable_config.h >> @@ -0,0 +1,55 @@ >> +/* Copyright (c) 2017, ARM Limited >> + * All rights reserved. >> + * >> + * SPDX-License-Identifier: BSD-3-Clause >> + */ >> + >> +#ifndef ODP_SCHEDULE_SCALABLE_CONFIG_H_ >> +#define ODP_SCHEDULE_SCALABLE_CONFIG_H_ >> + >> +/* >> + * Default scaling factor for the scheduler group >> + * >> + * This scaling factor is used when the application creates a scheduler >> + * group with no worker threads. >> + */ >> +#define CONFIG_DEFAULT_XFACTOR 4 >> + >> +/* >> + * Default weight (in events) for WRR in scalable scheduler >> + * >> + * This controls the per-queue weight for WRR between queues of the >>same >> + * priority in the scalable scheduler >> + * A higher value improves throughput while a lower value increases >> fairness >> + * and thus likely decreases latency >> + * >> + * If WRR is undesired, set the value to ~0 which will use the largest >> possible >> + * weight >> + * >> + * Note: an API for specifying this on a per-queue basis would be >>useful >> but is >> + * not yet available >> + */ >> +#define CONFIG_WRR_WEIGHT 64 >> + >> +/* >> + * Split queue producer/consumer metadata into separate cache lines. >> + * This is beneficial on e.g. Cortex-A57 but not so much on A53. >> + */ >> +#define CONFIG_SPLIT_PRODCONS >> + >> +/* >> + * Use locks to protect queue (ring buffer) and scheduler state updates >> + * On x86, this decreases overhead noticeably. >> + */ >> +#ifndef __ARM_ARCH >> +#define CONFIG_QSCHST_LOCK >> +/* Keep all ring buffer/qschst data together when using locks */ >> +#undef CONFIG_SPLIT_PRODCONS >> +#endif >> + >> +/* >> + * Maximum number of ordered locks per queue. >> + */ >> +#define CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE 2 > > >There's already CONFIG_QUEUE_MAX_ORD_LOCKS 4, in the general config file. >Should not add the same define twice (with different value). Yes it is unnecessary for the scalable scheduler to use a separate define for this. > > > >> + >> +#endif /* ODP_SCHEDULE_SCALABLE_CONFIG_H_ */ >> diff --git a/platform/linux- >> generic/include/odp_schedule_scalable_ordered.h b/platform/linux- >> generic/include/odp_schedule_scalable_ordered.h >> new file mode 100644 >> index ..9f3acf7a >> --- /dev/null >> +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h >> @@ -0,0 +1,132 @@ >> +/* Copyright (c) 2017, ARM Limited >> + * All rights reserved. >> + * >> + * SPDX-License-Identifier: BSD-3-Clause >> + */ >> + >> +#ifndef ODP_SCHEDULE_SCALABLE_ORDERED_H >> +#define ODP_SCHEDULE_SCALABLE_ORDERED_H >> + >> +#include >> + >> +#include >> +#include >> +#include >> +#include <_ishmpool_internal.h> > >> + >> +/* Number of reorder contects in the reorder window. >> + * Should be at least one per CPU. >> + */ >> +#define RWIN_SIZE 32 >> +ODP_STATIC_ASSERT(CHECK_IS_POWER2(RWIN_SIZE), "RWIN_SIZE is not a power >> of 2"); >> + >> +#define NUM_OLOCKS 2 > >Is this the same as CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE or something >different with similar name ? This is the define that is actually usedÅ There is an obvious need of a cleanup here. > > >> diff --git a/platform/linux-generic/odp_queue_if.c b/platform/linux- >> generic/odp_queue_if.c >> index c91f00eb..d7471dfc 100644 >> ---
Re: [lng-odp] [API-NEXT PATCH v9 5/6] linux-gen: sched scalable: add scalable scheduler
> --- a/platform/linux-generic/include/odp_config_internal.h > +++ b/platform/linux-generic/include/odp_config_internal.h > @@ -7,9 +7,7 @@ > #ifndef ODP_CONFIG_INTERNAL_H_ > #define ODP_CONFIG_INTERNAL_H_ > > -#ifdef __cplusplus > -extern "C" { > -#endif > +#include Why these configs need global visibility? This file should contain general configuration options. > > /* > * Maximum number of pools > @@ -22,6 +20,13 @@ extern "C" { > #define ODP_CONFIG_QUEUES 1024 > > /* > + * Maximum queue depth. Maximum number of elements that can be stored in > a > + * queue. This value is used only when the size is not explicitly > provided > + * during queue creation. > + */ > +#define CONFIG_QUEUE_SIZE 4096 > + > +/* > * Maximum number of ordered locks per queue > */ > #define CONFIG_QUEUE_MAX_ORD_LOCKS 4 > @@ -120,7 +125,7 @@ extern "C" { > * > * This the the number of separate SHM areas that can be reserved > concurrently > */ > -#define ODPDRV_CONFIG_SHM_BLOCKS 48 > +#define ODPDRV_CONFIG_SHM_BLOCKS ODP_CONFIG_SHM_BLOCKS Is this change necessary? Increases driver memory usage for no reason? > + > +#endif /* ODP_SCHEDULE_SCALABLE_H */ > diff --git a/platform/linux-generic/include/odp_schedule_scalable_config.h > b/platform/linux-generic/include/odp_schedule_scalable_config.h > new file mode 100644 > index ..febf379b > --- /dev/null > +++ b/platform/linux-generic/include/odp_schedule_scalable_config.h > @@ -0,0 +1,55 @@ > +/* Copyright (c) 2017, ARM Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_SCHEDULE_SCALABLE_CONFIG_H_ > +#define ODP_SCHEDULE_SCALABLE_CONFIG_H_ > + > +/* > + * Default scaling factor for the scheduler group > + * > + * This scaling factor is used when the application creates a scheduler > + * group with no worker threads. > + */ > +#define CONFIG_DEFAULT_XFACTOR 4 > + > +/* > + * Default weight (in events) for WRR in scalable scheduler > + * > + * This controls the per-queue weight for WRR between queues of the same > + * priority in the scalable scheduler > + * A higher value improves throughput while a lower value increases > fairness > + * and thus likely decreases latency > + * > + * If WRR is undesired, set the value to ~0 which will use the largest > possible > + * weight > + * > + * Note: an API for specifying this on a per-queue basis would be useful > but is > + * not yet available > + */ > +#define CONFIG_WRR_WEIGHT 64 > + > +/* > + * Split queue producer/consumer metadata into separate cache lines. > + * This is beneficial on e.g. Cortex-A57 but not so much on A53. > + */ > +#define CONFIG_SPLIT_PRODCONS > + > +/* > + * Use locks to protect queue (ring buffer) and scheduler state updates > + * On x86, this decreases overhead noticeably. > + */ > +#ifndef __ARM_ARCH > +#define CONFIG_QSCHST_LOCK > +/* Keep all ring buffer/qschst data together when using locks */ > +#undef CONFIG_SPLIT_PRODCONS > +#endif > + > +/* > + * Maximum number of ordered locks per queue. > + */ > +#define CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE 2 There's already CONFIG_QUEUE_MAX_ORD_LOCKS 4, in the general config file. Should not add the same define twice (with different value). > + > +#endif /* ODP_SCHEDULE_SCALABLE_CONFIG_H_ */ > diff --git a/platform/linux- > generic/include/odp_schedule_scalable_ordered.h b/platform/linux- > generic/include/odp_schedule_scalable_ordered.h > new file mode 100644 > index ..9f3acf7a > --- /dev/null > +++ b/platform/linux-generic/include/odp_schedule_scalable_ordered.h > @@ -0,0 +1,132 @@ > +/* Copyright (c) 2017, ARM Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_SCHEDULE_SCALABLE_ORDERED_H > +#define ODP_SCHEDULE_SCALABLE_ORDERED_H > + > +#include > + > +#include > +#include > +#include > +#include <_ishmpool_internal.h> > + > +/* Number of reorder contects in the reorder window. > + * Should be at least one per CPU. > + */ > +#define RWIN_SIZE 32 > +ODP_STATIC_ASSERT(CHECK_IS_POWER2(RWIN_SIZE), "RWIN_SIZE is not a power > of 2"); > + > +#define NUM_OLOCKS 2 Is this the same as CONFIG_MAX_ORDERED_LOCKS_PER_QUEUE or something different with similar name ? > diff --git a/platform/linux-generic/odp_queue_if.c b/platform/linux- > generic/odp_queue_if.c > index c91f00eb..d7471dfc 100644 > --- a/platform/linux-generic/odp_queue_if.c > +++ b/platform/linux-generic/odp_queue_if.c > @@ -6,11 +6,19 @@ > > #include > > +extern const queue_api_t queue_scalable_api; > +extern const queue_fn_t queue_scalable_fn; > + > extern const queue_api_t queue_default_api; > extern const queue_fn_t queue_default_fn; > > +#ifdef ODP_SCHEDULE_SCALABLE > +const queue_api_t *queue_api = _scalable_api; > +const queue_fn_t *queue_fn = _scalable_fn; > +#else > const queue_api_t *queue_api = _default_api; > const queue_fn_t *queue_fn = _default_fn; > +#endif > > odp_queue_t odp_queue_create(const char *name,