On Thu, 2018-12-20 at 19:33 +0800, Gavin Hu wrote: > External Email > > ------------------------------------------------------------------- > --- > From: Joyce Kong <joyce.k...@arm.com> > > Run ring perf test on all available cores to really verify MPMC > operations. > The old way of running on a pair of cores is not enough for MPMC > rings. We > used this test case for ring optimization and it was really helpful > for > measuring the ring performance in multi-core environment. > > Suggested-by: Gavin Hu <gavin...@arm.com> > Signed-off-by: Joyce Kong <joyce.k...@arm.com> > Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> > Reviewed-by: Dharmik Thakkar <dharmik.thak...@arm.com> > Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com> > Reviewed-by: Gavin Hu <gavin...@arm.com> > --- > test/test/test_ring_perf.c | 82 > ++++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 80 insertions(+), 2 deletions(-) > > diff --git a/test/test/test_ring_perf.c b/test/test/test_ring_perf.c > index ebb3939..819d119 100644 > --- a/test/test/test_ring_perf.c > +++ b/test/test/test_ring_perf.c > @@ -20,12 +20,17 @@ > * * Empty ring dequeue > * * Enqueue/dequeue of bursts in 1 threads > * * Enqueue/dequeue of bursts in 2 threads > + * * Enqueue/dequeue of bursts in all available threads > */ > > #define RING_NAME "RING_PERF" > #define RING_SIZE 4096 > #define MAX_BURST 32 > > +#ifndef ARRAY_SIZE > +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
Use RTE_DIM instead. > +#endif > + > /* > * the sizes to enqueue and dequeue in testing > * (marked volatile so they won't be seen as compile-time constants) > @@ -248,9 +253,78 @@ run_on_core_pair(struct lcore_pair *cores, > struct rte_ring *r, > } > } > > +static rte_atomic32_t synchro; > +static uint64_t queue_count[RTE_MAX_LCORE] = {0}; Do we need explicit {0} for this static global variable? > + > +#define TIME_MS 100 > + > +static int > +load_loop_fn(void *p) > +{ > + uint64_t time_diff = 0; > + uint64_t begin = 0; > + uint64_t hz = rte_get_timer_hz(); > + uint64_t lcount = 0; > + const unsigned int lcore = rte_lcore_id(); > + struct thread_params *params = p; > + void *burst[MAX_BURST] = {0}; > + > + /* wait synchro for slaves */ > + if (lcore != rte_get_master_lcore()) > + while (rte_atomic32_read(&synchro) == 0) > + rte_pause(); > + > + begin = rte_get_timer_cycles(); > + while (time_diff < hz * TIME_MS / 1000) { > + rte_ring_mp_enqueue_bulk(params->r, burst, params- > >size, NULL); > + rte_ring_mc_dequeue_bulk(params->r, burst, params- > >size, NULL); > + lcount++; > + time_diff = rte_get_timer_cycles() - begin; > + } > + queue_count[lcore] = lcount; > + return 0; > +} > + > +static int > +run_on_all_cores(struct rte_ring *r) > +{ > + uint64_t total = 0; > + struct thread_params param = {0}; Try to use memset here. Some version of clang complain {0} schemeatics.