Re: [lng-odp] [PATCH 1/1] validation: synchronizer tests

Jerin Jacob Tue, 16 Dec 2014 05:54:51 -0800

On Tue, Dec 16, 2014 at 10:15:20AM +0000, Mario Torrecillas Rodriguez wrote:
> This is a first stab at synchroniser tests, which contains contributions
> from Yan and Barry. I reviewed the code, fixed a couple of functional
> issues, merged both in a common test suite and adapted most of the coding
> style issues.
> It can be applied on top of the latest commit, however it depends on the
> ticketlock_try_lock patch, so this needs to be applied too before
> compiling it (or you can comment out the try lock calls in
> odp_synchronizers.c).
> 
> We still need more tests, especially for atomics, since the ones we have
> at the moment target functionality in the absence of other threads.
> 
> Mario.
> 
> On 16/12/2014 09:44, "Mario Torrecillas Rodriguez"
> <[email protected]> wrote:
> 
> >(This code contribution is provided under the terms of agreement
> >LES-LTM-21309)
> >
> >First set of synchronizer tests. This patch includes tests for locks,
> >barriers and atomics ("sunny day" tests only).
> >Most of this code comes from Barry Spinney and Yan Songming.
> >
> >Please note that this code depends on ticketlock_trylock
> >
> >Signed-off-by: Mario Torrecillas Rodriguez
> ><[email protected]>
> >---
> > test/validation/Makefile.am               |    7 +-
> > test/validation/common/odp_common.c       |  241 ++++++
> > test/validation/common/odp_common.h       |   28 +
> > test/validation/common/odp_cunit_common.c |    5 +
> > test/validation/common/odp_cunit_common.h |    1 +
> > test/validation/odp_synchronizers.c       | 1149
> >+++++++++++++++++++++++++++++
> > 6 files changed, 1430 insertions(+), 1 deletion(-)
> > create mode 100644 test/validation/common/odp_common.c
> > create mode 100644 test/validation/common/odp_common.h
> > create mode 100644 test/validation/odp_synchronizers.c
> >
> >diff --git a/test/validation/Makefile.am b/test/validation/Makefile.am
> >index d0b5426..38bd47c 100644
> >--- a/test/validation/Makefile.am
> >+++ b/test/validation/Makefile.am
> >@@ -6,7 +6,7 @@ AM_LDFLAGS += -static
> > if ODP_CUNIT_ENABLED
> > TESTS = ${bin_PROGRAMS}
> > check_PROGRAMS = ${bin_PROGRAMS}
> >-bin_PROGRAMS = odp_init odp_queue odp_crypto odp_shm odp_schedule
> >+bin_PROGRAMS = odp_init odp_queue odp_crypto odp_shm odp_schedule
> >odp_synchronizers
> > odp_init_LDFLAGS = $(AM_LDFLAGS)
> > odp_queue_LDFLAGS = $(AM_LDFLAGS)
> > odp_crypto_CFLAGS = $(AM_CFLAGS) -I$(srcdir)/crypto
> >@@ -15,6 +15,8 @@ odp_shm_CFLAGS = $(AM_CFLAGS)
> > odp_shm_LDFLAGS = $(AM_LDFLAGS)
> > odp_schedule_CFLAGS = $(AM_CFLAGS)
> > odp_schedule_LDFLAGS = $(AM_LDFLAGS)
> >+odp_synchronizers_LDFLAGS = $(AM_LDFLAGS)
> >+odp_synchronizers_LDFLAGS = $(AM_LDFLAGS)


duplicate odp_synchronizers_LDFLAGS


> > endif
> > 
> > dist_odp_init_SOURCES = odp_init.c
> >@@ -29,3 +31,6 @@ dist_odp_schedule_SOURCES = odp_schedule.c
> >common/odp_cunit_common.c
> > #For Linux generic the unimplemented crypto API functions break the
> > #regression TODO: https://bugs.linaro.org/show_bug.cgi?id=975
> > XFAIL_TESTS=odp_crypto
> >+dist_odp_synchronizers_SOURCES = odp_synchronizers.c \
> >+                                common/odp_cunit_common.c \
> >+                                common/odp_common.c
> >diff --git a/test/validation/common/odp_common.c
> >b/test/validation/common/odp_common.c
> >new file mode 100644
> >index 0000000..1b55569
> >--- /dev/null
> >+++ b/test/validation/common/odp_common.c
> >@@ -0,0 +1,241 @@
> >+/* Copyright (c) 2013, Linaro Limited
> >+ * All rights reserved.
> >+ *
> >+ * SPDX-License-Identifier:         BSD-3-Clause
> >+ */
> >+
> >+/**
> >+ * @file
> >+ *
> >+ * ODP test application common
> >+ */
> >+
> >+#if defined(__x86_64) || defined(__x86_64__)
> >+#define X86
> >+#else
> >+#define TILERA
> >+#endif

Broken logic,  !X86 != TILERA.. !X86 can be ARM

> >+
> >+
> >+#ifdef TILERA
> >+#include <tmc/cpus.h>
> >+#include <assert.h>
> >+#endif
> >+
> >+#define _GNU_SOURCE
> >+#define __USE_GNU
> >+#include <sched.h>
> >+
> >+#include <string.h>
> >+#include <unistd.h>
> >+#include <pthread.h>
> >+#include <odp.h>
> >+#include <odp_common.h>
> >+#include <odp_cunit_common.h>
> >+#include <test_debug.h>
> >+
> >+/* Globals */
> >+static pthread_t thread_tbl[MAX_WORKERS]; /**< worker threads table*/
> >+static int num_workers;                                       /**< number 
> >of workers          */
> >+
> >+typedef struct {
> >+    pthrd_arg          user_arg;
> >+    thread_fcn_ptr_t user_fcn;
> >+    uint32_t                 thread_num;
> >+    int32_t                  cpu;
> >+} thread_args_t;
> >+
> >+static thread_args_t g_thread_args[MAX_WORKERS];
> >+
> >+#ifdef TILERA
> >+
> >+static cpu_set_t dataplane_cpus;
> >+static cpu_set_t other_cpus;
> >+static cpu_set_t total_cpus;
> >+static uint32_t      num_dataplane_cpus;
> >+static uint32_t      total_num_cpus;
> >+
> >+static void get_all_cpus(uint32_t num_threads, odp_coremask_t *all_cpus)
> >+{
> >+    uint32_t num_other_cpus, idx, cpu;
> >+
> >+    tmc_cpus_get_dataplane_cpus(&dataplane_cpus);
> >+    tmc_cpus_get_online_cpus(&other_cpus);
> >+    tmc_cpus_remove_cpus(&other_cpus, &dataplane_cpus);
> >+    tmc_cpus_clear(&total_cpus);
> >+    tmc_cpus_add_cpus(&total_cpus, &dataplane_cpus);
> >+    tmc_cpus_add_cpus(&total_cpus, &other_cpus);
> >+
> >+    num_dataplane_cpus = tmc_cpus_count(&dataplane_cpus);
> >+    num_other_cpus     = tmc_cpus_count(&other_cpus);
> >+    total_num_cpus     = tmc_cpus_count(&total_cpus);
> >+    assert(total_num_cpus == num_dataplane_cpus + num_other_cpus);
> >+
> >+    /* Convert tmc cpu set representation to the odp_coremask type */
> >+    odp_coremask_zero(all_cpus);
> >+    for (idx = 0; idx < total_num_cpus; idx++) {
> >+            cpu = tmc_cpus_find_nth_cpu(&total_cpus, idx);
> >+            if ((0 <= cpu) && (cpu <= 63))
> >+                    odp_coremask_set(cpu, all_cpus);
> >+    }
> >+}
> >+
> >+static int get_nth_cpu(uint32_t thread_idx, odp_coremask_t *all_cpus)
> >+{
> >+    if (thread_idx < num_dataplane_cpus)
> >+            return tmc_cpus_find_nth_cpu(&dataplane_cpus, thread_idx);
> >+    else if (thread_idx < total_num_cpus)
> >+            return tmc_cpus_find_nth_cpu(&other_cpus,
> >+                    thread_idx - num_dataplane_cpus);
> >+    else
> >+            return -1;
> >+}
> >+

move this implementation to platform and use odp core mask to abstract it.


> >+static void set_thread_affinity(pthread_t pthread, int cpu)
> >+{
> >+    int rc;
> >+
> >+    rc = tmc_cpus_set_my_cpu(cpu);
> >+    if (rc != 0) {
> >+            printf("tmc_cpus_set_my_cpu failed.      Exiting\n");
> >+            _exit(-1);
> >+    }
> >+}
> >+
> >+#else
> >+
> >+static void get_all_cpus(uint32_t num_threads
> >__attribute__((__unused__)),
> >+                                            odp_coremask_t *all_cpus)
> >+{
> >+    uint32_t core_count, cpu;
> >+
> >+    core_count = odp_sys_core_count();
> >+    odp_coremask_zero(all_cpus);
> >+    for (cpu = 0; cpu < core_count; cpu++)
> >+            odp_coremask_set(cpu, all_cpus);
> >+}
> >+
> >+static int get_nth_cpu(int thread_idx, odp_coremask_t *all_cpus)
> >+{
> >+    if (odp_coremask_isset(thread_idx, all_cpus))
> >+            return thread_idx;
> >+    else
> >+            return -1;
> >+}
> >+
> >+static void set_thread_affinity(pthread_t pthread, int cpu)
> >+{
> >+    cpu_set_t cpu_set;
> >+    int               rc;
> >+
> >+    CPU_ZERO(&cpu_set);
> >+    CPU_SET(cpu, &cpu_set);
> >+    rc = pthread_setaffinity_np(pthread, sizeof(cpu_set_t), &cpu_set);
> >+    if (rc != 0) {
> >+            printf("pthread_setaffinity_np failed.  Exiting\n");
> >+            _exit(-1);
> >+    }
> >+}
> >+#endif

odph_linux_pthread_create aleady has means to launch a thread on specific
cpu. Use odph_linux_pthread_create infrastructure. Its difficult to
set the thread affinity to specific cpu at run time in all run-time environment.


> >+
> >+static void *thread_start_fcn(void *arg)
> >+{
> >+    thread_args_t *thread_args;
> >+    uint32_t           thread_idx;
> >+    int32_t            cpu;
> >+
> >+    thread_args = (thread_args_t *)arg;
> >+    thread_idx      = thread_args->thread_num - 1;
> >+    cpu                     = thread_args->cpu;
> >+
> >+    if (0 <= cpu)
> >+            set_thread_affinity(thread_tbl[thread_idx], cpu);
> >+
> >+    return thread_args->user_fcn(&thread_args->user_arg);

too much of abstraction. IMO existing odp_cunit_thread_create, can
replace this by mapping the "first_core" argument
in odph_linux_pthread_create function.


> >+}
> >+
> >+static int internal_execute_parallel(thread_fcn_ptr_t thread_fcn,
> >+                                    uint32_t num_threads)
> >+{
> >+    odp_coremask_t all_cpus;
> >+    thread_args_t *thread_args;
> >+    uint32_t           thread_num, thread_idx;
> >+    int                        rc, cpu;
> >+
> >+    if (MAX_WORKERS < num_threads) {
> >+            printf("Num of threads is too large. exiting\n");
> >+            _exit(-1);
> >+    }
> >+
> >+    memset(thread_tbl, 0, sizeof(thread_tbl));
> >+    get_all_cpus(num_threads, &all_cpus);
> >+
> >+    for (thread_num = 1; thread_num <= num_threads; thread_num++) {
> >+            thread_idx = thread_num - 1;
> >+            cpu                = get_nth_cpu(thread_idx, &all_cpus);
> >+            if (cpu < 0) {
> >+                    printf("%s error. No CPU available\n",
> >+                           __func__);
> >+                    _exit(-1);
> >+            }
> >+
> >+            thread_args = &g_thread_args[thread_idx];
> >+            thread_args->thread_num = thread_num;
> >+            thread_args->cpu = cpu;
> >+            thread_args->user_fcn = thread_fcn;
> >+            thread_args->user_arg.numthrds = thread_num;
> >+
> >+            rc = pthread_create(&thread_tbl[thread_idx], NULL,
> >+                                    thread_start_fcn, thread_args);
> >+            if (rc != 0) {
> >+                    printf("%s error. pthread_create failed. exiting\n",
> >+                           __func__);
> >+                    _exit(-1);
> >+            }
> >+    }

same as above.

> >+
> >+    return 0;
> >+}
> >+
> >+/** Test init globals and call odp_init_global() */
> >+int odp_test_global_init(void)
> >+{
> >+    memset(thread_tbl, 0, sizeof(thread_tbl));
> >+
> >+    if (odp_init_global(NULL, NULL)) {
> >+            LOG_ERR("ODP global init failed.\n");
> >+            return -1;
> >+    }
> >+
> >+    num_workers = odp_sys_core_count();
> >+
> >+    /* force to max core count */
> >+    if (num_workers > MAX_WORKERS)
> >+            num_workers = MAX_WORKERS;
> >+
> >+       return 0;
> >+}
> >+
> >+int odp_test_thread_join_all(uint32_t num_threads)
> >+{
> >+    uint32_t thread_idx;
> >+
> >+    /* Wait for other threads to exit */
> >+    for (thread_idx = 0; thread_idx < num_threads; thread_idx++)
> >+            pthread_join(thread_tbl[thread_idx], NULL);
> >+
> >+    return 0;
> >+}

extend the exisitng odp_cunit_thread_exit to accept num_threads.
Please don't create duplicate abstractions for each test case in common 
directory


> >+
> >+void odp_test_thread_yield(void)
> >+{
> >+    sched_yield();
> >+}

Is it really required ? Not much valid for data plane runtime environment.
 

> >+
> >+int odp_execute_parallel(thread_fcn_ptr_t thread_fcn,
> >+                                            uint32_t num_threads)
> >+{
> >+    internal_execute_parallel(thread_fcn, num_threads);
> >+    odp_test_thread_join_all(num_threads);
> >+    return 0;
> >+}
> >diff --git a/test/validation/common/odp_common.h
> >b/test/validation/common/odp_common.h
> >new file mode 100644
> >index 0000000..5357432
> >--- /dev/null
> >+++ b/test/validation/common/odp_common.h
> >@@ -0,0 +1,28 @@
> >+/* Copyright (c) 2013, Linaro Limited
> >+ * All rights reserved.
> >+ *
> >+ * SPDX-License-Identifier:     BSD-3-Clause
> >+ */
> >+
> >+/**
> >+ * @file
> >+ *
> >+ * ODP test application common headers
> >+ */
> >+
> >+#ifndef ODP_COMMON_H
> >+#define ODP_COMMON_H
> >+
> >+typedef void * (*thread_fcn_ptr_t) (void *arg);
> >+
> >+void odp_print_system_info(void);
> >+int odp_test_global_init(void);
> >+
> >+int odp_test_thread_join_all(uint32_t num_threads);
> >+
> >+void odp_test_thread_yield(void);
> >+
> >+int odp_execute_parallel(thread_fcn_ptr_t thread_fcn,
> >+                    uint32_t num_threads);
> >+

I guess, All above functions can be abstracted in existing
odp_cunit_common.h. No need to create new header file


> >+#endif /* ODP_COMMON_H */
> >diff --git a/test/validation/common/odp_cunit_common.c
> >b/test/validation/common/odp_cunit_common.c
> >index 950bd18..cf4f8d9 100644
> >--- a/test/validation/common/odp_cunit_common.c
> >+++ b/test/validation/common/odp_cunit_common.c
> >@@ -36,6 +36,11 @@ int odp_cunit_thread_exit(pthrd_arg *arg)
> >     return 0;
> > }
> > 
> >+void odp_cunit_thread_yield(void)
> >+{
> >+    sched_yield();
> >+}

Is it really required ? Not much valid for data plane runtime environment.

> >+
> > __attribute__((__weak__)) int tests_global_init(void)
> > {
> >     return 0;
> >diff --git a/test/validation/common/odp_cunit_common.h
> >b/test/validation/common/odp_cunit_common.h
> >index f967ca2..2d954e3 100644
> >--- a/test/validation/common/odp_cunit_common.h
> >+++ b/test/validation/common/odp_cunit_common.h
> >@@ -40,6 +40,7 @@ typedef struct {
> > /** create thread fro start_routine function */
> > extern int odp_cunit_thread_create(void *func_ptr(void *), pthrd_arg
> >*arg);
> > extern int odp_cunit_thread_exit(pthrd_arg *);
> >+extern void odp_cunit_thread_yield(void);
> > /**
> >  * Global tests initialization.
> >  *
> >diff --git a/test/validation/odp_synchronizers.c
> >b/test/validation/odp_synchronizers.c
> >new file mode 100644
> >index 0000000..65ac714
> >--- /dev/null
> >+++ b/test/validation/odp_synchronizers.c
> >@@ -0,0 +1,1149 @@
> >+/* Copyright (c) 2014, Linaro Limited
> >+ * All rights reserved.
> >+ *
> >+ * SPDX-License-Identifier:  BSD-3-Clause
> >+ */
> >+
> >+#include <string.h>
> >+#include <unistd.h>
> >+#include <stdlib.h>
> >+#include <stdio.h>
> >+#include <malloc.h>
> >+#include <stdint.h>
> >+#include <odp.h>
> >+#include <CUnit/Basic.h>
> >+#include <odp_common.h>
> >+#include <odp_cunit_common.h>
> >+
> >+#define VERBOSE                     0
> >+#define MAX_ITERATIONS              1000
> >+#define BARRIER_ITERATIONS  64
> >+
> >+#define SLOW_BARRIER_DELAY  400
> >+#define BASE_DELAY          6
> >+#define MIN_DELAY           1
> >+
> >+#define NUM_TEST_BARRIERS   BARRIER_ITERATIONS
> >+#define NUM_RESYNC_BARRIERS 100
> >+
> >+#define ADD_SUB_CNT         5
> >+
> >+#define     CNT                     10
> >+#define     U32_INIT_VAL            (1UL << 10)
> >+#define     U64_INIT_VAL            (1ULL << 33)
> >+
> >+#define GLOBAL_SHM_NAME             "GlobalLockTest"
> >+
> >+static odp_atomic_u32_t a32u;
> >+static odp_atomic_u64_t a64u;
> >+
> >+typedef volatile uint32_t volatile_u32_t, *ptr_volatile_u32_t;
> >+typedef volatile uint64_t volatile_u64_t, *ptr_volatile_u64_t;
> >+
> >+typedef struct {
> >+    odp_atomic_u32_t wait_cnt;
> >+} custom_barrier_t;
> >+
> >+typedef struct {
> >+    /* Global variables */
> >+    uint32_t g_num_threads;
> >+    uint32_t g_iterations;
> >+    uint32_t g_verbose;
> >+    uint32_t g_max_num_cores;
> >+    uint32_t one_thread_per_core;
> >+
> >+    odp_barrier_t    test_barriers[NUM_TEST_BARRIERS];
> >+    custom_barrier_t  custom_barrier1;
> >+    custom_barrier_t  custom_barrier2;
> >+    volatile_u32_t  slow_thread_num;
> >+    volatile_u32_t  barrier_cnt1;
> >+    volatile_u32_t  barrier_cnt2;
> >+    odp_barrier_t global_barrier;
> >+
> >+    /* Used to periodically resync within the lock functional tests */
> >+    odp_barrier_t barrier_array[NUM_RESYNC_BARRIERS];
> >+
> >+    /* Locks */
> >+    odp_spinlock_t   global_spinlock;
> >+    odp_ticketlock_t global_ticketlock;
> >+    odp_rwlock_t     global_rwlock;
> >+
> >+    volatile_u32_t global_lock_owner;
> >+} global_shared_mem_t;
> >+
> >+/* Per-thread memory */
> >+typedef struct {
> >+    global_shared_mem_t *global_mem;
> >+
> >+    uint32_t one_thread_per_core;
> >+
> >+    int thread_id;
> >+    int thread_core;
> >+
> >+    odp_spinlock_t   per_thread_spinlock;
> >+    odp_ticketlock_t per_thread_ticketlock;
> >+    odp_rwlock_t     per_thread_rwlock;
> >+
> >+    volatile_u64_t delay_counter;
> >+} per_thread_mem_t;
> >+
> >+#define THREAD_YIELD(thread_mem_ptr) {}
> >+
> >+static odp_shm_t                    global_shm;
> >+static global_shared_mem_t *global_mem;
> >+
> >+/*
> >+* Delay a consistent amount of time.  Ideally the amount of CPU time
> >taken
> >+* is linearly proportional to "iterations".  The goal is to try to do
> >some
> >+* work that the compiler optimizer won't optimize away, and also to
> >+* minimize loads and stores (at least to different memory addresses)
> >+* so as to not affect or be affected by caching issues.  This does NOT
> >have to
> >+* correlate to a specific number of cpu cycles or be consistent across
> >+* CPU architectures.
> >+*/
> >+static void thread_delay(per_thread_mem_t *per_thread_mem, uint32_t
> >iterations)
> >+{
> >+    ptr_volatile_u64_t counter_ptr;
> >+    uint32_t                   cnt;
> >+
> >+    counter_ptr = &per_thread_mem->delay_counter;
> >+    if (BASE_DELAY < iterations)
> >+            THREAD_YIELD(per_thread_mem);
> >+
> >+    for (cnt = 1; cnt <= iterations; cnt++)
> >+            (*counter_ptr)++;
> >+
> >+    THREAD_YIELD(per_thread_mem);

What is the significance/expected behavior of THREAD_YIELD ?


> >+}
> >+
> >+/* Initialise per-thread memory */
> >+static per_thread_mem_t *thread_init(void)
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    odp_shm_t global_shm;
> >+    uint32_t per_thread_mem_len;
> >+
> >+    if (odp_init_local() != 0) {
> >+            printf("Error initializing thread\n");
> >+            exit(0);
> >+    }
> >+
> >+    per_thread_mem_len = sizeof(per_thread_mem_t);
> >+    per_thread_mem = malloc(per_thread_mem_len);
> >+    memset(per_thread_mem, 0, per_thread_mem_len);
> >+
> >+    per_thread_mem->delay_counter = 1;
> >+
> >+    per_thread_mem->thread_id   = odp_thread_id();
> >+    per_thread_mem->thread_core = odp_thread_core();
> >+
> >+    global_shm = odp_shm_lookup(GLOBAL_SHM_NAME);
> >+    global_mem = odp_shm_addr(global_shm);
> >+
> >+    per_thread_mem->global_mem = global_mem;
> >+    per_thread_mem->one_thread_per_core = global_mem->one_thread_per_core;
> >+
> >+    return per_thread_mem;
> >+}
> >+
> >+/* Free per-thread memory */
> >+static void thread_finalize(per_thread_mem_t *per_thread_mem)
> >+{
> >+    free(per_thread_mem);
> >+}
> >+
> >+/* Custom barrier used to validate ODP barrier */
> >+static void custom_barrier_init(custom_barrier_t *custom_barrier,
> >+            uint32_t num_threads)
> >+{
> >+    odp_atomic_store_u32(&custom_barrier->wait_cnt, num_threads);
> >+}
> >+
> >+static void custom_barrier_wait(custom_barrier_t *custom_barrier)
> >+{
> >+    volatile_u64_t counter = 1;
> >+    uint32_t           delay_cnt, wait_cnt;
> >+
> >+    odp_atomic_sub_u32(&custom_barrier->wait_cnt, 1);
> >+
> >+    wait_cnt = 1;
> >+    while (wait_cnt != 0) {
> >+            for (delay_cnt = 1; delay_cnt <= 10; delay_cnt++)
> >+                    counter++;
> >+
> >+            wait_cnt = odp_atomic_load_u32(&custom_barrier->wait_cnt);
> >+    }

odp_atomic_* are RELAXED atomic memory models. Add proper ACQUIRE/RELEASE 
semantics
See existing odp_barrier_wait


> >+}
> >+
> >+static uint32_t barrier_test(uint32_t thread_num,
> >+                         per_thread_mem_t *per_thread_mem,
> >+                         uint8_t no_barrier_test)
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    uint32_t barrier_errs, iterations, cnt, i_am_slow_thread;
> >+    uint32_t slow_thread_num, next_slow_thread, num_threads;
> >+    uint32_t lock_owner_delay, barrier_cnt1, barrier_cnt2;
> >+
> >+    global_mem  = per_thread_mem->global_mem;
> >+    num_threads = global_mem->g_num_threads;
> >+    iterations  = BARRIER_ITERATIONS;
> >+
> >+    barrier_errs     = 0;
> >+    lock_owner_delay = SLOW_BARRIER_DELAY;
> >+
> >+    for (cnt = 1; cnt < iterations; cnt++) {
> >+            /* Wait here until all of the threads reach this point */
> >+            custom_barrier_wait(&global_mem->custom_barrier1);
> >+
> >+            barrier_cnt1 = global_mem->barrier_cnt1;
> >+            barrier_cnt2 = global_mem->barrier_cnt2;
> >+
> >+            if ((barrier_cnt1 != cnt) || (barrier_cnt2 != cnt)) {
> >+                    printf("thread_num=%u got bad barrier_cnts of \
> >+                           %u %u cnt=%u\n",
> >+                           thread_num, barrier_cnt1, barrier_cnt2, cnt);
> >+                    barrier_errs++;
> >+            }
> >+
> >+            /* Wait here until all of the threads reach this point */
> >+            custom_barrier_wait(&global_mem->custom_barrier2);
> >+
> >+            slow_thread_num  = global_mem->slow_thread_num;
> >+            i_am_slow_thread = thread_num == slow_thread_num;
> >+            next_slow_thread = slow_thread_num + 1;
> >+            if (num_threads < next_slow_thread)
> >+                    next_slow_thread = 1;
> >+
> >+            /*
> >+            * Now run the test, which involves having all but one thread
> >+            * immediately calling odp_barrier_wait(), and one thread wait a
> >+            * moderate amount of time and then calling odp_barrier_wait().
> >+            * The test fails if any of the first group of threads
> >+            * has not waited for the "slow" thread. The "slow" thread is
> >+            * responsible for re-initializing the barrier for next trial.
> >+            */
> >+            if (i_am_slow_thread) {
> >+                    thread_delay(per_thread_mem, lock_owner_delay);
> >+                    lock_owner_delay += BASE_DELAY;
> >+                    if ((global_mem->barrier_cnt1 != cnt) ||
> >+                        (global_mem->barrier_cnt2 != cnt) ||
> >+                        (global_mem->slow_thread_num
> >+                                    != slow_thread_num))
> >+                            barrier_errs++;
> >+            }
> >+
> >+            if (no_barrier_test == 0)
> >+                    odp_barrier_wait(&global_mem->test_barriers[cnt]);
> >+
> >+            global_mem->barrier_cnt1 = cnt + 1;
> >+            odp_sync_stores();
> >+
> >+            if (i_am_slow_thread) {
> >+                    custom_barrier_init(&global_mem->custom_barrier1,
> >+                                        num_threads);
> >+                    custom_barrier_init(&global_mem->custom_barrier2,
> >+                                        num_threads);
> >+                    global_mem->slow_thread_num = next_slow_thread;
> >+                    global_mem->barrier_cnt2        = cnt + 1;
> >+                    odp_sync_stores();
> >+            } else {
> >+                    while (global_mem->barrier_cnt2 != (cnt + 1))
> >+                            thread_delay(per_thread_mem, BASE_DELAY);
> >+            }
> >+    }
> >+
> >+    if ((global_mem->g_verbose) && (barrier_errs != 0))
> >+            printf("\nThread %u (id=%d core=%d) had %u barrier_errs in \
> >+                    %u iterations\n", thread_num,
> >+                    per_thread_mem->thread_id,
> >+                    per_thread_mem->thread_core, barrier_errs, iterations);
> >+
> >+    return barrier_errs;
> >+}
> >+
> >+static void *no_barrier_functional_test(void *arg)
> >+{
> >+    per_thread_mem_t *per_thread_mem;
> >+    pthrd_arg               *thread_arg;
> >+    uint32_t                  thread_num, barrier_errs;
> >+
> >+    thread_arg        = (pthrd_arg *)arg;
> >+    thread_num        = thread_arg->numthrds;
> >+    per_thread_mem = thread_init();
> >+
> >+    barrier_errs = barrier_test(thread_num, per_thread_mem, 1);
> >+
> >+    /*
> >+    * Note that the following CU_ASSERT MAY appear incorrect, but for the
> >+    * no_barrier test it should see barrier_errs or else there is something
> >+    * wrong with the test methodology or the ODP thread implementation.
> >+    * So this test PASSES only if it sees barrier_errs!
> >+    */
> >+    CU_ASSERT(barrier_errs != 0);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void *barrier_functional_test(void *arg)
> >+{
> >+    per_thread_mem_t *per_thread_mem;
> >+    pthrd_arg               *thread_arg;
> >+    uint32_t                  thread_num, barrier_errs;
> >+
> >+    thread_arg        = (pthrd_arg *)arg;
> >+    thread_num        = thread_arg->numthrds;
> >+    per_thread_mem = thread_init();
> >+
> >+    barrier_errs = barrier_test(thread_num, per_thread_mem, 0);
> >+    CU_ASSERT(barrier_errs == 0);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void spinlock_api_test(odp_spinlock_t *spinlock)
> >+{
> >+    odp_spinlock_init(spinlock);
> >+    CU_ASSERT(odp_spinlock_is_locked(spinlock) == 0);
> >+
> >+    odp_spinlock_lock(spinlock);
> >+    CU_ASSERT(odp_spinlock_is_locked(spinlock) == 1);
> >+
> >+    odp_spinlock_unlock(spinlock);
> >+    CU_ASSERT(odp_spinlock_is_locked(spinlock) == 0);
> >+
> >+    CU_ASSERT(odp_spinlock_trylock(spinlock) == 1);
> >+
> >+    CU_ASSERT(odp_spinlock_is_locked(spinlock) == 1);
> >+
> >+    odp_spinlock_unlock(spinlock);
> >+    CU_ASSERT(odp_spinlock_is_locked(spinlock) == 0);
> >+}
> >+
> >+static void *spinlock_api_tests(void *arg __attribute__((__unused__)))
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t        *per_thread_mem;
> >+    odp_spinlock_t     local_spin_lock;
> >+
> >+    per_thread_mem = thread_init();
> >+    global_mem       = per_thread_mem->global_mem;
> >+
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    spinlock_api_test(&local_spin_lock);
> >+    spinlock_api_test(&per_thread_mem->per_thread_spinlock);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void ticketlock_api_test(odp_ticketlock_t *ticketlock)
> >+{
> >+    odp_ticketlock_init(ticketlock);
> >+    CU_ASSERT(odp_ticketlock_is_locked(ticketlock) == 0);
> >+
> >+    odp_ticketlock_lock(ticketlock);
> >+    CU_ASSERT(odp_ticketlock_is_locked(ticketlock) == 1);
> >+
> >+    odp_ticketlock_unlock(ticketlock);
> >+    CU_ASSERT(odp_ticketlock_is_locked(ticketlock) == 0);
> >+
> >+    CU_ASSERT(odp_ticketlock_trylock(ticketlock) == 1);
> >+    CU_ASSERT(odp_ticketlock_trylock(ticketlock) == 0);
> >+    CU_ASSERT(odp_ticketlock_is_locked(ticketlock) == 1);
> >+
> >+    odp_ticketlock_unlock(ticketlock);
> >+    CU_ASSERT(odp_ticketlock_is_locked(ticketlock) == 0);
> >+}
> >+
> >+static void *ticketlock_api_tests(void *arg __attribute__((__unused__)))
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    odp_ticketlock_t local_ticket_lock;
> >+
> >+    per_thread_mem = thread_init();
> >+    global_mem = per_thread_mem->global_mem;
> >+
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    ticketlock_api_test(&local_ticket_lock);
> >+    ticketlock_api_test(&per_thread_mem->per_thread_ticketlock);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void rwlock_api_test(odp_rwlock_t *rw_lock)
> >+{
> >+    odp_rwlock_init(rw_lock);
> >+    /* CU_ASSERT(odp_rwlock_is_locked(rw_lock) == 0); */
> >+
> >+    odp_rwlock_read_lock(rw_lock);
> >+    odp_rwlock_read_unlock(rw_lock);
> >+
> >+    odp_rwlock_write_lock(rw_lock);
> >+    /* CU_ASSERT(odp_rwlock_is_locked(rw_lock) == 1); */
> >+
> >+    odp_rwlock_write_unlock(rw_lock);
> >+    /* CU_ASSERT(odp_rwlock_is_locked(rw_lock) == 0); */
> >+}
> >+
> >+static void *rwlock_api_tests(void *arg __attribute__((__unused__)))
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    odp_rwlock_t local_rwlock;
> >+
> >+    per_thread_mem = thread_init();
> >+    global_mem = per_thread_mem->global_mem;
> >+
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    rwlock_api_test(&local_rwlock);
> >+    rwlock_api_test(&per_thread_mem->per_thread_rwlock);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void *no_lock_functional_test(void *arg)
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    pthrd_arg *thread_arg;
> >+    uint32_t thread_num, resync_cnt, resync_idx, iterations, cnt;
> >+    uint32_t sync_failures, current_errs, lock_owner_delay;
> >+
> >+    thread_arg = (pthrd_arg *)arg;
> >+    thread_num = thread_arg->numthrds;
> >+    per_thread_mem = thread_init();
> >+    global_mem = per_thread_mem->global_mem;
> >+    iterations = global_mem->g_iterations;
> >+
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    sync_failures = 0;
> >+    current_errs = 0;
> >+    resync_idx = 0;
> >+    resync_cnt = iterations / NUM_RESYNC_BARRIERS;
> >+    lock_owner_delay = BASE_DELAY;
> >+
> >+    for (cnt = 1; cnt <= iterations; cnt++) {
> >+            global_mem->global_lock_owner = thread_num;
> >+            odp_sync_stores();
> >+            thread_delay(per_thread_mem, lock_owner_delay);
> >+
> >+            if (global_mem->global_lock_owner != thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            global_mem->global_lock_owner = 0;
> >+            odp_sync_stores();
> >+            thread_delay(per_thread_mem, MIN_DELAY);
> >+
> >+            if (global_mem->global_lock_owner == thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            if (current_errs == 0)
> >+                    lock_owner_delay++;
> >+
> >+            /* Wait a small amount of time and rerun the test */
> >+            thread_delay(per_thread_mem, BASE_DELAY);
> >+
> >+            /* Try to resync all of the threads to increase contention */
> >+            if ((resync_idx < NUM_RESYNC_BARRIERS) &&
> >+                ((cnt % resync_cnt) == (resync_cnt - 1)))
> >+                    
> >odp_barrier_wait(&global_mem->barrier_array[resync_idx++]);
> >+    }
> >+
> >+    if (global_mem->g_verbose)
> >+            printf("\nThread %u (id=%d core=%d) had %u sync_failures in \
> >+                   %u iterations\n", thread_num,
> >+                   per_thread_mem->thread_id,
> >+                   per_thread_mem->thread_core,
> >+                   sync_failures, iterations);
> >+
> >+    /* Note that the following CU_ASSERT MAY appear incorrect, but for the
> >+    * no_lock test it should see sync_failures or else there is something
> >+    * wrong with the test methodology or the ODP thread implementation.
> >+    * So this test PASSES only if it sees sync_failures
> >+    */
> >+    CU_ASSERT(sync_failures != 0);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void *spinlock_functional_test(void *arg)
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    pthrd_arg *thread_arg;
> >+    uint32_t thread_num, resync_cnt, resync_idx, iterations, cnt;
> >+    uint32_t sync_failures, is_locked_errs, current_errs;
> >+    uint32_t lock_owner_delay;
> >+
> >+    thread_arg        = (pthrd_arg *)arg;
> >+    thread_num        = thread_arg->numthrds;
> >+    per_thread_mem = thread_init();
> >+    global_mem       = per_thread_mem->global_mem;
> >+    iterations       = global_mem->g_iterations;
> >+
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    sync_failures   = 0;
> >+    is_locked_errs   = 0;
> >+    current_errs     = 0;
> >+    resync_idx         = 0;
> >+    resync_cnt         = iterations / NUM_RESYNC_BARRIERS;
> >+    lock_owner_delay = BASE_DELAY;
> >+
> >+    for (cnt = 1; cnt <= iterations; cnt++) {
> >+            /* Acquire the shared global lock */
> >+            odp_spinlock_lock(&global_mem->global_spinlock);
> >+
> >+            /* Make sure we have the lock AND didn't previously own it */
> >+            if (odp_spinlock_is_locked(&global_mem->global_spinlock) != 1)
> >+                    is_locked_errs++;
> >+
> >+            if (global_mem->global_lock_owner != 0) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            /* Now set the global_lock_owner to be us, wait a while, and
> >+            * then we see if anyone else has snuck in and changed the
> >+            * global_lock_owner to be themselves
> >+            */
> >+            global_mem->global_lock_owner = thread_num;
> >+            odp_sync_stores();
> >+            thread_delay(per_thread_mem, lock_owner_delay);
> >+            if (global_mem->global_lock_owner != thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            /* Release shared lock, and make sure we no longer have it */
> >+            global_mem->global_lock_owner = 0;
> >+            odp_sync_stores();
> >+            odp_spinlock_unlock(&global_mem->global_spinlock);
> >+            if (global_mem->global_lock_owner == thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            if (current_errs == 0)
> >+                    lock_owner_delay++;
> >+
> >+            /* Wait a small amount of time and rerun the test */
> >+            thread_delay(per_thread_mem, BASE_DELAY);
> >+
> >+            /* Try to resync all of the threads to increase contention */
> >+            if ((resync_idx < NUM_RESYNC_BARRIERS) &&
> >+                ((cnt % resync_cnt) == (resync_cnt - 1)))
> >+                    
> >odp_barrier_wait(&global_mem->barrier_array[resync_idx++]);
> >+    }
> >+
> >+    if ((global_mem->g_verbose) &&
> >+        ((sync_failures != 0) || (is_locked_errs != 0)))
> >+            printf("\nThread %u (id=%d core=%d) had %u sync_failures and \
> >+                   %u is_locked_errs in %u iterations\n", thread_num,
> >+                   per_thread_mem->thread_id, per_thread_mem->thread_core,
> >+                   sync_failures, is_locked_errs, iterations);
> >+
> >+    CU_ASSERT(sync_failures  == 0);
> >+    CU_ASSERT(is_locked_errs == 0);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void *ticketlock_functional_test(void *arg)
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    pthrd_arg *thread_arg;
> >+    uint32_t thread_num, resync_cnt, resync_idx, iterations, cnt;
> >+    uint32_t sync_failures, is_locked_errs, current_errs;
> >+    uint32_t lock_owner_delay;
> >+
> >+    thread_arg        = (pthrd_arg *)arg;
> >+    thread_num        = thread_arg->numthrds;
> >+    per_thread_mem = thread_init();
> >+    global_mem       = per_thread_mem->global_mem;
> >+    iterations       = global_mem->g_iterations;
> >+
> >+    /* Wait here until all of the threads have also reached this point */
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    sync_failures   = 0;
> >+    is_locked_errs   = 0;
> >+    current_errs     = 0;
> >+    resync_idx         = 0;
> >+    resync_cnt         = iterations / NUM_RESYNC_BARRIERS;
> >+    lock_owner_delay = BASE_DELAY;
> >+
> >+    for (cnt = 1; cnt <= iterations; cnt++) {
> >+            /* Acquire the shared global lock */
> >+            odp_ticketlock_lock(&global_mem->global_ticketlock);
> >+
> >+            /* Make sure we have the lock AND didn't previously own it */
> >+            if (odp_ticketlock_is_locked(&global_mem->global_ticketlock)
> >+                            != 1)
> >+                    is_locked_errs++;
> >+
> >+            if (global_mem->global_lock_owner != 0) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            /* Now set the global_lock_owner to be us, wait a while, and
> >+            * then we see if anyone else has snuck in and changed the
> >+            * global_lock_owner to be themselves
> >+            */
> >+            global_mem->global_lock_owner = thread_num;
> >+            odp_sync_stores();
> >+            thread_delay(per_thread_mem, lock_owner_delay);
> >+            if (global_mem->global_lock_owner != thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            /* Release shared lock, and make sure we no longer have it */
> >+            global_mem->global_lock_owner = 0;
> >+            odp_sync_stores();
> >+            odp_ticketlock_unlock(&global_mem->global_ticketlock);
> >+            if (global_mem->global_lock_owner == thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            if (current_errs == 0)
> >+                    lock_owner_delay++;
> >+
> >+            /* Wait a small amount of time and then rerun the test */
> >+            thread_delay(per_thread_mem, BASE_DELAY);
> >+
> >+            /* Try to resync all of the threads to increase contention */
> >+            if ((resync_idx < NUM_RESYNC_BARRIERS) &&
> >+                ((cnt % resync_cnt) == (resync_cnt - 1)))
> >+                    
> >odp_barrier_wait(&global_mem->barrier_array[resync_idx++]);
> >+    }
> >+
> >+    if ((global_mem->g_verbose) &&
> >+        ((sync_failures != 0) || (is_locked_errs != 0)))
> >+            printf("\nThread %u (id=%d core=%d) had %u sync_failures and \
> >+                   %u is_locked_errs in %u iterations\n", thread_num,
> >+                   per_thread_mem->thread_id, per_thread_mem->thread_core,
> >+                   sync_failures, is_locked_errs, iterations);
> >+
> >+    CU_ASSERT(sync_failures  == 0);
> >+    CU_ASSERT(is_locked_errs == 0);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void *rwlock_functional_test(void *arg)
> >+{
> >+    global_shared_mem_t *global_mem;
> >+    per_thread_mem_t *per_thread_mem;
> >+    pthrd_arg *thread_arg;
> >+    uint32_t thread_num, resync_cnt, resync_idx, iterations, cnt;
> >+    uint32_t sync_failures, current_errs, lock_owner_delay;
> >+
> >+    thread_arg        = (pthrd_arg *)arg;
> >+    thread_num        = thread_arg->numthrds;
> >+    per_thread_mem = thread_init();
> >+    global_mem       = per_thread_mem->global_mem;
> >+    iterations       = global_mem->g_iterations;
> >+
> >+    /* Wait here until all of the threads have also reached this point */
> >+    odp_barrier_wait(&global_mem->global_barrier);
> >+
> >+    sync_failures   = 0;
> >+    current_errs     = 0;
> >+    resync_idx         = 0;
> >+    resync_cnt         = iterations / NUM_RESYNC_BARRIERS;
> >+    lock_owner_delay = BASE_DELAY;
> >+
> >+    for (cnt = 1; cnt <= iterations; cnt++) {
> >+            /* Acquire the shared global lock */
> >+            odp_rwlock_write_lock(&global_mem->global_rwlock);
> >+
> >+            /* Make sure we have lock now AND didn't previously own it */
> >+            if (global_mem->global_lock_owner != 0) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            /* Now set the global_lock_owner to be us, wait a while, and
> >+            * then we see if anyone else has snuck in and changed the
> >+            * global_lock_owner to be themselves
> >+            */
> >+            global_mem->global_lock_owner = thread_num;
> >+            odp_sync_stores();
> >+            thread_delay(per_thread_mem, lock_owner_delay);
> >+            if (global_mem->global_lock_owner != thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            /* Release shared lock, and make sure we no longer have it */
> >+            global_mem->global_lock_owner = 0;
> >+            odp_sync_stores();
> >+            odp_rwlock_write_unlock(&global_mem->global_rwlock);
> >+            if (global_mem->global_lock_owner == thread_num) {
> >+                    current_errs++;
> >+                    sync_failures++;
> >+            }
> >+
> >+            if (current_errs == 0)
> >+                    lock_owner_delay++;
> >+
> >+            /* Wait a small amount of time and then rerun the test */
> >+            thread_delay(per_thread_mem, BASE_DELAY);
> >+
> >+            /* Try to resync all of the threads to increase contention */
> >+            if ((resync_idx < NUM_RESYNC_BARRIERS) &&
> >+                ((cnt % resync_cnt) == (resync_cnt - 1)))
> >+                    
> >odp_barrier_wait(&global_mem->barrier_array[resync_idx++]);
> >+    }
> >+
> >+    if ((global_mem->g_verbose) && (sync_failures != 0))
> >+            printf("\nThread %u (id=%d core=%d) had %u sync_failures in \
> >+                   %u iterations\n", thread_num,
> >+                   per_thread_mem->thread_id,
> >+                   per_thread_mem->thread_core,
> >+                   sync_failures, iterations);
> >+
> >+    CU_ASSERT(sync_failures == 0);
> >+
> >+    thread_finalize(per_thread_mem);
> >+
> >+    return NULL;
> >+}
> >+
> >+static void barrier_test_init(void)
> >+{
> >+    uint32_t num_threads, idx;
> >+
> >+    num_threads = global_mem->g_num_threads;
> >+
> >+    for (idx = 0; idx < NUM_TEST_BARRIERS; idx++)
> >+            odp_barrier_init(&global_mem->test_barriers[idx], num_threads);
> >+
> >+    custom_barrier_init(&global_mem->custom_barrier1, num_threads);
> >+    custom_barrier_init(&global_mem->custom_barrier2, num_threads);
> >+
> >+    global_mem->slow_thread_num = 1;
> >+    global_mem->barrier_cnt1        = 1;
> >+    global_mem->barrier_cnt2        = 1;
> >+}
> >+
> >+static void test_atomic_inc_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_inc_u32(&a32u);
> >+}
> >+
> >+static void test_atomic_inc_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_inc_u64(&a64u);
> >+}
> >+
> >+static void test_atomic_dec_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_dec_u32(&a32u);
> >+}
> >+
> >+static void test_atomic_dec_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_dec_u64(&a64u);
> >+}
> >+
> >+static void test_atomic_fetch_inc_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_inc_u32(&a32u);
> >+}
> >+
> >+static void test_atomic_fetch_inc_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_inc_u64(&a64u);
> >+}
> >+
> >+static void test_atomic_fetch_dec_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_dec_u32(&a32u);
> >+}
> >+
> >+static void test_atomic_fetch_dec_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_dec_u64(&a64u);
> >+}
> >+
> >+static void test_atomic_add_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_add_u32(&a32u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_add_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_add_u64(&a64u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_sub_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_sub_u32(&a32u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_sub_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_sub_u64(&a64u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_fetch_add_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_add_u32(&a32u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_fetch_add_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_add_u64(&a64u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_fetch_sub_u32(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_sub_u32(&a32u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_fetch_sub_64(void)
> >+{
> >+    int i;
> >+
> >+    for (i = 0; i < CNT; i++)
> >+            odp_atomic_fetch_sub_u64(&a64u, ADD_SUB_CNT);
> >+}
> >+
> >+static void test_atomic_inc_dec_u32(void)
> >+{
> >+    test_atomic_inc_u32();
> >+    test_atomic_dec_u32();
> >+}
> >+
> >+static void test_atomic_inc_dec_64(void)
> >+{
> >+    test_atomic_inc_64();
> >+    test_atomic_dec_64();
> >+}
> >+
> >+static void test_atomic_fetch_inc_dec_u32(void)
> >+{
> >+    test_atomic_fetch_inc_u32();
> >+    test_atomic_fetch_dec_u32();
> >+}
> >+
> >+static void test_atomic_fetch_inc_dec_64(void)
> >+{
> >+    test_atomic_fetch_inc_64();
> >+    test_atomic_fetch_dec_64();
> >+}
> >+
> >+static void test_atomic_add_sub_u32(void)
> >+{
> >+    test_atomic_add_u32();
> >+    test_atomic_sub_u32();
> >+}
> >+
> >+
> >+static void test_atomic_add_sub_64(void)
> >+{
> >+    test_atomic_add_64();
> >+    test_atomic_sub_64();
> >+}
> >+
> >+static void test_atomic_fetch_add_sub_u32(void)
> >+{
> >+    test_atomic_fetch_add_u32();
> >+    test_atomic_fetch_sub_u32();
> >+}
> >+
> >+static void test_atomic_fetch_add_sub_64(void)
> >+{
> >+    test_atomic_fetch_add_64();
> >+    test_atomic_fetch_sub_64();
> >+}
> >+
> >+static void test_atomic_init(void)
> >+{
> >+    odp_atomic_init_u32(&a32u, 0);
> >+    odp_atomic_init_u64(&a64u, 0);
> >+}
> >+
> >+static void test_atomic_store(void)
> >+{
> >+    odp_atomic_store_u32(&a32u, U32_INIT_VAL);
> >+    odp_atomic_store_u64(&a64u, U64_INIT_VAL);
> >+}
> >+
> >+static void test_atomic_validate(void)
> >+{
> >+    CU_ASSERT(U32_INIT_VAL == odp_atomic_load_u32(&a32u));
> >+    CU_ASSERT(U64_INIT_VAL == odp_atomic_load_u64(&a64u));
> >+}
> >+
> >+/* Barrier tests */
> >+static void test_no_barrier_functional(void)
> >+{
> >+    barrier_test_init();
> >+    odp_execute_parallel(no_barrier_functional_test,
> >+                         global_mem->g_num_threads);
> >+}
> >+
> >+static void test_barrier_functionality(void)
> >+{
> >+    barrier_test_init();
> >+    odp_execute_parallel(barrier_functional_test,
> >+                         global_mem->g_num_threads);
> >+}
> >+
> >+static CU_TestInfo test_odp_barrier[] = {
> >+{"no_barrier_functional", test_no_barrier_functional },
> >+{"odp_barrier_functionality", test_barrier_functionality },
> >+CU_TEST_INFO_NULL
> >+};
> >+
> >+/* Thread-unsafe tests */
> >+static void test_no_lock_functionality(void)
> >+{
> >+    odp_execute_parallel(no_lock_functional_test,
> >+                         global_mem->g_num_threads);
> >+}
> >+
> >+static CU_TestInfo test_no_locking[] = {
> >+{"test_no_lock_functionality", test_no_lock_functionality },
> >+CU_TEST_INFO_NULL
> >+};
> >+
> >+/* Spin lock tests */
> >+static void test_spinlock_api(void)
> >+{
> >+    odp_execute_parallel(spinlock_api_tests, global_mem->g_num_threads);
> >+}
> >+
> >+static void test_spinlock_functionality(void)
> >+{
> >+    odp_spinlock_init(&global_mem->global_spinlock);
> >+    odp_execute_parallel(spinlock_functional_test,
> >+                         global_mem->g_num_threads);
> >+}
> >+
> >+static CU_TestInfo test_odp_spinlock[] = {
> >+{"odp_spinlock_api", test_spinlock_api},
> >+{"odp_spinlock_functionality", test_spinlock_functionality },
> >+CU_TEST_INFO_NULL
> >+};
> >+
> >+/* Ticket lock tests */
> >+static void test_ticketlock_api(void)
> >+{
> >+    odp_execute_parallel(ticketlock_api_tests, global_mem->g_num_threads);
> >+}
> >+
> >+static void test_ticketlock_functionality(void)
> >+{
> >+    odp_ticketlock_init(&global_mem->global_ticketlock);
> >+    odp_execute_parallel(ticketlock_functional_test,
> >+                         global_mem->g_num_threads);
> >+}
> >+
> >+static CU_TestInfo test_odp_ticketlock[] = {
> >+{"odp_ticketlock_api", test_ticketlock_api},
> >+{"odp_ticketlock_functionality", test_ticketlock_functionality },
> >+CU_TEST_INFO_NULL
> >+};
> >+
> >+/* RW lock tests */
> >+static void test_rwlock_api(void)
> >+{
> >+    odp_execute_parallel(rwlock_api_tests, global_mem->g_num_threads);
> >+}
> >+
> >+static void test_rwlock_functionality(void)
> >+{
> >+    odp_rwlock_init(&global_mem->global_rwlock);
> >+    odp_execute_parallel(rwlock_functional_test,
> >+                         global_mem->g_num_threads);
> >+}
> >+
> >+static CU_TestInfo test_odp_rwlock[] = {
> >+{"odp_rwlock_api", test_rwlock_api},
> >+{"odp_rwlock_functionality", test_rwlock_functionality},
> >+CU_TEST_INFO_NULL
> >+};
> >+
> >+
> >+static int init_locks(void)
> >+{
> >+    uint32_t num_threads, idx;
> >+
> >+    num_threads = global_mem->g_num_threads;
> >+    odp_barrier_init(&global_mem->global_barrier, num_threads);
> >+    for (idx = 0; idx < NUM_RESYNC_BARRIERS; idx++)
> >+            odp_barrier_init(&global_mem->barrier_array[idx], num_threads);
> >+
> >+    return 0;
> >+}
> >+
> >+int tests_global_init(void)
> >+{
> >+    uint32_t core_count, max_threads, one_thread_per_core;
> >+    int       ret = 0;
> >+
> >+    global_shm = odp_shm_reserve(GLOBAL_SHM_NAME,
> >+                                 sizeof(global_shared_mem_t), 64,
> >+                                 ODP_SHM_SW_ONLY | ODP_SHM_PROC);
> >+    global_mem = odp_shm_addr(global_shm);
> >+    memset(global_mem, 0, sizeof(global_shared_mem_t));
> >+
> >+    global_mem->g_num_threads = MAX_WORKERS / 2;
> >+    global_mem->g_iterations  = MAX_ITERATIONS;
> >+    global_mem->g_verbose    = VERBOSE;
> >+
> >+    core_count = odp_sys_core_count();
> >+
> >+    one_thread_per_core = 1;
> >+    if (MAX_WORKERS < core_count)
> >+            max_threads = MAX_WORKERS;
> >+    else
> >+            max_threads = core_count;
> >+
> >+    if (max_threads < global_mem->g_num_threads) {
> >+            printf("Requested num of threads is too large\n");
> >+            printf("reducing from %u to %u\n", global_mem->g_num_threads,
> >+                   max_threads);
> >+            global_mem->g_num_threads = max_threads;
> >+    }
> >+
> >+    global_mem->one_thread_per_core = one_thread_per_core;
> >+
> >+    printf("Num of threads used = %u\n", global_mem->g_num_threads);
> >+
> >+    return ret;
> >+}
> >+
> >+/* Atomic tests */
> >+static void test_atomic_basic(void)
> >+{
> >+    test_atomic_init();
> >+    test_atomic_store();
> >+    test_atomic_validate();
> >+}
> >+
> >+static void test_atomic_inc_dec(void)
> >+{
> >+    test_atomic_store();
> >+    test_atomic_inc_dec_u32();
> >+    test_atomic_inc_dec_64();
> >+    test_atomic_validate();
> >+}
> >+
> >+static void test_atomic_add_sub(void)
> >+{
> >+    test_atomic_store();
> >+    test_atomic_add_sub_u32();
> >+    test_atomic_add_sub_64();
> >+    test_atomic_validate();
> >+}
> >+
> >+static void test_atomic_fetch_inc_dec(void)
> >+{
> >+    test_atomic_store();
> >+    test_atomic_fetch_inc_dec_u32();
> >+    test_atomic_fetch_inc_dec_64();
> >+    test_atomic_validate();
> >+}
> >+
> >+static void test_atomic_fetch_add_sub(void)
> >+{
> >+    test_atomic_store();
> >+    test_atomic_fetch_add_sub_u32();
> >+    test_atomic_fetch_add_sub_64();
> >+    test_atomic_validate();
> >+}
> >+
> >+CU_TestInfo test_odp_atomic[] = {
> >+{"test_odp_atomic_basic", test_atomic_basic},
> >+{"test_odp_atomic_inc_dec", test_atomic_inc_dec},
> >+{"test_odp_atomic_add_sub", test_atomic_add_sub},
> >+{"test_odp_atomic_fetch_inc_dec", test_atomic_fetch_inc_dec},
> >+{"test_odp_atomic_fetch_add_sub", test_atomic_fetch_add_sub},
> >+CU_TEST_INFO_NULL,
> >+};
> >+
> >+CU_SuiteInfo odp_testsuites[] = {
> >+{"odp_barrier", NULL, NULL, NULL, NULL, test_odp_barrier},
> >+{"odp_nolocking", init_locks, NULL, NULL, NULL, test_no_locking},
> >+{"odp_spinlock", init_locks, NULL, NULL, NULL, test_odp_spinlock},
> >+{"odp_ticketlock", init_locks, NULL, NULL, NULL, test_odp_ticketlock},
> >+{"odp_rwlock",      init_locks, NULL, NULL, NULL, test_odp_rwlock},
> >+{"odp_atomic", NULL, NULL, NULL, NULL, test_odp_atomic},
> >+CU_SUITE_INFO_NULL
> >+};
> >-- 
> >1.9.1
> >
> 
> 
> 
> 
> _______________________________________________
> lng-odp mailing list
> [email protected]
> http://lists.linaro.org/mailman/listinfo/lng-odp

_______________________________________________
lng-odp mailing list
[email protected]
http://lists.linaro.org/mailman/listinfo/lng-odp

Re: [lng-odp] [PATCH 1/1] validation: synchronizer tests

Reply via email to