* Paul E. McKenney ([email protected]) wrote: > On Mon, Jun 06, 2011 at 01:45:59PM -0400, Mathieu Desnoyers wrote: > > I played a bit with the call_rcu() implementation alongside with my > > rbtree tests, and noticed the following: > > > > If I use per-cpu call_rcu threads with URCU_CALL_RCU_RT flag, with one > > updater thread only for my rbtree (no reader), I get 38365 updates/s. > > If I add cpu affinity to these per-cpu call_rcu threads (I have prepared > > a patch that does this), it jumps to 54219 updates/s. So it looks like > > keeping per-cpu affinity for the call_rcu thread is a good thing. > > Makes sense to me! > > Main comment is that API.h should also be updated.
I updated API.txt. What is API.h ? Mathieu > > Thanx, Paul > > > Signed-off-by: Mathieu Desnoyers <[email protected]> > > --- > > API.txt | 7 +++++-- > > tests/rcutorture.h | 4 ++-- > > urcu-call-rcu-impl.h | 51 > > ++++++++++++++++++++++++++++++++++++++++++++------- > > urcu-call-rcu.h | 3 ++- > > urcu-qsbr.c | 1 + > > urcu.c | 1 + > > 6 files changed, 55 insertions(+), 12 deletions(-) > > > > Index: userspace-rcu/API.txt > > =================================================================== > > --- userspace-rcu.orig/API.txt > > +++ userspace-rcu/API.txt > > @@ -59,12 +59,15 @@ void call_rcu(struct rcu_head *head, > > > > call_rcu(&p->rcu, func); > > > > -struct call_rcu_data *create_call_rcu_data(unsigned long flags); > > +struct call_rcu_data *create_call_rcu_data(unsigned long flags, > > + int cpu_affinity); > > > > Returns a handle that can be passed to the following > > primitives. The "flags" argument can be zero, or can be > > URCU_CALL_RCU_RT if the worker threads associated with the > > - new helper thread are to get real-time response. > > + new helper thread are to get real-time response. The argument > > + "cpu_affinity" specifies a cpu on which the call_rcu thread should > > + be affined to. It is ignored if negative. > > > > struct call_rcu_data *get_default_call_rcu_data(void); > > > > Index: userspace-rcu/tests/rcutorture.h > > =================================================================== > > --- userspace-rcu.orig/tests/rcutorture.h > > +++ userspace-rcu/tests/rcutorture.h > > @@ -156,7 +156,7 @@ void *rcu_update_perf_test(void *arg) > > if ((random() & 0xf00) == 0) { > > struct call_rcu_data *crdp; > > > > - crdp = create_call_rcu_data(0); > > + crdp = create_call_rcu_data(0, -1); > > if (crdp != NULL) { > > fprintf(stderr, > > "Using per-thread call_rcu() worker.\n"); > > @@ -385,7 +385,7 @@ void *rcu_fake_update_stress_test(void * > > if ((random() & 0xf00) == 0) { > > struct call_rcu_data *crdp; > > > > - crdp = create_call_rcu_data(0); > > + crdp = create_call_rcu_data(0, -1); > > if (crdp != NULL) { > > fprintf(stderr, > > "Using per-thread call_rcu() worker.\n"); > > Index: userspace-rcu/urcu-call-rcu-impl.h > > =================================================================== > > --- userspace-rcu.orig/urcu-call-rcu-impl.h > > +++ userspace-rcu/urcu-call-rcu-impl.h > > @@ -20,6 +20,7 @@ > > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > > 02110-1301 USA > > */ > > > > +#define _GNU_SOURCE > > #include <stdio.h> > > #include <pthread.h> > > #include <signal.h> > > @@ -31,6 +32,7 @@ > > #include <sys/time.h> > > #include <syscall.h> > > #include <unistd.h> > > +#include <sched.h> > > > > #include "config.h" > > #include "urcu/wfqueue.h" > > @@ -47,6 +49,7 @@ struct call_rcu_data { > > pthread_cond_t cond; > > unsigned long qlen; > > pthread_t tid; > > + int cpu_affinity; > > struct cds_list_head list; > > } __attribute__((aligned(CAA_CACHE_LINE_SIZE))); > > > > @@ -146,6 +149,31 @@ static void call_rcu_unlock(pthread_mute > > } > > } > > > > +#if HAVE_SCHED_SETAFFINITY > > +static > > +int set_thread_cpu_affinity(struct call_rcu_data *crdp) > > +{ > > + cpu_set_t mask; > > + > > + if (crdp->cpu_affinity < 0) > > + return 0; > > + > > + CPU_ZERO(&mask); > > + CPU_SET(crdp->cpu_affinity, &mask); > > +#if SCHED_SETAFFINITY_ARGS == 2 > > + return sched_setaffinity(0, &mask); > > +#else > > + return sched_setaffinity(0, sizeof(mask), &mask); > > +#endif > > +} > > +#else > > +static > > +int set_thread_cpu_affinity(struct call_rcu_data *crdp) > > +{ > > + return 0; > > +} > > +#endif > > + > > /* This is the code run by each call_rcu thread. */ > > > > static void *call_rcu_thread(void *arg) > > @@ -156,6 +184,11 @@ static void *call_rcu_thread(void *arg) > > struct call_rcu_data *crdp = (struct call_rcu_data *)arg; > > struct rcu_head *rhp; > > > > + if (set_thread_cpu_affinity(crdp) != 0) { > > + perror("pthread_setaffinity_np"); > > + exit(-1); > > + } > > + > > thread_call_rcu_data = crdp; > > for (;;) { > > if (&crdp->cbs.head != _CMM_LOAD_SHARED(crdp->cbs.tail)) { > > @@ -214,7 +247,8 @@ static void *call_rcu_thread(void *arg) > > */ > > > > static void call_rcu_data_init(struct call_rcu_data **crdpp, > > - unsigned long flags) > > + unsigned long flags, > > + int cpu_affinity) > > { > > struct call_rcu_data *crdp; > > > > @@ -236,6 +270,7 @@ static void call_rcu_data_init(struct ca > > } > > crdp->flags = flags | URCU_CALL_RCU_RUNNING; > > cds_list_add(&crdp->list, &call_rcu_data_list); > > + crdp->cpu_affinity = cpu_affinity; > > cmm_smp_mb(); /* Structure initialized before pointer is planted. */ > > *crdpp = crdp; > > if (pthread_create(&crdp->tid, NULL, call_rcu_thread, crdp) != 0) { > > @@ -280,20 +315,22 @@ pthread_t get_call_rcu_thread(struct cal > > * Create a call_rcu_data structure (with thread) and return a pointer. > > */ > > > > -static struct call_rcu_data *__create_call_rcu_data(unsigned long flags) > > +static struct call_rcu_data *__create_call_rcu_data(unsigned long flags, > > + int cpu_affinity) > > { > > struct call_rcu_data *crdp; > > > > - call_rcu_data_init(&crdp, flags); > > + call_rcu_data_init(&crdp, flags, cpu_affinity); > > return crdp; > > } > > > > -struct call_rcu_data *create_call_rcu_data(unsigned long flags) > > +struct call_rcu_data *create_call_rcu_data(unsigned long flags, > > + int cpu_affinity) > > { > > struct call_rcu_data *crdp; > > > > call_rcu_lock(&call_rcu_mutex); > > - crdp = __create_call_rcu_data(flags); > > + crdp = __create_call_rcu_data(flags, cpu_affinity); > > call_rcu_unlock(&call_rcu_mutex); > > return crdp; > > } > > @@ -346,7 +383,7 @@ struct call_rcu_data *get_default_call_r > > call_rcu_unlock(&call_rcu_mutex); > > return default_call_rcu_data; > > } > > - call_rcu_data_init(&default_call_rcu_data, 0); > > + call_rcu_data_init(&default_call_rcu_data, 0, -1); > > call_rcu_unlock(&call_rcu_mutex); > > return default_call_rcu_data; > > } > > @@ -434,7 +471,7 @@ int create_all_cpu_call_rcu_data(unsigne > > call_rcu_unlock(&call_rcu_mutex); > > continue; > > } > > - crdp = __create_call_rcu_data(flags); > > + crdp = __create_call_rcu_data(flags, i); > > if (crdp == NULL) { > > call_rcu_unlock(&call_rcu_mutex); > > errno = ENOMEM; > > Index: userspace-rcu/urcu-call-rcu.h > > =================================================================== > > --- userspace-rcu.orig/urcu-call-rcu.h > > +++ userspace-rcu/urcu-call-rcu.h > > @@ -64,7 +64,8 @@ struct rcu_head { > > */ > > struct call_rcu_data *get_cpu_call_rcu_data(int cpu); > > pthread_t get_call_rcu_thread(struct call_rcu_data *crdp); > > -struct call_rcu_data *create_call_rcu_data(unsigned long flags); > > +struct call_rcu_data *create_call_rcu_data(unsigned long flags, > > + int cpu_affinity); > > int set_cpu_call_rcu_data(int cpu, struct call_rcu_data *crdp); > > struct call_rcu_data *get_default_call_rcu_data(void); > > struct call_rcu_data *get_call_rcu_data(void); > > Index: userspace-rcu/urcu-qsbr.c > > =================================================================== > > --- userspace-rcu.orig/urcu-qsbr.c > > +++ userspace-rcu/urcu-qsbr.c > > @@ -23,6 +23,7 @@ > > * IBM's contributions to this file may be relicensed under LGPLv2 or > > later. > > */ > > > > +#define _GNU_SOURCE > > #include <stdio.h> > > #include <pthread.h> > > #include <signal.h> > > Index: userspace-rcu/urcu.c > > =================================================================== > > --- userspace-rcu.orig/urcu.c > > +++ userspace-rcu/urcu.c > > @@ -24,6 +24,7 @@ > > */ > > > > #define _BSD_SOURCE > > +#define _GNU_SOURCE > > #include <stdio.h> > > #include <pthread.h> > > #include <signal.h> > > -- Mathieu Desnoyers Operating System Efficiency R&D Consultant EfficiOS Inc. http://www.efficios.com _______________________________________________ ltt-dev mailing list [email protected] http://lists.casi.polymtl.ca/cgi-bin/mailman/listinfo/ltt-dev
