* Lai Jiangshan ([email protected]) wrote: > urcu-qsbr's read site's quiescence is much longer than normal urcu ==> > synchronize_rcu() is much slower ==> > rcu_gp_ctr is updated much less ==> > the whole urcu-qsbr will not be slowed down by false sharing of rcu_gp_ctr.
Hi Lai, Sorry for the late reply, I must have been very busy when you sent this patch. It's indeed a very good idea to make qsbr more similar to other URCU flavors. I'm merging it right away. Thanks! Mathieu > > But this patch makes sense to keep the code of urcu-qsbr like normal urcu, > better readability and maintenance. > > Test: (4*6 CPUs) > Before patch: > [root@localhost userspace-rcu]# ./tests/test_urcu_qsbr 20 1 20 > SUMMARY ./tests/test_urcu_qsbr testdur 20 nr_readers 20 rdur 0 > wdur 0 nr_writers 1 wdelay 0 nr_reads 65498297587 nr_writes > 2000665 nr_ops 65500298252 > [root@localhost userspace-rcu]# ./tests/test_urcu_qsbr 20 1 20 > SUMMARY ./tests/test_urcu_qsbr testdur 20 nr_readers 20 rdur 0 > wdur 0 nr_writers 1 wdelay 0 nr_reads 67218079467 nr_writes > 1981593 nr_ops 67220061060 > > After patch > ./tests/test_urcu_qsbr 20 1 20 > SUMMARY ./tests/test_urcu_qsbr testdur 20 nr_readers 20 rdur 0 > wdur 0 nr_writers 1 wdelay 0 nr_reads 67473798999 nr_writes > 1999151 nr_ops 67475798150 > [root@localhost userspace-rcu]# ./tests/test_urcu_qsbr 20 1 20 > SUMMARY ./tests/test_urcu_qsbr testdur 20 nr_readers 20 rdur 0 > wdur 0 nr_writers 1 wdelay 0 nr_reads 67065521397 nr_writes > 1993956 nr_ops 67067515353 > > > Signed-off-by: Lai Jiangshan <[email protected]> > -- > diff --git a/urcu-qsbr.c b/urcu-qsbr.c > index 3c2c65d..995c564 100644 > --- a/urcu-qsbr.c > +++ b/urcu-qsbr.c > @@ -53,13 +53,7 @@ > void __attribute__((destructor)) rcu_exit(void); > > static pthread_mutex_t rcu_gp_lock = PTHREAD_MUTEX_INITIALIZER; > - > -int32_t rcu_gp_futex; > - > -/* > - * Global grace period counter. > - */ > -unsigned long rcu_gp_ctr = RCU_GP_ONLINE; > +struct urcu_gp rcu_gp = { .ctr = RCU_GP_ONLINE }; > > /* > * Active attempts to check for reader Q.S. before calling futex(). > @@ -118,8 +112,8 @@ static void wait_gp(void) > { > /* Read reader_gp before read futex */ > cmm_smp_rmb(); > - if (uatomic_read(&rcu_gp_futex) == -1) > - futex_noasync(&rcu_gp_futex, FUTEX_WAIT, -1, > + if (uatomic_read(&rcu_gp.futex) == -1) > + futex_noasync(&rcu_gp.futex, FUTEX_WAIT, -1, > NULL, NULL, 0); > } > > @@ -133,12 +127,12 @@ static void wait_for_readers(struct cds_list_head > *input_readers, > /* > * Wait for each thread URCU_TLS(rcu_reader).ctr to either > * indicate quiescence (offline), or for them to observe the > - * current rcu_gp_ctr value. > + * current rcu_gp.ctr value. > */ > for (;;) { > wait_loops++; > if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) { > - uatomic_set(&rcu_gp_futex, -1); > + uatomic_set(&rcu_gp.futex, -1); > /* > * Write futex before write waiting (the other side > * reads them in the opposite order). > @@ -177,7 +171,7 @@ static void wait_for_readers(struct cds_list_head > *input_readers, > if (wait_loops >= RCU_QS_ACTIVE_ATTEMPTS) { > /* Read reader_gp before write futex */ > cmm_smp_mb(); > - uatomic_set(&rcu_gp_futex, 0); > + uatomic_set(&rcu_gp.futex, 0); > } > break; > } else { > @@ -253,11 +247,11 @@ void synchronize_rcu(void) > > /* > * Must finish waiting for quiescent state for original parity > - * before committing next rcu_gp_ctr update to memory. Failure > + * before committing next rcu_gp.ctr update to memory. Failure > * to do so could result in the writer waiting forever while new > * readers are always accessing data (no progress). Enforce > * compiler-order of load URCU_TLS(rcu_reader).ctr before store > - * to rcu_gp_ctr. > + * to rcu_gp.ctr. > */ > cmm_barrier(); > > @@ -269,13 +263,13 @@ void synchronize_rcu(void) > cmm_smp_mb(); > > /* Switch parity: 0 -> 1, 1 -> 0 */ > - CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); > + CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr ^ RCU_GP_CTR); > > /* > - * Must commit rcu_gp_ctr update to memory before waiting for > + * Must commit rcu_gp.ctr update to memory before waiting for > * quiescent state. Failure to do so could result in the writer > * waiting forever while new readers are always accessing data > - * (no progress). Enforce compiler-order of store to rcu_gp_ctr > + * (no progress). Enforce compiler-order of store to rcu_gp.ctr > * before load URCU_TLS(rcu_reader).ctr. > */ > cmm_barrier(); > @@ -353,13 +347,13 @@ void synchronize_rcu(void) > goto out; > > /* Increment current G.P. */ > - CMM_STORE_SHARED(rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); > + CMM_STORE_SHARED(rcu_gp.ctr, rcu_gp.ctr + RCU_GP_CTR); > > /* > - * Must commit rcu_gp_ctr update to memory before waiting for > + * Must commit rcu_gp.ctr update to memory before waiting for > * quiescent state. Failure to do so could result in the writer > * waiting forever while new readers are always accessing data > - * (no progress). Enforce compiler-order of store to rcu_gp_ctr > + * (no progress). Enforce compiler-order of store to rcu_gp.ctr > * before load URCU_TLS(rcu_reader).ctr. > */ > cmm_barrier(); > diff --git a/urcu/static/urcu-qsbr.h b/urcu/static/urcu-qsbr.h > index f6e5580..2b9bece 100644 > --- a/urcu/static/urcu-qsbr.h > +++ b/urcu/static/urcu-qsbr.h > @@ -119,12 +119,18 @@ static inline void rcu_debug_yield_init(void) > #define RCU_GP_ONLINE (1UL << 0) > #define RCU_GP_CTR (1UL << 1) > > -/* > - * Global quiescent period counter with low-order bits unused. > - * Using a int rather than a char to eliminate false register dependencies > - * causing stalls on some architectures. > - */ > -extern unsigned long rcu_gp_ctr; > +struct urcu_gp { > + /* > + * Global quiescent period counter with low-order bits unused. > + * Using a int rather than a char to eliminate false register > + * dependencies causing stalls on some architectures. > + */ > + unsigned long ctr; > + > + int32_t futex; > +} __attribute__((aligned(CAA_CACHE_LINE_SIZE))); > + > +extern struct urcu_gp rcu_gp; > > struct rcu_reader { > /* Data used by both reader and synchronize_rcu() */ > @@ -137,8 +143,6 @@ struct rcu_reader { > > extern DECLARE_URCU_TLS(struct rcu_reader, rcu_reader); > > -extern int32_t rcu_gp_futex; > - > /* > * Wake-up waiting synchronize_rcu(). Called from many concurrent threads. > */ > @@ -147,10 +151,10 @@ static inline void wake_up_gp(void) > if (caa_unlikely(_CMM_LOAD_SHARED(URCU_TLS(rcu_reader).waiting))) { > _CMM_STORE_SHARED(URCU_TLS(rcu_reader).waiting, 0); > cmm_smp_mb(); > - if (uatomic_read(&rcu_gp_futex) != -1) > + if (uatomic_read(&rcu_gp.futex) != -1) > return; > - uatomic_set(&rcu_gp_futex, 0); > - futex_noasync(&rcu_gp_futex, FUTEX_WAKE, 1, > + uatomic_set(&rcu_gp.futex, 0); > + futex_noasync(&rcu_gp.futex, FUTEX_WAKE, 1, > NULL, NULL, 0); > } > } > @@ -162,7 +166,7 @@ static inline enum rcu_state rcu_reader_state(unsigned > long *ctr) > v = CMM_LOAD_SHARED(*ctr); > if (!v) > return RCU_READER_INACTIVE; > - if (v == rcu_gp_ctr) > + if (v == rcu_gp.ctr) > return RCU_READER_ACTIVE_CURRENT; > return RCU_READER_ACTIVE_OLD; > } > @@ -216,7 +220,7 @@ static inline void > _rcu_quiescent_state_update_and_wakeup(unsigned long gp_ctr) > * to be invoked directly from non-LGPL code. > * > * We skip the memory barriers and gp store if our local ctr already > - * matches the global rcu_gp_ctr value: this is OK because a prior > + * matches the global rcu_gp.ctr value: this is OK because a prior > * _rcu_quiescent_state() or _rcu_thread_online() already updated it > * within our thread, so we have no quiescent state to report. > */ > @@ -224,7 +228,7 @@ static inline void _rcu_quiescent_state(void) > { > unsigned long gp_ctr; > > - if ((gp_ctr = CMM_LOAD_SHARED(rcu_gp_ctr)) == URCU_TLS(rcu_reader).ctr) > + if ((gp_ctr = CMM_LOAD_SHARED(rcu_gp.ctr)) == URCU_TLS(rcu_reader).ctr) > return; > _rcu_quiescent_state_update_and_wakeup(gp_ctr); > } > @@ -257,7 +261,7 @@ static inline void _rcu_thread_offline(void) > static inline void _rcu_thread_online(void) > { > cmm_barrier(); /* Ensure the compiler does not reorder us with mutex */ > - _CMM_STORE_SHARED(URCU_TLS(rcu_reader).ctr, > CMM_LOAD_SHARED(rcu_gp_ctr)); > + _CMM_STORE_SHARED(URCU_TLS(rcu_reader).ctr, > CMM_LOAD_SHARED(rcu_gp.ctr)); > cmm_smp_mb(); > } > -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com _______________________________________________ lttng-dev mailing list [email protected] http://lists.lttng.org/cgi-bin/mailman/listinfo/lttng-dev
