On Mon, Nov 14, 2016 at 10:30:21AM -0800, Paul E. McKenney wrote: > Currently, IPIs are used to force other CPUs to invalidate their TLBs > in response to a kernel virtual-memory mapping change. This works, but > degrades both battery lifetime (for idle CPUs) and real-time response > (for nohz_full CPUs), and in addition results in unnecessary IPIs due to > the fact that CPUs executing in usermode are unaffected by stale kernel > mappings. It would be better to cause a CPU executing in usermode to > wait until it is entering kernel mode to do the flush, first to avoid > interrupting usemode tasks and second to handle multiple flush requests > with a single flush in the case of a long-running user task. > > This commit therefore reserves a bit at the bottom of the ->dynticks > counter, which is checked upon exit from extended quiescent states. > If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is > invoked, which, if not supplied, is an empty single-pass do-while loop. > If this bottom bit is set on -entry- to an extended quiescent state, > then a WARN_ON_ONCE() triggers. > > This bottom bit may be set using a new rcu_eqs_special_set() function, > which returns true if the bit was set, or false if the CPU turned > out to not be in an extended quiescent state. Please note that this > function refuses to set the bit for a non-nohz_full CPU when that CPU > is executing in usermode because usermode execution is tracked by RCU > as a dyntick-idle extended quiescent state only for nohz_full CPUs. > > Reported-by: Andy Lutomirski <[email protected]> > Signed-off-by: Paul E. McKenney <[email protected]> > --- > include/linux/rcutiny.h | 5 +++ > kernel/rcu/tree.c | 81 > +++++++++++++++++++++++++++++++++++++------------ > kernel/rcu/tree.h | 1 + > 3 files changed, 67 insertions(+), 20 deletions(-) > > diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h > index 4f9b2fa2173d..7232d199a81c 100644 > --- a/include/linux/rcutiny.h > +++ b/include/linux/rcutiny.h > @@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks > *rdtp) > return 0; > } > > +static inline bool rcu_eqs_special_set(int cpu) > +{ > + return false; /* Never flag non-existent other CPUs! */ > +} > + > static inline unsigned long get_state_synchronize_rcu(void) > { > return 0; > diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c > index c2b2f5b591b7..2c399db6df6e 100644 > --- a/kernel/rcu/tree.c > +++ b/kernel/rcu/tree.c > @@ -269,9 +269,19 @@ void rcu_bh_qs(void) > > static DEFINE_PER_CPU(int, rcu_sched_qs_mask); > > +/* > + * Steal a bit from the bottom of ->dynticks for idle entry/exit > + * control. Initially this is for TLB flushing. > + */ > +#define RCU_DYNTICK_CTRL_MASK 0x1 > +#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1) > +#ifndef rcu_eqs_special_exit > +#define rcu_eqs_special_exit() do { } while (0) > +#endif > + > static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { > .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, > - .dynticks = ATOMIC_INIT(1), > + .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), > #ifdef CONFIG_NO_HZ_FULL_SYSIDLE > .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, > .dynticks_idle = ATOMIC_INIT(1), > @@ -285,17 +295,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, > rcu_dynticks) = { > static void rcu_dynticks_eqs_enter(void) > { > struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); > + int seq; > > /* > - * CPUs seeing atomic_inc() must see prior RCU read-side critical > - * sections, and we also must force ordering with the next idle > - * sojourn. > + * CPUs seeing atomic_inc_return() must see prior RCU read-side > + * critical sections, and we also must force ordering with the > + * next idle sojourn. > */ > - smp_mb__before_atomic(); /* See above. */ > - atomic_inc(&rdtp->dynticks); > - smp_mb__after_atomic(); /* See above. */ > + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); > + /* Better be in an extended quiescent state! */ > + WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && > + (seq & RCU_DYNTICK_CTRL_CTR)); > + /* Better not have special action (TLB flush) pending! */
Ah.. you did the clean-up here ;-)
Never mind my previous comment on patch #3 ;-)
Regards,
Boqun
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> - atomic_read(&rdtp->dynticks) & 0x1);
> + (seq & RCU_DYNTICK_CTRL_MASK));
> }
>
> /*
> @@ -305,17 +318,22 @@ static void rcu_dynticks_eqs_enter(void)
> static void rcu_dynticks_eqs_exit(void)
> {
> struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> + int seq;
>
> /*
> - * CPUs seeing atomic_inc() must see prior idle sojourns,
> + * CPUs seeing atomic_inc_return() must see prior idle sojourns,
> * and we also must force ordering with the next RCU read-side
> * critical section.
> */
> - smp_mb__before_atomic(); /* See above. */
> - atomic_inc(&rdtp->dynticks);
> - smp_mb__after_atomic(); /* See above. */
> + seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
> - !(atomic_read(&rdtp->dynticks) & 0x1));
> + !(seq & RCU_DYNTICK_CTRL_CTR));
> + if (seq & RCU_DYNTICK_CTRL_MASK) {
> + rcu_eqs_special_exit();
> + /* Prefer duplicate flushes to losing a flush. */
> + smp_mb__before_atomic(); /* NMI safety. */
> + atomic_and(~RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
> + }
> }
>
> /*
> @@ -332,9 +350,9 @@ static void rcu_dynticks_eqs_online(void)
> {
> struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
>
> - if (atomic_read(&rdtp->dynticks) & 0x1)
> + if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)
> return;
> - atomic_add(0x1, &rdtp->dynticks);
> + atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
> }
>
> /*
> @@ -346,7 +364,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
> {
> struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
>
> - return !(atomic_read(&rdtp->dynticks) & 0x1);
> + return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);
> }
>
> /*
> @@ -357,7 +375,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
> {
> int snap = atomic_add_return(0, &rdtp->dynticks);
>
> - return snap;
> + return snap & ~RCU_DYNTICK_CTRL_MASK;
> }
>
> /*
> @@ -366,7 +384,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
> */
> static bool rcu_dynticks_in_eqs(int snap)
> {
> - return !(snap & 0x1);
> + return !(snap & RCU_DYNTICK_CTRL_CTR);
> }
>
> /*
> @@ -386,10 +404,33 @@ static bool rcu_dynticks_in_eqs_since(struct
> rcu_dynticks *rdtp, int snap)
> static void rcu_dynticks_momentary_idle(void)
> {
> struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
> - int special = atomic_add_return(2, &rdtp->dynticks);
> + int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
> + &rdtp->dynticks);
>
> /* It is illegal to call this from idle state. */
> - WARN_ON_ONCE(!(special & 0x1));
> + WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
> +}
> +
> +/*
> + * Set the special (bottom) bit of the specified CPU so that it
> + * will take special action (such as flushing its TLB) on the
> + * next exit from an extended quiescent state. Returns true if
> + * the bit was successfully set, or false if the CPU was not in
> + * an extended quiescent state.
> + */
> +bool rcu_eqs_special_set(int cpu)
> +{
> + int old;
> + int new;
> + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
> +
> + do {
> + old = atomic_read(&rdtp->dynticks);
> + if (old & RCU_DYNTICK_CTRL_CTR)
> + return false;
> + new = old | RCU_DYNTICK_CTRL_MASK;
> + } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
> + return true;
> }
>
> DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
> diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
> index 3b953dcf6afc..7dcdd59d894c 100644
> --- a/kernel/rcu/tree.h
> +++ b/kernel/rcu/tree.h
> @@ -596,6 +596,7 @@ extern struct rcu_state rcu_preempt_state;
> #endif /* #ifdef CONFIG_PREEMPT_RCU */
>
> int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
> +bool rcu_eqs_special_set(int cpu);
>
> #ifdef CONFIG_RCU_BOOST
> DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
> --
> 2.5.2
>
signature.asc
Description: PGP signature

