* Thomas Gleixner <[email protected]> wrote:

> From: Thomas Gleixner <[email protected]>
> 
> After a lengthy discussion [1] it turned out that RCU does not need a full
> rcu_irq_enter/exit() when RCU is already watching. All it needs if
> NOHZ_FULL is active is to check whether the tick needs to be restarted.
> 
> This allows to avoid a separate variant for the pagefault handler which
> cannot invoke rcu_irq_enter() on a kernel pagefault which might sleep.
> 
> The cond_rcu argument is only temporary and will be removed once the
> existing users of idtentry_enter/exit() have been cleaned up. After that
> the code can be significantly simplified.
> 
> Signed-off-by: Thomas Gleixner <[email protected]>
> Link: [1] https://lkml.kernel.org/r/[email protected]
> ---
> V9: Reworked to the new RCU mode
> ---
>  arch/x86/entry/common.c         |   84 
> ++++++++++++++++++++++++++++++----------
>  arch/x86/include/asm/idtentry.h |   14 +++++-
>  2 files changed, 77 insertions(+), 21 deletions(-)
> 
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -512,8 +512,10 @@ SYSCALL_DEFINE0(ni_syscall)
>  }
>  
>  /**
> - * idtentry_enter - Handle state tracking on idtentry
> + * idtentry_enter_cond_rcu - Handle state tracking on idtentry with 
> conditional
> + *                        RCU handling
>   * @regs:    Pointer to pt_regs of interrupted context
> + * @cond_rcu:        Invoke rcu_irq_enter() only if RCU is not watching
>   *
>   * Invokes:
>   *  - lockdep irqflag state tracking as low level ASM entry disabled
> @@ -521,40 +523,81 @@ SYSCALL_DEFINE0(ni_syscall)
>   *
>   *  - Context tracking if the exception hit user mode.
>   *
> - *  - RCU notification if the exception hit kernel mode.
> - *
>   *  - The hardirq tracer to keep the state consistent as low level ASM
>   *    entry disabled interrupts.
> + *
> + * For kernel mode entries RCU handling is done conditional. If RCU is
> + * watching then the only RCU requirement is to check whether the tick has
> + * to be restarted. If RCU is not watching then rcu_irq_enter() has to be
> + * invoked on entry and rcu_irq_exit() on exit.
> + *
> + * Avoiding the rcu_irq_enter/exit() calls is an optimization but also
> + * solves the problem of kernel mode pagefaults which can schedule, which
> + * is not possible after invoking rcu_irq_enter() without undoing it.
> + *
> + * For user mode entries enter_from_user_mode() must be invoked to
> + * establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
> + * would not be possible.
> + *
> + * Returns: True if RCU has been adjusted on a kernel entry
> + *       False otherwise
> + *
> + * The return value must be fed into the rcu_exit argument of
> + * idtentry_exit_cond_rcu().
>   */
> -void noinstr idtentry_enter(struct pt_regs *regs)
> +bool noinstr idtentry_enter_cond_rcu(struct pt_regs *regs, bool cond_rcu)
>  {
>       if (user_mode(regs)) {
>               enter_from_user_mode();
>       } else {
> -             lockdep_hardirqs_off(CALLER_ADDR0);
> -             rcu_irq_enter();
> -             instrumentation_begin();
> -             trace_hardirqs_off_prepare();
> -             instrumentation_end();
> +             if (!cond_rcu || !__rcu_is_watching()) {
> +                     /*
> +                      * If RCU is not watching then the same careful
> +                      * sequence vs. lockdep and tracing is required
> +                      * as in enter_from_user_mode().
> +                      *
> +                      * This only happens for IRQs that hits the idle
> +                      * loop, i.e. if idle is not using MWAIT.
> +                      */
> +                     lockdep_hardirqs_off(CALLER_ADDR0);
> +                     rcu_irq_enter();
> +                     instrumentation_begin();
> +                     trace_hardirqs_off_prepare();
> +                     instrumentation_end();
> +                     return true;
> +             } else {
> +                     /*
> +                      * If RCU is watching then RCU only wants to check
> +                      * whether it needs to restart the tick in NOHZ
> +                      * mode.
> +                      */
> +                     instrumentation_begin();
> +                     rcu_irq_enter_check_tick();
> +                     /* Use the combo lockdep/tracing function */
> +                     trace_hardirqs_off();
> +                     instrumentation_end();
> +             }
>       }
> +     return false;
>  }

Any objections to the simplified/flattened control flow below?

Thanks,

        Ingo

 common.c |   59 +++++++++++++++++++++++++++++++----------------------------
 1 file changed, 31 insertions(+), 28 deletions(-)
Index: tip/arch/x86/entry/common.c
===================================================================
--- tip.orig/arch/x86/entry/common.c
+++ tip/arch/x86/entry/common.c
@@ -549,35 +549,38 @@ bool noinstr idtentry_enter_cond_rcu(str
 {
        if (user_mode(regs)) {
                enter_from_user_mode();
-       } else {
-               if (!cond_rcu || !__rcu_is_watching()) {
-                       /*
-                        * If RCU is not watching then the same careful
-                        * sequence vs. lockdep and tracing is required
-                        * as in enter_from_user_mode().
-                        *
-                        * This only happens for IRQs that hit the idle
-                        * loop, i.e. if idle is not using MWAIT.
-                        */
-                       lockdep_hardirqs_off(CALLER_ADDR0);
-                       rcu_irq_enter();
-                       instrumentation_begin();
-                       trace_hardirqs_off_prepare();
-                       instrumentation_end();
-                       return true;
-               } else {
-                       /*
-                        * If RCU is watching then RCU only wants to check
-                        * whether it needs to restart the tick in NOHZ
-                        * mode.
-                        */
-                       instrumentation_begin();
-                       rcu_irq_enter_check_tick();
-                       /* Use the combo lockdep/tracing function */
-                       trace_hardirqs_off();
-                       instrumentation_end();
-               }
+               return false;
        }
+
+       if (!cond_rcu || !__rcu_is_watching()) {
+               /*
+                * If RCU is not watching then the same careful
+                * sequence vs. lockdep and tracing is required
+                * as in enter_from_user_mode().
+                *
+                * This only happens for IRQs that hit the idle
+                * loop, i.e. if idle is not using MWAIT.
+                */
+               lockdep_hardirqs_off(CALLER_ADDR0);
+               rcu_irq_enter();
+               instrumentation_begin();
+               trace_hardirqs_off_prepare();
+               instrumentation_end();
+
+               return true;
+       }
+
+       /*
+        * If RCU is watching then RCU only wants to check
+        * whether it needs to restart the tick in NOHZ
+        * mode.
+        */
+       instrumentation_begin();
+       rcu_irq_enter_check_tick();
+       /* Use the combo lockdep/tracing function */
+       trace_hardirqs_off();
+       instrumentation_end();
+
        return false;
 }
 

Reply via email to