On Mon, Nov 23, 2020 at 04:17PM +0100, Marco Elver wrote:
> On Mon, 23 Nov 2020 at 14:55, Peter Zijlstra <[email protected]> wrote:
> > On Mon, Nov 23, 2020 at 02:23:00PM +0100, Marco Elver wrote:
> > > When enabling KCSAN for kernel/sched (remove KCSAN_SANITIZE := n from
> > > kernel/sched/Makefile), with CONFIG_DEBUG_PREEMPT=y, we can observe
> > > recursion due to:
> > >
> > >       check_access() [via instrumentation]
> > >         kcsan_setup_watchpoint()
> > >           reset_kcsan_skip()
> > >             kcsan_prandom_u32_max()
> > >               get_cpu_var()
> > >                 preempt_disable()
> > >                   preempt_count_add() [in kernel/sched/core.c]
> > >                     check_access() [via instrumentation]
> > >
> > > Avoid this by rewriting kcsan_prandom_u32_max() to only use safe
> > > versions of preempt_disable() and preempt_enable() that do not call into
> > > scheduler code.
> > >
> > > Note, while this currently does not affect an unmodified kernel, it'd be
> > > good to keep a KCSAN kernel working when KCSAN_SANITIZE := n is removed
> > > from kernel/sched/Makefile to permit testing scheduler code with KCSAN
> > > if desired.
> > >
> > > Fixes: cd290ec24633 ("kcsan: Use tracing-safe version of prandom")
> > > Signed-off-by: Marco Elver <[email protected]>
> > > ---
> > > v2:
> > > * Update comment to also point out preempt_enable().
> > > ---
> > >  kernel/kcsan/core.c | 15 ++++++++++++---
> > >  1 file changed, 12 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > > index 3994a217bde7..10513f3e2349 100644
> > > --- a/kernel/kcsan/core.c
> > > +++ b/kernel/kcsan/core.c
> > > @@ -284,10 +284,19 @@ should_watch(const volatile void *ptr, size_t size, 
> > > int type, struct kcsan_ctx *
> > >   */
> > >  static u32 kcsan_prandom_u32_max(u32 ep_ro)
> > >  {
> > > -     struct rnd_state *state = &get_cpu_var(kcsan_rand_state);
> > > -     const u32 res = prandom_u32_state(state);
> > > +     struct rnd_state *state;
> > > +     u32 res;
> > > +
> > > +     /*
> > > +      * Avoid recursion with scheduler by using non-tracing versions of
> > > +      * preempt_disable() and preempt_enable() that do not call into
> > > +      * scheduler code.
> > > +      */
> > > +     preempt_disable_notrace();
> > > +     state = raw_cpu_ptr(&kcsan_rand_state);
> > > +     res = prandom_u32_state(state);
> > > +     preempt_enable_no_resched_notrace();
> >
> > This is a preemption bug. Does preempt_enable_notrace() not work?
> 
> No it didn't, because we end up calling preempt_schedule_notrace(),
> which again might end in recursion.
> 
> Normally we could surround this by
> kcsan_disable_current/kcsan_enable_current(), but that doesn't work
> because we have this sequence:
> 
>      reset_kcsan_skip();
>      if (!kcsan_is_enabled())
>          ...
> 
> to avoid underflowing the skip counter if KCSAN is disabled. That
> could be solved by writing to the skip-counter twice: once with a
> non-random value, and if KCSAN is enabled with a random value. Would
> that be better?

See below for concrete alternative that works.

> And I'd like to avoid adding __no_kcsan to scheduler functions.
> 
> Any recommendation?

Let me know what you prefer.

Thanks,
-- Marco

------ >8 ------

diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 10513f3e2349..c8eadef3f42a 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -266,8 +266,8 @@ should_watch(const volatile void *ptr, size_t size, int 
type, struct kcsan_ctx *
                return false;
 
        /*
-        * NOTE: If we get here, kcsan_skip must always be reset in slow path
-        * via reset_kcsan_skip() to avoid underflow.
+        * Note: If we get here, kcsan_skip must always be reset in slow path to
+        * avoid underflow.
         */
 
        /* this operation should be watched */
@@ -288,27 +288,19 @@ static u32 kcsan_prandom_u32_max(u32 ep_ro)
        u32 res;
 
        /*
-        * Avoid recursion with scheduler by using non-tracing versions of
-        * preempt_disable() and preempt_enable() that do not call into
-        * scheduler code.
+        * Avoid recursion with scheduler by disabling KCSAN because
+        * preempt_enable_notrace() will still call into scheduler code.
         */
+       kcsan_disable_current();
        preempt_disable_notrace();
        state = raw_cpu_ptr(&kcsan_rand_state);
        res = prandom_u32_state(state);
-       preempt_enable_no_resched_notrace();
+       preempt_enable_notrace();
+       kcsan_enable_current_nowarn();
 
        return (u32)(((u64) res * ep_ro) >> 32);
 }
 
-static inline void reset_kcsan_skip(void)
-{
-       long skip_count = kcsan_skip_watch -
-                         (IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
-                                  kcsan_prandom_u32_max(kcsan_skip_watch) :
-                                  0);
-       this_cpu_write(kcsan_skip, skip_count);
-}
-
 static __always_inline bool kcsan_is_enabled(void)
 {
        return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
@@ -430,10 +422,16 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t 
size, int type)
         * Always reset kcsan_skip counter in slow-path to avoid underflow; see
         * should_watch().
         */
-       reset_kcsan_skip();
-
-       if (!kcsan_is_enabled())
+       if (likely(kcsan_is_enabled())) {
+               long skip_count = kcsan_skip_watch -
+                                 
(IS_ENABLED(CONFIG_KCSAN_SKIP_WATCH_RANDOMIZE) ?
+                                          
kcsan_prandom_u32_max(kcsan_skip_watch) :
+                                          0);
+               this_cpu_write(kcsan_skip, skip_count);
+       } else {
+               this_cpu_write(kcsan_skip, kcsan_skip_watch);
                goto out;
+       }
 
        /*
         * Special atomic rules: unlikely to be true, so we check them here in
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 5fc9c9b70862..21fb5a5662b5 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -7,12 +7,6 @@ endif
 # that is not a function of syscall inputs. E.g. involuntary context switches.
 KCOV_INSTRUMENT := n
 
-# There are numerous data races here, however, most of them are due to plain 
accesses.
-# This would make it even harder for syzbot to find reproducers, because these
-# bugs trigger without specific input. Disable by default, but should re-enable
-# eventually.
-KCSAN_SANITIZE := n
-
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <[email protected]>, the -fno-omit-frame-pointer 
is
 # needed for x86 only.  Why this used to be enabled for all architectures is 
beyond

Reply via email to