Re: [sparc64] locking/atomic, kernel OOPS on running stress-ng
On Wed, Jul 07, 2021 at 10:47:06AM +0300, Anatoly Pugachev wrote: > On Tue, Jul 6, 2021 at 3:00 PM Mark Rutland wrote: > > On Tue, Jul 06, 2021 at 02:51:06PM +0300, Anatoly Pugachev wrote: > > > On Tue, Jul 6, 2021 at 12:11 PM Mark Rutland wrote: > > > > Fixes: ff5b4f1ed580c59d ("locking/atomic: sparc: move to ARCH_ATOMIC") > > > > Signed-off-by: Mark Rutland > > > > Reported-by: Anatoly Pugachev > > > > Cc: "David S. Miller" > > > > Cc: Peter Zijlstra > > > > --- > > > > arch/sparc/include/asm/cmpxchg_64.h | 2 +- > > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > > > diff --git a/arch/sparc/include/asm/cmpxchg_64.h > > > > b/arch/sparc/include/asm/cmpxchg_64.h > > > > index 8c39a9981187..12d00a42c0a3 100644 > > > > --- a/arch/sparc/include/asm/cmpxchg_64.h > > > > +++ b/arch/sparc/include/asm/cmpxchg_64.h > > > > @@ -201,7 +201,7 @@ static inline unsigned long > > > > __cmpxchg_local(volatile void *ptr, > > > > #define arch_cmpxchg64_local(ptr, o, n) > > > > \ > > > >({ > > > > \ > > > > BUILD_BUG_ON(sizeof(*(ptr)) != 8); > > > > \ > > > > - cmpxchg_local((ptr), (o), (n)); > > > > \ > > > > + arch_cmpxchg_local((ptr), (o), (n)); > > > > \ > > > >}) > > > > #define arch_cmpxchg64(ptr, o, n) arch_cmpxchg64_local((ptr), > > > > (o), (n)) > > > > > > > > > Mark, thanks, fixed... > > > tested on git kernel 5.13.0-11788-g79160a603bdb-dirty (dirty - cause > > > patch has been applied). > > > > Great! Thanks for confirming. > > > > Peter, are you happy to pick that (full commit in last mail), or should > > I send a new copy? > > It would be nice if patch could hit the kernel before v5.14-rc1 Absolutely; I'll resend this on it's own so that it's easier for folk to pick, and I'll poke people about picking it. Mark.
Re: [sparc64] locking/atomic, kernel OOPS on running stress-ng
On Tue, Jul 6, 2021 at 3:00 PM Mark Rutland wrote: > On Tue, Jul 06, 2021 at 02:51:06PM +0300, Anatoly Pugachev wrote: > > On Tue, Jul 6, 2021 at 12:11 PM Mark Rutland wrote: > > > Fixes: ff5b4f1ed580c59d ("locking/atomic: sparc: move to ARCH_ATOMIC") > > > Signed-off-by: Mark Rutland > > > Reported-by: Anatoly Pugachev > > > Cc: "David S. Miller" > > > Cc: Peter Zijlstra > > > --- > > > arch/sparc/include/asm/cmpxchg_64.h | 2 +- > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > diff --git a/arch/sparc/include/asm/cmpxchg_64.h > > > b/arch/sparc/include/asm/cmpxchg_64.h > > > index 8c39a9981187..12d00a42c0a3 100644 > > > --- a/arch/sparc/include/asm/cmpxchg_64.h > > > +++ b/arch/sparc/include/asm/cmpxchg_64.h > > > @@ -201,7 +201,7 @@ static inline unsigned long __cmpxchg_local(volatile > > > void *ptr, > > > #define arch_cmpxchg64_local(ptr, o, n) > > > \ > > >({ \ > > > BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ > > > - cmpxchg_local((ptr), (o), (n)); \ > > > + arch_cmpxchg_local((ptr), (o), (n)); > > > \ > > >}) > > > #define arch_cmpxchg64(ptr, o, n) arch_cmpxchg64_local((ptr), (o), > > > (n)) > > > > > > Mark, thanks, fixed... > > tested on git kernel 5.13.0-11788-g79160a603bdb-dirty (dirty - cause > > patch has been applied). > > Great! Thanks for confirming. > > Peter, are you happy to pick that (full commit in last mail), or should > I send a new copy? It would be nice if patch could hit the kernel before v5.14-rc1 Thanks.
Re: [sparc64] locking/atomic, kernel OOPS on running stress-ng
On Tue, Jul 06, 2021 at 02:51:06PM +0300, Anatoly Pugachev wrote: > On Tue, Jul 6, 2021 at 12:11 PM Mark Rutland wrote: > > Fixes: ff5b4f1ed580c59d ("locking/atomic: sparc: move to ARCH_ATOMIC") > > Signed-off-by: Mark Rutland > > Reported-by: Anatoly Pugachev > > Cc: "David S. Miller" > > Cc: Peter Zijlstra > > --- > > arch/sparc/include/asm/cmpxchg_64.h | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/arch/sparc/include/asm/cmpxchg_64.h > > b/arch/sparc/include/asm/cmpxchg_64.h > > index 8c39a9981187..12d00a42c0a3 100644 > > --- a/arch/sparc/include/asm/cmpxchg_64.h > > +++ b/arch/sparc/include/asm/cmpxchg_64.h > > @@ -201,7 +201,7 @@ static inline unsigned long __cmpxchg_local(volatile > > void *ptr, > > #define arch_cmpxchg64_local(ptr, o, n) > > \ > >({ \ > > BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ > > - cmpxchg_local((ptr), (o), (n)); \ > > + arch_cmpxchg_local((ptr), (o), (n)); > > \ > >}) > > #define arch_cmpxchg64(ptr, o, n) arch_cmpxchg64_local((ptr), (o), > > (n)) > > > Mark, thanks, fixed... > tested on git kernel 5.13.0-11788-g79160a603bdb-dirty (dirty - cause > patch has been applied). Great! Thanks for confirming. Peter, are you happy to pick that (full commit in last mail), or should I send a new copy? Thanks, Mark.
Re: [sparc64] locking/atomic, kernel OOPS on running stress-ng
On Tue, Jul 6, 2021 at 12:11 PM Mark Rutland wrote: > Fixes: ff5b4f1ed580c59d ("locking/atomic: sparc: move to ARCH_ATOMIC") > Signed-off-by: Mark Rutland > Reported-by: Anatoly Pugachev > Cc: "David S. Miller" > Cc: Peter Zijlstra > --- > arch/sparc/include/asm/cmpxchg_64.h | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/sparc/include/asm/cmpxchg_64.h > b/arch/sparc/include/asm/cmpxchg_64.h > index 8c39a9981187..12d00a42c0a3 100644 > --- a/arch/sparc/include/asm/cmpxchg_64.h > +++ b/arch/sparc/include/asm/cmpxchg_64.h > @@ -201,7 +201,7 @@ static inline unsigned long __cmpxchg_local(volatile void > *ptr, > #define arch_cmpxchg64_local(ptr, o, n) > \ >({ \ > BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ > - cmpxchg_local((ptr), (o), (n)); \ > + arch_cmpxchg_local((ptr), (o), (n)); > \ >}) > #define arch_cmpxchg64(ptr, o, n) arch_cmpxchg64_local((ptr), (o), (n)) Mark, thanks, fixed... tested on git kernel 5.13.0-11788-g79160a603bdb-dirty (dirty - cause patch has been applied).
Re: [sparc64] locking/atomic, kernel OOPS on running stress-ng
On Mon, Jul 05, 2021 at 08:56:54PM +0100, Mark Rutland wrote: > On Mon, Jul 05, 2021 at 06:16:49PM +0300, Anatoly Pugachev wrote: > > Hello! > > Hi Anatoly, > > > latest sparc64 git kernel produces the following OOPS on running stress-ng > > as : > > > > $ stress-ng -v --mmap 1 -t 30s > > > > kernel OOPS (console logs): > > > > [ 27.276719] Unable to handle kernel NULL pointer dereference > > [ 27.276782] tsk->{mm,active_mm}->context = 03cb > > [ 27.276818] tsk->{mm,active_mm}->pgd = fff83a2a > > [ 27.276853] \|/ \|/ > > [ 27.276853] "@'/ .. \`@" > > [ 27.276853] /_| \__/ |_\ > > [ 27.276853] \__U_/ > > [ 27.276927] stress-ng(928): Oops [#1] > > I can reproduce this under QEMU; following your bisection (and working > around the missing ifdeferry that breaks bisection), I can confirm that > the first broken commit is: > > ff5b4f1ed580 ("locking/atomic: sparc: move to ARCH_ATOMIC") > > Sorry about this. > > > Can someone please look at this commit ids? > > From digging into this, I can't spot an obvious bug in the commit above. Looking again with fresh eyes, there is a trivial bug after all. Could you give the patch below a spin? It works for me locally under QEMU. Sorry again about this! Thanks, Mark >8 >From afb683b2ce749dca426d27f05af3ea08455a52d7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 6 Jul 2021 09:55:56 +0100 Subject: [PATCH] locking/atomic: sparc: fix arch_cmpxchg64_local() Anatoly reports that since commit: ff5b4f1ed580c59d ("locking/atomic: sparc: move to ARCH_ATOMIC") ... it's possible to reliably trigger an oops by running: stress-ng -v --mmap 1 -t 30s ... which results in a NULL pointer dereference in __split_huge_pmd_locked(). The underlying problem is that commit ff5b4f1ed580c59d left arch_cmpxchg64_local() defined in terms of cmpxchg_local() rather than arch_cmpxchg_local(). In we wrap these with macros which use identically-named variables. When cmpxchg_local() nests inside cmpxchg64_local(), this casues it to use an unitialized variable as the pointer, which can be NULL. This can also be seen in pmdp_establish(), where the compiler can generate the pointer with a `clr` instruction: 0360 : 360: 9d e3 bf 50 save %sp, -176, %sp 364: fa 5e 80 00 ldx [ %i2 ], %i5 368: 82 10 00 1b mov %i3, %g1 36c: 84 10 20 00 clr %g2 370: c3 f0 90 1d casx [ %g2 ], %i5, %g1 374: 80 a7 40 01 cmp %i5, %g1 378: 32 6f ff fc bne,a %xcc, 368 37c: fa 5e 80 00 ldx [ %i2 ], %i5 380: d0 5e 20 40 ldx [ %i0 + 0x40 ], %o0 384: 96 10 00 1b mov %i3, %o3 388: 94 10 00 1d mov %i5, %o2 38c: 92 10 00 19 mov %i1, %o1 390: 7f ff ff 84 call 1a0 <__set_pmd_acct> 394: b0 10 00 1d mov %i5, %i0 398: 81 cf e0 08 return %i7 + 8 39c: 01 00 00 00 nop This patch fixes the problem by defining arch_cmpxchg64_local() in terms of arch_cmpxchg_local(), avoiding potential shadowing, and resulting in working cmpxchg64_local() and variants, e.g. 0360 : 360: 9d e3 bf 50 save %sp, -176, %sp 364: fa 5e 80 00 ldx [ %i2 ], %i5 368: 82 10 00 1b mov %i3, %g1 36c: c3 f6 90 1d casx [ %i2 ], %i5, %g1 370: 80 a7 40 01 cmp %i5, %g1 374: 32 6f ff fd bne,a %xcc, 368 378: fa 5e 80 00 ldx [ %i2 ], %i5 37c: d0 5e 20 40 ldx [ %i0 + 0x40 ], %o0 380: 96 10 00 1b mov %i3, %o3 384: 94 10 00 1d mov %i5, %o2 388: 92 10 00 19 mov %i1, %o1 38c: 7f ff ff 85 call 1a0 <__set_pmd_acct> 390: b0 10 00 1d mov %i5, %i0 394: 81 cf e0 08 return %i7 + 8 398: 01 00 00 00 nop 39c: 01 00 00 00 nop Fixes: ff5b4f1ed580c59d ("locking/atomic: sparc: move to ARCH_ATOMIC") Signed-off-by: Mark Rutland Reported-by: Anatoly Pugachev Cc: "David S. Miller" Cc: Peter Zijlstra --- arch/sparc/include/asm/cmpxchg_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index 8c39a9981187..12d00a42c0a3 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -201,7 +201,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, #define arch_cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ - cmpxchg_local((ptr), (o), (n)); \ + arch_cmpxchg_local((ptr), (o), (n)); \ }) #define arch_cmpxchg64(ptr, o, n) arch_cmpxchg64_local((ptr), (o), (n)) -- 2.11.0
Re: [sparc64] locking/atomic, kernel OOPS on running stress-ng
On Mon, Jul 05, 2021 at 06:16:49PM +0300, Anatoly Pugachev wrote: > Hello! Hi Anatoly, > latest sparc64 git kernel produces the following OOPS on running stress-ng as > : > > $ stress-ng -v --mmap 1 -t 30s > > kernel OOPS (console logs): > > [ 27.276719] Unable to handle kernel NULL pointer dereference > [ 27.276782] tsk->{mm,active_mm}->context = 03cb > [ 27.276818] tsk->{mm,active_mm}->pgd = fff83a2a > [ 27.276853] \|/ \|/ > [ 27.276853] "@'/ .. \`@" > [ 27.276853] /_| \__/ |_\ > [ 27.276853] \__U_/ > [ 27.276927] stress-ng(928): Oops [#1] I can reproduce this under QEMU; following your bisection (and working around the missing ifdeferry that breaks bisection), I can confirm that the first broken commit is: ff5b4f1ed580 ("locking/atomic: sparc: move to ARCH_ATOMIC") Sorry about this. > Can someone please look at this commit ids? >From digging into this, I can't spot an obvious bug in the commit above. It looks like this happens when some of the xchg/cmpxchg variants are wrapped by , but I can't immediately explain why. This might be a latent bug that's being tickled by the structure of the wrappers, or some subtlety with the typecasting that happens in the wrappers. Starting with: ff5b4f1ed580 ("locking/atomic: sparc: move to ARCH_ATOMIC") ... and atop that, cherry-picking: bccf1ec369ac ("locking/atomics: atomic-instrumented: simplify ifdeffery") ... the below hack seems to make the stress-ng run pass without issue, even after running for multiple minutes (when it would usually fail in a few seconds). In case this is a codegen issue, I'm using the kernel.org GCC 10.3.0 cross toolchain. Thanks, Mark. >8 >From 9a77ebd7005a9d4492686c45207642eeb4d13a8c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Jul 2021 20:38:06 +0100 Subject: [PATCH] HACK: disable instrumentation of xchg/cmpxchg Signed-off-by: Mark Rutland --- include/asm-generic/atomic-instrumented.h | 86 ++- scripts/atomic/gen-atomic-instrumented.sh | 10 2 files changed, 95 insertions(+), 1 deletion(-) diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index bc45af52c93b..7d0c38091c82 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -1177,90 +1177,139 @@ atomic64_dec_if_positive(atomic64_t *v) return arch_atomic64_dec_if_positive(v); } +#if 0 #define xchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg(__ai_ptr, __VA_ARGS__); \ }) +#else +#define xchg arch_xchg +#endif +#if 0 #define xchg_acquire(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_acquire(__ai_ptr, __VA_ARGS__); \ }) +#else +#define xchg_acquire arch_xchg_acquire +#endif +#if 0 #define xchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_release(__ai_ptr, __VA_ARGS__); \ }) +#else +#define xchg_release arch_xchg_release +#endif +#if 0 #define xchg_relaxed(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_xchg_relaxed(__ai_ptr, __VA_ARGS__); \ }) +#else +#define xchg_relaxed arch_xchg_relaxed +#endif +#if 0 #define cmpxchg(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg(__ai_ptr, __VA_ARGS__); \ }) +#else +#define cmpxchgarch_cmpxchg +#endif +#if 0 #define cmpxchg_acquire(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_acquire(__ai_ptr, __VA_ARGS__); \ }) +#else +#define cmpxchg_acquirearch_cmpxchg_acquire +#endif +#if 0 #define cmpxchg_release(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_release(__ai_ptr, __VA_ARGS__); \ }) +#else +#define cmpxchg_releasearch_cmpxchg_release +#endif +#if 0 #define cmpxchg_relaxed(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg_relaxed(__ai_ptr, __VA_ARGS__); \ }) +#else +#define cmpxchg_relaxedarch_cmpxchg_relaxed +#endif +#if 0 #define cmpxchg64(ptr, ...) \ ({ \ typeof(ptr) __ai_ptr = (ptr); \ instrument_atomic_write(__ai_ptr, sizeof(*__ai_ptr)); \ arch_cmpxchg64(__ai_ptr, __VA_ARGS__); \ }) +#else +#define cmpxchg64 arch_cmpxchg64 +#endif +#if 0 #define cmpxchg64_acquire(ptr, ...) \ ({ \ typeof(ptr)