On Wed, Nov 16, 2011 at 02:11:27PM +1100, Benjamin Herrenschmidt wrote:
> The Documentation/memory-barriers.txt document requires that atomic
> operations that return a value act as a memory barrier both before
> and after the actual atomic operation.
> 
> Our current implementation doesn't guarantee this. More specifically,
> while a load following the isync can not be issued before stwcx. has
> completed, that completion doesn't architecturally means that the
> result of stwcx. is visible to other processors (or any previous stores
> for that matter) (typically, the other processors L1 caches can still
> hold the old value).
> 
> This has caused an actual crash in RCU torture testing on Power 7
> 
> This fixes it by changing those atomic ops to use new macros instead
> of RELEASE/ACQUIRE barriers, called ATOMIC_ENTRY and ATMOIC_EXIT barriers,
> which are then defined respectively to lwsync and sync.
> 
> I haven't had a chance to measure the performance impact (or rather
> what I measured with kernel compiles is in the noise, I yet have to
> find a more precise benchmark)
> 
> Signed-off-by: Benjamin Herrenschmidt <[email protected]>

Acked-by: Paul E. McKenney <[email protected]>

> ---
> 
> diff --git a/arch/powerpc/include/asm/atomic.h 
> b/arch/powerpc/include/asm/atomic.h
> index e2a4c26..02e41b5 100644
> --- a/arch/powerpc/include/asm/atomic.h
> +++ b/arch/powerpc/include/asm/atomic.h
> @@ -49,13 +49,13 @@ static __inline__ int atomic_add_return(int a, atomic_t 
> *v)
>       int t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  lwarx   %0,0,%2         # atomic_add_return\n\
>       add     %0,%1,%0\n"
>       PPC405_ERR77(0,%2)
>  "    stwcx.  %0,0,%2 \n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (a), "r" (&v->counter)
>       : "cc", "memory");
> @@ -85,13 +85,13 @@ static __inline__ int atomic_sub_return(int a, atomic_t 
> *v)
>       int t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  lwarx   %0,0,%2         # atomic_sub_return\n\
>       subf    %0,%1,%0\n"
>       PPC405_ERR77(0,%2)
>  "    stwcx.  %0,0,%2 \n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (a), "r" (&v->counter)
>       : "cc", "memory");
> @@ -119,13 +119,13 @@ static __inline__ int atomic_inc_return(atomic_t *v)
>       int t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  lwarx   %0,0,%1         # atomic_inc_return\n\
>       addic   %0,%0,1\n"
>       PPC405_ERR77(0,%1)
>  "    stwcx.  %0,0,%1 \n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (&v->counter)
>       : "cc", "xer", "memory");
> @@ -163,13 +163,13 @@ static __inline__ int atomic_dec_return(atomic_t *v)
>       int t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  lwarx   %0,0,%1         # atomic_dec_return\n\
>       addic   %0,%0,-1\n"
>       PPC405_ERR77(0,%1)
>  "    stwcx.  %0,0,%1\n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (&v->counter)
>       : "cc", "xer", "memory");
> @@ -194,7 +194,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, 
> int a, int u)
>       int t;
> 
>       __asm__ __volatile__ (
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  lwarx   %0,0,%1         # __atomic_add_unless\n\
>       cmpw    0,%0,%3 \n\
>       beq-    2f \n\
> @@ -202,7 +202,7 @@ static __inline__ int __atomic_add_unless(atomic_t *v, 
> int a, int u)
>       PPC405_ERR77(0,%2)
>  "    stwcx.  %0,0,%1 \n\
>       bne-    1b \n"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>  "    subf    %0,%2,%0 \n\
>  2:"
>       : "=&r" (t)
> @@ -226,7 +226,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
>       int t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  lwarx   %0,0,%1         # atomic_dec_if_positive\n\
>       cmpwi   %0,1\n\
>       addi    %0,%0,-1\n\
> @@ -234,7 +234,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
>       PPC405_ERR77(0,%1)
>  "    stwcx.  %0,0,%1\n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       "\n\
>  2:"  : "=&b" (t)
>       : "r" (&v->counter)
> @@ -285,12 +285,12 @@ static __inline__ long atomic64_add_return(long a, 
> atomic64_t *v)
>       long t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  ldarx   %0,0,%2         # atomic64_add_return\n\
>       add     %0,%1,%0\n\
>       stdcx.  %0,0,%2 \n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (a), "r" (&v->counter)
>       : "cc", "memory");
> @@ -319,12 +319,12 @@ static __inline__ long atomic64_sub_return(long a, 
> atomic64_t *v)
>       long t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  ldarx   %0,0,%2         # atomic64_sub_return\n\
>       subf    %0,%1,%0\n\
>       stdcx.  %0,0,%2 \n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (a), "r" (&v->counter)
>       : "cc", "memory");
> @@ -351,12 +351,12 @@ static __inline__ long atomic64_inc_return(atomic64_t 
> *v)
>       long t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  ldarx   %0,0,%1         # atomic64_inc_return\n\
>       addic   %0,%0,1\n\
>       stdcx.  %0,0,%1 \n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (&v->counter)
>       : "cc", "xer", "memory");
> @@ -393,12 +393,12 @@ static __inline__ long atomic64_dec_return(atomic64_t 
> *v)
>       long t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  ldarx   %0,0,%1         # atomic64_dec_return\n\
>       addic   %0,%0,-1\n\
>       stdcx.  %0,0,%1\n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       : "=&r" (t)
>       : "r" (&v->counter)
>       : "cc", "xer", "memory");
> @@ -418,13 +418,13 @@ static __inline__ long 
> atomic64_dec_if_positive(atomic64_t *v)
>       long t;
> 
>       __asm__ __volatile__(
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  ldarx   %0,0,%1         # atomic64_dec_if_positive\n\
>       addic.  %0,%0,-1\n\
>       blt-    2f\n\
>       stdcx.  %0,0,%1\n\
>       bne-    1b"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>       "\n\
>  2:"  : "=&r" (t)
>       : "r" (&v->counter)
> @@ -450,14 +450,14 @@ static __inline__ int atomic64_add_unless(atomic64_t 
> *v, long a, long u)
>       long t;
> 
>       __asm__ __volatile__ (
> -     PPC_RELEASE_BARRIER
> +     PPC_ATOMIC_ENTRY_BARRIER
>  "1:  ldarx   %0,0,%1         # __atomic_add_unless\n\
>       cmpd    0,%0,%3 \n\
>       beq-    2f \n\
>       add     %0,%2,%0 \n"
>  "    stdcx.  %0,0,%1 \n\
>       bne-    1b \n"
> -     PPC_ACQUIRE_BARRIER
> +     PPC_ATOMIC_EXIT_BARRIER
>  "    subf    %0,%2,%0 \n\
>  2:"
>       : "=&r" (t)
> diff --git a/arch/powerpc/include/asm/bitops.h 
> b/arch/powerpc/include/asm/bitops.h
> index e137afc..efdc926 100644
> --- a/arch/powerpc/include/asm/bitops.h
> +++ b/arch/powerpc/include/asm/bitops.h
> @@ -124,14 +124,14 @@ static __inline__ unsigned long fn(                     
> \
>       return (old & mask);                            \
>  }
> 
> -DEFINE_TESTOP(test_and_set_bits, or, PPC_RELEASE_BARRIER,
> -           PPC_ACQUIRE_BARRIER, 0)
> +DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
> +           PPC_ATOMIC_EXIT_BARRIER, 0)
>  DEFINE_TESTOP(test_and_set_bits_lock, or, "",
>             PPC_ACQUIRE_BARRIER, 1)
> -DEFINE_TESTOP(test_and_clear_bits, andc, PPC_RELEASE_BARRIER,
> -           PPC_ACQUIRE_BARRIER, 0)
> -DEFINE_TESTOP(test_and_change_bits, xor, PPC_RELEASE_BARRIER,
> -           PPC_ACQUIRE_BARRIER, 0)
> +DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
> +           PPC_ATOMIC_EXIT_BARRIER, 0)
> +DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
> +           PPC_ATOMIC_EXIT_BARRIER, 0)
> 
>  static __inline__ int test_and_set_bit(unsigned long nr,
>                                      volatile unsigned long *addr)
> diff --git a/arch/powerpc/include/asm/futex.h 
> b/arch/powerpc/include/asm/futex.h
> index c94e4a3..2a9cf84 100644
> --- a/arch/powerpc/include/asm/futex.h
> +++ b/arch/powerpc/include/asm/futex.h
> @@ -11,12 +11,13 @@
> 
>  #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
>    __asm__ __volatile ( \
> -     PPC_RELEASE_BARRIER \
> +     PPC_ATOMIC_ENTRY_BARRIER \
>  "1:  lwarx   %0,0,%2\n" \
>       insn \
>       PPC405_ERR77(0, %2) \
>  "2:  stwcx.  %1,0,%2\n" \
>       "bne-   1b\n" \
> +     PPC_ATOMIC_EXIT_BARRIER \
>       "li     %1,0\n" \
>  "3:  .section .fixup,\"ax\"\n" \
>  "4:  li      %1,%3\n" \
> @@ -92,14 +93,14 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user 
> *uaddr,
>               return -EFAULT;
> 
>          __asm__ __volatile__ (
> -        PPC_RELEASE_BARRIER
> +        PPC_ATOMIC_ENTRY_BARRIER
>  "1:     lwarx   %1,0,%3         # futex_atomic_cmpxchg_inatomic\n\
>          cmpw    0,%1,%4\n\
>          bne-    3f\n"
>          PPC405_ERR77(0,%3)
>  "2:     stwcx.  %5,0,%3\n\
>          bne-    1b\n"
> -        PPC_ACQUIRE_BARRIER
> +        PPC_ATOMIC_EXIT_BARRIER
>  "3:  .section .fixup,\"ax\"\n\
>  4:   li      %0,%6\n\
>       b       3b\n\
> diff --git a/arch/powerpc/include/asm/synch.h 
> b/arch/powerpc/include/asm/synch.h
> index d7cab44..24fc618 100644
> --- a/arch/powerpc/include/asm/synch.h
> +++ b/arch/powerpc/include/asm/synch.h
> @@ -41,11 +41,15 @@ static inline void isync(void)
>       START_LWSYNC_SECTION(97);                       \
>       isync;                                          \
>       MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
> -#define PPC_ACQUIRE_BARRIER  "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
> -#define PPC_RELEASE_BARRIER  stringify_in_c(LWSYNC) "\n"
> +#define PPC_ACQUIRE_BARRIER   "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
> +#define PPC_RELEASE_BARRIER   stringify_in_c(LWSYNC) "\n"
> +#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n"
> +#define PPC_ATOMIC_EXIT_BARRIER       "\n" stringify_in_c(sync) "\n"
>  #else
>  #define PPC_ACQUIRE_BARRIER
>  #define PPC_RELEASE_BARRIER
> +#define PPC_ATOMIC_ENTRY_BARRIER
> +#define PPC_ATOMIC_EXIT_BARRIER
>  #endif
> 
>  #endif /* __KERNEL__ */
> 
> 

_______________________________________________
Linuxppc-dev mailing list
[email protected]
https://lists.ozlabs.org/listinfo/linuxppc-dev

Reply via email to