Re: [PATCH] locking/xchg/alpha: Remove memory barriers from the _local() variants

2018-02-27 Thread Andrea Parri
[+ Will]

I'm not sure how this happened; Will, you at least figure as Reported-by: ;-)

  Andrea


On Tue, Feb 27, 2018 at 05:00:58AM +0100, Andrea Parri wrote:
> Commits 79d442461df74 ("locking/xchg/alpha: Clean up barrier usage by using
> smp_mb() in place of __ASM__MB") and 472e8c55cf662 ("locking/xchg/alpha:
> Fix xchg() and cmpxchg() memory ordering bugs") ended up adding unnecessary
> barriers to the _local variants, which the previous code took care to avoid.
> 
> Fix them by adding the smp_mb() into the cmpxchg macro rather than into the
> cmpxchg variants.
> 
> Fixes: 79d442461df74 ("locking/xchg/alpha: Clean up barrier usage by using 
> smp_mb() in place of __ASM__MB")
> Fixes: 472e8c55cf662 ("locking/xchg/alpha: Fix xchg() and cmpxchg() memory 
> ordering bugs")
> Reported-by: Will Deacon <will.dea...@arm.com>
> Signed-off-by: Andrea Parri <parri.and...@gmail.com>
> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Cc: Alan Stern <st...@rowland.harvard.edu>
> Cc: Andrew Morton <a...@linux-foundation.org>
> Cc: Ivan Kokshaysky <i...@jurassic.park.msu.ru>
> Cc: Linus Torvalds <torva...@linux-foundation.org>
> Cc: Matt Turner <matts...@gmail.com>
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Richard Henderson <r...@twiddle.net>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Cc: linux-alpha@vger.kernel.org
> ---
>  arch/alpha/include/asm/cmpxchg.h | 20 
>  arch/alpha/include/asm/xchg.h| 27 ---
>  2 files changed, 16 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/alpha/include/asm/cmpxchg.h 
> b/arch/alpha/include/asm/cmpxchg.h
> index 8a2b331e43feb..6c7c394524714 100644
> --- a/arch/alpha/include/asm/cmpxchg.h
> +++ b/arch/alpha/include/asm/cmpxchg.h
> @@ -38,19 +38,31 @@
>  #define cmpxchg(type, args...)   __cmpxchg ##type(args)
>  #include 
>  
> +/*
> + * The leading and the trailing memory barriers guarantee that these
> + * operations are fully ordered.
> + */
>  #define xchg(ptr, x) \
>  ({   \
> + __typeof__(*(ptr)) __ret;   \
>   __typeof__(*(ptr)) _x_ = (x);   \
> - (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_,  \
> -  sizeof(*(ptr)));   \
> + smp_mb();   \
> + __ret = (__typeof__(*(ptr)))\
> + __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr)));  \
> + smp_mb();   \
> + __ret;  \
>  })
>  
>  #define cmpxchg(ptr, o, n)   \
>  ({   \
> + __typeof__(*(ptr)) __ret;   \
>   __typeof__(*(ptr)) _o_ = (o);   \
>   __typeof__(*(ptr)) _n_ = (n);   \
> - (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,   \
> - (unsigned long)_n_, sizeof(*(ptr)));\
> + smp_mb();   \
> + __ret = (__typeof__(*(ptr))) __cmpxchg((ptr),   \
> + (unsigned long)_o_, (unsigned long)_n_, sizeof(*(ptr)));\
> + smp_mb();   \
> + __ret;  \
>  })
>  
>  #define cmpxchg64(ptr, o, n) \
> diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
> index e2b59fac5257d..7adb80c6746ac 100644
> --- a/arch/alpha/include/asm/xchg.h
> +++ b/arch/alpha/include/asm/xchg.h
> @@ -12,10 +12,6 @@
>   * Atomic exchange.
>   * Since it can be used to implement critical sections
>   * it must clobber "memory" (also for interrupts in UP).
> - *
> - * The leading and the trailing memory barriers guarantee that these
> - * operations are fully ordered.
> - *
>   */
>  
>  static inline unsigned long
> @@ -23,7 +19,6 @@ xchg(_u8, volatile char *m, unsigned long val)
>  {
>   unsigned long ret, tmp, addr64;
>  
> - smp_mb();
>   __asm__ __volatile__(
>   "   andnot  %4,7,%3\n"
>   "   insbl   %1,%4,%1\n"
> @@ -38,7 +33,6 @@ xchg(_u8, volatile char *m

[PATCH] locking/xchg/alpha: Remove memory barriers from the _local() variants

2018-02-26 Thread Andrea Parri
Commits 79d442461df74 ("locking/xchg/alpha: Clean up barrier usage by using
smp_mb() in place of __ASM__MB") and 472e8c55cf662 ("locking/xchg/alpha:
Fix xchg() and cmpxchg() memory ordering bugs") ended up adding unnecessary
barriers to the _local variants, which the previous code took care to avoid.

Fix them by adding the smp_mb() into the cmpxchg macro rather than into the
cmpxchg variants.

Fixes: 79d442461df74 ("locking/xchg/alpha: Clean up barrier usage by using 
smp_mb() in place of __ASM__MB")
Fixes: 472e8c55cf662 ("locking/xchg/alpha: Fix xchg() and cmpxchg() memory 
ordering bugs")
Reported-by: Will Deacon <will.dea...@arm.com>
Signed-off-by: Andrea Parri <parri.and...@gmail.com>
Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
Cc: Alan Stern <st...@rowland.harvard.edu>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Ivan Kokshaysky <i...@jurassic.park.msu.ru>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Matt Turner <matts...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Richard Henderson <r...@twiddle.net>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: linux-alpha@vger.kernel.org
---
 arch/alpha/include/asm/cmpxchg.h | 20 
 arch/alpha/include/asm/xchg.h| 27 ---
 2 files changed, 16 insertions(+), 31 deletions(-)

diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 8a2b331e43feb..6c7c394524714 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -38,19 +38,31 @@
 #define cmpxchg(type, args...) __cmpxchg ##type(args)
 #include 
 
+/*
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ */
 #define xchg(ptr, x)   \
 ({ \
+   __typeof__(*(ptr)) __ret;   \
__typeof__(*(ptr)) _x_ = (x);   \
-   (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_,  \
-sizeof(*(ptr)));   \
+   smp_mb();   \
+   __ret = (__typeof__(*(ptr)))\
+   __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr)));  \
+   smp_mb();   \
+   __ret;  \
 })
 
 #define cmpxchg(ptr, o, n) \
 ({ \
+   __typeof__(*(ptr)) __ret;   \
__typeof__(*(ptr)) _o_ = (o);   \
__typeof__(*(ptr)) _n_ = (n);   \
-   (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,   \
-   (unsigned long)_n_, sizeof(*(ptr)));\
+   smp_mb();   \
+   __ret = (__typeof__(*(ptr))) __cmpxchg((ptr),   \
+   (unsigned long)_o_, (unsigned long)_n_, sizeof(*(ptr)));\
+   smp_mb();   \
+   __ret;  \
 })
 
 #define cmpxchg64(ptr, o, n)   \
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index e2b59fac5257d..7adb80c6746ac 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -12,10 +12,6 @@
  * Atomic exchange.
  * Since it can be used to implement critical sections
  * it must clobber "memory" (also for interrupts in UP).
- *
- * The leading and the trailing memory barriers guarantee that these
- * operations are fully ordered.
- *
  */
 
 static inline unsigned long
@@ -23,7 +19,6 @@ xchg(_u8, volatile char *m, unsigned long val)
 {
unsigned long ret, tmp, addr64;
 
-   smp_mb();
__asm__ __volatile__(
"   andnot  %4,7,%3\n"
"   insbl   %1,%4,%1\n"
@@ -38,7 +33,6 @@ xchg(_u8, volatile char *m, unsigned long val)
".previous"
: "=" (ret), "=" (val), "=" (tmp), "=" (addr64)
: "r" ((long)m), "1" (val) : "memory");
-   smp_mb();
 
return ret;
 }
@@ -48,7 +42,6 @@ xchg(_u16, volatile short *m, unsigned long val)
 {
unsigned long ret, tmp, addr64;
 
-   smp_mb();
__asm__ __volatile__(
"   andnot  %4,7,%3\n"
"   inswl   %1,%4,%1\n"
@@ -63,7 +56,6 @@ xchg(_u16, volatile short *m

[PATCH 1/2] locking/xchg/alpha: Use smp_mb() in place of __ASM__MB

2018-02-22 Thread Andrea Parri
Replace each occurrence of __ASM__MB with a (trailing) smp_mb() in
xchg(), cmpxchg(), and remove the now unused __ASM__MB definitions;
this improves readability, with no additional synchronization cost.

Suggested-by: Will Deacon <will.dea...@arm.com>
Signed-off-by: Andrea Parri <parri.and...@gmail.com>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
Cc: Alan Stern <st...@rowland.harvard.edu>
Cc: Ivan Kokshaysky <i...@jurassic.park.msu.ru>
Cc: Matt Turner <matts...@gmail.com>
Cc: Richard Henderson <r...@twiddle.net>
Cc: linux-alpha@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
---
 arch/alpha/include/asm/cmpxchg.h |  6 --
 arch/alpha/include/asm/xchg.h| 16 
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/arch/alpha/include/asm/cmpxchg.h b/arch/alpha/include/asm/cmpxchg.h
index 46ebf14aed4e5..8a2b331e43feb 100644
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -6,7 +6,6 @@
  * Atomic exchange routines.
  */
 
-#define __ASM__MB
 #define xchg(type, args...)__xchg ## type ## _local(args)
 #define cmpxchg(type, args...) __cmpxchg ## type ## _local(args)
 #include 
@@ -33,10 +32,6 @@
cmpxchg_local((ptr), (o), (n)); \
 })
 
-#ifdef CONFIG_SMP
-#undef __ASM__MB
-#define __ASM__MB  "\tmb\n"
-#endif
 #undef xchg
 #undef cmpxchg
 #define xchg(type, args...)__xchg ##type(args)
@@ -64,7 +59,6 @@
cmpxchg((ptr), (o), (n));   \
 })
 
-#undef __ASM__MB
 #undef cmpxchg
 
 #endif /* _ALPHA_CMPXCHG_H */
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index e2660866ce972..e1facf6fc2446 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -28,12 +28,12 @@ xchg(_u8, volatile char *m, unsigned long val)
"   or  %1,%2,%2\n"
"   stq_c   %2,0(%3)\n"
"   beq %2,2f\n"
-   __ASM__MB
".subsection 2\n"
"2: br  1b\n"
".previous"
: "=" (ret), "=" (val), "=" (tmp), "=" (addr64)
: "r" ((long)m), "1" (val) : "memory");
+   smp_mb();
 
return ret;
 }
@@ -52,12 +52,12 @@ xchg(_u16, volatile short *m, unsigned long val)
"   or  %1,%2,%2\n"
"   stq_c   %2,0(%3)\n"
"   beq %2,2f\n"
-   __ASM__MB
".subsection 2\n"
"2: br  1b\n"
".previous"
: "=" (ret), "=" (val), "=" (tmp), "=" (addr64)
: "r" ((long)m), "1" (val) : "memory");
+   smp_mb();
 
return ret;
 }
@@ -72,12 +72,12 @@ xchg(_u32, volatile int *m, unsigned long val)
"   bis $31,%3,%1\n"
"   stl_c %1,%2\n"
"   beq %1,2f\n"
-   __ASM__MB
".subsection 2\n"
"2: br 1b\n"
".previous"
: "=" (val), "=" (dummy), "=m" (*m)
: "rI" (val), "m" (*m) : "memory");
+   smp_mb();
 
return val;
 }
@@ -92,12 +92,12 @@ xchg(_u64, volatile long *m, unsigned long val)
"   bis $31,%3,%1\n"
"   stq_c %1,%2\n"
"   beq %1,2f\n"
-   __ASM__MB
".subsection 2\n"
"2: br 1b\n"
".previous"
: "=" (val), "=" (dummy), "=m" (*m)
: "rI" (val), "m" (*m) : "memory");
+   smp_mb();
 
return val;
 }
@@ -150,12 +150,12 @@ cmpxchg(_u8, volatile char *m, unsigned char old, 
unsigned char new)
"   stq_c   %2,0(%4)\n"
"   beq %2,3f\n"
"2:\n"
-   __ASM__MB
".subsection 2\n"
"3: br  1b\n"
".previous"
: "=" (prev), "=" (new), "=" (tmp), "=" (cmp), "=" (addr64)
: "r" ((long)m), "Ir" (old), "1" (new) : "memory");
+   smp_mb();
 
return prev;
 }
@@ -177,12 +177,12 @@ cmpxchg(_u16, volatile short *m, unsigned short old, 
unsigned short new)
"   stq_c   %2,0(%4)\n"
"   beq %2,3f\n"
"2:\n"
-   __ASM__MB
".subsection 2\n"
"3: br  1b\n

[PATCH 0/2] locking/xchg/alpha: Additional fixes

2018-02-22 Thread Andrea Parri
Hi,

A couple of two more fixes, on top of tip:locking/urgent, to Alpha's
implementations of xchg() and cmpxchg():

  - The first, suggested by Will, removing the macro '__ASM__MB'
in favour of smp_mb();

  - The second, reported by Will, adding a leading smp_mb().

Compile tested only.

Cheers,
  Andrea

Andrea Parri (2):
  locking/xchg/alpha: Use smp_mb() in place of __ASM__MB
  locking/xchg/alpha: Add leading smp_mb() to xchg(), cmpxchg()

 arch/alpha/include/asm/cmpxchg.h |  6 --
 arch/alpha/include/asm/xchg.h| 37 ++---
 2 files changed, 26 insertions(+), 17 deletions(-)

-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-alpha" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] xchg/alpha: Add unconditional memory barrier to cmpxchg

2018-02-21 Thread Andrea Parri
On Wed, Feb 21, 2018 at 11:21:38AM +, Will Deacon wrote:
> Hi Andrea,
> 
> On Tue, Feb 20, 2018 at 07:45:56PM +0100, Andrea Parri wrote:
> > Continuing along with the fight against smp_read_barrier_depends() [1]
> > (or rather, against its improper use), add an unconditional barrier to
> > cmpxchg.  This guarantees that dependency ordering is preserved when a
> > dependency is headed by an unsuccessful cmpxchg.  As it turns out, the
> > change could enable further simplification of LKMM as proposed in [2].
> > 
> > [1] https://marc.info/?l=linux-kernel=150884953419377=2
> > https://marc.info/?l=linux-kernel=150884946319353=2
> > https://marc.info/?l=linux-kernel=151215810824468=2
> > https://marc.info/?l=linux-kernel=151215816324484=2
> > 
> > [2] https://marc.info/?l=linux-kernel=151881978314872=2
> > 
> > Signed-off-by: Andrea Parri <parri.and...@gmail.com>
> > Acked-by: Peter Zijlstra <pet...@infradead.org>
> > Cc: Will Deacon <will.dea...@arm.com>
> > Cc: "Paul E. McKenney" <paul...@linux.vnet.ibm.com>
> > Cc: Alan Stern <st...@rowland.harvard.edu>
> > Cc: Richard Henderson <r...@twiddle.net>
> > Cc: Ivan Kokshaysky <i...@jurassic.park.msu.ru>
> > Cc: Matt Turner <matts...@gmail.com>
> > Cc: linux-alpha@vger.kernel.org
> > Cc: linux-ker...@vger.kernel.org
> > ---
> >  arch/alpha/include/asm/xchg.h | 15 +++
> >  1 file changed, 7 insertions(+), 8 deletions(-)
> > 
> > diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
> > index 68dfb3cb71454..e2660866ce972 100644
> > --- a/arch/alpha/include/asm/xchg.h
> > +++ b/arch/alpha/include/asm/xchg.h
> > @@ -128,10 +128,9 @@ xchg(, volatile void *ptr, unsigned long x, int 
> > size)
> >   * store NEW in MEM.  Return the initial value in MEM.  Success is
> >   * indicated by comparing RETURN with OLD.
> >   *
> > - * The memory barrier should be placed in SMP only when we actually
> > - * make the change. If we don't change anything (so if the returned
> > - * prev is equal to old) then we aren't acquiring anything new and
> > - * we don't need any memory barrier as far I can tell.
> > + * The memory barrier is placed in SMP unconditionally, in order to
> > + * guarantee that dependency ordering is preserved when a dependency
> > + * is headed by an unsuccessful operation.
> >   */
> >  
> >  static inline unsigned long
> > @@ -150,8 +149,8 @@ cmpxchg(_u8, volatile char *m, unsigned char old, 
> > unsigned char new)
> > "   or  %1,%2,%2\n"
> > "   stq_c   %2,0(%4)\n"
> > "   beq %2,3f\n"
> > -   __ASM__MB
> > "2:\n"
> > +   __ASM__MB
> > ".subsection 2\n"
> > "3: br  1b\n"
> > ".previous"
> 
> It might be better just to add smp_read_barrier_depends() into the cmpxchg
> macro, then remove all of the __ASM__MB stuff.

Mmh, it might be better to add smp_mb() into the cmpxchg macro (after the
operation), then remove all the __ASM__MB stuff.


> 
> That said, I don't actually understand how the Alpha cmpxchg or xchg
> implementations satisfy the memory model, since they only appear to have
> a barrier after the operation.
> 
> So MP using xchg:
> 
> WRITE_ONCE(x, 1)
> xchg(y, 1)
> 
> smp_load_acquire(y) == 1
> READ_ONCE(x) == 0
> 
> would be allowed. What am I missing?

Good question ;-)  The absence of an smp_mb() (or of an __ASM__MB) before
the operation did upset me.

If this question remains pending, I'll send a patch to add these barriers.


> 
> Since I'm in the mood for dumb questions, do we need to care about
> this_cpu_cmpxchg? I'm sure I've seen code that allows concurrent access to
> per-cpu variables, but the asm-generic implementation of this_cpu_cmpxchg
> doesn't use READ_ONCE.

Frankly, I'm not sure if this's an issue in the generic implementation of
this_cpu_* or, rather, in that code.  let me dig a bit more into this ...

  Andrea


> 
> Will
--
To unsubscribe from this list: send the line "unsubscribe linux-alpha" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 1/2] arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables

2017-10-05 Thread Andrea Parri
Hi Will,

none of my comments below represent objections to this patch, but
let me remark:


On Thu, Oct 05, 2017 at 05:31:54PM +0100, Will Deacon wrote:
> Hi Paul,
> 
> On Tue, Oct 03, 2017 at 12:11:10PM -0700, Paul E. McKenney wrote:
> > On Fri, Sep 29, 2017 at 05:33:49PM +0100, Will Deacon wrote:
> > > On Fri, Sep 29, 2017 at 09:29:39AM -0700, Paul E. McKenney wrote:
> > > > On Fri, Sep 29, 2017 at 10:08:43AM +0100, Will Deacon wrote:
> > > > > Ok, but where does that leave us wrt my initial proposal of moving
> > > > > smp_read_barrier_depends() into READ_ONCE and getting rid of
> > > > > lockless_dereference?
> > > > > 
> > > > > Michael (or anybody else running mainline on SMP Alpha) -- would you 
> > > > > be
> > > > > able to give the diff below a spin and see whether there's a 
> > > > > measurable
> > > > > performance impact?
> > > > 
> > > > This will be a sensitive test.  The smp_read_barrier_depends() can be
> > > > removed from lockless_dereference().  Without this removal Alpha will
> > > > get two memory barriers from rcu_dereference() and friends.
> > > 
> > > Oh yes, good point. I was trying to keep the diff simple, but you're
> > > right that this is packing too many barriers. Fixed diff below.
> > 
> > Not seeing any objections thus far.  If there are none by (say) the
> > end of this week, I would be happy to queue a patch for the 4.16
> > merge window.  That should give ample opportunity for further review
> > and testing.
> 
> Ok, full patch below.
> 
> Will
> 
> --->8
> 
> From 15956d0cc6b37208d8542b1858a8d8b64227acf4 Mon Sep 17 00:00:00 2001
> From: Will Deacon 
> Date: Thu, 5 Oct 2017 16:57:36 +0100
> Subject: [PATCH] locking/barriers: Kill lockless_dereference
> 
> lockless_dereference is a nice idea, but it's gained little traction in
> kernel code since it's introduction three years ago. This is partly
> because it's a pain to type, but also because using READ_ONCE instead
> will work correctly on all architectures apart from Alpha, which is a
> fully supported but somewhat niche architecture these days.

lockless_dereference might be a mouthful, but it does (explicitly)
say/remark: "Yep, we are relying on the following address dep. to
be "in strong-ppo" ".

Such information will be lost or, at least, not immediately clear
by just reading a READ_ONCE(). (And Yes, this information is only
relevant when we "include" Alpha in the picture/analysis.)


> 
> This patch moves smp_read_barrier_depends() (a NOP on all architectures
> other than Alpha) from lockless_dereference into READ_ONCE, converts
> the few actual users over to READ_ONCE and then finally removes
> lockless_dereference altogether.

Notice that several "potential users" of lockless_dereference are
currently hidden in other call sites for smp_read_barrier_depends
(i.e., cases where this barrier is not called from within a lock-
less or an RCU dereference).

Some of these usages (e.g.,

  include/linux/percpu-refcount.h:__ref_is_percpu,
  mm/ksm.c:get_ksm_page,
  security/keys/keyring.c:search_nested_keyrings )

precedes this barrier with a READ_ONCE; others (e.g.,

  arch/alpha/include/asm/pgtable.h:pmd_offset,
  net/ipv4/netfilter/arp_tables.c:arpt_do_table
  kernel/kernel/events/uprobes.c:get_trampiline_vaddr )

with a plain read.

There also appear to be cases where the barrier is preceded by an
ACCESS_ONCE (c.f, fs/dcache.c:prepend_name) or by an xchg_release
(c.f., kernel/locking/qspinlock.c:queued_spin_lock_slowpath), and
it would not be difficult to imagine/create different usages.


> 
> Signed-off-by: Will Deacon 

I understand that we all agree we're missing a Tested-by here ;-).

  Andrea


> ---
>  Documentation/memory-barriers.txt   | 12 
>  .../translations/ko_KR/memory-barriers.txt  | 12 
>  arch/x86/events/core.c  |  2 +-
>  arch/x86/include/asm/mmu_context.h  |  4 ++--
>  arch/x86/kernel/ldt.c   |  2 +-
>  drivers/md/dm-mpath.c   | 20 ++--
>  fs/dcache.c |  4 ++--
>  fs/overlayfs/ovl_entry.h|  2 +-
>  fs/overlayfs/readdir.c  |  2 +-
>  include/linux/compiler.h| 21 
> +
>  include/linux/rculist.h |  4 ++--
>  include/linux/rcupdate.h|  4 ++--
>  kernel/events/core.c|  4 ++--
>  kernel/seccomp.c|  2 +-
>  kernel/task_work.c  |  2 +-
>  mm/slab.h   |  2 +-
>  16 files changed, 28 insertions(+), 71 deletions(-)
> 
> diff --git a/Documentation/memory-barriers.txt 
> b/Documentation/memory-barriers.txt
> index b759a60624fd..470a682f3fa4 100644
> --- 

Re: Question about DEC Alpha memory ordering

2017-02-14 Thread Andrea Parri
On Mon, Feb 13, 2017 at 01:24:36PM -0800, Paul E. McKenney wrote:
> On Mon, Feb 13, 2017 at 04:06:21PM -0500, Alan Stern wrote:
> > On Mon, 13 Feb 2017, Paul E. McKenney wrote:
> > 
> > > On Mon, Feb 13, 2017 at 08:14:23PM +0100, Tobias Klausmann wrote:
> > > > Hi! 
> > > > 
> > > > On Mon, 13 Feb 2017, Paul E. McKenney wrote:
> > > > > On Mon, Feb 13, 2017 at 01:53:27PM -0500, bob smith wrote:
> > > > > > On 2/13/17 1:39 PM, Paul E. McKenney wrote:
> > > > > > > can real DEC Alpha hardware end up with both instances of "r1"
> > > > > > > having the value 1?
> > > > > > 
> > > > > > I thought this question reminded me of something, so I found this:
> > > > > > > https://www.kernel.org/doc/Documentation/memory-barriers.txt
> > > > > > 
> > > > > > and I pasted in the content - David Howells is one of the authors 
> > > > > > and
> > > > > > maybe that is why the question sort of reminded me.
> > > > > > 
> > > > > > Maybe someone has an update but this is what was said then.
> > > > > 
> > > > > Well, thank you for pointing me to this, but my question was intended 
> > > > > to
> > > > > check whether or not the words I helped to write in 
> > > > > memory-barriers.txt
> > > > > are in fact accurate.  So if you have an SMP DEC Alpha system that you
> > > > > could provide remote access to, that would be very helpful!
> > > > 
> > > > I have a 4-cpu ES40. Send me a test program and I'll gladly run
> > > > it for you.
> > > 
> > > Andrea, could you please convert the litmus test below and send it to
> > > Tobias?
> > > 
> > >   Thanx, Paul
> > > 
> > > 
> > > 
> > > C auto/C-LB-LRW+OB-Dv
> > > (*
> > >  * Result: Never
> > >  * 
> > >  *)
> > > {
> > > }
> > > 
> > > P0(int *u0, int *x1)
> > > {
> > >   r1 = READ_ONCE(*u0);
> > >   smp_mb();
> > >   WRITE_ONCE(*x1, 1);
> > > }
> > > 
> > > 
> > > P1(int *u0, int *x1)
> > > {
> > >   r1 = rcu_dereference(*x1);
> > 
> > No, please, not this.  It should be:
> > 
> > r1 = READ_ONCE(*x1);
> > 
> > That is, the auto/C-LB-LRW+OB-Ov.litmus test.
> > 
> > >   WRITE_ONCE(*u0, r1);
> > > }
> > > 
> > > exists
> > > (0:r1=1 /\ 1:r1=1)
> 
> Sorry, here is the correct one in full.
> 
>   Thanx, Paul
> 
> 
> 
> C auto/C-LB-LRW+OB-Ov
> (*
>  * Result: Maybe
>  * P0-P1 rf OB-Ov: Never->Maybe: Note lack of C11 guarantee, control 
> dependency
>  * P1 Ov,LRW: Note lack of C11 guarantee, control dependency
>  *)
> {
> }
> 
> P0(int *u0, int *x1)
> {
>   r1 = READ_ONCE(*u0);
>   smp_mb();
>   WRITE_ONCE(*x1, 1);
> }
> 
> 
> P1(int *u0, int *x1)
> {
>   r1 = READ_ONCE(*x1);
>   WRITE_ONCE(*u0, r1);
> }
> 
> exists
> (0:r1=1 /\ 1:r1=1)
> 

The (automatically generated) module for this test is at

   http://retis.sssup.it/~a.parri/lkmm/C-LB-LRW+OB-Ov.tgz ;

the test is run by cat-ing /sys/kernel/litmus/p_count: this will execute
the thread bodies for "runs * size" iterations; results can be sentisive
to the "stride" and "affinity increment" parameters (c.f., the Makefile);
statistics for each experiments are printed on stdout.

Please let me know should you find any problem with this. Thank you,

  Andrea

Disclaimer: I'm not "excited", to use an euphemism, to post such an ugly
C code to LKML ...; _most importantly_, I've certainly never tested this
on any Alpha machine ...


--
To unsubscribe from this list: send the line "unsubscribe linux-alpha" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html