Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
On Thu, 06 Jul 2017 19:14:25 PDT (-0700), boqun.f...@gmail.com wrote:
> On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
> [...]
>> >> +#define __smp_load_acquire(p)
>> >> \
>> >> +do { 
>> >> \
>> >> + union { typeof(*p) __val; char __c[1]; } __u =  \
>> >> + { .__val = (__force typeof(*p)) (v) };  \
>> >> + compiletime_assert_atomic_type(*p); \
>> >> + switch (sizeof(*p)) {   \
>> >> + case 1: \
>> >> + case 2: \
>> >> + __u.__val = READ_ONCE(*p);  \
>> >> + smb_mb();   \
>> >> + break;  \
>> >> + case 4: \
>> >> + __asm__ __volatile__ (  \
>> >> + "amoor.w.aq %1, zero, %0"   \
>> >> + : "+A" (*p) \
>> >> + : "=r" (__u.__val)  \
>> >> + : "memory");\
>> >> + break;  \
>> >> + case 8: \
>> >> + __asm__ __volatile__ (  \
>> >> + "amoor.d.aq %1, zero, %0"   \
>> >> + : "+A" (*p) \
>> >> + : "=r" (__u.__val)  \
>> >> + : "memory");\
>> >> + break;  \
>> >> + }   \
>> >> + __u.__val;  \
>> >> +} while (0)
>> >
>> > 'creative' use of amoswap and amoor :-)
>> >
>> > You should really look at a normal load with ordering instruction
>> > though, that amoor.aq is a rmw and will promote the cacheline to
>> > exclusive (and dirty it).
>>
>> The thought here was that implementations could elide the MW by pattern
>> matching the "zero" (x0, the architectural zero register) forms of AMOs where
>> it's interesting.  I talked to one of our microarchitecture guys, and while 
>> he
>> agrees that's easy he points out that eliding half the AMO may wreak havoc on
>> the consistency model.  Since we're not sure what the memory model is 
>> actually
>> going to look like, we thought it'd be best to just write the simplest code
>> here
>>
>>   /*
>>* TODO_RISCV_MEMORY_MODEL: While we could emit AMOs for the W and D sized
>>* accesses here, it's questionable if that actually helps or not: the 
>> lack of
>>* offsets in the AMOs means they're usually preceded by an addi, so they
>>* probably won't save code space.  For now we'll just emit the fence.
>>*/
>>   #define __smp_store_release(p, v)   \
>>   ({  \
>>   compiletime_assert_atomic_type(*p); \
>>   smp_mb();   \
>>   WRITE_ONCE(*p, v);  \
>>   })
>>
>>   #define __smp_load_acquire(p)   \
>>   ({  \
>>   union{typeof(*p) __p; long __l;} __u;   \
>
> AFAICT, there seems to be an endian issue if you do this. No?
>
> Let us assume typeof(*p) is char and *p == 1, and on a big endian 32bit
> platform:
>
>>   compiletime_assert_atomic_type(*p); \
>>   __u.__l = READ_ONCE(*p);\
>
>   READ_ONCE(*p) is 1 so
>   __u.__l is 0x00 00 00 01 now
>
>>   smp_mb();   \
>>   __u.__p;\
>
>   __u.__p is then 0x00.
>
> Am I missing something here?

We're little endian (though I might have still screwed it up).  I didn't really
bother looking because...

> Even so why not use the simple definition as in include/asm-generic/barrier.h?

...that's much better -- I forgot there were generic versions, as we used to
have a much more complicated one.

  
https://github.com/riscv/riscv-linux/commit/910d2bf4c3c349b670a1d839462e32e122ac70a5

Thanks!


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
On Thu, 06 Jul 2017 19:14:25 PDT (-0700), boqun.f...@gmail.com wrote:
> On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
> [...]
>> >> +#define __smp_load_acquire(p)
>> >> \
>> >> +do { 
>> >> \
>> >> + union { typeof(*p) __val; char __c[1]; } __u =  \
>> >> + { .__val = (__force typeof(*p)) (v) };  \
>> >> + compiletime_assert_atomic_type(*p); \
>> >> + switch (sizeof(*p)) {   \
>> >> + case 1: \
>> >> + case 2: \
>> >> + __u.__val = READ_ONCE(*p);  \
>> >> + smb_mb();   \
>> >> + break;  \
>> >> + case 4: \
>> >> + __asm__ __volatile__ (  \
>> >> + "amoor.w.aq %1, zero, %0"   \
>> >> + : "+A" (*p) \
>> >> + : "=r" (__u.__val)  \
>> >> + : "memory");\
>> >> + break;  \
>> >> + case 8: \
>> >> + __asm__ __volatile__ (  \
>> >> + "amoor.d.aq %1, zero, %0"   \
>> >> + : "+A" (*p) \
>> >> + : "=r" (__u.__val)  \
>> >> + : "memory");\
>> >> + break;  \
>> >> + }   \
>> >> + __u.__val;  \
>> >> +} while (0)
>> >
>> > 'creative' use of amoswap and amoor :-)
>> >
>> > You should really look at a normal load with ordering instruction
>> > though, that amoor.aq is a rmw and will promote the cacheline to
>> > exclusive (and dirty it).
>>
>> The thought here was that implementations could elide the MW by pattern
>> matching the "zero" (x0, the architectural zero register) forms of AMOs where
>> it's interesting.  I talked to one of our microarchitecture guys, and while 
>> he
>> agrees that's easy he points out that eliding half the AMO may wreak havoc on
>> the consistency model.  Since we're not sure what the memory model is 
>> actually
>> going to look like, we thought it'd be best to just write the simplest code
>> here
>>
>>   /*
>>* TODO_RISCV_MEMORY_MODEL: While we could emit AMOs for the W and D sized
>>* accesses here, it's questionable if that actually helps or not: the 
>> lack of
>>* offsets in the AMOs means they're usually preceded by an addi, so they
>>* probably won't save code space.  For now we'll just emit the fence.
>>*/
>>   #define __smp_store_release(p, v)   \
>>   ({  \
>>   compiletime_assert_atomic_type(*p); \
>>   smp_mb();   \
>>   WRITE_ONCE(*p, v);  \
>>   })
>>
>>   #define __smp_load_acquire(p)   \
>>   ({  \
>>   union{typeof(*p) __p; long __l;} __u;   \
>
> AFAICT, there seems to be an endian issue if you do this. No?
>
> Let us assume typeof(*p) is char and *p == 1, and on a big endian 32bit
> platform:
>
>>   compiletime_assert_atomic_type(*p); \
>>   __u.__l = READ_ONCE(*p);\
>
>   READ_ONCE(*p) is 1 so
>   __u.__l is 0x00 00 00 01 now
>
>>   smp_mb();   \
>>   __u.__p;\
>
>   __u.__p is then 0x00.
>
> Am I missing something here?

We're little endian (though I might have still screwed it up).  I didn't really
bother looking because...

> Even so why not use the simple definition as in include/asm-generic/barrier.h?

...that's much better -- I forgot there were generic versions, as we used to
have a much more complicated one.

  
https://github.com/riscv/riscv-linux/commit/910d2bf4c3c349b670a1d839462e32e122ac70a5

Thanks!


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
On Fri, 07 Jul 2017 01:08:19 PDT (-0700), pet...@infradead.org wrote:
> On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
>>   +/*
>>   + * TODO_RISCV_MEMORY_MODEL: I don't think RISC-V is allowed to perform a
>>   + * speculative load, but we're going to wait on a formal memory model in 
>> order
>>   + * to ensure this is safe to elide.
>>   + */
>>   +#define smp_acquire__after_ctrl_dep()  smp_mb()
>
> So typically a control dependency already provides read->write ordering,
> by virtue of speculative writes being BAD.
>
> So a control dependency only needs to provide read->read ordering in
> addition to the existing read->write ordering and hence this barrier is
> typically a smp_rmb().
>
> See the definition in asm-generic/barrier.h.
>
> Having to use a full barrier here would imply your architecture does not
> respect control dependencies, which would be BAD because we actually
> rely on them.
>
> So either the normal definition is good and you don't need to do
> anything, or you prohibit read speculation in which case you have a
> special case like TILE does.

I'd be very surprised (and very unhappy) if we ended up with speculative
writes, as that would be a huge mess.

Thanks!


Re: [patches] [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
On Fri, 07 Jul 2017 06:16:07 PDT (-0700), j.neuschae...@gmx.net wrote:
> On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> [...]
>> +/* These barries need to enforce ordering on both devices or memory. */
>
> Very minor nit: s/barries/barriers/ (in several places)

I think this should do it

  
https://github.com/riscv/riscv-linux/commit/b356e7a2223b5e21df424ea7e9900e5bf408762f

Thanks!


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
On Fri, 07 Jul 2017 01:08:19 PDT (-0700), pet...@infradead.org wrote:
> On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
>>   +/*
>>   + * TODO_RISCV_MEMORY_MODEL: I don't think RISC-V is allowed to perform a
>>   + * speculative load, but we're going to wait on a formal memory model in 
>> order
>>   + * to ensure this is safe to elide.
>>   + */
>>   +#define smp_acquire__after_ctrl_dep()  smp_mb()
>
> So typically a control dependency already provides read->write ordering,
> by virtue of speculative writes being BAD.
>
> So a control dependency only needs to provide read->read ordering in
> addition to the existing read->write ordering and hence this barrier is
> typically a smp_rmb().
>
> See the definition in asm-generic/barrier.h.
>
> Having to use a full barrier here would imply your architecture does not
> respect control dependencies, which would be BAD because we actually
> rely on them.
>
> So either the normal definition is good and you don't need to do
> anything, or you prohibit read speculation in which case you have a
> special case like TILE does.

I'd be very surprised (and very unhappy) if we ended up with speculative
writes, as that would be a huge mess.

Thanks!


Re: [patches] [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-10 Thread Palmer Dabbelt
On Fri, 07 Jul 2017 06:16:07 PDT (-0700), j.neuschae...@gmx.net wrote:
> On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> [...]
>> +/* These barries need to enforce ordering on both devices or memory. */
>
> Very minor nit: s/barries/barriers/ (in several places)

I think this should do it

  
https://github.com/riscv/riscv-linux/commit/b356e7a2223b5e21df424ea7e9900e5bf408762f

Thanks!


Re: [patches] [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-07 Thread Jonathan Neuschäfer
On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
[...]
> +/* These barries need to enforce ordering on both devices or memory. */

Very minor nit: s/barries/barriers/ (in several places)


Jonathan Neuschäfer


signature.asc
Description: PGP signature


Re: [patches] [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-07 Thread Jonathan Neuschäfer
On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
[...]
> +/* These barries need to enforce ordering on both devices or memory. */

Very minor nit: s/barries/barriers/ (in several places)


Jonathan Neuschäfer


signature.asc
Description: PGP signature


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-07 Thread Peter Zijlstra
On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
> > Also probably not true. I _think_ you want a full barrier here, but
> > given the total lack of documentation on your end and the fact I've not
> > yet read the spinlock (which I suppose is below) I cannot yet state
> > more.
> 
> Ya, sorry about that -- we're waiting on a proper memory model spec.  Is there
> any other documentation I should produce?

Nah, I'll wait for your shiny new document.


>   +/*
>   + * TODO_RISCV_MEMORY_MODEL: I don't think RISC-V is allowed to perform a
>   + * speculative load, but we're going to wait on a formal memory model in 
> order
>   + * to ensure this is safe to elide.
>   + */
>   +#define smp_acquire__after_ctrl_dep()  smp_mb()

So typically a control dependency already provides read->write ordering,
by virtue of speculative writes being BAD.

So a control dependency only needs to provide read->read ordering in
addition to the existing read->write ordering and hence this barrier is
typically a smp_rmb().

See the definition in asm-generic/barrier.h.

Having to use a full barrier here would imply your architecture does not
respect control dependencies, which would be BAD because we actually
rely on them.

So either the normal definition is good and you don't need to do
anything, or you prohibit read speculation in which case you have a
special case like TILE does.



> >> +#define __smp_load_acquire(p) 
> >> \
> >> +do {  
> >> \
> >> +  union { typeof(*p) __val; char __c[1]; } __u =  \
> >> +  { .__val = (__force typeof(*p)) (v) };  \
> >> +  compiletime_assert_atomic_type(*p); \
> >> +  switch (sizeof(*p)) {   \
> >> +  case 1: \
> >> +  case 2: \
> >> +  __u.__val = READ_ONCE(*p);  \
> >> +  smb_mb();   \
> >> +  break;  \
> >> +  case 4: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.w.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  case 8: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.d.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  }   \
> >> +  __u.__val;  \
> >> +} while (0)
> >
> > 'creative' use of amoswap and amoor :-)
> >
> > You should really look at a normal load with ordering instruction
> > though, that amoor.aq is a rmw and will promote the cacheline to
> > exclusive (and dirty it).
> 
> The thought here was that implementations could elide the MW by pattern
> matching the "zero" (x0, the architectural zero register) forms of AMOs where
> it's interesting.  I talked to one of our microarchitecture guys, and while he
> agrees that's easy he points out that eliding half the AMO may wreak havoc on
> the consistency model.  Since we're not sure what the memory model is actually
> going to look like, we thought it'd be best to just write the simplest code
> here
> 
>   /*
>* TODO_RISCV_MEMORY_MODEL: While we could emit AMOs for the W and D sized
>* accesses here, it's questionable if that actually helps or not: the lack 
> of
>* offsets in the AMOs means they're usually preceded by an addi, so they
>* probably won't save code space.  For now we'll just emit the fence.
>*/
>   #define __smp_store_release(p, v)   \
>   ({  \
>   compiletime_assert_atomic_type(*p); \
>   smp_mb();   \
>   WRITE_ONCE(*p, v);  \
>   })
> 
>   #define __smp_load_acquire(p)   \
>   ({  

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-07 Thread Peter Zijlstra
On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
> > Also probably not true. I _think_ you want a full barrier here, but
> > given the total lack of documentation on your end and the fact I've not
> > yet read the spinlock (which I suppose is below) I cannot yet state
> > more.
> 
> Ya, sorry about that -- we're waiting on a proper memory model spec.  Is there
> any other documentation I should produce?

Nah, I'll wait for your shiny new document.


>   +/*
>   + * TODO_RISCV_MEMORY_MODEL: I don't think RISC-V is allowed to perform a
>   + * speculative load, but we're going to wait on a formal memory model in 
> order
>   + * to ensure this is safe to elide.
>   + */
>   +#define smp_acquire__after_ctrl_dep()  smp_mb()

So typically a control dependency already provides read->write ordering,
by virtue of speculative writes being BAD.

So a control dependency only needs to provide read->read ordering in
addition to the existing read->write ordering and hence this barrier is
typically a smp_rmb().

See the definition in asm-generic/barrier.h.

Having to use a full barrier here would imply your architecture does not
respect control dependencies, which would be BAD because we actually
rely on them.

So either the normal definition is good and you don't need to do
anything, or you prohibit read speculation in which case you have a
special case like TILE does.



> >> +#define __smp_load_acquire(p) 
> >> \
> >> +do {  
> >> \
> >> +  union { typeof(*p) __val; char __c[1]; } __u =  \
> >> +  { .__val = (__force typeof(*p)) (v) };  \
> >> +  compiletime_assert_atomic_type(*p); \
> >> +  switch (sizeof(*p)) {   \
> >> +  case 1: \
> >> +  case 2: \
> >> +  __u.__val = READ_ONCE(*p);  \
> >> +  smb_mb();   \
> >> +  break;  \
> >> +  case 4: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.w.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  case 8: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.d.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  }   \
> >> +  __u.__val;  \
> >> +} while (0)
> >
> > 'creative' use of amoswap and amoor :-)
> >
> > You should really look at a normal load with ordering instruction
> > though, that amoor.aq is a rmw and will promote the cacheline to
> > exclusive (and dirty it).
> 
> The thought here was that implementations could elide the MW by pattern
> matching the "zero" (x0, the architectural zero register) forms of AMOs where
> it's interesting.  I talked to one of our microarchitecture guys, and while he
> agrees that's easy he points out that eliding half the AMO may wreak havoc on
> the consistency model.  Since we're not sure what the memory model is actually
> going to look like, we thought it'd be best to just write the simplest code
> here
> 
>   /*
>* TODO_RISCV_MEMORY_MODEL: While we could emit AMOs for the W and D sized
>* accesses here, it's questionable if that actually helps or not: the lack 
> of
>* offsets in the AMOs means they're usually preceded by an addi, so they
>* probably won't save code space.  For now we'll just emit the fence.
>*/
>   #define __smp_store_release(p, v)   \
>   ({  \
>   compiletime_assert_atomic_type(*p); \
>   smp_mb();   \
>   WRITE_ONCE(*p, v);  \
>   })
> 
>   #define __smp_load_acquire(p)   \
>   ({  

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-06 Thread Boqun Feng
On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
[...]
> >> +#define __smp_load_acquire(p) 
> >> \
> >> +do {  
> >> \
> >> +  union { typeof(*p) __val; char __c[1]; } __u =  \
> >> +  { .__val = (__force typeof(*p)) (v) };  \
> >> +  compiletime_assert_atomic_type(*p); \
> >> +  switch (sizeof(*p)) {   \
> >> +  case 1: \
> >> +  case 2: \
> >> +  __u.__val = READ_ONCE(*p);  \
> >> +  smb_mb();   \
> >> +  break;  \
> >> +  case 4: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.w.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  case 8: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.d.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  }   \
> >> +  __u.__val;  \
> >> +} while (0)
> >
> > 'creative' use of amoswap and amoor :-)
> >
> > You should really look at a normal load with ordering instruction
> > though, that amoor.aq is a rmw and will promote the cacheline to
> > exclusive (and dirty it).
> 
> The thought here was that implementations could elide the MW by pattern
> matching the "zero" (x0, the architectural zero register) forms of AMOs where
> it's interesting.  I talked to one of our microarchitecture guys, and while he
> agrees that's easy he points out that eliding half the AMO may wreak havoc on
> the consistency model.  Since we're not sure what the memory model is actually
> going to look like, we thought it'd be best to just write the simplest code
> here
> 
>   /*
>* TODO_RISCV_MEMORY_MODEL: While we could emit AMOs for the W and D sized
>* accesses here, it's questionable if that actually helps or not: the lack 
> of
>* offsets in the AMOs means they're usually preceded by an addi, so they
>* probably won't save code space.  For now we'll just emit the fence.
>*/
>   #define __smp_store_release(p, v)   \
>   ({  \
>   compiletime_assert_atomic_type(*p); \
>   smp_mb();   \
>   WRITE_ONCE(*p, v);  \
>   })
> 
>   #define __smp_load_acquire(p)   \
>   ({  \
>   union{typeof(*p) __p; long __l;} __u;   \

AFAICT, there seems to be an endian issue if you do this. No?

Let us assume typeof(*p) is char and *p == 1, and on a big endian 32bit
platform:

>   compiletime_assert_atomic_type(*p); \
>   __u.__l = READ_ONCE(*p);\

READ_ONCE(*p) is 1 so
__u.__l is 0x00 00 00 01 now

>   smp_mb();   \
>   __u.__p;\

__u.__p is then 0x00.

Am I missing something here?

Even so why not use the simple definition as in include/asm-generic/barrier.h?

Regards,
Boqun



>   })
> 
[...]


signature.asc
Description: PGP signature


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-06 Thread Boqun Feng
On Thu, Jul 06, 2017 at 06:04:13PM -0700, Palmer Dabbelt wrote:
[...]
> >> +#define __smp_load_acquire(p) 
> >> \
> >> +do {  
> >> \
> >> +  union { typeof(*p) __val; char __c[1]; } __u =  \
> >> +  { .__val = (__force typeof(*p)) (v) };  \
> >> +  compiletime_assert_atomic_type(*p); \
> >> +  switch (sizeof(*p)) {   \
> >> +  case 1: \
> >> +  case 2: \
> >> +  __u.__val = READ_ONCE(*p);  \
> >> +  smb_mb();   \
> >> +  break;  \
> >> +  case 4: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.w.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  case 8: \
> >> +  __asm__ __volatile__ (  \
> >> +  "amoor.d.aq %1, zero, %0"   \
> >> +  : "+A" (*p) \
> >> +  : "=r" (__u.__val)  \
> >> +  : "memory");\
> >> +  break;  \
> >> +  }   \
> >> +  __u.__val;  \
> >> +} while (0)
> >
> > 'creative' use of amoswap and amoor :-)
> >
> > You should really look at a normal load with ordering instruction
> > though, that amoor.aq is a rmw and will promote the cacheline to
> > exclusive (and dirty it).
> 
> The thought here was that implementations could elide the MW by pattern
> matching the "zero" (x0, the architectural zero register) forms of AMOs where
> it's interesting.  I talked to one of our microarchitecture guys, and while he
> agrees that's easy he points out that eliding half the AMO may wreak havoc on
> the consistency model.  Since we're not sure what the memory model is actually
> going to look like, we thought it'd be best to just write the simplest code
> here
> 
>   /*
>* TODO_RISCV_MEMORY_MODEL: While we could emit AMOs for the W and D sized
>* accesses here, it's questionable if that actually helps or not: the lack 
> of
>* offsets in the AMOs means they're usually preceded by an addi, so they
>* probably won't save code space.  For now we'll just emit the fence.
>*/
>   #define __smp_store_release(p, v)   \
>   ({  \
>   compiletime_assert_atomic_type(*p); \
>   smp_mb();   \
>   WRITE_ONCE(*p, v);  \
>   })
> 
>   #define __smp_load_acquire(p)   \
>   ({  \
>   union{typeof(*p) __p; long __l;} __u;   \

AFAICT, there seems to be an endian issue if you do this. No?

Let us assume typeof(*p) is char and *p == 1, and on a big endian 32bit
platform:

>   compiletime_assert_atomic_type(*p); \
>   __u.__l = READ_ONCE(*p);\

READ_ONCE(*p) is 1 so
__u.__l is 0x00 00 00 01 now

>   smp_mb();   \
>   __u.__p;\

__u.__p is then 0x00.

Am I missing something here?

Even so why not use the simple definition as in include/asm-generic/barrier.h?

Regards,
Boqun



>   })
> 
[...]


signature.asc
Description: PGP signature


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-06 Thread Palmer Dabbelt
On Wed, 05 Jul 2017 01:43:21 PDT (-0700), pet...@infradead.org wrote:
> On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
>> +/*
>> + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
>> + * barrier-free.  I'm assuming that and/or/xor have the same constraints as 
>> the
>> + * others.
>> + */
>
> Yes.. we have new documentation in the work to which I would post a link
> but for some reason copy/paste stopped working again (Konsole does that
> at times and is #$%#$%#4# annoying).
>
> Ha, found it using google...
>
>   https://marc.info/?l=linux-kernel=149727901125801

Thanks.

>> +/*
>> + * atomic_{cmp,}xchg is required to have exactly the same ordering 
>> semantics as
>> + * {cmp,}xchg and the operations that return, so they need a barrier.  We 
>> just
>> + * use the other implementations directly.
>> + */
>
> cmpxchg triggers an extra rule; all conditional operations only need to
> imply barriers on success. So a cmpxchg that fails, need not imply any
> ordering what so ever.
>
>> +/*
>> + * Our atomic operations set the AQ and RL bits and therefor we don't need 
>> to
>> + * fence around atomics.
>> + */
>> +#define __smb_mb__before_atomic()   barrier()
>> +#define __smb_mb__after_atomic()barrier()
>
> Ah, not quite... you need full barriers here. Because your regular
> atomic ops imply no ordering what so ever.

The new documentation helps here, too.  Thanks!

  diff --git a/arch/riscv/include/asm/barrier.h 
b/arch/riscv/include/asm/barrier.h
  index 82a0092a86d0..a480c0fb85e5 100644
  --- a/arch/riscv/include/asm/barrier.h
  +++ b/arch/riscv/include/asm/barrier.h
  @@ -39,11 +39,19 @@
   #define smp_wmb()  RISCV_FENCE(w,w)

   /*
  - * Our atomic operations set the AQ and RL bits and therefor we don't need to
  - * fence around atomics.
  + * These fences exist to enforce ordering around the relaxed AMOs.  The
  + * documentation defines that
  + * "
  + * atomic_fetch_add();
  + *   is equivalent to:
  + * smp_mb__before_atomic();
  + * atomic_fetch_add_relaxed();
  + * smp_mb__after_atomic();
  + * "
  + * So we emit full fences on both sides.
*/
  -#define __smb_mb__before_atomic()  barrier()
  -#define __smb_mb__after_atomic()   barrier()
  +#define __smb_mb__before_atomic()  smp_mb()
  +#define __smb_mb__after_atomic()   smp_mb()

   /*
* These barries are meant to prevent memory operations inside a spinlock 
from

>> +/*
>> + * These barries are meant to prevent memory operations inside a spinlock 
>> from
>> + * moving outside of that spinlock.  Since we set the AQ and RL bits when
>> + * entering or leaving spinlocks, no additional fence needs to be performed.
>> + */
>> +#define smb_mb__before_spinlock()   barrier()
>> +#define smb_mb__after_spinlock()barrier()
>
> Also probably not true. I _think_ you want a full barrier here, but
> given the total lack of documentation on your end and the fact I've not
> yet read the spinlock (which I suppose is below) I cannot yet state
> more.

Ya, sorry about that -- we're waiting on a proper memory model spec.  Is there
any other documentation I should produce?

More below.

>> +
>> +/* FIXME: I don't think RISC-V is allowed to perform a speculative load. */
>> +#define smp_acquire__after_ctrl_dep()   barrier()
>
> That would be a very weird thing to disallow... speculative loads are
> teh awesome ;-) Note you can get the very same effect from caches when
> your stores are not globally atomic.

OK -- I guess generally the user ISA spec is written disregarding
microarchitecture, so I assumed this would be illegal.  We'll wait for the
memory model spec.

  diff --git a/arch/riscv/include/asm/barrier.h 
b/arch/riscv/include/asm/barrier.h
  index a4e54f4c17eb..c039333d4a5d 100644
  --- a/arch/riscv/include/asm/barrier.h
  +++ b/arch/riscv/include/asm/barrier.h
  @@ -61,8 +61,12 @@
   #define smb_mb__before_spinlock()  smp_mb()
   #define smb_mb__after_spinlock()   smp_mb()

  -/* FIXME: I don't think RISC-V is allowed to perform a speculative load. */
  -#define smp_acquire__after_ctrl_dep()  barrier()
  +/*
  + * TODO_RISCV_MEMORY_MODEL: I don't think RISC-V is allowed to perform a
  + * speculative load, but we're going to wait on a formal memory model in 
order
  + * to ensure this is safe to elide.
  + */
  +#define smp_acquire__after_ctrl_dep()  smp_mb()

   /*
* The RISC-V ISA doesn't support byte or half-word AMOs, so we fall back to 
a
  @@ -137,24 +141,6 @@
  __u.__val;  \
   })

  -/*
  - * The default implementation of this uses READ_ONCE and
  - * smp_acquire__after_ctrl_dep, but since we can directly do an ACQUIRE load 
we
  - * can avoid the extra barrier.
  - */
  -#define smp_cond_load_acquire(ptr, cond_expr) ({   \
  -   typeof(ptr) __PTR = (ptr);  \
  -   typeof(*ptr) VAL;   

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-06 Thread Palmer Dabbelt
On Wed, 05 Jul 2017 01:43:21 PDT (-0700), pet...@infradead.org wrote:
> On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
>> +/*
>> + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
>> + * barrier-free.  I'm assuming that and/or/xor have the same constraints as 
>> the
>> + * others.
>> + */
>
> Yes.. we have new documentation in the work to which I would post a link
> but for some reason copy/paste stopped working again (Konsole does that
> at times and is #$%#$%#4# annoying).
>
> Ha, found it using google...
>
>   https://marc.info/?l=linux-kernel=149727901125801

Thanks.

>> +/*
>> + * atomic_{cmp,}xchg is required to have exactly the same ordering 
>> semantics as
>> + * {cmp,}xchg and the operations that return, so they need a barrier.  We 
>> just
>> + * use the other implementations directly.
>> + */
>
> cmpxchg triggers an extra rule; all conditional operations only need to
> imply barriers on success. So a cmpxchg that fails, need not imply any
> ordering what so ever.
>
>> +/*
>> + * Our atomic operations set the AQ and RL bits and therefor we don't need 
>> to
>> + * fence around atomics.
>> + */
>> +#define __smb_mb__before_atomic()   barrier()
>> +#define __smb_mb__after_atomic()barrier()
>
> Ah, not quite... you need full barriers here. Because your regular
> atomic ops imply no ordering what so ever.

The new documentation helps here, too.  Thanks!

  diff --git a/arch/riscv/include/asm/barrier.h 
b/arch/riscv/include/asm/barrier.h
  index 82a0092a86d0..a480c0fb85e5 100644
  --- a/arch/riscv/include/asm/barrier.h
  +++ b/arch/riscv/include/asm/barrier.h
  @@ -39,11 +39,19 @@
   #define smp_wmb()  RISCV_FENCE(w,w)

   /*
  - * Our atomic operations set the AQ and RL bits and therefor we don't need to
  - * fence around atomics.
  + * These fences exist to enforce ordering around the relaxed AMOs.  The
  + * documentation defines that
  + * "
  + * atomic_fetch_add();
  + *   is equivalent to:
  + * smp_mb__before_atomic();
  + * atomic_fetch_add_relaxed();
  + * smp_mb__after_atomic();
  + * "
  + * So we emit full fences on both sides.
*/
  -#define __smb_mb__before_atomic()  barrier()
  -#define __smb_mb__after_atomic()   barrier()
  +#define __smb_mb__before_atomic()  smp_mb()
  +#define __smb_mb__after_atomic()   smp_mb()

   /*
* These barries are meant to prevent memory operations inside a spinlock 
from

>> +/*
>> + * These barries are meant to prevent memory operations inside a spinlock 
>> from
>> + * moving outside of that spinlock.  Since we set the AQ and RL bits when
>> + * entering or leaving spinlocks, no additional fence needs to be performed.
>> + */
>> +#define smb_mb__before_spinlock()   barrier()
>> +#define smb_mb__after_spinlock()barrier()
>
> Also probably not true. I _think_ you want a full barrier here, but
> given the total lack of documentation on your end and the fact I've not
> yet read the spinlock (which I suppose is below) I cannot yet state
> more.

Ya, sorry about that -- we're waiting on a proper memory model spec.  Is there
any other documentation I should produce?

More below.

>> +
>> +/* FIXME: I don't think RISC-V is allowed to perform a speculative load. */
>> +#define smp_acquire__after_ctrl_dep()   barrier()
>
> That would be a very weird thing to disallow... speculative loads are
> teh awesome ;-) Note you can get the very same effect from caches when
> your stores are not globally atomic.

OK -- I guess generally the user ISA spec is written disregarding
microarchitecture, so I assumed this would be illegal.  We'll wait for the
memory model spec.

  diff --git a/arch/riscv/include/asm/barrier.h 
b/arch/riscv/include/asm/barrier.h
  index a4e54f4c17eb..c039333d4a5d 100644
  --- a/arch/riscv/include/asm/barrier.h
  +++ b/arch/riscv/include/asm/barrier.h
  @@ -61,8 +61,12 @@
   #define smb_mb__before_spinlock()  smp_mb()
   #define smb_mb__after_spinlock()   smp_mb()

  -/* FIXME: I don't think RISC-V is allowed to perform a speculative load. */
  -#define smp_acquire__after_ctrl_dep()  barrier()
  +/*
  + * TODO_RISCV_MEMORY_MODEL: I don't think RISC-V is allowed to perform a
  + * speculative load, but we're going to wait on a formal memory model in 
order
  + * to ensure this is safe to elide.
  + */
  +#define smp_acquire__after_ctrl_dep()  smp_mb()

   /*
* The RISC-V ISA doesn't support byte or half-word AMOs, so we fall back to 
a
  @@ -137,24 +141,6 @@
  __u.__val;  \
   })

  -/*
  - * The default implementation of this uses READ_ONCE and
  - * smp_acquire__after_ctrl_dep, but since we can directly do an ACQUIRE load 
we
  - * can avoid the extra barrier.
  - */
  -#define smp_cond_load_acquire(ptr, cond_expr) ({   \
  -   typeof(ptr) __PTR = (ptr);  \
  -   typeof(*ptr) VAL;   

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-06 Thread Peter Zijlstra
On Thu, Jul 06, 2017 at 07:08:33PM +0800, Boqun Feng wrote:
> On Wed, Jul 05, 2017 at 10:43:21AM +0200, Peter Zijlstra wrote:
> > On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> > > +/*
> > > + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} 
> > > are
> > > + * barrier-free.  I'm assuming that and/or/xor have the same constraints 
> > > as the
> > > + * others.
> > > + */
> > 
> > Yes.. we have new documentation in the work to which I would post a link
> > but for some reason copy/paste stopped working again (Konsole does that
> > at times and is #$%#$%#4# annoying).
> > 
> > Ha, found it using google...
> > 
> >   https://marc.info/?l=linux-kernel=14972790112580
> > 
> 
> The link is broken, you miss a tailing 1 ;-)
> 
>   https://marc.info/?l=linux-kernel=149727901125801
> 

Argh... thanks for fixing that!


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-06 Thread Peter Zijlstra
On Thu, Jul 06, 2017 at 07:08:33PM +0800, Boqun Feng wrote:
> On Wed, Jul 05, 2017 at 10:43:21AM +0200, Peter Zijlstra wrote:
> > On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> > > +/*
> > > + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} 
> > > are
> > > + * barrier-free.  I'm assuming that and/or/xor have the same constraints 
> > > as the
> > > + * others.
> > > + */
> > 
> > Yes.. we have new documentation in the work to which I would post a link
> > but for some reason copy/paste stopped working again (Konsole does that
> > at times and is #$%#$%#4# annoying).
> > 
> > Ha, found it using google...
> > 
> >   https://marc.info/?l=linux-kernel=14972790112580
> > 
> 
> The link is broken, you miss a tailing 1 ;-)
> 
>   https://marc.info/?l=linux-kernel=149727901125801
> 

Argh... thanks for fixing that!


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-05 Thread Boqun Feng
On Wed, Jul 05, 2017 at 10:43:21AM +0200, Peter Zijlstra wrote:
> On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> > +/*
> > + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
> > + * barrier-free.  I'm assuming that and/or/xor have the same constraints 
> > as the
> > + * others.
> > + */
> 
> Yes.. we have new documentation in the work to which I would post a link
> but for some reason copy/paste stopped working again (Konsole does that
> at times and is #$%#$%#4# annoying).
> 
> Ha, found it using google...
> 
>   https://marc.info/?l=linux-kernel=14972790112580
> 

The link is broken, you miss a tailing 1 ;-)

https://marc.info/?l=linux-kernel=149727901125801

Regards,
Boqun


signature.asc
Description: PGP signature


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-05 Thread Boqun Feng
On Wed, Jul 05, 2017 at 10:43:21AM +0200, Peter Zijlstra wrote:
> On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> > +/*
> > + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
> > + * barrier-free.  I'm assuming that and/or/xor have the same constraints 
> > as the
> > + * others.
> > + */
> 
> Yes.. we have new documentation in the work to which I would post a link
> but for some reason copy/paste stopped working again (Konsole does that
> at times and is #$%#$%#4# annoying).
> 
> Ha, found it using google...
> 
>   https://marc.info/?l=linux-kernel=14972790112580
> 

The link is broken, you miss a tailing 1 ;-)

https://marc.info/?l=linux-kernel=149727901125801

Regards,
Boqun


signature.asc
Description: PGP signature


Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-05 Thread Boqun Feng
On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
[...]
> diff --git a/arch/riscv/include/asm/cmpxchg.h 
> b/arch/riscv/include/asm/cmpxchg.h
> new file mode 100644
> index ..81025c056412
> --- /dev/null
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -0,0 +1,138 @@
> +/*
> + * Copyright (C) 2014 Regents of the University of California
> + *
> + *   This program is free software; you can redistribute it and/or
> + *   modify it under the terms of the GNU General Public License
> + *   as published by the Free Software Foundation, version 2.
> + *
> + *   This program is distributed in the hope that it will be useful,
> + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *   GNU General Public License for more details.
> + */
> +
> +#ifndef _ASM_RISCV_CMPXCHG_H
> +#define _ASM_RISCV_CMPXCHG_H
> +
> +#include 
> +
> +#ifdef CONFIG_ISA_A
> +
> +#include 
> +
> +#define __xchg(new, ptr, size, asm_or)   \
> +({   \
> + __typeof__(ptr) __ptr = (ptr);  \
> + __typeof__(new) __new = (new);  \
> + __typeof__(*(ptr)) __ret;   \
> + switch (size) { \
> + case 4: \
> + __asm__ __volatile__ (  \
> + "amoswap.w" #asm_or " %0, %2, %1"   \
> + : "=r" (__ret), "+A" (*__ptr)   \
> + : "r" (__new)); \

It seems that you miss the "memmory" clobber here, so as for cmpxchg(),
did you do this on purpose? AFAIK, without this clobber, compilers are
within their right to reorder operations preceding and following this
operation, which makes it unordered.

> + break;  \
> + case 8: \
> + __asm__ __volatile__ (  \
> + "amoswap.d" #asm_or " %0, %2, %1"   \
> + : "=r" (__ret), "+A" (*__ptr)   \
> + : "r" (__new)); \
> + break;  \
> + default:\
> + BUILD_BUG();\
> + }   \
> + __ret;  \
> +})
> +
> +#define xchg(ptr, x)(__xchg((x), (ptr), sizeof(*(ptr)), .aqrl))
> +
> +#define xchg32(ptr, x)   \
> +({   \
> + BUILD_BUG_ON(sizeof(*(ptr)) != 4);  \
> + xchg((ptr), (x));   \
> +})
> +
> +#define xchg64(ptr, x)   \
> +({   \
> + BUILD_BUG_ON(sizeof(*(ptr)) != 8);  \
> + xchg((ptr), (x));   \
> +})
> +
> +/*
> + * Atomic compare and exchange.  Compare OLD with MEM, if identical,
> + * store NEW in MEM.  Return the initial value in MEM.  Success is
> + * indicated by comparing RETURN with OLD.
> + */
> +#define __cmpxchg(ptr, old, new, size, lrb, scb) \
> +({   \
> + __typeof__(ptr) __ptr = (ptr);  \
> + __typeof__(old) __old = (old);  \
> + __typeof__(new) __new = (new);  \

Better write those two lines as:

__typeof__(*(ptr)) __old = (old);   
\
__typeof__(*(ptr)) __new = (new);   
\

? I'm thinking the case where @old and @new are int and ptr is "long *",
could the asm below do the implicitly converting right, i.e. keep the
sign bit?

Regards,
Boqun

> + __typeof__(*(ptr)) __ret;   \
> + register unsigned int __rc; \
> + switch (size) { \
> + case 4: \
> + __asm__ __volatile__ (  \
> + "0:"\
> + "lr.w" #scb " %0, %2\n" \
> + "bne %0, %z3, 1f\n" \
> + "sc.w" #lrb " %1, %z4, %2\n"\
> + "bnez%1, 0b\n"  \
> + "1:"\
> + : 

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-05 Thread Boqun Feng
On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
[...]
> diff --git a/arch/riscv/include/asm/cmpxchg.h 
> b/arch/riscv/include/asm/cmpxchg.h
> new file mode 100644
> index ..81025c056412
> --- /dev/null
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -0,0 +1,138 @@
> +/*
> + * Copyright (C) 2014 Regents of the University of California
> + *
> + *   This program is free software; you can redistribute it and/or
> + *   modify it under the terms of the GNU General Public License
> + *   as published by the Free Software Foundation, version 2.
> + *
> + *   This program is distributed in the hope that it will be useful,
> + *   but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *   GNU General Public License for more details.
> + */
> +
> +#ifndef _ASM_RISCV_CMPXCHG_H
> +#define _ASM_RISCV_CMPXCHG_H
> +
> +#include 
> +
> +#ifdef CONFIG_ISA_A
> +
> +#include 
> +
> +#define __xchg(new, ptr, size, asm_or)   \
> +({   \
> + __typeof__(ptr) __ptr = (ptr);  \
> + __typeof__(new) __new = (new);  \
> + __typeof__(*(ptr)) __ret;   \
> + switch (size) { \
> + case 4: \
> + __asm__ __volatile__ (  \
> + "amoswap.w" #asm_or " %0, %2, %1"   \
> + : "=r" (__ret), "+A" (*__ptr)   \
> + : "r" (__new)); \

It seems that you miss the "memmory" clobber here, so as for cmpxchg(),
did you do this on purpose? AFAIK, without this clobber, compilers are
within their right to reorder operations preceding and following this
operation, which makes it unordered.

> + break;  \
> + case 8: \
> + __asm__ __volatile__ (  \
> + "amoswap.d" #asm_or " %0, %2, %1"   \
> + : "=r" (__ret), "+A" (*__ptr)   \
> + : "r" (__new)); \
> + break;  \
> + default:\
> + BUILD_BUG();\
> + }   \
> + __ret;  \
> +})
> +
> +#define xchg(ptr, x)(__xchg((x), (ptr), sizeof(*(ptr)), .aqrl))
> +
> +#define xchg32(ptr, x)   \
> +({   \
> + BUILD_BUG_ON(sizeof(*(ptr)) != 4);  \
> + xchg((ptr), (x));   \
> +})
> +
> +#define xchg64(ptr, x)   \
> +({   \
> + BUILD_BUG_ON(sizeof(*(ptr)) != 8);  \
> + xchg((ptr), (x));   \
> +})
> +
> +/*
> + * Atomic compare and exchange.  Compare OLD with MEM, if identical,
> + * store NEW in MEM.  Return the initial value in MEM.  Success is
> + * indicated by comparing RETURN with OLD.
> + */
> +#define __cmpxchg(ptr, old, new, size, lrb, scb) \
> +({   \
> + __typeof__(ptr) __ptr = (ptr);  \
> + __typeof__(old) __old = (old);  \
> + __typeof__(new) __new = (new);  \

Better write those two lines as:

__typeof__(*(ptr)) __old = (old);   
\
__typeof__(*(ptr)) __new = (new);   
\

? I'm thinking the case where @old and @new are int and ptr is "long *",
could the asm below do the implicitly converting right, i.e. keep the
sign bit?

Regards,
Boqun

> + __typeof__(*(ptr)) __ret;   \
> + register unsigned int __rc; \
> + switch (size) { \
> + case 4: \
> + __asm__ __volatile__ (  \
> + "0:"\
> + "lr.w" #scb " %0, %2\n" \
> + "bne %0, %z3, 1f\n" \
> + "sc.w" #lrb " %1, %z4, %2\n"\
> + "bnez%1, 0b\n"  \
> + "1:"\
> + : 

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-05 Thread Peter Zijlstra
On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> +/*
> + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
> + * barrier-free.  I'm assuming that and/or/xor have the same constraints as 
> the
> + * others.
> + */

Yes.. we have new documentation in the work to which I would post a link
but for some reason copy/paste stopped working again (Konsole does that
at times and is #$%#$%#4# annoying).

Ha, found it using google...

  https://marc.info/?l=linux-kernel=14972790112580


> +

> +/*
> + * atomic_{cmp,}xchg is required to have exactly the same ordering semantics 
> as
> + * {cmp,}xchg and the operations that return, so they need a barrier.  We 
> just
> + * use the other implementations directly.
> + */

cmpxchg triggers an extra rule; all conditional operations only need to
imply barriers on success. So a cmpxchg that fails, need not imply any
ordering what so ever.

> +/*
> + * Our atomic operations set the AQ and RL bits and therefor we don't need to
> + * fence around atomics.
> + */
> +#define __smb_mb__before_atomic()barrier()
> +#define __smb_mb__after_atomic() barrier()

Ah, not quite... you need full barriers here. Because your regular
atomic ops imply no ordering what so ever.

> +/*
> + * These barries are meant to prevent memory operations inside a spinlock 
> from
> + * moving outside of that spinlock.  Since we set the AQ and RL bits when
> + * entering or leaving spinlocks, no additional fence needs to be performed.
> + */
> +#define smb_mb__before_spinlock()barrier()
> +#define smb_mb__after_spinlock() barrier()

Also probably not true. I _think_ you want a full barrier here, but
given the total lack of documentation on your end and the fact I've not
yet read the spinlock (which I suppose is below) I cannot yet state
more.


> +
> +/* FIXME: I don't think RISC-V is allowed to perform a speculative load. */
> +#define smp_acquire__after_ctrl_dep()barrier()

That would be a very weird thing to disallow... speculative loads are
teh awesome ;-) Note you can get the very same effect from caches when
your stores are not globally atomic.

> +/*
> + * The RISC-V ISA doesn't support byte or half-word AMOs, so we fall back to 
> a
> + * regular store and a fence here.  Otherwise we emit an AMO with an AQ or RL
> + * bit set and allow the microarchitecture to avoid the other half of the 
> AMO.
> + */
> +#define __smp_store_release(p, v)\
> +do { \
> + union { typeof(*p) __val; char __c[1]; } __u =  \
> + { .__val = (__force typeof(*p)) (v) };  \
> + compiletime_assert_atomic_type(*p); \
> + switch (sizeof(*p)) {   \
> + case 1: \
> + case 2: \
> + smb_mb();   \
> + WRITE_ONCE(*p, __u.__val);  \
> + break;  \
> + case 4: \
> + __asm__ __volatile__ (  \
> + "amoswap.w.rl zero, %1, %0" \
> + : "+A" (*p), "r" (__u.__val)\
> + :   \
> + : "memory");\
> + break;  \
> + case 8: \
> + __asm__ __volatile__ (  \
> + "amoswap.d.rl zero, %1, %0" \
> + : "+A" (*p), "r" (__u.__val)\
> + :   \
> + : "memory");\
> + break;  \
> + }   \
> +} while (0)
> +
> +#define __smp_load_acquire(p)
> \
> +do { \
> + union { typeof(*p) __val; char __c[1]; } __u =  \
> + { .__val = (__force typeof(*p)) (v) };  \
> + compiletime_assert_atomic_type(*p); \
> + switch (sizeof(*p)) {   \
> + case 1: \
> + case 2: \
> + __u.__val = 

Re: [PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-05 Thread Peter Zijlstra
On Tue, Jul 04, 2017 at 12:50:55PM -0700, Palmer Dabbelt wrote:
> +/*
> + * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
> + * barrier-free.  I'm assuming that and/or/xor have the same constraints as 
> the
> + * others.
> + */

Yes.. we have new documentation in the work to which I would post a link
but for some reason copy/paste stopped working again (Konsole does that
at times and is #$%#$%#4# annoying).

Ha, found it using google...

  https://marc.info/?l=linux-kernel=14972790112580


> +

> +/*
> + * atomic_{cmp,}xchg is required to have exactly the same ordering semantics 
> as
> + * {cmp,}xchg and the operations that return, so they need a barrier.  We 
> just
> + * use the other implementations directly.
> + */

cmpxchg triggers an extra rule; all conditional operations only need to
imply barriers on success. So a cmpxchg that fails, need not imply any
ordering what so ever.

> +/*
> + * Our atomic operations set the AQ and RL bits and therefor we don't need to
> + * fence around atomics.
> + */
> +#define __smb_mb__before_atomic()barrier()
> +#define __smb_mb__after_atomic() barrier()

Ah, not quite... you need full barriers here. Because your regular
atomic ops imply no ordering what so ever.

> +/*
> + * These barries are meant to prevent memory operations inside a spinlock 
> from
> + * moving outside of that spinlock.  Since we set the AQ and RL bits when
> + * entering or leaving spinlocks, no additional fence needs to be performed.
> + */
> +#define smb_mb__before_spinlock()barrier()
> +#define smb_mb__after_spinlock() barrier()

Also probably not true. I _think_ you want a full barrier here, but
given the total lack of documentation on your end and the fact I've not
yet read the spinlock (which I suppose is below) I cannot yet state
more.


> +
> +/* FIXME: I don't think RISC-V is allowed to perform a speculative load. */
> +#define smp_acquire__after_ctrl_dep()barrier()

That would be a very weird thing to disallow... speculative loads are
teh awesome ;-) Note you can get the very same effect from caches when
your stores are not globally atomic.

> +/*
> + * The RISC-V ISA doesn't support byte or half-word AMOs, so we fall back to 
> a
> + * regular store and a fence here.  Otherwise we emit an AMO with an AQ or RL
> + * bit set and allow the microarchitecture to avoid the other half of the 
> AMO.
> + */
> +#define __smp_store_release(p, v)\
> +do { \
> + union { typeof(*p) __val; char __c[1]; } __u =  \
> + { .__val = (__force typeof(*p)) (v) };  \
> + compiletime_assert_atomic_type(*p); \
> + switch (sizeof(*p)) {   \
> + case 1: \
> + case 2: \
> + smb_mb();   \
> + WRITE_ONCE(*p, __u.__val);  \
> + break;  \
> + case 4: \
> + __asm__ __volatile__ (  \
> + "amoswap.w.rl zero, %1, %0" \
> + : "+A" (*p), "r" (__u.__val)\
> + :   \
> + : "memory");\
> + break;  \
> + case 8: \
> + __asm__ __volatile__ (  \
> + "amoswap.d.rl zero, %1, %0" \
> + : "+A" (*p), "r" (__u.__val)\
> + :   \
> + : "memory");\
> + break;  \
> + }   \
> +} while (0)
> +
> +#define __smp_load_acquire(p)
> \
> +do { \
> + union { typeof(*p) __val; char __c[1]; } __u =  \
> + { .__val = (__force typeof(*p)) (v) };  \
> + compiletime_assert_atomic_type(*p); \
> + switch (sizeof(*p)) {   \
> + case 1: \
> + case 2: \
> + __u.__val = 

[PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-04 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 337 
 arch/riscv/include/asm/barrier.h| 143 ++
 arch/riscv/include/asm/bitops.h | 228 +
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 138 +
 arch/riscv/include/asm/io.h | 180 +
 arch/riscv/include/asm/spinlock.h   | 167 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 ++
 10 files changed, 1353 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..e5a12cfa405a
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#ifdef CONFIG_ISA_A
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/*
+ * First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)); 
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+/*
+ * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
+ * barrier-free.  I'm assuming that and/or/xor have the same constraints as the
+ * others.
+ */
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/*
+ * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions.
+ */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 

[PATCH 2/9] RISC-V: Atomic and Locking Code

2017-07-04 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 337 
 arch/riscv/include/asm/barrier.h| 143 ++
 arch/riscv/include/asm/bitops.h | 228 +
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 138 +
 arch/riscv/include/asm/io.h | 180 +
 arch/riscv/include/asm/spinlock.h   | 167 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 ++
 10 files changed, 1353 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..e5a12cfa405a
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#ifdef CONFIG_ISA_A
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/*
+ * First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)); 
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+/*
+ * FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
+ * barrier-free.  I'm assuming that and/or/xor have the same constraints as the
+ * others.
+ */
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/*
+ * Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions.
+ */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)  

[PATCH 2/9] RISC-V: Atomic and Locking Code

2017-06-28 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 330 
 arch/riscv/include/asm/barrier.h| 139 ++
 arch/riscv/include/asm/bitops.h | 228 ++
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 138 +
 arch/riscv/include/asm/io.h | 178 +
 arch/riscv/include/asm/spinlock.h   | 167 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 +++
 10 files changed, 1340 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..7c5bfd18fe4e
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#ifdef CONFIG_ISA_A
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/* First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)); 
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+/* FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
+ * barrier-free.  I'm assuming that and/or/xor have the same constraints as the
+ * others.
+ */
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/* Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions. */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)  

[PATCH 2/9] RISC-V: Atomic and Locking Code

2017-06-28 Thread Palmer Dabbelt
This contains all the code that directly interfaces with the RISC-V
memory model.  While this code corforms to the current RISC-V ISA
specifications (user 2.2 and priv 1.10), the memory model is somewhat
underspecified in those documents.  There is a working group that hopes
to produce a formal memory model by the end of the year, but my
understanding is that the basic definitions we're relying on here won't
change significantly.

Signed-off-by: Palmer Dabbelt 
---
 arch/riscv/include/asm/atomic.h | 330 
 arch/riscv/include/asm/barrier.h| 139 ++
 arch/riscv/include/asm/bitops.h | 228 ++
 arch/riscv/include/asm/cacheflush.h |  39 
 arch/riscv/include/asm/cmpxchg.h| 138 +
 arch/riscv/include/asm/io.h | 178 +
 arch/riscv/include/asm/spinlock.h   | 167 
 arch/riscv/include/asm/spinlock_types.h |  33 
 arch/riscv/include/asm/tlb.h|  24 +++
 arch/riscv/include/asm/tlbflush.h   |  64 +++
 10 files changed, 1340 insertions(+)
 create mode 100644 arch/riscv/include/asm/atomic.h
 create mode 100644 arch/riscv/include/asm/barrier.h
 create mode 100644 arch/riscv/include/asm/bitops.h
 create mode 100644 arch/riscv/include/asm/cacheflush.h
 create mode 100644 arch/riscv/include/asm/cmpxchg.h
 create mode 100644 arch/riscv/include/asm/io.h
 create mode 100644 arch/riscv/include/asm/spinlock.h
 create mode 100644 arch/riscv/include/asm/spinlock_types.h
 create mode 100644 arch/riscv/include/asm/tlb.h
 create mode 100644 arch/riscv/include/asm/tlbflush.h

diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h
new file mode 100644
index ..7c5bfd18fe4e
--- /dev/null
+++ b/arch/riscv/include/asm/atomic.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_RISCV_ATOMIC_H
+#define _ASM_RISCV_ATOMIC_H
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+# include 
+#else
+# if (__riscv_xlen < 64)
+#  error "64-bit atomics require XLEN to be at least 64"
+# endif
+#endif
+
+#ifdef CONFIG_ISA_A
+
+#include 
+#include 
+
+#define ATOMIC_INIT(i) { (i) }
+static __always_inline int atomic_read(const atomic_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic_set(atomic_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+
+#ifndef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC64_INIT(i) { (i) }
+static __always_inline int atomic64_read(const atomic64_t *v)
+{
+   return READ_ONCE(v->counter);
+}
+static __always_inline void atomic64_set(atomic64_t *v, int i)
+{
+   WRITE_ONCE(v->counter, i);
+}
+#endif
+
+/* First, the atomic ops that have no ordering constraints and therefor don't
+ * have the AQ or RL bits set.  These don't return anything, so there's only
+ * one version to worry about.
+ */
+#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix)   
\
+static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t 
*v) \
+{  
\
+   __asm__ __volatile__ (  
\
+   "amo" #asm_op "." #asm_type " zero, %1, %0" 
\
+   : "+A" (v->counter) 
\
+   : "r" (I)); 
\
+}
+
+#ifdef CONFIG_GENERIC_ATOMIC64
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )
+#else
+#define ATOMIC_OPS(op, asm_op, c_op, I)\
+ATOMIC_OP (op, asm_op, c_op, I, w,  int,   )   \
+ATOMIC_OP (op, asm_op, c_op, I, d, long, 64)
+#endif
+
+ATOMIC_OPS(add, add, +,  i)
+ATOMIC_OPS(sub, add, +, -i)
+/* FIXME: I could only find documentation that atomic_{add,sub,inc,dec} are
+ * barrier-free.  I'm assuming that and/or/xor have the same constraints as the
+ * others.
+ */
+ATOMIC_OPS(and, and, &,  i)
+ATOMIC_OPS( or,  or, |,  i)
+ATOMIC_OPS(xor, xor, ^,  i)
+
+#undef ATOMIC_OP
+#undef ATOMIC_OPS
+
+/* Atomic ops that have ordered, relaxed, acquire, and relese variants.
+ * There's two flavors of these: the arithmatic ops have both fetch and return
+ * versions, while the logical ops only have fetch versions. */
+#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, 
prefix)   \
+static