Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-18 Thread Matthew Macy
>
>
> > PowerPC barrier instructions are needed to prevent reordering.
> >
> > Correct. The current lkpi implementation also assumes that device
> > endian == host endian. The Linux generic accessors will do use endian
> > macros to byte swap where necessary.
>
> Yes, these functions are used to access little-endian registers so byte
> swapping is needed on big-endian machines.  For PowerPC Linux also defines
> functions to access big-endian registers, but we probably don't need those.
>

None of our currently supported drivers use those macros (most devices are
LE) so I’ve omitted them.

-M



> > The following change fixes radeon attach issues:
> >
> https://github.com/POWER9BSD/freebsd/commit/be6c98f5c2e2ed9a4935ac5b67c468b75f3b4457
>
> +/* prevent prefetching of coherent DMA data ahead of a dma-complete */
> +#ifndef __io_ar
> +#ifdef rmb
> +#define __io_ar()  rmb()
> +#else
> +#define __io_ar()  __compiler_membar();
> +#endif
> +#endif
> +
> +/* flush writes to coherent DMA data before possibly triggering a DMA
> read */
> +#ifndef __io_bw
> +#ifdef wmb
> +#define __io_bw()  wmb()
> +#else
> +#define __io_bw()  __compiler_membar();
> +#endif
> +#endif
>
> ...
>
>  static inline uint16_t
>  readw(const volatile void *addr)
>  {
> uint16_t v;
>
> -   __compiler_membar();
> -   v = *(const volatile uint16_t *)addr;
> -   __compiler_membar();
> +   __io_br();
> +   v = le16toh(__raw_readw(addr));
> +   __io_ar();
> return (v);
>  }
>
> For x86 rmb and wmb are defined as lfence and sfence instructions which
> shouldn't be necessary here.
>
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-18 Thread Matthew Macy
Correct. This is just the generic case. We just need to define the __io
macros as __compiler_membar in x86/io.h

Cheers.
-M

On Sun, Nov 18, 2018 at 13:08 Tijl Coosemans  wrote:

> On Sun, 18 Nov 2018 12:10:25 -0800 Matthew Macy 
> wrote:
> >> Note that these functions are normally used on uncacheable memory which
> >> is strongly ordered on x86.  There should be no reordering at all.  On
> >> PowerPC barrier instructions are needed to prevent reordering.
> >
> > Correct. The current lkpi implementation also assumes that device
> > endian == host endian. The Linux generic accessors will do use endian
> > macros to byte swap where necessary.
>
> Yes, these functions are used to access little-endian registers so byte
> swapping is needed on big-endian machines.  For PowerPC Linux also defines
> functions to access big-endian registers, but we probably don't need those.
>
> > The following change fixes radeon attach issues:
> >
> https://github.com/POWER9BSD/freebsd/commit/be6c98f5c2e2ed9a4935ac5b67c468b75f3b4457
>
> +/* prevent prefetching of coherent DMA data ahead of a dma-complete */
> +#ifndef __io_ar
> +#ifdef rmb
> +#define __io_ar()  rmb()
> +#else
> +#define __io_ar()  __compiler_membar();
> +#endif
> +#endif
> +
> +/* flush writes to coherent DMA data before possibly triggering a DMA
> read */
> +#ifndef __io_bw
> +#ifdef wmb
> +#define __io_bw()  wmb()
> +#else
> +#define __io_bw()  __compiler_membar();
> +#endif
> +#endif
>
> ...
>
>  static inline uint16_t
>  readw(const volatile void *addr)
>  {
> uint16_t v;
>
> -   __compiler_membar();
> -   v = *(const volatile uint16_t *)addr;
> -   __compiler_membar();
> +   __io_br();
> +   v = le16toh(__raw_readw(addr));
> +   __io_ar();
> return (v);
>  }
>
> For x86 rmb and wmb are defined as lfence and sfence instructions which
> shouldn't be necessary here.
>
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-18 Thread Tijl Coosemans
On Sun, 18 Nov 2018 12:10:25 -0800 Matthew Macy  wrote:
>> Note that these functions are normally used on uncacheable memory which
>> is strongly ordered on x86.  There should be no reordering at all.  On
>> PowerPC barrier instructions are needed to prevent reordering.  
> 
> Correct. The current lkpi implementation also assumes that device
> endian == host endian. The Linux generic accessors will do use endian
> macros to byte swap where necessary.

Yes, these functions are used to access little-endian registers so byte
swapping is needed on big-endian machines.  For PowerPC Linux also defines
functions to access big-endian registers, but we probably don't need those.

> The following change fixes radeon attach issues:
> https://github.com/POWER9BSD/freebsd/commit/be6c98f5c2e2ed9a4935ac5b67c468b75f3b4457

+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar()  rmb()
+#else
+#define __io_ar()  __compiler_membar();
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw()  wmb()
+#else
+#define __io_bw()  __compiler_membar();
+#endif
+#endif

...

 static inline uint16_t
 readw(const volatile void *addr)
 {
uint16_t v;
 
-   __compiler_membar();
-   v = *(const volatile uint16_t *)addr;
-   __compiler_membar();
+   __io_br();
+   v = le16toh(__raw_readw(addr));
+   __io_ar();
return (v);
 }

For x86 rmb and wmb are defined as lfence and sfence instructions which
shouldn't be necessary here.
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-18 Thread Matthew Macy
> Note that these functions are normally used on uncacheable memory which
> is strongly ordered on x86.  There should be no reordering at all.  On
> PowerPC barrier instructions are needed to prevent reordering.

Correct. The current lkpi implementation also assumes that device
endian == host endian. The Linux generic accessors will do use endian
macros to byte swap where necessary.

The following change fixes radeon attach issues:
https://github.com/POWER9BSD/freebsd/commit/be6c98f5c2e2ed9a4935ac5b67c468b75f3b4457
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-18 Thread Tijl Coosemans
On Sat, 17 Nov 2018 14:55:05 -0800 Matthew Macy  wrote:
> When looking at powerpc io.h raw and relaxed are not aliases, but it
> appears that on x86, they are:
> https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/io.h
> 
> Sorry for the noise. But let's starting moving the x86 specific
> atomic.h and io.h under asm/x86.

Yes, I agree that the file should be moved to an arch specific location.
And the functions should probably be implemented using inline asm like
they are on Linux in case there are differences in the way compilers treat
the C code versus the asm code.

> On Sat, Nov 17, 2018 at 2:01 PM Matthew Macy  wrote:
>> You should probably revert this. The implied understanding of the
>> _relaxed version is incorrect. compiler_membar is there to prevent
>> instruction reordering which is possible on FreeBSD because the accesses
>> are done in C. The relaxed variants still do not permit instruction
>> reordering. On Linux __compiler_member (referred to as barrier there)
>> isn’t necessary in the mmio accessors because they always use volatile
>> asm which can’t be reordered. The distinction between the relaxed and
>> non relaxed variants is that the relaxed variant lacks memory barriers
>> (sync / lwsync / eieio on ppc, membar on sparc, etc).

Quoting https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html:

"Note that the compiler can move even volatile asm instructions relative
to other code, including across jump instructions."

So I think the Linux implementation of the relaxed variant lacks both CPU
and compiler barriers.

>> Most of the time we don’t run in to problems on x86 because with TSO
>> the only reordering possible is writes that happen before reads can
>> become visible in memory after they occur in the instruction stream.

Note that these functions are normally used on uncacheable memory which
is strongly ordered on x86.  There should be no reordering at all.  On
PowerPC barrier instructions are needed to prevent reordering.
___
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"


Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-17 Thread Matthew Macy
When looking at powerpc io.h raw and relaxed are not aliases, but it
appears that on x86, they are:
https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/io.h

Sorry for the noise. But let's starting moving the x86 specific
atomic.h and io.h under asm/x86.

Thanks.



On Sat, Nov 17, 2018 at 2:01 PM Matthew Macy  wrote:
>
> You should probably revert this. The implied understanding of the _relaxed 
> version is incorrect. compiler_membar is there to prevent instruction 
> reordering which is possible on FreeBSD because the accesses are done in C. 
> The relaxed variants still do not permit instruction reordering. On Linux 
> __compiler_member (referred to as barrier there) isn’t necessary in the mmio 
> accessors because they always use volatile asm which can’t be reordered. The 
> distinction between the relaxed and non relaxed variants is that the relaxed 
> variant lacks memory barriers (sync / lwsync / eieio on ppc, membar on sparc, 
> etc). Most of the time we don’t run in to problems on x86 because with TSO 
> the only reordering possible is writes that happen before reads can become 
> visible in memory after they occur in the instruction stream.
>
> Thanks.
> -M
>
> On Mon, Oct 22, 2018 at 13:55 Tijl Coosemans  wrote:
>>
>> Author: tijl
>> Date: Mon Oct 22 20:55:35 2018
>> New Revision: 339618
>> URL: https://svnweb.freebsd.org/changeset/base/339618
>>
>> Log:
>>   Define linuxkpi readq for 64-bit architectures.  It is used by drm-kmod.
>>   Currently the compiler picks up the definition in machine/cpufunc.h.
>>
>>   Add compiler memory barriers to read* and write*.  The Linux x86
>>   implementation of these functions uses inline asm with "memory" clobber.
>>   The Linux x86 implementation of read_relaxed* and write_relaxed* uses the
>>   same inline asm without "memory" clobber.
>>
>>   Implement ioread* and iowrite* in terms of read* and write* so they also
>>   have memory barriers.
>>
>>   Qualify the addr parameter in write* as volatile.
>>
>>   Like Linux, define macros with the same name as the inline functions.
>>
>>   Only define 64-bit versions on 64-bit architectures because generally
>>   32-bit architectures can't do atomic 64-bit loads and stores.
>>
>>   Regroup the functions a bit and add brief comments explaining what they do:
>>   - __raw_read*, __raw_write*: atomic, no barriers, no byte swapping
>>   - read_relaxed*, write_relaxed*: atomic, no barriers, little-endian
>>   - read*, write*: atomic, with barriers, little-endian
>>
>>   Add a comment that says our implementation of ioread* and iowrite*
>>   only handles MMIO and does not support port IO.
>>
>>   Reviewed by:  hselasky
>>   MFC after:3 days
>>
>> Modified:
>>   head/sys/compat/linuxkpi/common/include/linux/io.h
>>
>> Modified: head/sys/compat/linuxkpi/common/include/linux/io.h
>> ==
>> --- head/sys/compat/linuxkpi/common/include/linux/io.h  Mon Oct 22 20:22:33 
>> 2018(r339617)
>> +++ head/sys/compat/linuxkpi/common/include/linux/io.h  Mon Oct 22 20:55:35 
>> 2018(r339618)
>> @@ -38,153 +38,309 @@
>>  #include 
>>  #include 
>>
>> +/*
>> + * XXX This is all x86 specific.  It should be bus space access.
>> + */
>> +
>> +/* Access MMIO registers atomically without barriers and byte swapping. */
>> +
>> +static inline uint8_t
>> +__raw_readb(const volatile void *addr)
>> +{
>> +   return (*(const volatile uint8_t *)addr);
>> +}
>> +#define__raw_readb(addr)   __raw_readb(addr)
>> +
>> +static inline void
>> +__raw_writeb(uint8_t v, volatile void *addr)
>> +{
>> +   *(volatile uint8_t *)addr = v;
>> +}
>> +#define__raw_writeb(v, addr)   __raw_writeb(v, addr)
>> +
>> +static inline uint16_t
>> +__raw_readw(const volatile void *addr)
>> +{
>> +   return (*(const volatile uint16_t *)addr);
>> +}
>> +#define__raw_readw(addr)   __raw_readw(addr)
>> +
>> +static inline void
>> +__raw_writew(uint16_t v, volatile void *addr)
>> +{
>> +   *(volatile uint16_t *)addr = v;
>> +}
>> +#define__raw_writew(v, addr)   __raw_writew(v, addr)
>> +
>>  static inline uint32_t
>>  __raw_readl(const volatile void *addr)
>>  {
>> -   return *(const volatile uint32_t *)addr;
>> +   return (*(const volatile uint32_t *)addr);
>>  }
>> +#define__raw_readl(addr)   __raw_readl(addr)
>>
>>  static inline void
>> -__raw_writel(uint32_t b, volatile void *addr)
>> +__raw_writel(uint32_t v, volatile void *addr)
>>  {
>> -   *(volatile uint32_t *)addr = b;
>> +   *(volatile uint32_t *)addr = v;
>>  }
>> +#define__raw_writel(v, addr)   __raw_writel(v, addr)
>>
>> +#ifdef __LP64__
>>  static inline uint64_t
>>  __raw_readq(const volatile void *addr)
>>  {
>> -   return *(const volatile uint64_t *)addr;
>> +   return (*(const volatile uint64_t *)addr);
>>  }
>> +#define__raw_readq(addr)   __raw_readq(addr)
>>
>>  static inline void
>> 

Re: svn commit: r339618 - head/sys/compat/linuxkpi/common/include/linux

2018-11-17 Thread Matthew Macy
You should probably revert this. The implied understanding of the _relaxed
version is incorrect. compiler_membar is there to prevent instruction
reordering which is possible on FreeBSD because the accesses are done in C.
The relaxed variants still do not permit instruction reordering. On Linux
__compiler_member (referred to as barrier there) isn’t necessary in the
mmio accessors because they always use volatile asm which can’t be
reordered. The distinction between the relaxed and non relaxed variants is
that the relaxed variant lacks memory barriers (sync / lwsync / eieio on
ppc, membar on sparc, etc). Most of the time we don’t run in to problems on
x86 because with TSO the only reordering possible is writes that happen
before reads can become visible in memory after they occur in the
instruction stream.

Thanks.
-M

On Mon, Oct 22, 2018 at 13:55 Tijl Coosemans  wrote:

> Author: tijl
> Date: Mon Oct 22 20:55:35 2018
> New Revision: 339618
> URL: https://svnweb.freebsd.org/changeset/base/339618
>
> Log:
>   Define linuxkpi readq for 64-bit architectures.  It is used by drm-kmod.
>   Currently the compiler picks up the definition in machine/cpufunc.h.
>
>   Add compiler memory barriers to read* and write*.  The Linux x86
>   implementation of these functions uses inline asm with "memory" clobber.
>   The Linux x86 implementation of read_relaxed* and write_relaxed* uses the
>   same inline asm without "memory" clobber.
>
>   Implement ioread* and iowrite* in terms of read* and write* so they also
>   have memory barriers.
>
>   Qualify the addr parameter in write* as volatile.
>
>   Like Linux, define macros with the same name as the inline functions.
>
>   Only define 64-bit versions on 64-bit architectures because generally
>   32-bit architectures can't do atomic 64-bit loads and stores.
>
>   Regroup the functions a bit and add brief comments explaining what they
> do:
>   - __raw_read*, __raw_write*: atomic, no barriers, no byte swapping
>   - read_relaxed*, write_relaxed*: atomic, no barriers, little-endian
>   - read*, write*: atomic, with barriers, little-endian
>
>   Add a comment that says our implementation of ioread* and iowrite*
>   only handles MMIO and does not support port IO.
>
>   Reviewed by:  hselasky
>   MFC after:3 days
>
> Modified:
>   head/sys/compat/linuxkpi/common/include/linux/io.h
>
> Modified: head/sys/compat/linuxkpi/common/include/linux/io.h
>
> ==
> --- head/sys/compat/linuxkpi/common/include/linux/io.h  Mon Oct 22
> 20:22:33 2018(r339617)
> +++ head/sys/compat/linuxkpi/common/include/linux/io.h  Mon Oct 22
> 20:55:35 2018(r339618)
> @@ -38,153 +38,309 @@
>  #include 
>  #include 
>
> +/*
> + * XXX This is all x86 specific.  It should be bus space access.
> + */
> +
> +/* Access MMIO registers atomically without barriers and byte swapping. */
> +
> +static inline uint8_t
> +__raw_readb(const volatile void *addr)
> +{
> +   return (*(const volatile uint8_t *)addr);
> +}
> +#define__raw_readb(addr)   __raw_readb(addr)
> +
> +static inline void
> +__raw_writeb(uint8_t v, volatile void *addr)
> +{
> +   *(volatile uint8_t *)addr = v;
> +}
> +#define__raw_writeb(v, addr)   __raw_writeb(v, addr)
> +
> +static inline uint16_t
> +__raw_readw(const volatile void *addr)
> +{
> +   return (*(const volatile uint16_t *)addr);
> +}
> +#define__raw_readw(addr)   __raw_readw(addr)
> +
> +static inline void
> +__raw_writew(uint16_t v, volatile void *addr)
> +{
> +   *(volatile uint16_t *)addr = v;
> +}
> +#define__raw_writew(v, addr)   __raw_writew(v, addr)
> +
>  static inline uint32_t
>  __raw_readl(const volatile void *addr)
>  {
> -   return *(const volatile uint32_t *)addr;
> +   return (*(const volatile uint32_t *)addr);
>  }
> +#define__raw_readl(addr)   __raw_readl(addr)
>
>  static inline void
> -__raw_writel(uint32_t b, volatile void *addr)
> +__raw_writel(uint32_t v, volatile void *addr)
>  {
> -   *(volatile uint32_t *)addr = b;
> +   *(volatile uint32_t *)addr = v;
>  }
> +#define__raw_writel(v, addr)   __raw_writel(v, addr)
>
> +#ifdef __LP64__
>  static inline uint64_t
>  __raw_readq(const volatile void *addr)
>  {
> -   return *(const volatile uint64_t *)addr;
> +   return (*(const volatile uint64_t *)addr);
>  }
> +#define__raw_readq(addr)   __raw_readq(addr)
>
>  static inline void
> -__raw_writeq(uint64_t b, volatile void *addr)
> +__raw_writeq(uint64_t v, volatile void *addr)
>  {
> -   *(volatile uint64_t *)addr = b;
> +   *(volatile uint64_t *)addr = v;
>  }
> +#define__raw_writeq(v, addr)   __raw_writeq(v, addr)
> +#endif
>
> -/*
> - * XXX This is all x86 specific.  It should be bus space access.
> - */
>  #definemmiowb()barrier()
>
> -#undef writel
> +/* Access little-endian MMIO registers atomically with memory barriers. */
> +
>