Re: [PATCH] powerpc: provide __bswapdi2

2013-05-14 Thread David Woodhouse
On Tue, 2013-05-14 at 11:25 +1000, Michael Neuling wrote:
 
  So, if we are just stealing the output of gcc, why not just use the C
  version (at least for 32 bit)?
 
 Woodhouse: can we just do this?

Sure, if you don't mind GCC optimising the contents of your C function
by turning it into a call to libgcc's __bswapdi2() :)

OK, you might be able to do some archaeology and determine that the only
compiler that emits calls to __bswapdi2() is GCC 4.4, and furthermore
that the same compiler *doesn't* have the wit to notice that the
contents of the function are a 64-bit byteswap, so it's never going to
happen. But I don't like that approach. I'd feel I have to sacrifice a
goat *anyway*, and I don't have a spare goat.

Although now I come to explicitly explain why I did it that way... it
occurs to me that the libgcc version is just written in C, and the
compiler evidently trusts itself not to optimise that into a recursive
call. Is there a compiler switch which guarantees that, which we could
use without other unwanted side-effects?
 
-- 
David WoodhouseOpen Source Technology Centre
david.woodho...@intel.com  Intel Corporation





smime.p7s
Description: S/MIME cryptographic signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Anton Blanchard
On Fri, 10 May 2013 22:18:27 +0100
David Woodhouse dw...@infradead.org wrote:

 From: David Woodhouse david.woodho...@intel.com
 
 Some versions of GCC apparently expect this to be provided by libgcc.

Thanks Dave. We were discussing this with Alan Modra and he doesn't
think the 64bit target should ever emit a call to __bswapdi2. Did you
only see it on 32bit, or 64bit as well?

Alan: I notice Dave is adding calls to __builtin_bswap, perhaps some
versions of the 64bit compiler did emit __bswapdi2 calls for that.

Anton

 
 Signed-off-by: David Woodhouse david.woodho...@intel.com
 ---
 Untested.
 
 diff --git a/arch/powerpc/kernel/misc_32.S
 b/arch/powerpc/kernel/misc_32.S index 19e096b..f077dc2 100644
 --- a/arch/powerpc/kernel/misc_32.S
 +++ b/arch/powerpc/kernel/misc_32.S
 @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2)
   li  r3,2
   blr
  
 +_GLOBAL(__bswapdi2)
 + rlwinm  10,4,8,0x
 + rlwinm  11,3,8,0x
 + rlwimi  10,4,24,0,7
 + rlwimi  11,3,24,0,7
 + rlwimi  10,4,24,16,23
 + rlwimi  11,3,24,16,23
 + mr  4,11
 + mr  3,10
 + blr
 +
  _GLOBAL(abs)
   srawi   r4,r3,31
   xor r3,r3,r4
 diff --git a/arch/powerpc/kernel/misc_64.S
 b/arch/powerpc/kernel/misc_64.S index 5cfa800..3b2e6e8 100644
 --- a/arch/powerpc/kernel/misc_64.S
 +++ b/arch/powerpc/kernel/misc_64.S
 @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache)
   isync
   blr
  
 +_GLOBAL(__bswapdi2)
 + srdi8,3,32
 + rlwinm  7,3,8,0x
 + rlwimi  7,3,24,0,7
 + rlwinm  9,8,8,0x
 + rlwimi  7,3,24,16,23
 + rlwimi  9,8,24,0,7
 + rlwimi  9,8,24,16,23
 + sldi7,7,32
 + or  7,7,9
 + mr  3,7
 + blr
  
  #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
  /*
 diff --git a/arch/powerpc/kernel/ppc_ksyms.c
 b/arch/powerpc/kernel/ppc_ksyms.c index 78b8766..c296665 100644
 --- a/arch/powerpc/kernel/ppc_ksyms.c
 +++ b/arch/powerpc/kernel/ppc_ksyms.c
 @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3);
  int __ucmpdi2(unsigned long long, unsigned long long);
  EXPORT_SYMBOL(__ucmpdi2);
  #endif
 -
 +long long __bswapdi2(long long);
 +EXPORT_SYMBOL(__bswapdi2);
  EXPORT_SYMBOL(memcpy);
  EXPORT_SYMBOL(memset);
  EXPORT_SYMBOL(memmove);
 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Michael Neuling
David Woodhouse dw...@infradead.org wrote:

 From: David Woodhouse david.woodho...@intel.com
 
 Some versions of GCC apparently expect this to be provided by libgcc.
 
 Signed-off-by: David Woodhouse david.woodho...@intel.com
 ---
 Untested.
 
 diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
 index 19e096b..f077dc2 100644
 --- a/arch/powerpc/kernel/misc_32.S
 +++ b/arch/powerpc/kernel/misc_32.S
 @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2)
   li  r3,2
   blr
  
 +_GLOBAL(__bswapdi2)
 + rlwinm  10,4,8,0x
 + rlwinm  11,3,8,0x
 + rlwimi  10,4,24,0,7
 + rlwimi  11,3,24,0,7
 + rlwimi  10,4,24,16,23
 + rlwimi  11,3,24,16,23
 + mr  4,11
 + mr  3,10
 + blr
 +

This doesn't work for me but the below does:

_GLOBAL(__bswapdi2)
rotlwi  r9,r4,8
rotlwi  r10,r3,8
rlwimi  r9,r4,24,0,7
rlwimi  r10,r3,24,0,7
rlwimi  r9,r4,24,16,23
rlwimi  r10,r3,24,16,23
mr  r4,r10
mr  r3,r9
blr

stolen from GCC -02 output of:
  unsigned long long __bswapdi2(unsigned long long x)
  {
 return ((x  0x00ffULL)  56) |
((x  0xff00ULL)  40) |
((x  0x00ffULL)  24) |
((x  0xff00ULL)   8) |
((x  0x00ffULL)   8) |
((x  0xff00ULL)  24) |
((x  0x00ffULL)  40) |
((x  0xff00ULL)  56);
  }

  _GLOBAL(abs)
   srawi   r4,r3,31
   xor r3,r3,r4
 diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
 index 5cfa800..3b2e6e8 100644
 --- a/arch/powerpc/kernel/misc_64.S
 +++ b/arch/powerpc/kernel/misc_64.S
 @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache)
   isync
   blr
  
 +_GLOBAL(__bswapdi2)
 + srdi8,3,32
 + rlwinm  7,3,8,0x
 + rlwimi  7,3,24,0,7
 + rlwinm  9,8,8,0x
 + rlwimi  7,3,24,16,23
 + rlwimi  9,8,24,0,7
 + rlwimi  9,8,24,16,23
 + sldi7,7,32
 + or  7,7,9
 + mr  3,7
 + blr

This works but we should add r to the register names.

I'll repost

Mikey

  
  #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
  /*
 diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
 index 78b8766..c296665 100644
 --- a/arch/powerpc/kernel/ppc_ksyms.c
 +++ b/arch/powerpc/kernel/ppc_ksyms.c
 @@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3);
  int __ucmpdi2(unsigned long long, unsigned long long);
  EXPORT_SYMBOL(__ucmpdi2);
  #endif
 -
 +long long __bswapdi2(long long);
 +EXPORT_SYMBOL(__bswapdi2);
  EXPORT_SYMBOL(memcpy);
  EXPORT_SYMBOL(memset);
  EXPORT_SYMBOL(memmove);
 
 -- 
 dwmw2
 
 ___
 Linuxppc-dev mailing list
 Linuxppc-dev@lists.ozlabs.org
 https://lists.ozlabs.org/listinfo/linuxppc-dev
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Alan Modra
On Mon, May 13, 2013 at 04:48:19PM +1000, Anton Blanchard wrote:
 On Fri, 10 May 2013 22:18:27 +0100
 David Woodhouse dw...@infradead.org wrote:
 
  From: David Woodhouse david.woodho...@intel.com
  
  Some versions of GCC apparently expect this to be provided by libgcc.
 
 Thanks Dave. We were discussing this with Alan Modra and he doesn't
 think the 64bit target should ever emit a call to __bswapdi2. Did you
 only see it on 32bit, or 64bit as well?
 
 Alan: I notice Dave is adding calls to __builtin_bswap, perhaps some
 versions of the 64bit compiler did emit __bswapdi2 calls for that.

I did a little digging, and it looks like gcc-4.4 will emit __bswapdi2
calls.  Support in rs6000.md appeared 2009-06-25.

-- 
Alan Modra
Australia Development Lab, IBM
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Gabriel Paubert
On Mon, May 13, 2013 at 05:09:59PM +1000, Michael Neuling wrote:
 David Woodhouse dw...@infradead.org wrote:
 
  From: David Woodhouse david.woodho...@intel.com
  
  Some versions of GCC apparently expect this to be provided by libgcc.
  
  Signed-off-by: David Woodhouse david.woodho...@intel.com
  ---
  Untested.
  
  diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
  index 19e096b..f077dc2 100644
  --- a/arch/powerpc/kernel/misc_32.S
  +++ b/arch/powerpc/kernel/misc_32.S
  @@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2)
  li  r3,2
  blr
   
  +_GLOBAL(__bswapdi2)
  +   rlwinm  10,4,8,0x
  +   rlwinm  11,3,8,0x
  +   rlwimi  10,4,24,0,7
  +   rlwimi  11,3,24,0,7
  +   rlwimi  10,4,24,16,23
  +   rlwimi  11,3,24,16,23
  +   mr  4,11
  +   mr  3,10
  +   blr
  +
 
 This doesn't work for me but the below does:
 
 _GLOBAL(__bswapdi2)
   rotlwi  r9,r4,8
   rotlwi  r10,r3,8
   rlwimi  r9,r4,24,0,7
   rlwimi  r10,r3,24,0,7
   rlwimi  r9,r4,24,16,23
   rlwimi  r10,r3,24,16,23
   mr  r4,r10
   mr  r3,r9
   blr
 

Actually, I'd swap the two mr instructions to never
have an instruction that uses the result from the
previous one. 


 stolen from GCC -02 output of:
   unsigned long long __bswapdi2(unsigned long long x)
   {
return ((x  0x00ffULL)  56) |
   ((x  0xff00ULL)  40) |
   ((x  0x00ffULL)  24) |
   ((x  0xff00ULL)   8) |
   ((x  0x00ffULL)   8) |
   ((x  0xff00ULL)  24) |
   ((x  0x00ffULL)  40) |
   ((x  0xff00ULL)  56);
   }
 
   _GLOBAL(abs)
  srawi   r4,r3,31
  xor r3,r3,r4
  diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
  index 5cfa800..3b2e6e8 100644
  --- a/arch/powerpc/kernel/misc_64.S
  +++ b/arch/powerpc/kernel/misc_64.S
  @@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache)
  isync
  blr
   
  +_GLOBAL(__bswapdi2)
  +   srdi8,3,32
  +   rlwinm  7,3,8,0x
  +   rlwimi  7,3,24,0,7
  +   rlwinm  9,8,8,0x
  +   rlwimi  7,3,24,16,23
  +   rlwimi  9,8,24,0,7
  +   rlwimi  9,8,24,16,23
  +   sldi7,7,32
  +   or  7,7,9
  +   mr  3,7
  +   blr
 
 This works but we should add r to the register names.
 

And merge the last two instructions as a single or r3,r7,r9.


Gabriel
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread David Woodhouse
On Mon, 2013-05-13 at 16:50 +0930, Alan Modra wrote:
 I did a little digging, and it looks like gcc-4.4 will emit __bswapdi2
 calls.  Support in rs6000.md appeared 2009-06-25.

That's where it was seen. I don't have anything that ancient any more so
it didn't show up in my testing, but Al saw it on a Debian system.

-- 
David WoodhouseOpen Source Technology Centre
david.woodho...@intel.com  Intel Corporation


smime.p7s
Description: S/MIME cryptographic signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread David Woodhouse
On Mon, 2013-05-13 at 17:09 +1000, Michael Neuling wrote:
 
 This doesn't work for me but the below does:
 ...
 
 stolen from GCC -02 output of:
   unsigned long long __bswapdi2(unsigned long long x)

Hm, so was mine:

[dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32
.file   bswapdi2.c
.section.text
.align 2
.p2align 4,,15
.globl __bswapdi2
.type   __bswapdi2, @function
__bswapdi2:
rlwinm 10,4,8,0x
rlwinm 11,3,8,0x
rlwimi 10,4,24,0,7
rlwimi 11,3,24,0,7
rlwimi 10,4,24,16,23
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10
blr
.size   __bswapdi2,.-__bswapdi2
.ident  GCC: (GNU) 4.7.2 20121105 (Red Hat 4.7.2-2.aa.20121114svn)

On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote:
 Actually, I'd swap the two mr instructions to never
 have an instruction that uses the result from the
 previous one. 

Bad GCC. No biscuit.

Should we file a PR? 

-- 
David WoodhouseOpen Source Technology Centre
david.woodho...@intel.com  Intel Corporation


smime.p7s
Description: S/MIME cryptographic signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread David Woodhouse
On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote:
 
 On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote:
  Actually, I'd swap the two mr instructions to never
  have an instruction that uses the result from the
  previous one. 
 
 Bad GCC. No biscuit.
 
 Should we file a PR? 

Maybe not. If you tell it to tune for an in-order machine like Cell, it
swaps them round. Although now I'm confused about which of POWER[567]
were in-order:

[dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32  | grep -B1 mr
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10
[dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=cell | 
grep -B1 mr
rlwimi 11,3,24,16,23
mr 3,10
mr 4,11
[dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power5 
| grep -B1 mr
rlwimi 11,3,24,16,23
mr 3,10
mr 4,11
[dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power6 
| grep -B1 mr
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10
[dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=power7 
| grep -B1 mr
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10




-- 
David WoodhouseOpen Source Technology Centre
david.woodho...@intel.com  Intel Corporation


smime.p7s
Description: S/MIME cryptographic signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Joakim Tjernlund
Linuxppc-dev 
linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org 
wrote 2013/05/13 12:38:13:
 
 On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote:
  
  On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote:
   Actually, I'd swap the two mr instructions to never
   have an instruction that uses the result from the
   previous one. 
  
  Bad GCC. No biscuit.
  
  Should we file a PR? 
 
 Maybe not. If you tell it to tune for an in-order machine like Cell, it
 swaps them round. Although now I'm confused about which of POWER[567]
 were in-order:
 
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32  | grep 
-B1 mr
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
-mtune=cell | grep -B1 mr
rlwimi 11,3,24,16,23
mr 3,10
mr 4,11
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
-mtune=power5 | grep -B1 mr
rlwimi 11,3,24,16,23
mr 3,10
mr 4,11
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
-mtune=power6 | grep -B1 mr
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
-mtune=power7 | grep -B1 mr
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10

A bit rusty on the ppc asm but can you not remove the mr completely:
 rlwimi 10,4,24,16,23
 rlwimi 11,3,24,16,23
 mr 4,11
 mr 3,10
to
 rlwimi 4,4,24,16,23
 rlwimi 3,3,24,16,23

 Jocke
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Joakim Tjernlund
Linuxppc-dev 
linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org 
wrote on 2013/05/13 12:51:59:
 
 Linuxppc-dev 
 linuxppc-dev-bounces+joakim.tjernlund=transmode...@lists.ozlabs.org 
 wrote 2013/05/13 12:38:13:
  
  On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote:
   
   On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote:
Actually, I'd swap the two mr instructions to never
have an instruction that uses the result from the
previous one. 
   
   Bad GCC. No biscuit.
   
   Should we file a PR? 
  
  Maybe not. If you tell it to tune for an in-order machine like Cell, 
it
  swaps them round. Although now I'm confused about which of POWER[567]
  were in-order:
  
  [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32  | 
grep 
 -B1 mr
 rlwimi 11,3,24,16,23
 mr 4,11
 mr 3,10
  [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=cell | grep -B1 mr
 rlwimi 11,3,24,16,23
 mr 3,10
 mr 4,11
  [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=power5 | grep -B1 mr
 rlwimi 11,3,24,16,23
 mr 3,10
 mr 4,11
  [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=power6 | grep -B1 mr
 rlwimi 11,3,24,16,23
 mr 4,11
 mr 3,10
  [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=power7 | grep -B1 mr
 rlwimi 11,3,24,16,23
 mr 4,11
 mr 3,10
 
 A bit rusty on the ppc asm but can you not remove the mr completely:
  rlwimi 10,4,24,16,23
  rlwimi 11,3,24,16,23
  mr 4,11
  mr 3,10
 to
  rlwimi 4,4,24,16,23
  rlwimi 3,3,24,16,23

Oops, that got twisted. Forget my comment.
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Gabriel Paubert
On Mon, May 13, 2013 at 11:38:13AM +0100, David Woodhouse wrote:
 On Mon, 2013-05-13 at 11:33 +0100, David Woodhouse wrote:
  
  On Mon, 2013-05-13 at 09:33 +0200, Gabriel Paubert wrote:
   Actually, I'd swap the two mr instructions to never
   have an instruction that uses the result from the
   previous one. 
  
  Bad GCC. No biscuit.
  
  Should we file a PR? 
 
 Maybe not. If you tell it to tune for an in-order machine like Cell, it
 swaps them round. Although now I'm confused about which of POWER[567]
 were in-order:

It was Power6 IIRC. On this kind of fine point, don't rely too much
on what GCC produces.

 
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32  | grep -B1 
 mr
   rlwimi 11,3,24,16,23
   mr 4,11
   mr 3,10
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 -mtune=cell 
 | grep -B1 mr
   rlwimi 11,3,24,16,23
   mr 3,10
   mr 4,11
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=power5 | grep -B1 mr
   rlwimi 11,3,24,16,23
   mr 3,10
   mr 4,11
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=power6 | grep -B1 mr
   rlwimi 11,3,24,16,23
   mr 4,11
   mr 3,10
 [dwmw2@i7 ~]$ powerpc64-linux-gnu-gcc -O2 -S -o- bswapdi2.c -m32 
 -mtune=power7 | grep -B1 mr
   rlwimi 11,3,24,16,23
   mr 4,11
   mr 3,10

I don't know of any processor in which putting the mr 3,10 first can cause 
stalls, so
even a generic tuning should put it first.

Gabriel
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Segher Boessenkool
I did a little digging, and it looks like gcc-4.4 will emit  
__bswapdi2

calls.  Support in rs6000.md appeared 2009-06-25.


That's where it was seen. I don't have anything that ancient any  
more so

it didn't show up in my testing, but Al saw it on a Debian system.


It should never happen on 32-bit -- it is broken into two bswapsi's --
although, old compiler, who knows.  Lack of testing makes some people
nervous though ;-)


Segher

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Segher Boessenkool

rlwinm 10,4,8,0x
rlwinm 11,3,8,0x
rlwimi 10,4,24,0,7
rlwimi 11,3,24,0,7
rlwimi 10,4,24,16,23
rlwimi 11,3,24,16,23
mr 4,11
mr 3,10



Actually, I'd swap the two mr instructions to never
have an instruction that uses the result from the
previous one.


Bad GCC. No biscuit.

Should we file a PR?


This is scheduled just fine.  Every pair of instructions here can
execute together (on most CPUs, if not all); all instructions after
it are dependent on previous instructions.  There also is no issue
(group) restriction that makes this scheduling suboptimal afaics.


Segher

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Stephen Rothwell
Hi Mikey,

On Mon, 13 May 2013 17:09:59 +1000 Michael Neuling mi...@neuling.org wrote:

 This doesn't work for me but the below does:
 
 _GLOBAL(__bswapdi2)
   rotlwi  r9,r4,8
   rotlwi  r10,r3,8
   rlwimi  r9,r4,24,0,7
   rlwimi  r10,r3,24,0,7
   rlwimi  r9,r4,24,16,23
   rlwimi  r10,r3,24,16,23
   mr  r4,r10
   mr  r3,r9
   blr
 
 stolen from GCC -02 output of:
   unsigned long long __bswapdi2(unsigned long long x)
   {
return ((x  0x00ffULL)  56) |
   ((x  0xff00ULL)  40) |
   ((x  0x00ffULL)  24) |
   ((x  0xff00ULL)   8) |
   ((x  0x00ffULL)   8) |
   ((x  0xff00ULL)  24) |
   ((x  0x00ffULL)  40) |
   ((x  0xff00ULL)  56);
   }

So, if we are just stealing the output of gcc, why not just use the C
version (at least for 32 bit)?

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au


pgp7QZrN15nN7.pgp
Description: PGP signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] powerpc: provide __bswapdi2

2013-05-13 Thread Michael Neuling
On 14 May 2013 11:09, Stephen Rothwell s...@canb.auug.org.au wrote:

 Hi Mikey,

 On Mon, 13 May 2013 17:09:59 +1000 Michael Neuling mi...@neuling.org
wrote:
 
  This doesn't work for me but the below does:
 
  _GLOBAL(__bswapdi2)
rotlwi  r9,r4,8
rotlwi  r10,r3,8
rlwimi  r9,r4,24,0,7
rlwimi  r10,r3,24,0,7
rlwimi  r9,r4,24,16,23
rlwimi  r10,r3,24,16,23
mr  r4,r10
mr  r3,r9
blr
 
  stolen from GCC -02 output of:
unsigned long long __bswapdi2(unsigned long long x)
{
 return ((x  0x00ffULL)  56) |
((x  0xff00ULL)  40) |
((x  0x00ffULL)  24) |
((x  0xff00ULL)   8) |
((x  0x00ffULL)   8) |
((x  0xff00ULL)  24) |
((x  0x00ffULL)  40) |
((x  0xff00ULL)  56);
}

 So, if we are just stealing the output of gcc, why not just use the C
 version (at least for 32 bit)?

Woodhouse: can we just do this?

Mikey
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc: provide __bswapdi2

2013-05-10 Thread David Woodhouse
From: David Woodhouse david.woodho...@intel.com

Some versions of GCC apparently expect this to be provided by libgcc.

Signed-off-by: David Woodhouse david.woodho...@intel.com
---
Untested.

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 19e096b..f077dc2 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -657,6 +657,17 @@ _GLOBAL(__ucmpdi2)
li  r3,2
blr
 
+_GLOBAL(__bswapdi2)
+   rlwinm  10,4,8,0x
+   rlwinm  11,3,8,0x
+   rlwimi  10,4,24,0,7
+   rlwimi  11,3,24,0,7
+   rlwimi  10,4,24,16,23
+   rlwimi  11,3,24,16,23
+   mr  4,11
+   mr  3,10
+   blr
+
 _GLOBAL(abs)
srawi   r4,r3,31
xor r3,r3,r4
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 5cfa800..3b2e6e8 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -234,6 +234,18 @@ _GLOBAL(__flush_dcache_icache)
isync
blr
 
+_GLOBAL(__bswapdi2)
+   srdi8,3,32
+   rlwinm  7,3,8,0x
+   rlwimi  7,3,24,0,7
+   rlwinm  9,8,8,0x
+   rlwimi  7,3,24,16,23
+   rlwimi  9,8,24,0,7
+   rlwimi  9,8,24,16,23
+   sldi7,7,32
+   or  7,7,9
+   mr  3,7
+   blr
 
 #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
 /*
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 78b8766..c296665 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -143,7 +143,8 @@ EXPORT_SYMBOL(__lshrdi3);
 int __ucmpdi2(unsigned long long, unsigned long long);
 EXPORT_SYMBOL(__ucmpdi2);
 #endif
-
+long long __bswapdi2(long long);
+EXPORT_SYMBOL(__bswapdi2);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
 EXPORT_SYMBOL(memmove);

-- 
dwmw2



smime.p7s
Description: S/MIME cryptographic signature
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev