On Thu, 2018-05-24 at 11:33:18 UTC, Christophe Leroy wrote:
> The generic csum_ipv6_magic() generates a pretty bad result
> 
> 00000000 <csum_ipv6_magic>: (PPC32)
>    0: 81 23 00 00     lwz     r9,0(r3)
>    4: 81 03 00 04     lwz     r8,4(r3)
>    8: 7c e7 4a 14     add     r7,r7,r9
>    c: 7d 29 38 10     subfc   r9,r9,r7
>   10: 7d 4a 51 10     subfe   r10,r10,r10
>   14: 7d 27 42 14     add     r9,r7,r8
>   18: 7d 2a 48 50     subf    r9,r10,r9
>   1c: 80 e3 00 08     lwz     r7,8(r3)
>   20: 7d 08 48 10     subfc   r8,r8,r9
>   24: 7d 4a 51 10     subfe   r10,r10,r10
>   28: 7d 29 3a 14     add     r9,r9,r7
>   2c: 81 03 00 0c     lwz     r8,12(r3)
>   30: 7d 2a 48 50     subf    r9,r10,r9
>   34: 7c e7 48 10     subfc   r7,r7,r9
>   38: 7d 4a 51 10     subfe   r10,r10,r10
>   3c: 7d 29 42 14     add     r9,r9,r8
>   40: 7d 2a 48 50     subf    r9,r10,r9
>   44: 80 e4 00 00     lwz     r7,0(r4)
>   48: 7d 08 48 10     subfc   r8,r8,r9
>   4c: 7d 4a 51 10     subfe   r10,r10,r10
>   50: 7d 29 3a 14     add     r9,r9,r7
>   54: 7d 2a 48 50     subf    r9,r10,r9
>   58: 81 04 00 04     lwz     r8,4(r4)
>   5c: 7c e7 48 10     subfc   r7,r7,r9
>   60: 7d 4a 51 10     subfe   r10,r10,r10
>   64: 7d 29 42 14     add     r9,r9,r8
>   68: 7d 2a 48 50     subf    r9,r10,r9
>   6c: 80 e4 00 08     lwz     r7,8(r4)
>   70: 7d 08 48 10     subfc   r8,r8,r9
>   74: 7d 4a 51 10     subfe   r10,r10,r10
>   78: 7d 29 3a 14     add     r9,r9,r7
>   7c: 7d 2a 48 50     subf    r9,r10,r9
>   80: 81 04 00 0c     lwz     r8,12(r4)
>   84: 7c e7 48 10     subfc   r7,r7,r9
>   88: 7d 4a 51 10     subfe   r10,r10,r10
>   8c: 7d 29 42 14     add     r9,r9,r8
>   90: 7d 2a 48 50     subf    r9,r10,r9
>   94: 7d 08 48 10     subfc   r8,r8,r9
>   98: 7d 4a 51 10     subfe   r10,r10,r10
>   9c: 7d 29 2a 14     add     r9,r9,r5
>   a0: 7d 2a 48 50     subf    r9,r10,r9
>   a4: 7c a5 48 10     subfc   r5,r5,r9
>   a8: 7c 63 19 10     subfe   r3,r3,r3
>   ac: 7d 29 32 14     add     r9,r9,r6
>   b0: 7d 23 48 50     subf    r9,r3,r9
>   b4: 7c c6 48 10     subfc   r6,r6,r9
>   b8: 7c 63 19 10     subfe   r3,r3,r3
>   bc: 7c 63 48 50     subf    r3,r3,r9
>   c0: 54 6a 80 3e     rotlwi  r10,r3,16
>   c4: 7c 63 52 14     add     r3,r3,r10
>   c8: 7c 63 18 f8     not     r3,r3
>   cc: 54 63 84 3e     rlwinm  r3,r3,16,16,31
>   d0: 4e 80 00 20     blr
> 
> 0000000000000000 <.csum_ipv6_magic>: (PPC64)
>    0: 81 23 00 00     lwz     r9,0(r3)
>    4: 80 03 00 04     lwz     r0,4(r3)
>    8: 81 63 00 08     lwz     r11,8(r3)
>    c: 7c e7 4a 14     add     r7,r7,r9
>   10: 7f 89 38 40     cmplw   cr7,r9,r7
>   14: 7d 47 02 14     add     r10,r7,r0
>   18: 7d 30 10 26     mfocrf  r9,1
>   1c: 55 29 f7 fe     rlwinm  r9,r9,30,31,31
>   20: 7d 4a 4a 14     add     r10,r10,r9
>   24: 7f 80 50 40     cmplw   cr7,r0,r10
>   28: 7d 2a 5a 14     add     r9,r10,r11
>   2c: 80 03 00 0c     lwz     r0,12(r3)
>   30: 81 44 00 00     lwz     r10,0(r4)
>   34: 7d 10 10 26     mfocrf  r8,1
>   38: 55 08 f7 fe     rlwinm  r8,r8,30,31,31
>   3c: 7d 29 42 14     add     r9,r9,r8
>   40: 81 04 00 04     lwz     r8,4(r4)
>   44: 7f 8b 48 40     cmplw   cr7,r11,r9
>   48: 7d 29 02 14     add     r9,r9,r0
>   4c: 7d 70 10 26     mfocrf  r11,1
>   50: 55 6b f7 fe     rlwinm  r11,r11,30,31,31
>   54: 7d 29 5a 14     add     r9,r9,r11
>   58: 7f 80 48 40     cmplw   cr7,r0,r9
>   5c: 7d 29 52 14     add     r9,r9,r10
>   60: 7c 10 10 26     mfocrf  r0,1
>   64: 54 00 f7 fe     rlwinm  r0,r0,30,31,31
>   68: 7d 69 02 14     add     r11,r9,r0
>   6c: 7f 8a 58 40     cmplw   cr7,r10,r11
>   70: 7c 0b 42 14     add     r0,r11,r8
>   74: 81 44 00 08     lwz     r10,8(r4)
>   78: 7c f0 10 26     mfocrf  r7,1
>   7c: 54 e7 f7 fe     rlwinm  r7,r7,30,31,31
>   80: 7c 00 3a 14     add     r0,r0,r7
>   84: 7f 88 00 40     cmplw   cr7,r8,r0
>   88: 7d 20 52 14     add     r9,r0,r10
>   8c: 80 04 00 0c     lwz     r0,12(r4)
>   90: 7d 70 10 26     mfocrf  r11,1
>   94: 55 6b f7 fe     rlwinm  r11,r11,30,31,31
>   98: 7d 29 5a 14     add     r9,r9,r11
>   9c: 7f 8a 48 40     cmplw   cr7,r10,r9
>   a0: 7d 29 02 14     add     r9,r9,r0
>   a4: 7d 70 10 26     mfocrf  r11,1
>   a8: 55 6b f7 fe     rlwinm  r11,r11,30,31,31
>   ac: 7d 29 5a 14     add     r9,r9,r11
>   b0: 7f 80 48 40     cmplw   cr7,r0,r9
>   b4: 7d 29 2a 14     add     r9,r9,r5
>   b8: 7c 10 10 26     mfocrf  r0,1
>   bc: 54 00 f7 fe     rlwinm  r0,r0,30,31,31
>   c0: 7d 29 02 14     add     r9,r9,r0
>   c4: 7f 85 48 40     cmplw   cr7,r5,r9
>   c8: 7c 09 32 14     add     r0,r9,r6
>   cc: 7d 50 10 26     mfocrf  r10,1
>   d0: 55 4a f7 fe     rlwinm  r10,r10,30,31,31
>   d4: 7c 00 52 14     add     r0,r0,r10
>   d8: 7f 80 30 40     cmplw   cr7,r0,r6
>   dc: 7d 30 10 26     mfocrf  r9,1
>   e0: 55 29 ef fe     rlwinm  r9,r9,29,31,31
>   e4: 7c 09 02 14     add     r0,r9,r0
>   e8: 54 03 80 3e     rotlwi  r3,r0,16
>   ec: 7c 03 02 14     add     r0,r3,r0
>   f0: 7c 03 00 f8     not     r3,r0
>   f4: 78 63 84 22     rldicl  r3,r3,48,48
>   f8: 4e 80 00 20     blr
> 
> This patch implements it in assembly for both PPC32 and PPC64
> 
> Link: https://github.com/linuxppc/linux/issues/9
> Signed-off-by: Christophe Leroy <christophe.le...@c-s.fr>
> Reviewed-by: Segher Boessenkool <seg...@kernel.crashing.org>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/e9c4943a107b56696e4872cdffdba6

cheers

Reply via email to