The branch master has been updated
       via  1b1ff9b94d5cfa7879ef6a1a4101fe4db9cb9a9c (commit)
      from  3dcbb6c4a395d56dfa561145d89017ff958bb18e (commit)


- Log -----------------------------------------------------------------
commit 1b1ff9b94d5cfa7879ef6a1a4101fe4db9cb9a9c
Author: Andy Polyakov <[email protected]>
Date:   Fri Mar 8 14:40:56 2019 +0100

    sha/asm/keccak1600-ppc64.pl: up 10% performance improvement.
    
    Reviewed-by: Matt Caswell <[email protected]>
    Reviewed-by: Richard Levitte <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/8444)

-----------------------------------------------------------------------

Summary of changes:
 crypto/sha/asm/keccak1600-ppc64.pl | 40 +++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/crypto/sha/asm/keccak1600-ppc64.pl 
b/crypto/sha/asm/keccak1600-ppc64.pl
index 5c23841..876632b 100755
--- a/crypto/sha/asm/keccak1600-ppc64.pl
+++ b/crypto/sha/asm/keccak1600-ppc64.pl
@@ -27,10 +27,10 @@
 #
 #              r=1088(*)
 #
-# PPC970/G5    14.6/+120%
-# POWER7       10.3/+100%
-# POWER8       11.5/+85%
-# POWER9       9.4/+45%
+# PPC970/G5    14.0/+130%
+# POWER7       9.7/+110%
+# POWER8       10.6/+100%
+# POWER9       8.2/+66%
 #
 # (*)  Corresponds to SHA3-256. Percentage after slash is improvement
 #      over gcc-4.x-generated KECCAK_1X_ALT code. Newer compilers do
@@ -384,19 +384,19 @@ KeccakF1600:
 .type  dword_le_load,\@function
 .align 5
 dword_le_load:
-       lbzu    r0,1(r3)
-       lbzu    r4,1(r3)
-       lbzu    r5,1(r3)
+       lbz     r0,1(r3)
+       lbz     r4,2(r3)
+       lbz     r5,3(r3)
        insrdi  r0,r4,8,48
-       lbzu    r4,1(r3)
+       lbz     r4,4(r3)
        insrdi  r0,r5,8,40
-       lbzu    r5,1(r3)
+       lbz     r5,5(r3)
        insrdi  r0,r4,8,32
-       lbzu    r4,1(r3)
+       lbz     r4,6(r3)
        insrdi  r0,r5,8,24
-       lbzu    r5,1(r3)
+       lbz     r5,7(r3)
        insrdi  r0,r4,8,16
-       lbzu    r4,1(r3)
+       lbzu    r4,8(r3)
        insrdi  r0,r5,8,8
        insrdi  r0,r4,8,0
        blr
@@ -657,21 +657,21 @@ SHA3_squeeze:
        ${UCMP}i $len,8
        blt     .Lsqueeze_tail
 
-       stbu    r0,1($out)
+       stb     r0,1($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stb     r0,2($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stb     r0,3($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stb     r0,4($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stb     r0,5($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stb     r0,6($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stb     r0,7($out)
        srdi    r0,r0,8
-       stbu    r0,1($out)
+       stbu    r0,8($out)
 
        subic.  $len,$len,8
        beq     .Lsqueeze_done

Reply via email to