The branch master has been updated
       via  07a470729c4ace678fba6aeeeaf506436aa856e2 (commit)
      from  d86925e6bdaf3544615860e932d66207662d335e (commit)


- Log -----------------------------------------------------------------
commit 07a470729c4ace678fba6aeeeaf506436aa856e2
Author: Ard Biesheuvel <[email protected]>
Date:   Thu Nov 21 18:13:41 2019 +0100

    chacha/asm/chacha-armv8.pl: preserve FP registers d8 and d9 correctly
    
    Depending on the size of the input, we may take different paths through
    the accelerated arm64 ChaCha20 routines, each of which use a different
    subset of the FP registers, some of which need to be preserved and
    restored, as required by the AArch64 calling convention (AAPCS64)
    
    In some cases, (e.g., when the input size is 640 bytes), we call the 512
    byte NEON path followed directly by the scalar path, and in this case,
    we preserve and restore d8 and d9, only to clobber them again
    immediately before handing over to the scalar path which does not touch
    the FP registers at all, and hence does not restore them either.
    
    Fix this by moving the restoration of d8 and d9 to a later stage in the
    512 byte routine, either before calling the scalar path, or when exiting
    the function.
    
    Fixes #10470
    CLA: trivial
    
    Reviewed-by: Paul Dale <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/10497)

-----------------------------------------------------------------------

Summary of changes:
 crypto/chacha/asm/chacha-armv8.pl | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/crypto/chacha/asm/chacha-armv8.pl 
b/crypto/chacha/asm/chacha-armv8.pl
index aed873d57e..7868389f71 100755
--- a/crypto/chacha/asm/chacha-armv8.pl
+++ b/crypto/chacha/asm/chacha-armv8.pl
@@ -1232,8 +1232,7 @@ $code.=<<___;
        adds    $len,$len,#512
        ushr    $ONE,$ONE,#1                    // 4 -> 2
 
-       ldp     d8,d9,[sp,#128+0]               // meet ABI requirements
-       ldp     d10,d11,[sp,#128+16]
+       ldp     d10,d11,[sp,#128+16]            // meet ABI requirements
        ldp     d12,d13,[sp,#128+32]
        ldp     d14,d15,[sp,#128+48]
 
@@ -1250,6 +1249,7 @@ $code.=<<___;
        ld1     {$CTR,$ROT24},[$key]
        b.hs    .Loop_outer_neon
 
+       ldp     d8,d9,[sp,#0]                   // meet ABI requirements
        eor     @K[1],@K[1],@K[1]
        eor     @K[2],@K[2],@K[2]
        eor     @K[3],@K[3],@K[3]
@@ -1259,6 +1259,7 @@ $code.=<<___;
        b       .Loop_outer
 
 .Ldone_512_neon:
+       ldp     d8,d9,[sp,#128+0]               // meet ABI requirements
        ldp     x19,x20,[x29,#16]
        add     sp,sp,#128+64
        ldp     x21,x22,[x29,#32]

Reply via email to