The branch master has been updated
       via  40c24d74deaad8a0ad7566a68ea5ea757bc3ccef (commit)
      from  c30bc4e2093f47a37736944da548653bc08d774d (commit)


- Log -----------------------------------------------------------------
commit 40c24d74deaad8a0ad7566a68ea5ea757bc3ccef
Author: David Benjamin <david...@google.com>
Date:   Wed Dec 29 13:05:12 2021 -0500

    Don't use __ARMEL__/__ARMEB__ in aarch64 assembly
    
    GCC's __ARMEL__ and __ARMEB__ defines denote little- and big-endian arm,
    respectively. They are not defined on aarch64, which instead use
    __AARCH64EL__ and __AARCH64EB__.
    
    However, OpenSSL's assembly originally used the 32-bit defines on both
    platforms and even define __ARMEL__ and __ARMEB__ in arm_arch.h. This is
    less portable and can even interfere with other headers, which use
    __ARMEL__ to detect little-endian arm.
    
    Over time, the aarch64 assembly has switched to the correct defines,
    such as in 32bbb62ea634239e7cb91d6450ba23517082bab6. This commit
    finishes the job: poly1305-armv8.pl needed a fix and the dual-arch
    armx.pl files get one more transform to convert from 32-bit to 64-bit.
    
    (There is an even more official endianness detector, __ARM_BIG_ENDIAN in
    the Arm C Language Extensions. But I've stuck with the GCC ones here as
    that would be a larger change.)
    
    Reviewed-by: Matt Caswell <m...@openssl.org>
    Reviewed-by: Tomas Mraz <to...@openssl.org>
    Reviewed-by: Paul Dale <pa...@openssl.org>
    Reviewed-by: Bernd Edlinger <bernd.edlin...@hotmail.de>
    (Merged from https://github.com/openssl/openssl/pull/17373)

-----------------------------------------------------------------------

Summary of changes:
 crypto/aes/asm/aesv8-armx.pl          |  3 +++
 crypto/arm_arch.h                     |  5 -----
 crypto/modes/asm/ghashv8-armx.pl      |  3 +++
 crypto/poly1305/asm/poly1305-armv8.pl | 24 ++++++++++++------------
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index c323179b2b..da10c44030 100755
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -3613,6 +3613,9 @@ if ($flavour =~ /64/) {                   ######## 64-bit 
code
        s/\.[ui]?64//o and s/\.16b/\.2d/go;
        s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
 
+       # Switch preprocessor checks to aarch64 versions.
+       s/__ARME([BL])__/__AARCH64E$1__/go;
+
        print $_,"\n";
     }
 } else {                               ######## 32-bit code
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index ca48045670..848f06542c 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -21,11 +21,6 @@
 #  elif defined(__GNUC__)
 #   if   defined(__aarch64__)
 #    define __ARM_ARCH__ 8
-#    if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-#     define __ARMEB__
-#    else
-#     define __ARMEL__
-#    endif
   /*
    * Why doesn't gcc define __ARM_ARCH__? Instead it defines
    * bunch of below macros. See all_architectures[] table in
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index 57f893e77c..a1cfad0ef6 100644
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -755,6 +755,9 @@ if ($flavour =~ /64/) {                     ######## 64-bit 
code
        s/\.[uisp]?64//o and s/\.16b/\.2d/go;
        s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
 
+       # Switch preprocessor checks to aarch64 versions.
+       s/__ARME([BL])__/__AARCH64E$1__/go;
+
        print $_,"\n";
     }
 } else {                               ######## 32-bit code
diff --git a/crypto/poly1305/asm/poly1305-armv8.pl 
b/crypto/poly1305/asm/poly1305-armv8.pl
index 20816c4283..e2c7f2822c 100755
--- a/crypto/poly1305/asm/poly1305-armv8.pl
+++ b/crypto/poly1305/asm/poly1305-armv8.pl
@@ -86,7 +86,7 @@ poly1305_init:
        ldp     $r0,$r1,[$inp]          // load key
        mov     $s1,#0xfffffffc0fffffff
        movk    $s1,#0x0fff,lsl#48
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     $r0,$r0                 // flip bytes
        rev     $r1,$r1
 #endif
@@ -136,7 +136,7 @@ poly1305_blocks:
 .Loop:
        ldp     $t0,$t1,[$inp],#16      // load input
        sub     $len,$len,#16
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     $t0,$t0
        rev     $t1,$t1
 #endif
@@ -204,13 +204,13 @@ poly1305_emit:
        csel    $h0,$h0,$d0,eq
        csel    $h1,$h1,$d1,eq
 
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        ror     $t0,$t0,#32             // flip nonce words
        ror     $t1,$t1,#32
 #endif
        adds    $h0,$h0,$t0             // accumulate nonce
        adc     $h1,$h1,$t1
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     $h0,$h0                 // flip output bytes
        rev     $h1,$h1
 #endif
@@ -345,7 +345,7 @@ poly1305_blocks_neon:
        adcs    $h1,$h1,xzr
        adc     $h2,$h2,xzr
 
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     $d0,$d0
        rev     $d1,$d1
 #endif
@@ -391,7 +391,7 @@ poly1305_blocks_neon:
        ldp     $d0,$d1,[$inp],#16      // load input
        sub     $len,$len,#16
        add     $s1,$r1,$r1,lsr#2       // s1 = r1 + (r1 >> 2)
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     $d0,$d0
        rev     $d1,$d1
 #endif
@@ -476,7 +476,7 @@ poly1305_blocks_neon:
        lsl     $padbit,$padbit,#24
        add     x15,$ctx,#48
 
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     x8,x8
        rev     x12,x12
        rev     x9,x9
@@ -512,7 +512,7 @@ poly1305_blocks_neon:
        ld1     {$S2,$R3,$S3,$R4},[x15],#64
        ld1     {$S4},[x15]
 
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     x8,x8
        rev     x12,x12
        rev     x9,x9
@@ -573,7 +573,7 @@ poly1305_blocks_neon:
        umull   $ACC1,$IN23_0,${R1}[2]
         ldp    x9,x13,[$in2],#48
        umull   $ACC0,$IN23_0,${R0}[2]
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
         rev    x8,x8
         rev    x12,x12
         rev    x9,x9
@@ -638,7 +638,7 @@ poly1305_blocks_neon:
        umlal   $ACC4,$IN01_2,${R2}[0]
        umlal   $ACC1,$IN01_2,${S4}[0]
        umlal   $ACC2,$IN01_2,${R0}[0]
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
         rev    x8,x8
         rev    x12,x12
         rev    x9,x9
@@ -922,13 +922,13 @@ poly1305_emit_neon:
        csel    $h0,$h0,$d0,eq
        csel    $h1,$h1,$d1,eq
 
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        ror     $t0,$t0,#32             // flip nonce words
        ror     $t1,$t1,#32
 #endif
        adds    $h0,$h0,$t0             // accumulate nonce
        adc     $h1,$h1,$t1
-#ifdef __ARMEB__
+#ifdef __AARCH64EB__
        rev     $h0,$h0                 // flip output bytes
        rev     $h1,$h1
 #endif

Reply via email to