Include support for new POWER8 vcipher instructions. It uses
OPENSSL_ppccap_P to choose which implementation to use, regular or
POWER8.

Vcipher instructions use the straightforward decryption described in
FIPS-197 instead of the equivalent decryption that requires  MixColumns
to be applied to subkeys. In order to use the same AES_set_decrypt_key()
function independently from which AES_decrypt() implementation is used,
ppc_AES_decrypt_compact() was changed and MixColumns and AddRoundKey
steps order was reversed.

Also, ppc_AES_[en|de]crypt functions were renamed to
ppc_AES_[en|de]crypt_nocompact, and AES_[en|de]crypt functions in
aes-ppc.pl were renamed to ppc_AES_[en|de]crypt to create a clearly
distinction between them and the new added functions
ppc_vcipher_AES_[en|de]crypt.
---
 Configure                 |  6 +--
 crypto/aes/aes_core.c     |  3 ++
 crypto/aes/asm/aes-ppc.pl | 96 ++++++++++++++++++++++++++++++++++++-----------
 crypto/ppccap.c           | 28 ++++++++++++++
 4 files changed, 109 insertions(+), 24 deletions(-)

diff --git a/Configure b/Configure
index cf43c8d..0794b83 100755
--- a/Configure
+++ b/Configure
@@ -347,7 +347,7 @@ my %table=(
 # *-generic* is endian-neutral target, but ./config is free to
 # throw in -D[BL]_ENDIAN, whichever appropriate...
 "linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer 
-Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL 
BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ppc",   "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG 
RC4_CHAR RC4_CHUNK DES_RISC1 
DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-ppc",   "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall 
-DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK 
DES_RISC1 
DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 # It's believed that majority of ARM toolchains predefine appropriate -march.
 # If you compiler does not, do complement config command line with one!
 "linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR 
RC4_CHUNK DES_INT DES_UNROLL 
BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -364,8 +364,8 @@ my %table=(
 "linux-aout",  "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -march=i486 
-Wall::(unknown):::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out",
 ####
 "linux-generic64","gcc:-DTERMIO -O3 
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT 
DES_UNROLL 
BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 
DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
-"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_RISC1 
DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
+"linux-ppc64", "gcc:-m64 -DB_ENDIAN -DTERMIO -O3 -Wall 
-DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR 
RC4_CHUNK DES_RISC1 
DES_UNROLL:${ppc64_asm}:linux64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
+"linux-ppc64le","gcc:-m64 -DL_ENDIAN -DTERMIO -O3 -Wall 
-DAES_NO_MIXCOL_DECR_KEY::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR 
RC4_CHUNK DES_RISC1 
DES_UNROLL:${ppc64_asm}:linux64le:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::",
 "linux-ia64",  "gcc:-DL_ENDIAN -DTERMIO -O3 
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL 
DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-ia64-icc","icc:-DL_ENDIAN -DTERMIO -O2 -Wall::-D_REENTRANT::-ldl 
-no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 
DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "linux-x86_64",        "gcc:-m64 -DL_ENDIAN -DTERMIO -O3 
-Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT 
DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c
index f333c16..07b1d7a 100644
--- a/crypto/aes/aes_core.c
+++ b/crypto/aes/aes_core.c
@@ -1333,6 +1333,8 @@ int AES_set_decrypt_key(const unsigned char *userKey, 
const int bits,
                temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
                temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
        }
+
+#ifndef AES_NO_MIXCOL_DECR_KEY
        /* apply the inverse MixColumn transform to all round keys but the 
first and the last: */
        for (i = 1; i < (key->rounds); i++) {
                rk += 4;
@@ -1363,6 +1365,7 @@ int AES_set_decrypt_key(const unsigned char *userKey, 
const int bits,
 #endif
                }
        }
+#endif
        return 0;
 }
 
diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl
index b38bce1..0e0e557 100644
--- a/crypto/aes/asm/aes-ppc.pl
+++ b/crypto/aes/asm/aes-ppc.pl
@@ -9,12 +9,12 @@
 
 # Needs more work: key setup, CBC routine...
 #
-# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
+# ppc_AES_[en|de]crypt_nocompact perform at 18 cycles per byte processed with
 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
 # 4.0. But these are not the ones currently used! Their "compact"
 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
-# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
-# at 1/3 of ppc_AES_decrypt.
+# at 1/2 of ppc_AES_encrypt_nocompact speed, while ppc_AES_decrypt_compact -
+# at 1/3 of ppc_AES_decrypt_nocompact.
 
 # February 2010
 #
@@ -109,10 +109,16 @@ $acc15="r31";
 $mask80=$Tbl2;
 $mask1b=$Tbl3;
 
+# Registers used by vcipher functions
+my $rnds="r6";
+my $state = "vr0";
+my $subkey="vr1";
+
 $code.=<<___;
 .machine       "any"
 .text
-
+___
+$code.=<<___;
 .align 7
 LAES_Te:
        mflr    r0
@@ -334,12 +340,11 @@ $code.=<<___;
 .byte  0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
 
 
-.globl .AES_encrypt
+.globl .ppc_AES_encrypt
 .align 7
-.AES_encrypt:
+.ppc_AES_encrypt:
        $STU    $sp,-$FRAME($sp)
        mflr    r0
-
        $PUSH   $out,`$FRAME-$SIZE_T*19`($sp)
        $PUSH   r14,`$FRAME-$SIZE_T*18`($sp)
        $PUSH   r15,`$FRAME-$SIZE_T*17`($sp)
@@ -523,7 +528,7 @@ Lenc_done:
        .long   0
 
 .align 5
-Lppc_AES_encrypt:
+Lppc_AES_encrypt_nocompact:
        lwz     $acc00,240($key)
        addi    $Tbl1,$Tbl0,3
        lwz     $t0,0($key)
@@ -810,11 +815,11 @@ Lenc_compact_done:
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
-.size  .AES_encrypt,.-.AES_encrypt
+.size  .ppc_AES_encrypt,.-.ppc_AES_encrypt
 
-.globl .AES_decrypt
+.globl .ppc_AES_decrypt
 .align 7
-.AES_decrypt:
+.ppc_AES_decrypt:
        $STU    $sp,-$FRAME($sp)
        mflr    r0
 
@@ -1001,7 +1006,7 @@ Ldec_done:
        .long   0
 
 .align 5
-Lppc_AES_decrypt:
+Lppc_AES_decrypt_nocompact:
        lwz     $acc00,240($key)
        addi    $Tbl1,$Tbl0,3
        lwz     $t0,0($key)
@@ -1164,14 +1169,14 @@ $code.=<<___ if ($SIZE_T==8);
 ___
 $code.=<<___;
        mtctr   $acc00
+       xor     $s0,$s0,$t0
+       xor     $s1,$s1,$t1
+       xor     $s2,$s2,$t2
+       xor     $s3,$s3,$t3
 .align 4
 Ldec_compact_loop:
-       xor     $s0,$s0,$t0
-       xor     $s1,$s1,$t1
        rlwinm  $acc00,$s0,`32-24`,24,31
-       xor     $s2,$s2,$t2
        rlwinm  $acc01,$s1,`32-24`,24,31
-       xor     $s3,$s3,$t3
        rlwinm  $acc02,$s2,`32-24`,24,31
        rlwinm  $acc03,$s3,`32-24`,24,31
        rlwinm  $acc04,$s3,`32-16`,24,31
@@ -1223,6 +1228,11 @@ Ldec_compact_loop:
        lwz     $t3,12($key)
        or      $s3,$s3,$acc15
 
+       xor     $s0,$s0,$t0
+       xor     $s1,$s1,$t1
+       xor     $s2,$s2,$t2
+       xor     $s3,$s3,$t3
+
        addi    $key,$key,16
        bdz     Ldec_compact_done
 ___
@@ -1438,15 +1448,59 @@ $code.=<<___;
        b       Ldec_compact_loop
 .align 4
 Ldec_compact_done:
-       xor     $s0,$s0,$t0
-       xor     $s1,$s1,$t1
-       xor     $s2,$s2,$t2
-       xor     $s3,$s3,$t3
        blr
        .long   0
        .byte   0,12,0x14,0,0,0,0,0
-.size  .AES_decrypt,.-.AES_decrypt
+.size  .ppc_AES_decrypt,.-.ppc_AES_decrypt
+___
 
+# Skeleton for encryption and decryption
+sub vcipher_aes_block {
+my ($func, $instr) = @_;
+$code.=<<___;
+.globl .$func
+.align 7
+.$func:
+       # Load number of rounds and input block
+       lwz     $rnds, 240($key)
+       lxvd2x  $state + 32, 0, $inp
+       # Initial round
+       lxvd2x  $subkey + 32, 0, $key
+       vxor    $state, $state, $subkey
+       addi    $key, $key, 16
+       # Check rounds
+       cmpldi  $rnds, 10
+       ble     L${func}10
+       cmpldi  $rnds, 12
+       ble     L${func}12
+___
+for ($i = 0; $i < 13; $i++) {
+$code.="L${func}12:" if ($i == 2);
+$code.="L${func}10:" if ($i == 4);
+$code.=<<___;
+       lxvd2x  $subkey + 32, 0, $key
+       $instr  $state, $state, $subkey
+       addi    $key, $key, 16
+___
+}
+$code.=<<___;
+       # Last round
+       lxvd2x  $subkey + 32, 0, $key
+       ${instr}last    $state, $state, $subkey
+       # Store output block
+       stxvd2x $state + 32, 0, $out
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,0,0
+.size  .${func},.-.${func}
+___
+}
+
+# Create vcipher AES functions
+vcipher_aes_block('ppc_vcipher_AES_encrypt', 'vcipher');
+vcipher_aes_block('ppc_vcipher_AES_decrypt', 'vncipher');
+
+$code.=<<___;
 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
 .align 7
 ___
diff --git a/crypto/ppccap.c b/crypto/ppccap.c
index b38fc09..585c4b6 100644
--- a/crypto/ppccap.c
+++ b/crypto/ppccap.c
@@ -9,6 +9,7 @@
 #endif
 #include <crypto.h>
 #include <openssl/bn.h>
+#include <openssl/aes.h>
 
 #define PPC_FPU64      (1<<0)
 #define PPC_ALTIVEC    (1<<1)
@@ -18,6 +19,33 @@ static int OPENSSL_ppccap_P = 0;
 
 static sigset_t all_masked;
 
+void ppc_AES_encrypt(const unsigned char *in, unsigned char *out,
+                    const AES_KEY *key);
+void ppc_AES_decrypt(const unsigned char *in, unsigned char *out,
+                    const AES_KEY *key);
+void ppc_vcipher_AES_encrypt(const unsigned char *in, unsigned char *out,
+                        const AES_KEY *key);
+void ppc_vcipher_AES_decrypt(const unsigned char *in, unsigned char *out,
+                        const AES_KEY *key);
+
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+                const AES_KEY *key)
+{
+       if (OPENSSL_ppccap_P & PPC_VCIPHER)
+               ppc_vcipher_AES_encrypt(in, out, key);
+       else
+               ppc_AES_encrypt(in, out, key);
+}
+
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+                const AES_KEY *key)
+{
+       if (OPENSSL_ppccap_P & PPC_VCIPHER)
+               ppc_vcipher_AES_decrypt(in, out, key);
+       else
+               ppc_AES_decrypt(in, out, key);
+}
+
 #ifdef OPENSSL_BN_ASM_MONT
 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const 
BN_ULONG *np, const BN_ULONG *n0, int num)
        {
-- 
1.7.12

______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       openssl-dev@openssl.org
Automated List Manager                           majord...@openssl.org

Reply via email to