Author: Leonidas Da Silva Barbosa <leosi...@linux.vnet.ibm.com>
ASM implementation

Signed-off-by: Paulo Flabiano Smorigo <pfsmor...@linux.vnet.ibm.com>
Signed-off-by: Leonidas Da Silva Barbosa <leosi...@linux.vnet.ibm.com>
---
 crypto/aes/asm/aesp8-ppc.pl | 237 +++++++++++++++++++++++++++++++++++++++++++-
 crypto/evp/e_aes.c          |   7 ++
 2 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
index a1891cc..28ae77c 100755
--- a/crypto/aes/asm/aesp8-ppc.pl
+++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -84,6 +84,19 @@ Lconsts:
        .byte   0,12,0x14,0,0,0,0,0
 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
 
+.align 7
+gf:
+.long   0x87000000, 0x00000000, 0x00000000, 0x00000000  ?rev
+Lgf:
+       mflr   r0
+       bcl 20,21,\$+4
+       mflr r12
+       addi r12,r12,-24
+       mtlr  r0
+       blr
+       .long 0
+       .byte 0,12,0x14,0,0,0,0,0
+
 .globl .${prefix}_set_encrypt_key
 .align 5
 .${prefix}_set_encrypt_key:
@@ -1886,6 +1899,228 @@ Lctr32_enc8x_done:
 .size  .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
 ___
 }}     }}}
+#######
+{{
+my ($inp,$out,$len,$key,$tweak,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0, $rndkey1,$inout,$tmp)=map("v$_",(0..3));
+my ($intweak,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+                       map("v$_",(4..10));
+
+
+$code.=<<___;
+.globl .${prefix}_xts_encrypt
+.align 5
+.${prefix}_xts_encrypt:
+    ${UCMP}i   $len,16
+    bltlr-
+
+    cmpwi      $enc,0
+    lis        r0,0xffe0
+    mfspr      $vrsave,256
+    mtspr      256,r0
+
+    li         $idx,15
+    vxor       $rndkey0,$rndkey0,$rndkey0
+    le?vspltisb $tmp,0x0f
+
+    lvx        $intweak,0,$tweak
+    lvsl       $inpperm,0,$tweak
+    lvx        $inptail,$idx,$tweak
+    le?vxor    $inpperm,$inpperm,$tmp
+    vperm      $intweak,$intweak,$inptail,$inpperm
+
+    neg        r11,$inp
+    ?lvsl      $keyperm,0,$key
+    lwz        $rounds,240($key)
+
+    lvsr       $inpperm,0,r11
+    lvx        $inptail,0,$inp
+    addi       $inp,$inp,15
+    le?vxor    $inpperm,$inpperm,$tmp
+
+    ?lvsr      $outperm,0,$out
+    vspltisb   $outmask,-1
+    lvx        $outhead,0,$out
+    ?vperm     $outmask,$rndkey0,$outmask,$outperm
+    le?vxor    $outperm,$outperm,$tmp
+
+    srwi       $rounds,$rounds,1
+    li         $idx,16
+    subi       $rounds,$rounds,1
+
+    beq  Lxts_dec      #if enc = 0 is dec
+    b Lxts_enc         #if not jump to enc
+
+Ltweak:
+       mflr    r11
+       xor     r12,r12,r12
+       addi    r12,r12,0x1
+       lvsr    v11,0,r12
+
+       bl Lgf
+       mtlr    r11
+
+       vor     v11,v4,v4
+       vspltisb  v15,1           # create a mask 0101010...01
+       vsl     v13,$intweak,v15  # shift tweak left 1 bit
+       vor     $intweak,v13,v13
+       vand    v13,v13,v15       # create a new mask to fix shift left
+
+       vxor    v13,v4,v13        # apply mask to clean last bits at each byte
+       vxor    v12,v12,v12
+       vsldoi  v12,v11,v12,0xf
+
+       vspltisb v14,0x06
+       vspltisb v16,0x02
+       vsl      v14,v16,v14      # create a mask of 808080..80 bytes to check 
carry bits
+       vspltisb v16,0x0f
+       vand     v11,v11,v14      # create a mask to see if we have carry to xor
+
+       vsro     v11,v11,v16      # shift 1byte back carry bit
+       vsr      v11,v11,v16          # shift 7bits carry bit
+
+       vxor     v13,v13,v11      # apply mask and finally we have a tweak 
shifted 1 bit
+       vor      $intweak,v13,v13
+       vxor     v13,v13,v13
+
+       vand     v14,v12,v14      # if first byte in tweak has a carry we'll 
need to mult it
+       vxor     v16,v16,v16
+       vcmpequb v16,v16,v14
+
+       lvx      v11,0,r12
+
+       vspltisb v15, -1
+       vxor     v16, v16,v15
+       vand     v11,v16,v11
+       vxor     $intweak,v11,$intweak
+
+       cmpwi    $enc,0
+       beq      Lxts_dec
+
+Lxts_enc:
+    vmr        $inout,$inptail
+    lvx        $inptail,0,$inp
+    addi       $inp,$inp,16
+    mtctr      $rounds
+    subi       $len,$len,16
+
+    lvx        $rndkey0,0,$key
+    vperm      $inout,$inout,$inptail,$inpperm
+    lvx        $rndkey1,$idx,$key
+    addi       $idx,$idx,16
+    ?vperm     $rndkey0,$rndkey0,$rndkey1,$keyperm
+    vxor       $inout,$inout,$rndkey0
+    lvx        $rndkey0,$idx,$key
+    addi       $idx,$idx,16
+    vxor       $inout,$inout,$intweak          #P = T xor PP
+
+Loop_xts_enc:
+    ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
+    vcipher     $inout,$inout,$rndkey1
+    lvx         $rndkey1,$idx,$key
+
+    addi        $idx,$idx,16
+    ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
+    vcipher     $inout,$inout,$rndkey0
+    lvx         $rndkey0,$idx,$key
+
+    addi        $idx,$idx,16
+    bdnz        Loop_xts_enc
+
+    ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
+    vcipher     $inout,$inout,$rndkey1
+    lvx         $rndkey1,$idx,$key
+    li          $idx,16
+    ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
+    vcipherlast $inout,$inout,$rndkey0
+    vxor        $inout,$inout,$intweak         #C = T xor CC
+    ${UCMP}i    $len,16
+
+    vperm       $tmp,$inout,$inout,$outperm
+    vsel        $inout,$outhead,$tmp,$outmask
+    vmr         $outhead,$tmp
+    stvx        $inout,0,$out
+    addi        $out,$out,16
+    bge     Ltweak
+
+   b       Lxts_done
+
+Lxts_dec:
+    vmr        $inout,$inptail
+    lvx        $inptail,0,$inp
+    addi       $inp,$inp,16
+    mtctr      $rounds
+    subi       $len,$len,16
+
+    lvx        $rndkey0,0,$key
+    vperm      $inout,$inout,$inptail,$inpperm
+    lvx        $rndkey1,$idx,$key
+    addi       $idx,$idx,16
+    ?vperm     $rndkey0,$rndkey0,$rndkey1,$keyperm
+    vxor       $inout,$inout,$rndkey0
+    lvx        $rndkey0,$idx,$key
+    addi       $idx,$idx,16
+    vxor       $inout,$inout,$intweak    #P = T xor PP
+
+Loop_xts_dec:
+    ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
+    vncipher    $inout,$inout,$rndkey1
+    lvx         $rndkey1,$idx,$key
+
+    addi        $idx,$idx,16
+    ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
+    vncipher    $inout,$inout,$rndkey0
+    lvx         $rndkey0,$idx,$key
+
+    addi        $idx,$idx,16
+    bdnz        Loop_xts_dec
+
+    ?vperm      $rndkey1,$rndkey1,$rndkey0,$keyperm
+    vncipher    $inout,$inout,$rndkey1
+    lvx         $rndkey1,$idx,$key
+    li          $idx,16
+    ?vperm      $rndkey0,$rndkey0,$rndkey1,$keyperm
+    vncipherlast $inout,$inout,$rndkey0
+    vxor        $inout,$inout,$intweak   #C = T xor CC
+    ${UCMP}i    $len,16
+
+    vperm       $tmp,$inout,$inout,$outperm
+    vsel        $inout,$outhead,$tmp,$outmask
+    vmr         $outhead,$tmp
+    stvx        $inout,0,$out
+    addi        $out,$out,16
+    bge     Ltweak
+
+Lxts_done:
+       addi            $out,$out,-1
+       lvx             $inout,0,$out,
+       vsel            $inout,$outhead,$inout,$outmask
+       stvx            $inout,0,$out
+
+       neg             $enc,$tweak
+       li              $idx,15
+       vxor            $rndkey0,$rndkey0,$rndkey0
+       vspltisb        $outmask,-1
+       le?vspltisb     $tmp,0x0f
+       ?lvsl           $outperm,0,$enc
+       ?vperm          $outmask,$rndkey0,$outmask,$outperm
+       le?vxor         $outperm,$outperm,$tmp
+       lvx             $outhead,0,$tweak
+       vperm           $intweak,$intweak,$intweak,$outperm
+       vsel            $inout,$outhead,$intweak,$outmask
+       lvx             $inptail,$idx,$tweak
+       stvx            $inout,0,$tweak
+       vsel            $inout,$intweak,$inptail,$outmask
+       stvx            $inout,$idx,$tweak
+
+       mtspr           256,$vrsave
+       blr
+       .long   0
+       .byte   0,12,0x14,0,0,0,6,0
+       .long 0
+.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+___
+}}
 
 my $consts=1;
 foreach(split("\n",$code)) {
@@ -1918,7 +2153,7 @@ foreach(split("\n",$code)) {
            print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
            next;
        }
-       $consts=0 if (m/Lconsts:/o);    # end of table
+       $consts=0 if (m/Lgf:/o);    # end of table
 
        # instructions prefixed with '?' are endian-specific and need
        # to be adjusted accordingly...
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
index c906e6f..b83af7b 100644
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -196,6 +196,7 @@ void AES_xts_decrypt(const char *inp, char *out, size_t len,
 #  define HWAES_encrypt aes_p8_encrypt
 #  define HWAES_decrypt aes_p8_decrypt
 #  define HWAES_cbc_encrypt aes_p8_cbc_encrypt
+#  define HWAES_xts_encrypt aes_p8_xts_encrypt
 #  define HWAES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks
 # endif
 
@@ -1039,6 +1040,9 @@ void HWAES_decrypt(const unsigned char *in, unsigned char 
*out,
 void HWAES_cbc_encrypt(const unsigned char *in, unsigned char *out,
                        size_t length, const AES_KEY *key,
                        unsigned char *ivec, const int enc);
+void HWAES_xts_encrypt(const unsigned char *in, unsigned char *out,
+                       size_t length, const AES_KEY *key,
+                       unsigned char *ivec, const int enc);
 void HWAES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
                                 size_t len, const AES_KEY *key,
                                 const unsigned char ivec[16]);
@@ -1850,6 +1854,9 @@ static int aes_xts_init_key(EVP_CIPHER_CTX *ctx, const 
unsigned char *key,
                 xctx->xts.block2 = (block128_f) HWAES_encrypt;
 
                 xctx->xts.key1 = &xctx->ks1;
+
+                if (HWAES_xts_encrypt)
+                    xctx->stream = HWAES_xts_encrypt;
                 break;
             } else
 # endif
-- 
2.5.5


-- 
Paulo Flabiano Smorigo
IBM Linux Technology Center


-- 
Ticket here: http://rt.openssl.org/Ticket/Display.html?id=4491
Please log in as guest with password guest if prompted

-- 
openssl-dev mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-dev

Reply via email to