This patch adds support to Intel AES-NI instruction set for x86_64
platform.

Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
instructions that are going to be introduced in the next generation of
Intel processor, as of 2009. These instructions enable fast and secure
data encryption and decryption, using the Advanced Encryption Standard
(AES), defined by FIPS Publication number 197.  The architecture
introduces six instructions that offer full hardware support for
AES. Four of them support high performance data encryption and
decryption, and the other two instructions support the AES key
expansion procedure.

The white paper can be downloaded from:

http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf


- AES implementation based on AES-NI is put in crypto/aes/asm/aes-intel.S

- AES-NI operates on XMM registers, so the key structure need to be
  128-bit aligned. A pad field is added to AES_KEY and key structure
  is aligned to 128-bit boundary in entry of AES-NI implementation.

- In entry point of AES algorithm in crypto/aes/asm/aes-x86_64.pl,
  OPENSSL_ia32cap_P is checked, if corresponding bit (57) is set,
  branch into AES-NI based implementation.

- AES-NI based implementation can not benefit from a specialized
  AES_cbc_encrypt, so its general C implementation is used. To resolve
  the name conflict, original AES_cbc_encrypt is renamed to
  AES_cbc_encrypt_def and put in crypto/aes/aes_cbc_def.c.


Signed-off-by: Huang Ying <[EMAIL PROTECTED]>

---
 Configure                    |   20 +-
 crypto/aes/Makefile          |    9 -
 crypto/aes/aes.h             |    5 
 crypto/aes/aes_cbc.c         |   66 -------
 crypto/aes/aes_cbc_def.c     |  130 ++++++++++++++
 crypto/aes/asm/aes-intel.S   |  374 +++++++++++++++++++++++++++++++++++++++++++
 crypto/aes/asm/aes-x86_64.pl |   20 ++
 7 files changed, 546 insertions(+), 78 deletions(-)

--- /dev/null
+++ b/crypto/aes/asm/aes-intel.S
@@ -0,0 +1,374 @@
+/*
+ * ====================================================================
+ * Written by Huang Ying <[EMAIL PROTECTED]> for the OpenSSL
+ * project to add support for Intel new AES instructions. Rights for
+ * redistribution and usage in source and binary forms are granted
+ * according to the OpenSSL license.
+ * ====================================================================
+ */
+
+.align 16
+key_expansion_128:
+       movaps %xmm1, %xmm4
+       psrldq $12, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm1, %xmm0
+
+       movaps %xmm0, (%rcx)
+       add $0x10, %rcx
+       ret
+
+.align 16
+key_expansion_192:
+       pshufd $0b01010101, %xmm1, %xmm1
+       movaps %xmm1, %xmm4
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm1, %xmm0
+
+       pshufd $0b11111111, %xmm0, %xmm3
+       pxor %xmm2, %xmm3
+       palignr $12, %xmm0, %xmm3
+       pxor %xmm2, %xmm3
+
+       test %r9, %r9
+       not %r9
+       jnz 1f
+
+       movaps %xmm0, %xmm1
+       pslldq $8, %xmm2
+       palignr $8, %xmm2, %xmm1
+       movaps %xmm1, (%rcx)
+       add $0x10, %rcx
+       movaps %xmm3, %xmm2
+       palignr $8, %xmm0, %xmm3
+       movaps %xmm3, (%rcx)
+       add $0x10, %rcx
+       ret
+1:
+       movaps %xmm0, (%rcx)
+       add $0x10, %rcx
+       movaps %xmm3, %xmm2
+       ret
+
+.align 16
+key_expansion_256:
+       movaps %xmm1, %xmm4
+       psrldq $12, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm0, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm1, %xmm0
+
+       movaps %xmm0, (%rcx)
+       add $0x10, %rcx
+
+       test %r9, %r9
+       jnz 1f
+
+       # aeskeygenassist $0x1, %xmm0, %xmm1
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+
+       pshufd $0b10101010, %xmm1, %xmm1
+       movaps %xmm1, %xmm4
+       pxor %xmm2, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm2, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm2, %xmm1
+       palignr $12, %xmm4, %xmm1
+       pxor %xmm1, %xmm2
+
+       movaps %xmm2, (%rcx)
+       add $0x10, %rcx
+1:
+       ret
+
+.align 16
+.global intel_AES_set_encrypt_key
+intel_AES_set_encrypt_key:
+       test %rdi, %rdi
+       jz 3f
+       test %rdx, %rdx
+       jz 3f
+       add $0xf, %rdx                  # make key struct 128-bit aligned
+       and $0xfffffffffffffff0, %rdx
+       movups (%rdi), %xmm0            # user key (first 16 bytes)
+       movaps %xmm0, (%rdx)
+       lea 0x10(%rdx), %rcx            # key addr
+       cmp $256, %esi
+       jnz 1f
+       mov $14, %esi
+       movl %esi, 240(%rdx)            # 14 rounds for 256
+       movups 0x10(%rdi), %xmm2        # other user key
+       movaps %xmm2, (%rcx)
+       add $0x10, %rcx
+       xor %r9, %r9
+       # aeskeygenassist $0x1, %xmm2, %xmm1    # round 1
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+       call key_expansion_256
+       # aeskeygenassist $0x2, %xmm2, %xmm1    # round 2
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+       call key_expansion_256
+       # aeskeygenassist $0x4, %xmm2, %xmm1    # round 3
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+       call key_expansion_256
+       # aeskeygenassist $0x8, %xmm2, %xmm1    # round 4
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+       call key_expansion_256
+       # aeskeygenassist $0x10, %xmm2, %xmm1   # round 5
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+       call key_expansion_256
+       # aeskeygenassist $0x20, %xmm2, %xmm1   # round 6
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+       call key_expansion_256
+       # aeskeygenassist $0x40, %xmm2, %xmm1   # round 7
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+       not %r9
+       call key_expansion_256
+       xor %rax, %rax
+       ret
+1:
+       cmp $192, %esi
+       jnz 2f
+       mov $12, %esi
+       movl %esi, 240(%rdx)            # 12 rounds for 192
+       movq 0x10(%rdi), %xmm2          # other user key
+       xor %r9, %r9
+       # aeskeygenassist $0x1, %xmm2, %xmm1    # round 1
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x01
+       call key_expansion_192
+       # aeskeygenassist $0x2, %xmm2, %xmm1    # round 2
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x02
+       call key_expansion_192
+       # aeskeygenassist $0x4, %xmm2, %xmm1    # round 3
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x04
+       call key_expansion_192
+       # aeskeygenassist $0x8, %xmm2, %xmm1    # round 4
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x08
+       call key_expansion_192
+       # aeskeygenassist $0x10, %xmm2, %xmm1   # round 5
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x10
+       call key_expansion_192
+       # aeskeygenassist $0x20, %xmm2, %xmm1   # round 6
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x20
+       call key_expansion_192
+       # aeskeygenassist $0x40, %xmm2, %xmm1   # round 7
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x40
+       call key_expansion_192
+       # aeskeygenassist $0x80, %xmm2, %xmm1   # round 8
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xca, 0x80
+       call key_expansion_192
+       xor %rax, %rax
+       ret
+2:
+       cmp $128, %esi
+       jnz 4f
+       mov $10, %esi
+       movl %esi, 240(%rdx)            # 10 rounds for 128
+       # aeskeygenassist $0x1, %xmm0, %xmm1    # round 1
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x01
+       call key_expansion_128
+       # aeskeygenassist $0x2, %xmm0, %xmm1    # round 2
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x02
+       call key_expansion_128
+       # aeskeygenassist $0x4, %xmm0, %xmm1    # round 3
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x04
+       call key_expansion_128
+       # aeskeygenassist $0x8, %xmm0, %xmm1    # round 4
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x08
+       call key_expansion_128
+       # aeskeygenassist $0x10, %xmm0, %xmm1   # round 5
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x10
+       call key_expansion_128
+       # aeskeygenassist $0x20, %xmm0, %xmm1   # round 6
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x20
+       call key_expansion_128
+       # aeskeygenassist $0x40, %xmm0, %xmm1   # round 7
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x40
+       call key_expansion_128
+       # aeskeygenassist $0x80, %xmm0, %xmm1   # round 8
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x80
+       call key_expansion_128
+       # aeskeygenassist $0x1b, %xmm0, %xmm1   # round 9
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x1b
+       call key_expansion_128
+       # aeskeygenassist $0x36, %xmm0, %xmm1   # round 10
+       .byte 0x66, 0x0f, 0x3a, 0xdf, 0xc8, 0x36
+       call key_expansion_128
+       xor %eax, %eax
+       ret
+3:
+       mov $-1, %rax
+       ret
+4:
+       mov $-2, %rax
+       ret
+
+.align 16
+.global intel_AES_set_decrypt_key
+intel_AES_set_decrypt_key:
+       call intel_AES_set_encrypt_key
+       test %rax, %rax
+       jnz 3f
+       lea 0x10(%rdx), %rcx
+       shl $4, %esi
+       add %rdx, %rsi
+       mov %rsi, %rdi
+1:
+       movaps (%rdx), %xmm0
+       movaps (%rsi), %xmm1
+       movaps %xmm0, (%rsi)
+       movaps %xmm1, (%rdx)
+       lea 0x10(%rdx), %rdx
+       lea -0x10(%rsi), %rsi
+       cmp %rdx, %rsi
+       ja 1b
+2:
+       movaps (%rcx), %xmm0
+       # aesimc %xmm0, %xmm1
+       .byte 0x66, 0x0f, 0x38, 0xdb, 0xc8
+       movaps %xmm1, (%rcx)
+       lea 0x10(%rcx), %rcx
+       cmp %rdi, %rcx
+       jnz 2b
+3:
+       ret
+
+.align 16
+.global intel_AES_encrypt
+intel_AES_encrypt:
+       add $0xf, %rdx                  # make key struct 128-bit aligned
+       and $0xfffffffffffffff0, %rdx
+
+       movups (%rdi), %xmm0            # input
+       movaps (%rdx), %xmm1            # key
+       mov 240(%rdx), %ecx             # round count
+       pxor %xmm1, %xmm0               # round 0
+       lea 0x30(%rdx), %rdx
+       cmp $12, %cl
+       jb 2f
+       lea 0x20(%rdx), %rdx
+       je 1f
+       lea 0x20(%rdx), %rdx
+       movaps -0x60(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps -0x50(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+1:
+       movaps -0x40(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps -0x30(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+2:
+       movaps -0x20(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps -0x10(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps (%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x10(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x20(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x30(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x40(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x50(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x60(%rdx), %xmm1
+       # aesenc %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xdc, 0xc1
+       movaps 0x70(%rdx), %xmm1
+       # aesenclast %xmm1, %xmm0               # last round
+       .byte 0x66, 0x0f, 0x38, 0xdd, 0xc1
+       movups %xmm0, (%rsi)            # output
+       ret
+
+.align 16
+.global intel_AES_decrypt
+intel_AES_decrypt:
+       add $0xf, %rdx                  # make key struct 128-bit aligned
+       and $0xfffffffffffffff0, %rdx
+
+       movups (%rdi), %xmm0            # input
+       movaps (%rdx), %xmm1            # key
+       mov 240(%rdx), %ecx             # round count
+       pxor %xmm1, %xmm0               # round 0
+       lea 0x30(%rdx), %rdx
+       cmp $12, %cl
+       jb 2f
+       lea 0x20(%rdx), %rdx
+       je 1f
+       lea 0x20(%rdx), %rdx
+       movaps -0x60(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps -0x50(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+1:
+       movaps -0x40(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps -0x30(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+2:
+       movaps -0x20(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps -0x10(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps (%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x10(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x20(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x30(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x40(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x50(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x60(%rdx), %xmm1
+       # aesdec %xmm1, %xmm0
+       .byte 0x66, 0x0f, 0x38, 0xde, 0xc1
+       movaps 0x70(%rdx), %xmm1
+       # aesdeclast %xmm1, %xmm0               # last round
+       .byte 0x66, 0x0f, 0x38, 0xdf, 0xc1
+       movups %xmm0, (%rsi)            # output
+       ret
--- a/crypto/aes/aes.h
+++ b/crypto/aes/aes.h
@@ -80,6 +80,8 @@ struct aes_key_st {
     unsigned int rd_key[4 *(AES_MAXNR + 1)];
 #endif
     int rounds;
+    /* Intel AES-NI need aes_key_st to be 128-bit aligned */
+    unsigned int pad[3];
 };
 typedef struct aes_key_st AES_KEY;
 
@@ -100,6 +102,9 @@ void AES_ecb_encrypt(const unsigned char
 void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
        size_t length, const AES_KEY *key,
        unsigned char *ivec, const int enc);
+void AES_cbc_encrypt_def(const unsigned char *in, unsigned char *out,
+       size_t length, const AES_KEY *key,
+       unsigned char *ivec, const int enc);
 void AES_cfb128_encrypt(const unsigned char *in, unsigned char *out,
        size_t length, const AES_KEY *key,
        unsigned char *ivec, int *num, const int enc);
--- a/crypto/aes/Makefile
+++ b/crypto/aes/Makefile
@@ -11,7 +11,7 @@ CFLAG=-g
 MAKEFILE=      Makefile
 AR=            ar r
 
-AES_ENC=aes_core.o aes_cbc.o
+AES_ENC=aes_core.o aes_cbc.o aes_cbc_def.o
 
 CFLAGS= $(INCLUDES) $(CFLAG)
 ASFLAGS= $(INCLUDES) $(ASFLAG)
@@ -23,8 +23,8 @@ TEST=
 APPS=
 
 LIB=$(TOP)/libcrypto.a
-LIBSRC=aes_core.c aes_misc.c aes_ecb.c aes_cbc.c aes_cfb.c aes_ofb.c \
-       aes_ctr.c aes_ige.c aes_wrap.c
+LIBSRC=aes_core.c aes_misc.c aes_ecb.c aes_cbc.c aes_cbc_def.c aes_cfb.c \
+       aes_ofb.c aes_ctr.c aes_ige.c aes_wrap.c
 LIBOBJ=aes_misc.o aes_ecb.o aes_cfb.o aes_ofb.o aes_ctr.o aes_ige.o aes_wrap.o 
\
        $(AES_ENC)
 
@@ -54,6 +54,9 @@ aes-586.s:    asm/aes-586.pl ../perlasm/x86
 aes-x86_64.s: asm/aes-x86_64.pl
        $(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@
 
+aes-intel.s: asm/aes-intel.S
+       $(CC) $(CFLAGS) -E asm/aes-intel.S > $@
+
 aes-sparcv9.s: asm/aes-sparcv9.pl
        $(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
 
--- a/crypto/aes/asm/aes-x86_64.pl
+++ b/crypto/aes/asm/aes-x86_64.pl
@@ -584,6 +584,10 @@ $code.=<<___;
 .type  AES_encrypt,[EMAIL PROTECTED],3
 .align 16
 AES_encrypt:
+       mov     OPENSSL_ia32cap_P(%rip),%rax
+       bt      \$57,%rax
+       jc      intel_AES_encrypt
+
        push    %rbx
        push    %rbp
        push    %r12
@@ -1176,6 +1180,10 @@ $code.=<<___;
 .type  AES_decrypt,[EMAIL PROTECTED],3
 .align 16
 AES_decrypt:
+       mov     OPENSSL_ia32cap_P(%rip),%rax
+       bt      \$57,%rax
+       jc      intel_AES_decrypt
+
        push    %rbx
        push    %rbp
        push    %r12
@@ -1271,6 +1279,10 @@ $code.=<<___;
 .type  AES_set_encrypt_key,[EMAIL PROTECTED],3
 .align 16
 AES_set_encrypt_key:
+       mov     OPENSSL_ia32cap_P(%rip),%rax
+       bt      \$57,%rax
+       jc      intel_AES_set_encrypt_key
+
        call    _x86_64_AES_set_encrypt_key
        ret
 .size  AES_set_encrypt_key,.-AES_set_encrypt_key
@@ -1528,6 +1540,10 @@ $code.=<<___;
 .type  AES_set_decrypt_key,[EMAIL PROTECTED],3
 .align 16
 AES_set_decrypt_key:
+       mov     OPENSSL_ia32cap_P(%rip),%rax
+       bt      \$57,%rax
+       jc      intel_AES_set_decrypt_key
+
        push    %rdx                    # save key schedule
        call    _x86_64_AES_set_encrypt_key
        cmp     \$0,%eax
@@ -1620,6 +1636,10 @@ $code.=<<___;
 .align 16
 .extern        OPENSSL_ia32cap_P
 AES_cbc_encrypt:
+       mov     OPENSSL_ia32cap_P(%rip),%rax
+       bt      \$57,%rax
+       jc      AES_cbc_encrypt_def
+
        cmp     \$0,%rdx        # check length
        je      .Lcbc_just_ret
        push    %rbx
--- a/Configure
+++ b/Configure
@@ -120,16 +120,16 @@ my $x86_asm="x86cpuid.o:bn-586.o co-586.
 
 my $x86_elf_asm="$x86_asm:elf";
 
-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o 
x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o 
sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
-my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o 
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o 
rc4_skey.o:::::void";
-my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o 
aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o 
aes_cbc_def.o aes-intel.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o 
sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
+my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes_cbc_def.o 
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o 
rc4_skey.o:::::void";
+my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o 
sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes_cbc_def.o 
aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
 my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
 my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o::::::::::::void";
 my $mips3_asm=":bn-mips3.o::::::::::::void";
-my $s390x_asm=":bn-s390x.o::aes_cbc.o aes-s390x.o:::sha1-s390x.o 
sha256-s390x.o sha512-s390x.o:::::::void";
-my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o 
aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::void";
-my $ppc32_asm="ppccpuid.o:bn-ppc.o::aes_core.o aes_cbc.o 
aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::";
-my $ppc64_asm="ppccpuid.o:bn-ppc.o ppc-mont.o::aes_core.o aes_cbc.o 
aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::";
+my $s390x_asm=":bn-s390x.o::aes_cbc.o aes_cbc_def.o aes-s390x.o:::sha1-s390x.o 
sha256-s390x.o sha512-s390x.o:::::::void";
+my $armv4_asm=":bn_asm.o armv4-mont.o::aes_cbc.o aes_cbc_def.o 
aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::void";
+my $ppc32_asm="ppccpuid.o:bn-ppc.o::aes_core.o aes_cbc.o aes_cbc_def.o 
aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::";
+my $ppc64_asm="ppccpuid.o:bn-ppc.o ppc-mont.o::aes_core.o aes_cbc.o 
aes_cbc_def.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o sha512-ppc.o::::::";
 my $no_asm=":::::::::::::void";
 
 # As for $BSDthreads. Idea is to maintain "collective" set of flags,
@@ -178,7 +178,7 @@ my %table=(
 "debug-linux-ppro","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG 
-DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -mcpu=pentiumpro 
-Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:${x86_elf_asm}:dlfcn",
 "debug-linux-elf","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG 
-DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 
-Wall::-D_REENTRANT::-lefence -ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "debug-linux-elf-noefence","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG 
-DBN_CTX_DEBUG -DCRYPTO_MDEBUG -DL_ENDIAN -DTERMIO -g -march=i486 
-Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 
-fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o 
crypt586.o:aes_x86core.o aes_cbc.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o 
sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o 
wp-mmx.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"debug-linux-ia32-aes", "gcc:-DAES_EXPERIMENTAL -DL_ENDIAN -DTERMIO -O3 
-fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} 
${x86_gcc_opts}:x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o 
crypt586.o:aes_x86core.o aes_cbc.o aes_cbc_def.o:bf-586.o:md5-586.o:sha1-586.o 
sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o 
wp-mmx.o::elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
 "dist",                "cc:-O::(unknown)::::::",
 
 # Basic configs that should work on any (32 and less bit) box
@@ -479,7 +479,7 @@ my %table=(
 # Visual C targets
 #
 # Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64
-"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 
-DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE 
-D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT 
RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o 
aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32",
+"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 
-DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE 
-D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT 
RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o 
aes_cbc_def.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o 
sha512-ia64.o:::::::ias:win32",
 "VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 
-DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE 
-D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT 
RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o 
x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o 
sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o::ml64:win32",
 # x86 Win32 target defaults to ANSI API, if you want UNICODE, complement
 # 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE'
@@ -629,7 +629,7 @@ my $rc2     ="crypto/rc2/rc2.h";
 my $bf ="crypto/bf/bf_locl.h";
 my $bn_asm     ="bn_asm.o";
 my $des_enc="des_enc.o fcrypt_b.o";
-my $aes_enc="aes_core.o aes_cbc.o";
+my $aes_enc="aes_core.o aes_cbc.o aes_cbc_def.o";
 my $bf_enc     ="bf_enc.o";
 my $cast_enc="c_enc.o";
 my $rc4_enc="rc4_enc.o rc4_skey.o";
--- a/crypto/aes/aes_cbc.c
+++ b/crypto/aes/aes_cbc.c
@@ -62,69 +62,5 @@
 void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
                     size_t len, const AES_KEY *key,
                     unsigned char *ivec, const int enc) {
-
-       size_t n;
-       unsigned char tmp[AES_BLOCK_SIZE];
-       const unsigned char *iv = ivec;
-
-       assert(in && out && key && ivec);
-       assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
-
-       if (AES_ENCRYPT == enc) {
-               while (len >= AES_BLOCK_SIZE) {
-                       for(n=0; n < AES_BLOCK_SIZE; ++n)
-                               out[n] = in[n] ^ iv[n];
-                       AES_encrypt(out, out, key);
-                       iv = out;
-                       len -= AES_BLOCK_SIZE;
-                       in += AES_BLOCK_SIZE;
-                       out += AES_BLOCK_SIZE;
-               }
-               if (len) {
-                       for(n=0; n < len; ++n)
-                               out[n] = in[n] ^ iv[n];
-                       for(n=len; n < AES_BLOCK_SIZE; ++n)
-                               out[n] = iv[n];
-                       AES_encrypt(out, out, key);
-                       iv = out;
-               }
-               memcpy(ivec,iv,AES_BLOCK_SIZE);
-       } else if (in != out) {
-               while (len >= AES_BLOCK_SIZE) {
-                       AES_decrypt(in, out, key);
-                       for(n=0; n < AES_BLOCK_SIZE; ++n)
-                               out[n] ^= iv[n];
-                       iv = in;
-                       len -= AES_BLOCK_SIZE;
-                       in  += AES_BLOCK_SIZE;
-                       out += AES_BLOCK_SIZE;
-               }
-               if (len) {
-                       AES_decrypt(in,tmp,key);
-                       for(n=0; n < len; ++n)
-                               out[n] = tmp[n] ^ iv[n];
-                       iv = in;
-               }
-               memcpy(ivec,iv,AES_BLOCK_SIZE);
-       } else {
-               while (len >= AES_BLOCK_SIZE) {
-                       memcpy(tmp, in, AES_BLOCK_SIZE);
-                       AES_decrypt(in, out, key);
-                       for(n=0; n < AES_BLOCK_SIZE; ++n)
-                               out[n] ^= ivec[n];
-                       memcpy(ivec, tmp, AES_BLOCK_SIZE);
-                       len -= AES_BLOCK_SIZE;
-                       in += AES_BLOCK_SIZE;
-                       out += AES_BLOCK_SIZE;
-               }
-               if (len) {
-                       memcpy(tmp, in, AES_BLOCK_SIZE);
-                       AES_decrypt(tmp, out, key);
-                       for(n=0; n < len; ++n)
-                               out[n] ^= ivec[n];
-                       for(n=len; n < AES_BLOCK_SIZE; ++n)
-                               out[n] = tmp[n];
-                       memcpy(ivec, tmp, AES_BLOCK_SIZE);
-               }
-       }
+    return AES_cbc_encrypt_def(in, out, len, key, ivec, enc);
 }
--- /dev/null
+++ b/crypto/aes/aes_cbc_def.c
@@ -0,0 +1,130 @@
+/* crypto/aes/aes_cbc_def.c -*- mode:C; c-file-style: "eay" -*- */
+/* ====================================================================
+ * Copyright (c) 1998-2002 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+#  define NDEBUG
+# endif
+#endif
+#include <assert.h>
+
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+void AES_cbc_encrypt_def(const unsigned char *in, unsigned char *out,
+                        size_t len, const AES_KEY *key,
+                        unsigned char *ivec, const int enc) {
+
+       size_t n;
+       unsigned char tmp[AES_BLOCK_SIZE];
+       const unsigned char *iv = ivec;
+
+       assert(in && out && key && ivec);
+       assert((AES_ENCRYPT == enc)||(AES_DECRYPT == enc));
+
+       if (AES_ENCRYPT == enc) {
+               while (len >= AES_BLOCK_SIZE) {
+                       for(n=0; n < AES_BLOCK_SIZE; ++n)
+                               out[n] = in[n] ^ iv[n];
+                       AES_encrypt(out, out, key);
+                       iv = out;
+                       len -= AES_BLOCK_SIZE;
+                       in += AES_BLOCK_SIZE;
+                       out += AES_BLOCK_SIZE;
+               }
+               if (len) {
+                       for(n=0; n < len; ++n)
+                               out[n] = in[n] ^ iv[n];
+                       for(n=len; n < AES_BLOCK_SIZE; ++n)
+                               out[n] = iv[n];
+                       AES_encrypt(out, out, key);
+                       iv = out;
+               }
+               memcpy(ivec,iv,AES_BLOCK_SIZE);
+       } else if (in != out) {
+               while (len >= AES_BLOCK_SIZE) {
+                       AES_decrypt(in, out, key);
+                       for(n=0; n < AES_BLOCK_SIZE; ++n)
+                               out[n] ^= iv[n];
+                       iv = in;
+                       len -= AES_BLOCK_SIZE;
+                       in  += AES_BLOCK_SIZE;
+                       out += AES_BLOCK_SIZE;
+               }
+               if (len) {
+                       AES_decrypt(in,tmp,key);
+                       for(n=0; n < len; ++n)
+                               out[n] = tmp[n] ^ iv[n];
+                       iv = in;
+               }
+               memcpy(ivec,iv,AES_BLOCK_SIZE);
+       } else {
+               while (len >= AES_BLOCK_SIZE) {
+                       memcpy(tmp, in, AES_BLOCK_SIZE);
+                       AES_decrypt(in, out, key);
+                       for(n=0; n < AES_BLOCK_SIZE; ++n)
+                               out[n] ^= ivec[n];
+                       memcpy(ivec, tmp, AES_BLOCK_SIZE);
+                       len -= AES_BLOCK_SIZE;
+                       in += AES_BLOCK_SIZE;
+                       out += AES_BLOCK_SIZE;
+               }
+               if (len) {
+                       memcpy(tmp, in, AES_BLOCK_SIZE);
+                       AES_decrypt(tmp, out, key);
+                       for(n=0; n < len; ++n)
+                               out[n] ^= ivec[n];
+                       for(n=len; n < AES_BLOCK_SIZE; ++n)
+                               out[n] = tmp[n];
+                       memcpy(ivec, tmp, AES_BLOCK_SIZE);
+               }
+       }
+}

Attachment: signature.asc
Description: This is a digitally signed message part

Reply via email to