Author: jkim
Date: Mon May 16 19:30:27 2016
New Revision: 299966
URL: https://svnweb.freebsd.org/changeset/base/299966

Log:
  - Make libcrypto.so position independent on i386.
  - Enable linker error when libcrypto.so contains a relocation against text.
  - Add "Do not modify" comment to generated source files.
  - Set CC environment variable for Perl scripts to enable AVX instructions.
  - Update __FreeBSD_version to indicate libcrypto.so is position independent.
  
  Note this is a direct commit because head has OpenSSL 1.0.2 branch but based
  on r299389, r299462, r299464, r299479, and r299480.

Added:
  stable/10/secure/lib/libcrypto/i386/aes-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/aes-586.s
  stable/10/secure/lib/libcrypto/i386/aesni-x86.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/aesni-x86.s
  stable/10/secure/lib/libcrypto/i386/bf-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/bf-586.s
  stable/10/secure/lib/libcrypto/i386/bf-686.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/bf-686.s
  stable/10/secure/lib/libcrypto/i386/bn-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/bn-586.s
  stable/10/secure/lib/libcrypto/i386/cast-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/cast-586.s
  stable/10/secure/lib/libcrypto/i386/cmll-x86.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/cmll-x86.s
  stable/10/secure/lib/libcrypto/i386/co-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/co-586.s
  stable/10/secure/lib/libcrypto/i386/crypt586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/crypt586.s
  stable/10/secure/lib/libcrypto/i386/des-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/des-586.s
  stable/10/secure/lib/libcrypto/i386/ghash-x86.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/ghash-x86.s
  stable/10/secure/lib/libcrypto/i386/md5-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/md5-586.s
  stable/10/secure/lib/libcrypto/i386/rc4-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/rc4-586.s
  stable/10/secure/lib/libcrypto/i386/rc5-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/rc5-586.s
  stable/10/secure/lib/libcrypto/i386/rmd-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/rmd-586.s
  stable/10/secure/lib/libcrypto/i386/sha1-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/sha1-586.s
  stable/10/secure/lib/libcrypto/i386/sha256-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/sha256-586.s
  stable/10/secure/lib/libcrypto/i386/sha512-586.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/sha512-586.s
  stable/10/secure/lib/libcrypto/i386/vpaes-x86.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/vpaes-x86.s
  stable/10/secure/lib/libcrypto/i386/wp-mmx.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/wp-mmx.s
  stable/10/secure/lib/libcrypto/i386/x86-gf2m.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/x86-gf2m.s
  stable/10/secure/lib/libcrypto/i386/x86-mont.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/x86-mont.s
  stable/10/secure/lib/libcrypto/i386/x86cpuid.S
     - copied, changed from r299965, 
stable/10/secure/lib/libcrypto/i386/x86cpuid.s
Deleted:
  stable/10/secure/lib/libcrypto/i386/aes-586.s
  stable/10/secure/lib/libcrypto/i386/aesni-x86.s
  stable/10/secure/lib/libcrypto/i386/bf-586.s
  stable/10/secure/lib/libcrypto/i386/bf-686.s
  stable/10/secure/lib/libcrypto/i386/bn-586.s
  stable/10/secure/lib/libcrypto/i386/cast-586.s
  stable/10/secure/lib/libcrypto/i386/cmll-x86.s
  stable/10/secure/lib/libcrypto/i386/co-586.s
  stable/10/secure/lib/libcrypto/i386/crypt586.s
  stable/10/secure/lib/libcrypto/i386/des-586.s
  stable/10/secure/lib/libcrypto/i386/ghash-x86.s
  stable/10/secure/lib/libcrypto/i386/md5-586.s
  stable/10/secure/lib/libcrypto/i386/rc4-586.s
  stable/10/secure/lib/libcrypto/i386/rc5-586.s
  stable/10/secure/lib/libcrypto/i386/rmd-586.s
  stable/10/secure/lib/libcrypto/i386/sha1-586.s
  stable/10/secure/lib/libcrypto/i386/sha256-586.s
  stable/10/secure/lib/libcrypto/i386/sha512-586.s
  stable/10/secure/lib/libcrypto/i386/vpaes-x86.s
  stable/10/secure/lib/libcrypto/i386/wp-mmx.s
  stable/10/secure/lib/libcrypto/i386/x86-gf2m.s
  stable/10/secure/lib/libcrypto/i386/x86-mont.s
  stable/10/secure/lib/libcrypto/i386/x86cpuid.s
Modified:
  stable/10/secure/lib/libcrypto/Makefile
  stable/10/secure/lib/libcrypto/Makefile.asm
  stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/aesni-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/bsaes-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/cmll-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/ghash-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/md5-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/modexp512-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/rc4-md5-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/rc4-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/sha1-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/sha256-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/sha512-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/vpaes-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/wp-x86_64.S
  stable/10/secure/lib/libcrypto/amd64/x86_64-gf2m.S
  stable/10/secure/lib/libcrypto/amd64/x86_64-mont.S
  stable/10/secure/lib/libcrypto/amd64/x86_64-mont5.S
  stable/10/secure/lib/libcrypto/amd64/x86_64cpuid.S
  stable/10/sys/sys/param.h

Modified: stable/10/secure/lib/libcrypto/Makefile
==============================================================================
--- stable/10/secure/lib/libcrypto/Makefile     Mon May 16 19:10:59 2016        
(r299965)
+++ stable/10/secure/lib/libcrypto/Makefile     Mon May 16 19:30:27 2016        
(r299966)
@@ -7,7 +7,6 @@ SUBDIR=         engines
 
 LIB=           crypto
 SHLIB_MAJOR=   7
-ALLOW_SHARED_TEXTREL=
 
 NO_LINT=
 
@@ -26,7 +25,7 @@ SRCS= cpt_err.c cryptlib.c cversion.c ex
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+= x86_64cpuid.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= x86cpuid.s
+SRCS+= x86cpuid.S
 .else
 SRCS+= mem_clr.c
 .endif
@@ -38,7 +37,7 @@ SRCS+=        aes_cfb.c aes_ctr.c aes_ecb.c aes
 SRCS+= aes-x86_64.S aesni-sha1-x86_64.S aesni-x86_64.S bsaes-x86_64.S \
        vpaes-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= aes-586.s aesni-x86.s vpaes-x86.s
+SRCS+= aes-586.S aesni-x86.S vpaes-x86.S
 .else
 SRCS+= aes_cbc.c aes_core.c
 .endif
@@ -63,9 +62,9 @@ INCS+=        asn1.h asn1_mac.h asn1t.h
 SRCS+= bf_cfb64.c bf_ecb.c bf_ofb64.c bf_skey.c
 .if ${MACHINE_CPUARCH} == "i386"
 .if ${MACHINE_CPU:Mi686}
-SRCS+= bf-686.s
+SRCS+= bf-686.S
 .else
-SRCS+= bf-586.s
+SRCS+= bf-586.S
 .endif
 .else
 SRCS+= bf_enc.c
@@ -87,7 +86,7 @@ SRCS+=        bn_add.c bn_blind.c bn_const.c bn
 SRCS+= modexp512-x86_64.S x86_64-gcc.c x86_64-gf2m.S x86_64-mont.S \
        x86_64-mont5.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= bn-586.s co-586.s x86-gf2m.s x86-mont.s
+SRCS+= bn-586.S co-586.S x86-gf2m.S x86-mont.S
 .else
 SRCS+= bn_asm.c
 .endif
@@ -102,7 +101,7 @@ SRCS+=      cmll_cfb.c cmll_ctr.c cmll_ecb.c 
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+= cmll_misc.c cmll-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= cmll-x86.s
+SRCS+= cmll-x86.S
 .else
 SRCS+= camellia.c cmll_cbc.c cmll_misc.c
 .endif
@@ -136,7 +135,7 @@ SRCS+=      cbc_cksm.c cbc_enc.c cfb64ede.c c
        fcrypt.c ofb64ede.c ofb64enc.c ofb_enc.c pcbc_enc.c qud_cksm.c \
        rand_key.c read2pwd.c rpc_enc.c set_key.c str2key.c xcbc_enc.c
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+= crypt586.s des-586.s
+SRCS+= crypt586.S des-586.S
 .else
 SRCS+= des_enc.c fcrypt_b.c
 .endif
@@ -219,7 +218,7 @@ SRCS+=      md5_dgst.c md5_one.c
 .if ${MACHINE_CPUARCH} == "amd64"
 SRCS+= md5-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= md5-586.s
+SRCS+= md5-586.S
 .endif
 INCS+= md5.h
 
@@ -232,7 +231,7 @@ SRCS+=      cbc128.c ccm128.c cfb128.c ctr128
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+= ghash-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= ghash-x86.s
+SRCS+= ghash-x86.S
 .endif
 INCS+= modes.h
 
@@ -278,7 +277,7 @@ SRCS+=      rc4_utl.c
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+= rc4-md5-x86_64.S rc4-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= rc4-586.s
+SRCS+= rc4-586.S
 .else
 SRCS+= rc4_enc.c rc4_skey.c
 .endif
@@ -287,7 +286,7 @@ INCS+=      rc4.h
 # rc5
 SRCS+= rc5_ecb.c rc5_skey.c rc5cfb64.c rc5ofb64.c
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+= rc5-586.s
+SRCS+= rc5-586.S
 .else
 SRCS+= rc5_enc.c
 .endif
@@ -296,7 +295,7 @@ INCS+=      rc5.h
 # ripemd
 SRCS+= rmd_dgst.c rmd_one.c
 .if ${MACHINE_CPUARCH} == "i386"
-SRCS+= rmd-586.s
+SRCS+= rmd-586.S
 .endif
 INCS+= ripemd.h
 
@@ -316,7 +315,7 @@ SRCS+=      sha1_one.c sha1dgst.c sha256.c sh
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+= sha1-x86_64.S sha256-x86_64.S sha512-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= sha1-586.s sha256-586.s sha512-586.s
+SRCS+= sha1-586.S sha256-586.S sha512-586.S
 .endif
 INCS+= sha.h
 
@@ -347,7 +346,7 @@ SRCS+=      wp_dgst.c
 .if ${MACHINE_CPUARCH} == "amd64" 
 SRCS+= wp-x86_64.S
 .elif ${MACHINE_CPUARCH} == "i386"
-SRCS+= wp-mmx.s wp_block.c
+SRCS+= wp-mmx.S wp_block.c
 .else
 SRCS+= wp_block.c
 .endif
@@ -379,9 +378,6 @@ CFLAGS+=    -I${LCRYPTO_SRC}/crypto/asn1
 CFLAGS+=       -I${LCRYPTO_SRC}/crypto/evp
 CFLAGS+=       -I${LCRYPTO_SRC}/crypto/modes
 
-.if !empty(SRCS:M*.s)
-AFLAGS+=       --noexecstack
-.endif
 .if !empty(SRCS:M*.S)
 ACFLAGS+=      -Wa,--noexecstack
 .endif

Modified: stable/10/secure/lib/libcrypto/Makefile.asm
==============================================================================
--- stable/10/secure/lib/libcrypto/Makefile.asm Mon May 16 19:10:59 2016        
(r299965)
+++ stable/10/secure/lib/libcrypto/Makefile.asm Mon May 16 19:30:27 2016        
(r299966)
@@ -1,8 +1,8 @@
 # $FreeBSD$
-# Use this to help generate the asm *.[Ss] files after an import.  It is not
+# Use this to help generate the asm *.S files after an import.  It is not
 # perfect by any means, but does what is needed.
-# Do a 'make -f Makefile.asm all' and it will generate *.s.  Move them
-# to the i386 subdir, and correct any exposed paths and $ FreeBSD $ tags.
+# Do a 'make -f Makefile.asm all' and it will generate *.S.  Move them
+# to the arch subdir, and correct any exposed paths and $ FreeBSD $ tags.
 
 .include "Makefile.inc"
 
@@ -39,31 +39,39 @@ SRCS+=      ghash-x86_64.pl
 SRCS+= rc4-md5-x86_64.pl rc4-x86_64.pl
 
 # sha
-SRCS+= sha1-x86_64.pl sha512-x86_64.pl
+SRCS+= sha1-x86_64.pl
 
 # whrlpool
 SRCS+= wp-x86_64.pl
 
-ASM=   ${SRCS:S/.pl/.S/}
-ASM+=  sha256-x86_64.S x86_64cpuid.S
+# cpuid
+SRCS+= x86_64cpuid.pl
 
-all:   ${ASM}
+SHA_ASM=       sha256-x86_64 sha512-x86_64
+SHA_SRC=       sha512-x86_64.pl
+SHA_TMP=       ${SHA_ASM:S/$/.s/}
 
-CLEANFILES+=   ${SRCS:M*.pl:S/.pl$/.cmt/} ${SRCS:M*.pl:S/.pl$/.S/}
-CLEANFILES+=   sha256-x86_64.cmt sha256-x86_64.S x86_64cpuid.cmt x86_64cpuid.S
-.SUFFIXES:     .pl .cmt
+ASM=   ${SRCS:R:S/$/.S/} ${SHA_ASM:S/$/.S/}
 
-.pl.cmt:
-       ( cd `dirname ${.IMPSRC}`/.. ; perl ${.IMPSRC} ${.OBJDIR}/${.TARGET} )
+all:   ${ASM}
 
-.cmt.S:
-       ( echo '        # $$'FreeBSD'$$'; cat ${.IMPSRC} ) > ${.TARGET}
+CLEANFILES=    ${ASM} ${SHA_ASM:S/$/.s/}
+.SUFFIXES:     .pl
 
-sha256-x86_64.cmt: sha512-x86_64.pl
-       ( cd `dirname ${.ALLSRC}`/.. ; perl ${.ALLSRC} ${.OBJDIR}/${.TARGET} )
+.pl.S:
+       ( echo '# $$'FreeBSD'$$' ;\
+       echo '# Do not modify. This file is auto-generated from ${.IMPSRC:T}.' 
;\
+       env CC=cc perl ${.IMPSRC} elf ) > ${.TARGET}
 
-x86_64cpuid.cmt: x86_64cpuid.pl
-       ( cd `dirname ${.ALLSRC}` ; perl ${.ALLSRC} ${.OBJDIR}/${.TARGET} )
+${SHA_TMP}: ${SHA_SRC}
+       env CC=cc perl ${.ALLSRC} elf ${.TARGET}
+
+.for s in ${SHA_ASM}
+${s}.S: ${s}.s
+       ( echo '        # $$'FreeBSD'$$' ;\
+       echo '  # Do not modify. This file is auto-generated from ${SHA_SRC}.' 
;\
+       cat ${s}.s ) > ${.TARGET}
+.endfor
 
 .elif ${MACHINE_CPUARCH} == "i386"
 
@@ -126,16 +134,22 @@ SRCS+=    wp-mmx.pl
 # cpuid
 SRCS+= x86cpuid.pl
 
-ASM=   ${SRCS:S/.pl/.s/}
+ASM=   ${SRCS:R:S/$/.S/}
 
 all:   ${ASM}
 
-CLEANFILES+=   ${SRCS:M*.pl:S/.pl$/.s/}
+CLEANFILES=    ${ASM}
 .SUFFIXES:     .pl
 
-.pl.s:
-       ( echo '        # $$'FreeBSD'$$' ;\
-       perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ) > ${.TARGET}
+.pl.S:
+       ( echo '# $$'FreeBSD'$$' ;\
+       echo '# Do not modify. This file is auto-generated from ${.IMPSRC:T}.' 
;\
+       echo '#ifdef PIC' ;\
+       env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} -fpic -DPIC ;\
+       echo '#else' ;\
+       env CC=cc perl ${PERLPATH} ${.IMPSRC} elf ${CFLAGS} ;\
+       echo '#endif') |\
+       sed -E 's|(\.file[[:blank:]]+)".*"|\1"${.TARGET}"|' > ${.TARGET}
 .endif
 
 .include <bsd.prog.mk>

Modified: stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S
==============================================================================
--- stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S   Mon May 16 19:10:59 
2016        (r299965)
+++ stable/10/secure/lib/libcrypto/amd64/aes-x86_64.S   Mon May 16 19:30:27 
2016        (r299966)
@@ -1,4 +1,5 @@
-       # $FreeBSD$
+# $FreeBSD$
+# Do not modify. This file is auto-generated from aes-x86_64.pl.
 .text  
 .type  _x86_64_AES_encrypt,@function
 .align 16

Modified: stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S
==============================================================================
--- stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S    Mon May 16 
19:10:59 2016        (r299965)
+++ stable/10/secure/lib/libcrypto/amd64/aesni-sha1-x86_64.S    Mon May 16 
19:30:27 2016        (r299966)
@@ -1,4 +1,5 @@
-       # $FreeBSD$
+# $FreeBSD$
+# Do not modify. This file is auto-generated from aesni-sha1-x86_64.pl.
 .text  
 
 
@@ -9,6 +10,11 @@ aesni_cbc_sha1_enc:
 
        movl    OPENSSL_ia32cap_P+0(%rip),%r10d
        movl    OPENSSL_ia32cap_P+4(%rip),%r11d
+       andl    $268435456,%r11d
+       andl    $1073741824,%r10d
+       orl     %r11d,%r10d
+       cmpl    $1342177280,%r10d
+       je      aesni_cbc_sha1_enc_avx
        jmp     aesni_cbc_sha1_enc_ssse3
        .byte   0xf3,0xc3
 .size  aesni_cbc_sha1_enc,.-aesni_cbc_sha1_enc
@@ -1385,6 +1391,1343 @@ aesni_cbc_sha1_enc_ssse3:
 .Lepilogue_ssse3:
        .byte   0xf3,0xc3
 .size  aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3
+.type  aesni_cbc_sha1_enc_avx,@function
+.align 16
+aesni_cbc_sha1_enc_avx:
+       movq    8(%rsp),%r10
+
+
+       pushq   %rbx
+       pushq   %rbp
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+       leaq    -104(%rsp),%rsp
+
+
+       vzeroall
+       movq    %rdi,%r12
+       movq    %rsi,%r13
+       movq    %rdx,%r14
+       movq    %rcx,%r15
+       vmovdqu (%r8),%xmm11
+       movq    %r8,88(%rsp)
+       shlq    $6,%r14
+       subq    %r12,%r13
+       movl    240(%r15),%r8d
+       addq    $112,%r15
+       addq    %r10,%r14
+
+       leaq    K_XX_XX(%rip),%r11
+       movl    0(%r9),%eax
+       movl    4(%r9),%ebx
+       movl    8(%r9),%ecx
+       movl    12(%r9),%edx
+       movl    %ebx,%esi
+       movl    16(%r9),%ebp
+
+       vmovdqa 64(%r11),%xmm6
+       vmovdqa 0(%r11),%xmm9
+       vmovdqu 0(%r10),%xmm0
+       vmovdqu 16(%r10),%xmm1
+       vmovdqu 32(%r10),%xmm2
+       vmovdqu 48(%r10),%xmm3
+       vpshufb %xmm6,%xmm0,%xmm0
+       addq    $64,%r10
+       vpshufb %xmm6,%xmm1,%xmm1
+       vpshufb %xmm6,%xmm2,%xmm2
+       vpshufb %xmm6,%xmm3,%xmm3
+       vpaddd  %xmm9,%xmm0,%xmm4
+       vpaddd  %xmm9,%xmm1,%xmm5
+       vpaddd  %xmm9,%xmm2,%xmm6
+       vmovdqa %xmm4,0(%rsp)
+       vmovdqa %xmm5,16(%rsp)
+       vmovdqa %xmm6,32(%rsp)
+       vmovups -112(%r15),%xmm13
+       vmovups 16-112(%r15),%xmm14
+       jmp     .Loop_avx
+.align 16
+.Loop_avx:
+       addl    0(%rsp),%ebp
+       vmovups 0(%r12),%xmm12
+       vxorps  %xmm13,%xmm12,%xmm12
+       vxorps  %xmm12,%xmm11,%xmm11
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -80(%r15),%xmm15
+       xorl    %edx,%ecx
+       vpalignr        $8,%xmm0,%xmm1,%xmm4
+       movl    %eax,%edi
+       shldl   $5,%eax,%eax
+       vpaddd  %xmm3,%xmm9,%xmm9
+       andl    %ecx,%esi
+       xorl    %edx,%ecx
+       vpsrldq $4,%xmm3,%xmm8
+       xorl    %edx,%esi
+       addl    %eax,%ebp
+       vpxor   %xmm0,%xmm4,%xmm4
+       shrdl   $2,%ebx,%ebx
+       addl    %esi,%ebp
+       vpxor   %xmm2,%xmm8,%xmm8
+       addl    4(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       vpxor   %xmm8,%xmm4,%xmm4
+       andl    %ebx,%edi
+       xorl    %ecx,%ebx
+       vmovdqa %xmm9,48(%rsp)
+       xorl    %ecx,%edi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -64(%r15),%xmm14
+       addl    %ebp,%edx
+       vpsrld  $31,%xmm4,%xmm8
+       shrdl   $7,%eax,%eax
+       addl    %edi,%edx
+       addl    8(%rsp),%ecx
+       xorl    %ebx,%eax
+       vpslldq $12,%xmm4,%xmm10
+       vpaddd  %xmm4,%xmm4,%xmm4
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       andl    %eax,%esi
+       xorl    %ebx,%eax
+       vpsrld  $30,%xmm10,%xmm9
+       vpor    %xmm8,%xmm4,%xmm4
+       xorl    %ebx,%esi
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %esi,%ecx
+       vpslld  $2,%xmm10,%xmm10
+       vpxor   %xmm9,%xmm4,%xmm4
+       addl    12(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%esi
+       shldl   $5,%ecx,%ecx
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -48(%r15),%xmm15
+       vpxor   %xmm10,%xmm4,%xmm4
+       andl    %ebp,%edi
+       xorl    %eax,%ebp
+       vmovdqa 0(%r11),%xmm10
+       xorl    %eax,%edi
+       addl    %ecx,%ebx
+       shrdl   $7,%edx,%edx
+       addl    %edi,%ebx
+       addl    16(%rsp),%eax
+       xorl    %ebp,%edx
+       vpalignr        $8,%xmm1,%xmm2,%xmm5
+       movl    %ebx,%edi
+       shldl   $5,%ebx,%ebx
+       vpaddd  %xmm4,%xmm10,%xmm10
+       andl    %edx,%esi
+       xorl    %ebp,%edx
+       vpsrldq $4,%xmm4,%xmm9
+       xorl    %ebp,%esi
+       addl    %ebx,%eax
+       vpxor   %xmm1,%xmm5,%xmm5
+       shrdl   $7,%ecx,%ecx
+       addl    %esi,%eax
+       vpxor   %xmm3,%xmm9,%xmm9
+       addl    20(%rsp),%ebp
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -32(%r15),%xmm14
+       xorl    %edx,%ecx
+       movl    %eax,%esi
+       shldl   $5,%eax,%eax
+       vpxor   %xmm9,%xmm5,%xmm5
+       andl    %ecx,%edi
+       xorl    %edx,%ecx
+       vmovdqa %xmm10,0(%rsp)
+       xorl    %edx,%edi
+       addl    %eax,%ebp
+       vpsrld  $31,%xmm5,%xmm9
+       shrdl   $7,%ebx,%ebx
+       addl    %edi,%ebp
+       addl    24(%rsp),%edx
+       xorl    %ecx,%ebx
+       vpslldq $12,%xmm5,%xmm8
+       vpaddd  %xmm5,%xmm5,%xmm5
+       movl    %ebp,%edi
+       shldl   $5,%ebp,%ebp
+       andl    %ebx,%esi
+       xorl    %ecx,%ebx
+       vpsrld  $30,%xmm8,%xmm10
+       vpor    %xmm9,%xmm5,%xmm5
+       xorl    %ecx,%esi
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -16(%r15),%xmm15
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %esi,%edx
+       vpslld  $2,%xmm8,%xmm8
+       vpxor   %xmm10,%xmm5,%xmm5
+       addl    28(%rsp),%ecx
+       xorl    %ebx,%eax
+       movl    %edx,%esi
+       shldl   $5,%edx,%edx
+       vpxor   %xmm8,%xmm5,%xmm5
+       andl    %eax,%edi
+       xorl    %ebx,%eax
+       vmovdqa 16(%r11),%xmm8
+       xorl    %ebx,%edi
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %edi,%ecx
+       addl    32(%rsp),%ebx
+       xorl    %eax,%ebp
+       vpalignr        $8,%xmm2,%xmm3,%xmm6
+       movl    %ecx,%edi
+       shldl   $5,%ecx,%ecx
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 0(%r15),%xmm14
+       vpaddd  %xmm5,%xmm8,%xmm8
+       andl    %ebp,%esi
+       xorl    %eax,%ebp
+       vpsrldq $4,%xmm5,%xmm10
+       xorl    %eax,%esi
+       addl    %ecx,%ebx
+       vpxor   %xmm2,%xmm6,%xmm6
+       shrdl   $7,%edx,%edx
+       addl    %esi,%ebx
+       vpxor   %xmm4,%xmm10,%xmm10
+       addl    36(%rsp),%eax
+       xorl    %ebp,%edx
+       movl    %ebx,%esi
+       shldl   $5,%ebx,%ebx
+       vpxor   %xmm10,%xmm6,%xmm6
+       andl    %edx,%edi
+       xorl    %ebp,%edx
+       vmovdqa %xmm8,16(%rsp)
+       xorl    %ebp,%edi
+       addl    %ebx,%eax
+       vpsrld  $31,%xmm6,%xmm10
+       shrdl   $7,%ecx,%ecx
+       addl    %edi,%eax
+       addl    40(%rsp),%ebp
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 16(%r15),%xmm15
+       xorl    %edx,%ecx
+       vpslldq $12,%xmm6,%xmm9
+       vpaddd  %xmm6,%xmm6,%xmm6
+       movl    %eax,%edi
+       shldl   $5,%eax,%eax
+       andl    %ecx,%esi
+       xorl    %edx,%ecx
+       vpsrld  $30,%xmm9,%xmm8
+       vpor    %xmm10,%xmm6,%xmm6
+       xorl    %edx,%esi
+       addl    %eax,%ebp
+       shrdl   $7,%ebx,%ebx
+       addl    %esi,%ebp
+       vpslld  $2,%xmm9,%xmm9
+       vpxor   %xmm8,%xmm6,%xmm6
+       addl    44(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       vpxor   %xmm9,%xmm6,%xmm6
+       andl    %ebx,%edi
+       xorl    %ecx,%ebx
+       vmovdqa 16(%r11),%xmm9
+       xorl    %ecx,%edi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 32(%r15),%xmm14
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %edi,%edx
+       addl    48(%rsp),%ecx
+       xorl    %ebx,%eax
+       vpalignr        $8,%xmm3,%xmm4,%xmm7
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       vpaddd  %xmm6,%xmm9,%xmm9
+       andl    %eax,%esi
+       xorl    %ebx,%eax
+       vpsrldq $4,%xmm6,%xmm8
+       xorl    %ebx,%esi
+       addl    %edx,%ecx
+       vpxor   %xmm3,%xmm7,%xmm7
+       shrdl   $7,%ebp,%ebp
+       addl    %esi,%ecx
+       vpxor   %xmm5,%xmm8,%xmm8
+       addl    52(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%esi
+       shldl   $5,%ecx,%ecx
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 48(%r15),%xmm15
+       vpxor   %xmm8,%xmm7,%xmm7
+       andl    %ebp,%edi
+       xorl    %eax,%ebp
+       vmovdqa %xmm9,32(%rsp)
+       xorl    %eax,%edi
+       addl    %ecx,%ebx
+       vpsrld  $31,%xmm7,%xmm8
+       shrdl   $7,%edx,%edx
+       addl    %edi,%ebx
+       addl    56(%rsp),%eax
+       xorl    %ebp,%edx
+       vpslldq $12,%xmm7,%xmm10
+       vpaddd  %xmm7,%xmm7,%xmm7
+       movl    %ebx,%edi
+       shldl   $5,%ebx,%ebx
+       andl    %edx,%esi
+       xorl    %ebp,%edx
+       vpsrld  $30,%xmm10,%xmm9
+       vpor    %xmm8,%xmm7,%xmm7
+       xorl    %ebp,%esi
+       addl    %ebx,%eax
+       shrdl   $7,%ecx,%ecx
+       addl    %esi,%eax
+       vpslld  $2,%xmm10,%xmm10
+       vpxor   %xmm9,%xmm7,%xmm7
+       addl    60(%rsp),%ebp
+       cmpl    $11,%r8d
+       jb      .Lvaesenclast1
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 64(%r15),%xmm14
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 80(%r15),%xmm15
+       je      .Lvaesenclast1
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 96(%r15),%xmm14
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 112(%r15),%xmm15
+.Lvaesenclast1:
+       vaesenclast     %xmm15,%xmm11,%xmm11
+       vmovups 16-112(%r15),%xmm14
+       xorl    %edx,%ecx
+       movl    %eax,%esi
+       shldl   $5,%eax,%eax
+       vpxor   %xmm10,%xmm7,%xmm7
+       andl    %ecx,%edi
+       xorl    %edx,%ecx
+       vmovdqa 16(%r11),%xmm10
+       xorl    %edx,%edi
+       addl    %eax,%ebp
+       shrdl   $7,%ebx,%ebx
+       addl    %edi,%ebp
+       vpalignr        $8,%xmm6,%xmm7,%xmm9
+       vpxor   %xmm4,%xmm0,%xmm0
+       addl    0(%rsp),%edx
+       xorl    %ecx,%ebx
+       movl    %ebp,%edi
+       shldl   $5,%ebp,%ebp
+       vpxor   %xmm1,%xmm0,%xmm0
+       andl    %ebx,%esi
+       xorl    %ecx,%ebx
+       vmovdqa %xmm10,%xmm8
+       vpaddd  %xmm7,%xmm10,%xmm10
+       xorl    %ecx,%esi
+       vmovups 16(%r12),%xmm12
+       vxorps  %xmm13,%xmm12,%xmm12
+       vmovups %xmm11,0(%r13,%r12,1)
+       vxorps  %xmm12,%xmm11,%xmm11
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -80(%r15),%xmm15
+       addl    %ebp,%edx
+       vpxor   %xmm9,%xmm0,%xmm0
+       shrdl   $7,%eax,%eax
+       addl    %esi,%edx
+       addl    4(%rsp),%ecx
+       xorl    %ebx,%eax
+       vpsrld  $30,%xmm0,%xmm9
+       vmovdqa %xmm10,48(%rsp)
+       movl    %edx,%esi
+       shldl   $5,%edx,%edx
+       andl    %eax,%edi
+       xorl    %ebx,%eax
+       vpslld  $2,%xmm0,%xmm0
+       xorl    %ebx,%edi
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %edi,%ecx
+       addl    8(%rsp),%ebx
+       xorl    %eax,%ebp
+       movl    %ecx,%edi
+       shldl   $5,%ecx,%ecx
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -64(%r15),%xmm14
+       vpor    %xmm9,%xmm0,%xmm0
+       andl    %ebp,%esi
+       xorl    %eax,%ebp
+       vmovdqa %xmm0,%xmm10
+       xorl    %eax,%esi
+       addl    %ecx,%ebx
+       shrdl   $7,%edx,%edx
+       addl    %esi,%ebx
+       addl    12(%rsp),%eax
+       xorl    %ebp,%edx
+       movl    %ebx,%esi
+       shldl   $5,%ebx,%ebx
+       andl    %edx,%edi
+       xorl    %ebp,%edx
+       xorl    %ebp,%edi
+       addl    %ebx,%eax
+       shrdl   $7,%ecx,%ecx
+       addl    %edi,%eax
+       vpalignr        $8,%xmm7,%xmm0,%xmm10
+       vpxor   %xmm5,%xmm1,%xmm1
+       addl    16(%rsp),%ebp
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -48(%r15),%xmm15
+       xorl    %edx,%esi
+       movl    %eax,%edi
+       shldl   $5,%eax,%eax
+       vpxor   %xmm2,%xmm1,%xmm1
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       vmovdqa %xmm8,%xmm9
+       vpaddd  %xmm0,%xmm8,%xmm8
+       shrdl   $7,%ebx,%ebx
+       addl    %esi,%ebp
+       vpxor   %xmm10,%xmm1,%xmm1
+       addl    20(%rsp),%edx
+       xorl    %ecx,%edi
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       vpsrld  $30,%xmm1,%xmm10
+       vmovdqa %xmm8,0(%rsp)
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %edi,%edx
+       vpslld  $2,%xmm1,%xmm1
+       addl    24(%rsp),%ecx
+       xorl    %ebx,%esi
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       xorl    %eax,%esi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -32(%r15),%xmm14
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %esi,%ecx
+       vpor    %xmm10,%xmm1,%xmm1
+       addl    28(%rsp),%ebx
+       xorl    %eax,%edi
+       vmovdqa %xmm1,%xmm8
+       movl    %ecx,%esi
+       shldl   $5,%ecx,%ecx
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       shrdl   $7,%edx,%edx
+       addl    %edi,%ebx
+       vpalignr        $8,%xmm0,%xmm1,%xmm8
+       vpxor   %xmm6,%xmm2,%xmm2
+       addl    32(%rsp),%eax
+       xorl    %ebp,%esi
+       movl    %ebx,%edi
+       shldl   $5,%ebx,%ebx
+       vpxor   %xmm3,%xmm2,%xmm2
+       xorl    %edx,%esi
+       addl    %ebx,%eax
+       vmovdqa 32(%r11),%xmm10
+       vpaddd  %xmm1,%xmm9,%xmm9
+       shrdl   $7,%ecx,%ecx
+       addl    %esi,%eax
+       vpxor   %xmm8,%xmm2,%xmm2
+       addl    36(%rsp),%ebp
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -16(%r15),%xmm15
+       xorl    %edx,%edi
+       movl    %eax,%esi
+       shldl   $5,%eax,%eax
+       vpsrld  $30,%xmm2,%xmm8
+       vmovdqa %xmm9,16(%rsp)
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       shrdl   $7,%ebx,%ebx
+       addl    %edi,%ebp
+       vpslld  $2,%xmm2,%xmm2
+       addl    40(%rsp),%edx
+       xorl    %ecx,%esi
+       movl    %ebp,%edi
+       shldl   $5,%ebp,%ebp
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %esi,%edx
+       vpor    %xmm8,%xmm2,%xmm2
+       addl    44(%rsp),%ecx
+       xorl    %ebx,%edi
+       vmovdqa %xmm2,%xmm9
+       movl    %edx,%esi
+       shldl   $5,%edx,%edx
+       xorl    %eax,%edi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 0(%r15),%xmm14
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %edi,%ecx
+       vpalignr        $8,%xmm1,%xmm2,%xmm9
+       vpxor   %xmm7,%xmm3,%xmm3
+       addl    48(%rsp),%ebx
+       xorl    %eax,%esi
+       movl    %ecx,%edi
+       shldl   $5,%ecx,%ecx
+       vpxor   %xmm4,%xmm3,%xmm3
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       vmovdqa %xmm10,%xmm8
+       vpaddd  %xmm2,%xmm10,%xmm10
+       shrdl   $7,%edx,%edx
+       addl    %esi,%ebx
+       vpxor   %xmm9,%xmm3,%xmm3
+       addl    52(%rsp),%eax
+       xorl    %ebp,%edi
+       movl    %ebx,%esi
+       shldl   $5,%ebx,%ebx
+       vpsrld  $30,%xmm3,%xmm9
+       vmovdqa %xmm10,32(%rsp)
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       shrdl   $7,%ecx,%ecx
+       addl    %edi,%eax
+       vpslld  $2,%xmm3,%xmm3
+       addl    56(%rsp),%ebp
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 16(%r15),%xmm15
+       xorl    %edx,%esi
+       movl    %eax,%edi
+       shldl   $5,%eax,%eax
+       xorl    %ecx,%esi
+       addl    %eax,%ebp
+       shrdl   $7,%ebx,%ebx
+       addl    %esi,%ebp
+       vpor    %xmm9,%xmm3,%xmm3
+       addl    60(%rsp),%edx
+       xorl    %ecx,%edi
+       vmovdqa %xmm3,%xmm10
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       xorl    %ebx,%edi
+       addl    %ebp,%edx
+       shrdl   $7,%eax,%eax
+       addl    %edi,%edx
+       vpalignr        $8,%xmm2,%xmm3,%xmm10
+       vpxor   %xmm0,%xmm4,%xmm4
+       addl    0(%rsp),%ecx
+       xorl    %ebx,%esi
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       vpxor   %xmm5,%xmm4,%xmm4
+       xorl    %eax,%esi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 32(%r15),%xmm14
+       addl    %edx,%ecx
+       vmovdqa %xmm8,%xmm9
+       vpaddd  %xmm3,%xmm8,%xmm8
+       shrdl   $7,%ebp,%ebp
+       addl    %esi,%ecx
+       vpxor   %xmm10,%xmm4,%xmm4
+       addl    4(%rsp),%ebx
+       xorl    %eax,%edi
+       movl    %ecx,%esi
+       shldl   $5,%ecx,%ecx
+       vpsrld  $30,%xmm4,%xmm10
+       vmovdqa %xmm8,48(%rsp)
+       xorl    %ebp,%edi
+       addl    %ecx,%ebx
+       shrdl   $7,%edx,%edx
+       addl    %edi,%ebx
+       vpslld  $2,%xmm4,%xmm4
+       addl    8(%rsp),%eax
+       xorl    %ebp,%esi
+       movl    %ebx,%edi
+       shldl   $5,%ebx,%ebx
+       xorl    %edx,%esi
+       addl    %ebx,%eax
+       shrdl   $7,%ecx,%ecx
+       addl    %esi,%eax
+       vpor    %xmm10,%xmm4,%xmm4
+       addl    12(%rsp),%ebp
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 48(%r15),%xmm15
+       xorl    %edx,%edi
+       vmovdqa %xmm4,%xmm8
+       movl    %eax,%esi
+       shldl   $5,%eax,%eax
+       xorl    %ecx,%edi
+       addl    %eax,%ebp
+       shrdl   $7,%ebx,%ebx
+       addl    %edi,%ebp
+       vpalignr        $8,%xmm3,%xmm4,%xmm8
+       vpxor   %xmm1,%xmm5,%xmm5
+       addl    16(%rsp),%edx
+       xorl    %ecx,%esi
+       movl    %ebp,%edi
+       shldl   $5,%ebp,%ebp
+       vpxor   %xmm6,%xmm5,%xmm5
+       xorl    %ebx,%esi
+       addl    %ebp,%edx
+       vmovdqa %xmm9,%xmm10
+       vpaddd  %xmm4,%xmm9,%xmm9
+       shrdl   $7,%eax,%eax
+       addl    %esi,%edx
+       vpxor   %xmm8,%xmm5,%xmm5
+       addl    20(%rsp),%ecx
+       xorl    %ebx,%edi
+       movl    %edx,%esi
+       shldl   $5,%edx,%edx
+       vpsrld  $30,%xmm5,%xmm8
+       vmovdqa %xmm9,0(%rsp)
+       xorl    %eax,%edi
+       cmpl    $11,%r8d
+       jb      .Lvaesenclast2
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 64(%r15),%xmm14
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 80(%r15),%xmm15
+       je      .Lvaesenclast2
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups 96(%r15),%xmm14
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups 112(%r15),%xmm15
+.Lvaesenclast2:
+       vaesenclast     %xmm15,%xmm11,%xmm11
+       vmovups 16-112(%r15),%xmm14
+       addl    %edx,%ecx
+       shrdl   $7,%ebp,%ebp
+       addl    %edi,%ecx
+       vpslld  $2,%xmm5,%xmm5
+       addl    24(%rsp),%ebx
+       xorl    %eax,%esi
+       movl    %ecx,%edi
+       shldl   $5,%ecx,%ecx
+       xorl    %ebp,%esi
+       addl    %ecx,%ebx
+       shrdl   $7,%edx,%edx
+       addl    %esi,%ebx
+       vpor    %xmm8,%xmm5,%xmm5
+       addl    28(%rsp),%eax
+       xorl    %ebp,%edi
+       vmovdqa %xmm5,%xmm9
+       movl    %ebx,%esi
+       shldl   $5,%ebx,%ebx
+       xorl    %edx,%edi
+       addl    %ebx,%eax
+       shrdl   $7,%ecx,%ecx
+       addl    %edi,%eax
+       vpalignr        $8,%xmm4,%xmm5,%xmm9
+       vpxor   %xmm2,%xmm6,%xmm6
+       movl    %ecx,%edi
+       vmovups 32(%r12),%xmm12
+       vxorps  %xmm13,%xmm12,%xmm12
+       vmovups %xmm11,16(%r13,%r12,1)
+       vxorps  %xmm12,%xmm11,%xmm11
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -80(%r15),%xmm15
+       xorl    %edx,%ecx
+       addl    32(%rsp),%ebp
+       andl    %edx,%edi
+       vpxor   %xmm7,%xmm6,%xmm6
+       andl    %ecx,%esi
+       shrdl   $7,%ebx,%ebx
+       vmovdqa %xmm10,%xmm8
+       vpaddd  %xmm5,%xmm10,%xmm10
+       addl    %edi,%ebp
+       movl    %eax,%edi
+       vpxor   %xmm9,%xmm6,%xmm6
+       shldl   $5,%eax,%eax
+       addl    %esi,%ebp
+       xorl    %edx,%ecx
+       addl    %eax,%ebp
+       vpsrld  $30,%xmm6,%xmm9
+       vmovdqa %xmm10,16(%rsp)
+       movl    %ebx,%esi
+       xorl    %ecx,%ebx
+       addl    36(%rsp),%edx
+       andl    %ecx,%esi
+       vpslld  $2,%xmm6,%xmm6
+       andl    %ebx,%edi
+       shrdl   $7,%eax,%eax
+       addl    %esi,%edx
+       movl    %ebp,%esi
+       shldl   $5,%ebp,%ebp
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -64(%r15),%xmm14
+       addl    %edi,%edx
+       xorl    %ecx,%ebx
+       addl    %ebp,%edx
+       vpor    %xmm9,%xmm6,%xmm6
+       movl    %eax,%edi
+       xorl    %ebx,%eax
+       vmovdqa %xmm6,%xmm10
+       addl    40(%rsp),%ecx
+       andl    %ebx,%edi
+       andl    %eax,%esi
+       shrdl   $7,%ebp,%ebp
+       addl    %edi,%ecx
+       movl    %edx,%edi
+       shldl   $5,%edx,%edx
+       addl    %esi,%ecx
+       xorl    %ebx,%eax
+       addl    %edx,%ecx
+       movl    %ebp,%esi
+       xorl    %eax,%ebp
+       addl    44(%rsp),%ebx
+       andl    %eax,%esi
+       andl    %ebp,%edi
+       vaesenc %xmm14,%xmm11,%xmm11
+       vmovups -48(%r15),%xmm15
+       shrdl   $7,%edx,%edx
+       addl    %esi,%ebx
+       movl    %ecx,%esi
+       shldl   $5,%ecx,%ecx
+       addl    %edi,%ebx
+       xorl    %eax,%ebp
+       addl    %ecx,%ebx
+       vpalignr        $8,%xmm5,%xmm6,%xmm10
+       vpxor   %xmm3,%xmm7,%xmm7
+       movl    %edx,%edi
+       xorl    %ebp,%edx
+       addl    48(%rsp),%eax
+       andl    %ebp,%edi
+       vpxor   %xmm0,%xmm7,%xmm7
+       andl    %edx,%esi
+       shrdl   $7,%ecx,%ecx
+       vmovdqa 48(%r11),%xmm9
+       vpaddd  %xmm6,%xmm8,%xmm8
+       addl    %edi,%eax
+       movl    %ebx,%edi
+       vpxor   %xmm10,%xmm7,%xmm7
+       shldl   $5,%ebx,%ebx
+       addl    %esi,%eax
+       xorl    %ebp,%edx
+       addl    %ebx,%eax
+       vpsrld  $30,%xmm7,%xmm10
+       vmovdqa %xmm8,32(%rsp)
+       movl    %ecx,%esi
+       vaesenc %xmm15,%xmm11,%xmm11
+       vmovups -32(%r15),%xmm14
+       xorl    %edx,%ecx
+       addl    52(%rsp),%ebp
+       andl    %edx,%esi
+       vpslld  $2,%xmm7,%xmm7
+       andl    %ecx,%edi
+       shrdl   $7,%ebx,%ebx
+       addl    %esi,%ebp
+       movl    %eax,%esi
+       shldl   $5,%eax,%eax
+       addl    %edi,%ebp
+       xorl    %edx,%ecx
+       addl    %eax,%ebp
+       vpor    %xmm10,%xmm7,%xmm7
+       movl    %ebx,%edi
+       xorl    %ecx,%ebx
+       vmovdqa %xmm7,%xmm8
+       addl    56(%rsp),%edx
+       andl    %ecx,%edi

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
[email protected] mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "[email protected]"

Reply via email to