The branch main has been updated by jhb:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=74d73bb743c759e6d4d67435d383d501585c4680

commit 74d73bb743c759e6d4d67435d383d501585c4680
Author:     John Baldwin <[email protected]>
AuthorDate: 2023-08-22 04:02:29 +0000
Commit:     John Baldwin <[email protected]>
CommitDate: 2023-08-22 04:02:29 +0000

    libcrypto: Generate new files added in OpenSSL 3.0.
    
    Reviewed by:    gallatin, ngie, emaste
    Differential Revision:  https://reviews.freebsd.org/D41538
---
 secure/lib/libcrypto/arch/amd64/aes-x86_64.S   | 2680 ++++++++++
 secure/lib/libcrypto/arch/amd64/bsaes-x86_64.S | 2619 ++++++++++
 secure/lib/libcrypto/arch/i386/aes-586.S       | 6644 ++++++++++++++++++++++++
 3 files changed, 11943 insertions(+)

diff --git a/secure/lib/libcrypto/arch/amd64/aes-x86_64.S 
b/secure/lib/libcrypto/arch/amd64/aes-x86_64.S
new file mode 100644
index 000000000000..fc375184a20a
--- /dev/null
+++ b/secure/lib/libcrypto/arch/amd64/aes-x86_64.S
@@ -0,0 +1,2680 @@
+/* Do not modify. This file is auto-generated from aes-x86_64.pl. */
+.text  
+.type  _x86_64_AES_encrypt,@function
+.align 16
+_x86_64_AES_encrypt:
+.cfi_startproc 
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+
+       movl    240(%r15),%r13d
+       subl    $1,%r13d
+       jmp     .Lenc_loop
+.align 16
+.Lenc_loop:
+
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movl    0(%r14,%rsi,8),%r10d
+       movl    0(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r12d
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movzbl  %dl,%ebp
+       xorl    3(%r14,%rsi,8),%r10d
+       xorl    3(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r8d
+
+       movzbl  %dh,%esi
+       shrl    $16,%ecx
+       movzbl  %ah,%ebp
+       xorl    3(%r14,%rsi,8),%r12d
+       shrl    $16,%edx
+       xorl    3(%r14,%rbp,8),%r8d
+
+       shrl    $16,%ebx
+       leaq    16(%r15),%r15
+       shrl    $16,%eax
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       xorl    2(%r14,%rsi,8),%r10d
+       xorl    2(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r12d
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movzbl  %bl,%ebp
+       xorl    1(%r14,%rsi,8),%r10d
+       xorl    1(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r8d
+
+       movl    12(%r15),%edx
+       movzbl  %bh,%edi
+       movzbl  %ch,%ebp
+       movl    0(%r15),%eax
+       xorl    1(%r14,%rdi,8),%r12d
+       xorl    1(%r14,%rbp,8),%r8d
+
+       movl    4(%r15),%ebx
+       movl    8(%r15),%ecx
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+       subl    $1,%r13d
+       jnz     .Lenc_loop
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movzbl  2(%r14,%rsi,8),%r10d
+       movzbl  2(%r14,%rdi,8),%r11d
+       movzbl  2(%r14,%rbp,8),%r12d
+
+       movzbl  %dl,%esi
+       movzbl  %bh,%edi
+       movzbl  %ch,%ebp
+       movzbl  2(%r14,%rsi,8),%r8d
+       movl    0(%r14,%rdi,8),%edi
+       movl    0(%r14,%rbp,8),%ebp
+
+       andl    $0x0000ff00,%edi
+       andl    $0x0000ff00,%ebp
+
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+       shrl    $16,%ecx
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       shrl    $16,%edx
+       movl    0(%r14,%rsi,8),%esi
+       movl    0(%r14,%rdi,8),%edi
+
+       andl    $0x0000ff00,%esi
+       andl    $0x0000ff00,%edi
+       shrl    $16,%ebx
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+       shrl    $16,%eax
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       movl    0(%r14,%rsi,8),%esi
+       movl    0(%r14,%rdi,8),%edi
+       movl    0(%r14,%rbp,8),%ebp
+
+       andl    $0x00ff0000,%esi
+       andl    $0x00ff0000,%edi
+       andl    $0x00ff0000,%ebp
+
+       xorl    %esi,%r10d
+       xorl    %edi,%r11d
+       xorl    %ebp,%r12d
+
+       movzbl  %bl,%esi
+       movzbl  %dh,%edi
+       movzbl  %ah,%ebp
+       movl    0(%r14,%rsi,8),%esi
+       movl    2(%r14,%rdi,8),%edi
+       movl    2(%r14,%rbp,8),%ebp
+
+       andl    $0x00ff0000,%esi
+       andl    $0xff000000,%edi
+       andl    $0xff000000,%ebp
+
+       xorl    %esi,%r8d
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movl    16+12(%r15),%edx
+       movl    2(%r14,%rsi,8),%esi
+       movl    2(%r14,%rdi,8),%edi
+       movl    16+0(%r15),%eax
+
+       andl    $0xff000000,%esi
+       andl    $0xff000000,%edi
+
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+
+       movl    16+4(%r15),%ebx
+       movl    16+8(%r15),%ecx
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+.byte  0xf3,0xc3
+.cfi_endproc   
+.size  _x86_64_AES_encrypt,.-_x86_64_AES_encrypt
+.type  _x86_64_AES_encrypt_compact,@function
+.align 16
+_x86_64_AES_encrypt_compact:
+.cfi_startproc 
+       leaq    128(%r14),%r8
+       movl    0-128(%r8),%edi
+       movl    32-128(%r8),%ebp
+       movl    64-128(%r8),%r10d
+       movl    96-128(%r8),%r11d
+       movl    128-128(%r8),%edi
+       movl    160-128(%r8),%ebp
+       movl    192-128(%r8),%r10d
+       movl    224-128(%r8),%r11d
+       jmp     .Lenc_loop_compact
+.align 16
+.Lenc_loop_compact:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+       leaq    16(%r15),%r15
+       movzbl  %al,%r10d
+       movzbl  %bl,%r11d
+       movzbl  %cl,%r12d
+       movzbl  %dl,%r8d
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       shrl    $16,%ecx
+       movzbl  %dh,%ebp
+       movzbl  (%r14,%r10,1),%r10d
+       movzbl  (%r14,%r11,1),%r11d
+       movzbl  (%r14,%r12,1),%r12d
+       movzbl  (%r14,%r8,1),%r8d
+
+       movzbl  (%r14,%rsi,1),%r9d
+       movzbl  %ah,%esi
+       movzbl  (%r14,%rdi,1),%r13d
+       movzbl  %cl,%edi
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+
+       shll    $8,%r9d
+       shrl    $16,%edx
+       shll    $8,%r13d
+       xorl    %r9d,%r10d
+       shrl    $16,%eax
+       movzbl  %dl,%r9d
+       shrl    $16,%ebx
+       xorl    %r13d,%r11d
+       shll    $8,%ebp
+       movzbl  %al,%r13d
+       movzbl  (%r14,%rdi,1),%edi
+       xorl    %ebp,%r12d
+
+       shll    $8,%esi
+       movzbl  %bl,%ebp
+       shll    $16,%edi
+       xorl    %esi,%r8d
+       movzbl  (%r14,%r9,1),%r9d
+       movzbl  %dh,%esi
+       movzbl  (%r14,%r13,1),%r13d
+       xorl    %edi,%r10d
+
+       shrl    $8,%ecx
+       movzbl  %ah,%edi
+       shll    $16,%r9d
+       shrl    $8,%ebx
+       shll    $16,%r13d
+       xorl    %r9d,%r11d
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rcx,1),%edx
+       movzbl  (%r14,%rbx,1),%ecx
+
+       shll    $16,%ebp
+       xorl    %r13d,%r12d
+       shll    $24,%esi
+       xorl    %ebp,%r8d
+       shll    $24,%edi
+       xorl    %esi,%r10d
+       shll    $24,%edx
+       xorl    %edi,%r11d
+       shll    $24,%ecx
+       movl    %r10d,%eax
+       movl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+       cmpq    16(%rsp),%r15
+       je      .Lenc_compact_done
+       movl    $0x80808080,%r10d
+       movl    $0x80808080,%r11d
+       andl    %eax,%r10d
+       andl    %ebx,%r11d
+       movl    %r10d,%esi
+       movl    %r11d,%edi
+       shrl    $7,%r10d
+       leal    (%rax,%rax,1),%r8d
+       shrl    $7,%r11d
+       leal    (%rbx,%rbx,1),%r9d
+       subl    %r10d,%esi
+       subl    %r11d,%edi
+       andl    $0xfefefefe,%r8d
+       andl    $0xfefefefe,%r9d
+       andl    $0x1b1b1b1b,%esi
+       andl    $0x1b1b1b1b,%edi
+       movl    %eax,%r10d
+       movl    %ebx,%r11d
+       xorl    %esi,%r8d
+       xorl    %edi,%r9d
+
+       xorl    %r8d,%eax
+       xorl    %r9d,%ebx
+       movl    $0x80808080,%r12d
+       roll    $24,%eax
+       movl    $0x80808080,%ebp
+       roll    $24,%ebx
+       andl    %ecx,%r12d
+       andl    %edx,%ebp
+       xorl    %r8d,%eax
+       xorl    %r9d,%ebx
+       movl    %r12d,%esi
+       rorl    $16,%r10d
+       movl    %ebp,%edi
+       rorl    $16,%r11d
+       leal    (%rcx,%rcx,1),%r8d
+       shrl    $7,%r12d
+       xorl    %r10d,%eax
+       shrl    $7,%ebp
+       xorl    %r11d,%ebx
+       rorl    $8,%r10d
+       leal    (%rdx,%rdx,1),%r9d
+       rorl    $8,%r11d
+       subl    %r12d,%esi
+       subl    %ebp,%edi
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+
+       andl    $0xfefefefe,%r8d
+       andl    $0xfefefefe,%r9d
+       andl    $0x1b1b1b1b,%esi
+       andl    $0x1b1b1b1b,%edi
+       movl    %ecx,%r12d
+       movl    %edx,%ebp
+       xorl    %esi,%r8d
+       xorl    %edi,%r9d
+
+       rorl    $16,%r12d
+       xorl    %r8d,%ecx
+       rorl    $16,%ebp
+       xorl    %r9d,%edx
+       roll    $24,%ecx
+       movl    0(%r14),%esi
+       roll    $24,%edx
+       xorl    %r8d,%ecx
+       movl    64(%r14),%edi
+       xorl    %r9d,%edx
+       movl    128(%r14),%r8d
+       xorl    %r12d,%ecx
+       rorl    $8,%r12d
+       xorl    %ebp,%edx
+       rorl    $8,%ebp
+       xorl    %r12d,%ecx
+       movl    192(%r14),%r9d
+       xorl    %ebp,%edx
+       jmp     .Lenc_loop_compact
+.align 16
+.Lenc_compact_done:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+.byte  0xf3,0xc3
+.cfi_endproc   
+.size  _x86_64_AES_encrypt_compact,.-_x86_64_AES_encrypt_compact
+.globl AES_encrypt
+.type  AES_encrypt,@function
+.align 16
+.globl asm_AES_encrypt
+.hidden        asm_AES_encrypt
+asm_AES_encrypt:
+AES_encrypt:
+.cfi_startproc 
+.byte  243,15,30,250
+       movq    %rsp,%rax
+.cfi_def_cfa_register  %rax
+       pushq   %rbx
+.cfi_offset    %rbx,-16
+       pushq   %rbp
+.cfi_offset    %rbp,-24
+       pushq   %r12
+.cfi_offset    %r12,-32
+       pushq   %r13
+.cfi_offset    %r13,-40
+       pushq   %r14
+.cfi_offset    %r14,-48
+       pushq   %r15
+.cfi_offset    %r15,-56
+
+
+       leaq    -63(%rdx),%rcx
+       andq    $-64,%rsp
+       subq    %rsp,%rcx
+       negq    %rcx
+       andq    $0x3c0,%rcx
+       subq    %rcx,%rsp
+       subq    $32,%rsp
+
+       movq    %rsi,16(%rsp)
+       movq    %rax,24(%rsp)
+.cfi_escape    0x0f,0x05,0x77,0x18,0x06,0x23,0x08
+.Lenc_prologue:
+
+       movq    %rdx,%r15
+       movl    240(%r15),%r13d
+
+       movl    0(%rdi),%eax
+       movl    4(%rdi),%ebx
+       movl    8(%rdi),%ecx
+       movl    12(%rdi),%edx
+
+       shll    $4,%r13d
+       leaq    (%r15,%r13,1),%rbp
+       movq    %r15,(%rsp)
+       movq    %rbp,8(%rsp)
+
+
+       leaq    .LAES_Te+2048(%rip),%r14
+       leaq    768(%rsp),%rbp
+       subq    %r14,%rbp
+       andq    $0x300,%rbp
+       leaq    (%r14,%rbp,1),%r14
+
+       call    _x86_64_AES_encrypt_compact
+
+       movq    16(%rsp),%r9
+       movq    24(%rsp),%rsi
+.cfi_def_cfa   %rsi,8
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       movq    -48(%rsi),%r15
+.cfi_restore   %r15
+       movq    -40(%rsi),%r14
+.cfi_restore   %r14
+       movq    -32(%rsi),%r13
+.cfi_restore   %r13
+       movq    -24(%rsi),%r12
+.cfi_restore   %r12
+       movq    -16(%rsi),%rbp
+.cfi_restore   %rbp
+       movq    -8(%rsi),%rbx
+.cfi_restore   %rbx
+       leaq    (%rsi),%rsp
+.cfi_def_cfa_register  %rsp
+.Lenc_epilogue:
+       .byte   0xf3,0xc3
+.cfi_endproc   
+.size  AES_encrypt,.-AES_encrypt
+.type  _x86_64_AES_decrypt,@function
+.align 16
+_x86_64_AES_decrypt:
+.cfi_startproc 
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+
+       movl    240(%r15),%r13d
+       subl    $1,%r13d
+       jmp     .Ldec_loop
+.align 16
+.Ldec_loop:
+
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movl    0(%r14,%rsi,8),%r10d
+       movl    0(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r12d
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movzbl  %dl,%ebp
+       xorl    3(%r14,%rsi,8),%r10d
+       xorl    3(%r14,%rdi,8),%r11d
+       movl    0(%r14,%rbp,8),%r8d
+
+       movzbl  %bh,%esi
+       shrl    $16,%eax
+       movzbl  %ch,%ebp
+       xorl    3(%r14,%rsi,8),%r12d
+       shrl    $16,%edx
+       xorl    3(%r14,%rbp,8),%r8d
+
+       shrl    $16,%ebx
+       leaq    16(%r15),%r15
+       shrl    $16,%ecx
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       xorl    2(%r14,%rsi,8),%r10d
+       xorl    2(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r12d
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       movzbl  %bl,%ebp
+       xorl    1(%r14,%rsi,8),%r10d
+       xorl    1(%r14,%rdi,8),%r11d
+       xorl    2(%r14,%rbp,8),%r8d
+
+       movzbl  %dh,%esi
+       movl    12(%r15),%edx
+       movzbl  %ah,%ebp
+       xorl    1(%r14,%rsi,8),%r12d
+       movl    0(%r15),%eax
+       xorl    1(%r14,%rbp,8),%r8d
+
+       xorl    %r10d,%eax
+       movl    4(%r15),%ebx
+       movl    8(%r15),%ecx
+       xorl    %r12d,%ecx
+       xorl    %r11d,%ebx
+       xorl    %r8d,%edx
+       subl    $1,%r13d
+       jnz     .Ldec_loop
+       leaq    2048(%r14),%r14
+       movzbl  %al,%esi
+       movzbl  %bl,%edi
+       movzbl  %cl,%ebp
+       movzbl  (%r14,%rsi,1),%r10d
+       movzbl  (%r14,%rdi,1),%r11d
+       movzbl  (%r14,%rbp,1),%r12d
+
+       movzbl  %dl,%esi
+       movzbl  %dh,%edi
+       movzbl  %ah,%ebp
+       movzbl  (%r14,%rsi,1),%r8d
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rbp,1),%ebp
+
+       shll    $8,%edi
+       shll    $8,%ebp
+
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+       shrl    $16,%edx
+
+       movzbl  %bh,%esi
+       movzbl  %ch,%edi
+       shrl    $16,%eax
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+
+       shll    $8,%esi
+       shll    $8,%edi
+       shrl    $16,%ebx
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+       shrl    $16,%ecx
+
+       movzbl  %cl,%esi
+       movzbl  %dl,%edi
+       movzbl  %al,%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rbp,1),%ebp
+
+       shll    $16,%esi
+       shll    $16,%edi
+       shll    $16,%ebp
+
+       xorl    %esi,%r10d
+       xorl    %edi,%r11d
+       xorl    %ebp,%r12d
+
+       movzbl  %bl,%esi
+       movzbl  %bh,%edi
+       movzbl  %ch,%ebp
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movzbl  (%r14,%rbp,1),%ebp
+
+       shll    $16,%esi
+       shll    $24,%edi
+       shll    $24,%ebp
+
+       xorl    %esi,%r8d
+       xorl    %edi,%r10d
+       xorl    %ebp,%r11d
+
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       movl    16+12(%r15),%edx
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%edi
+       movl    16+0(%r15),%eax
+
+       shll    $24,%esi
+       shll    $24,%edi
+
+       xorl    %esi,%r12d
+       xorl    %edi,%r8d
+
+       movl    16+4(%r15),%ebx
+       movl    16+8(%r15),%ecx
+       leaq    -2048(%r14),%r14
+       xorl    %r10d,%eax
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+.byte  0xf3,0xc3
+.cfi_endproc   
+.size  _x86_64_AES_decrypt,.-_x86_64_AES_decrypt
+.type  _x86_64_AES_decrypt_compact,@function
+.align 16
+_x86_64_AES_decrypt_compact:
+.cfi_startproc 
+       leaq    128(%r14),%r8
+       movl    0-128(%r8),%edi
+       movl    32-128(%r8),%ebp
+       movl    64-128(%r8),%r10d
+       movl    96-128(%r8),%r11d
+       movl    128-128(%r8),%edi
+       movl    160-128(%r8),%ebp
+       movl    192-128(%r8),%r10d
+       movl    224-128(%r8),%r11d
+       jmp     .Ldec_loop_compact
+
+.align 16
+.Ldec_loop_compact:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+       leaq    16(%r15),%r15
+       movzbl  %al,%r10d
+       movzbl  %bl,%r11d
+       movzbl  %cl,%r12d
+       movzbl  %dl,%r8d
+       movzbl  %dh,%esi
+       movzbl  %ah,%edi
+       shrl    $16,%edx
+       movzbl  %bh,%ebp
+       movzbl  (%r14,%r10,1),%r10d
+       movzbl  (%r14,%r11,1),%r11d
+       movzbl  (%r14,%r12,1),%r12d
+       movzbl  (%r14,%r8,1),%r8d
+
+       movzbl  (%r14,%rsi,1),%r9d
+       movzbl  %ch,%esi
+       movzbl  (%r14,%rdi,1),%r13d
+       movzbl  (%r14,%rbp,1),%ebp
+       movzbl  (%r14,%rsi,1),%esi
+
+       shrl    $16,%ecx
+       shll    $8,%r13d
+       shll    $8,%r9d
+       movzbl  %cl,%edi
+       shrl    $16,%eax
+       xorl    %r9d,%r10d
+       shrl    $16,%ebx
+       movzbl  %dl,%r9d
+
+       shll    $8,%ebp
+       xorl    %r13d,%r11d
+       shll    $8,%esi
+       movzbl  %al,%r13d
+       movzbl  (%r14,%rdi,1),%edi
+       xorl    %ebp,%r12d
+       movzbl  %bl,%ebp
+
+       shll    $16,%edi
+       xorl    %esi,%r8d
+       movzbl  (%r14,%r9,1),%r9d
+       movzbl  %bh,%esi
+       movzbl  (%r14,%rbp,1),%ebp
+       xorl    %edi,%r10d
+       movzbl  (%r14,%r13,1),%r13d
+       movzbl  %ch,%edi
+
+       shll    $16,%ebp
+       shll    $16,%r9d
+       shll    $16,%r13d
+       xorl    %ebp,%r8d
+       movzbl  %dh,%ebp
+       xorl    %r9d,%r11d
+       shrl    $8,%eax
+       xorl    %r13d,%r12d
+
+       movzbl  (%r14,%rsi,1),%esi
+       movzbl  (%r14,%rdi,1),%ebx
+       movzbl  (%r14,%rbp,1),%ecx
+       movzbl  (%r14,%rax,1),%edx
+
+       movl    %r10d,%eax
+       shll    $24,%esi
+       shll    $24,%ebx
+       shll    $24,%ecx
+       xorl    %esi,%eax
+       shll    $24,%edx
+       xorl    %r11d,%ebx
+       xorl    %r12d,%ecx
+       xorl    %r8d,%edx
+       cmpq    16(%rsp),%r15
+       je      .Ldec_compact_done
+
+       movq    256+0(%r14),%rsi
+       shlq    $32,%rbx
+       shlq    $32,%rdx
+       movq    256+8(%r14),%rdi
+       orq     %rbx,%rax
+       orq     %rdx,%rcx
+       movq    256+16(%r14),%rbp
+       movq    %rsi,%r9
+       movq    %rsi,%r12
+       andq    %rax,%r9
+       andq    %rcx,%r12
+       movq    %r9,%rbx
+       movq    %r12,%rdx
+       shrq    $7,%r9
+       leaq    (%rax,%rax,1),%r8
+       shrq    $7,%r12
+       leaq    (%rcx,%rcx,1),%r11
+       subq    %r9,%rbx
+       subq    %r12,%rdx
+       andq    %rdi,%r8
+       andq    %rdi,%r11
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %rbx,%r8
+       xorq    %rdx,%r11
+       movq    %rsi,%r10
+       movq    %rsi,%r13
+
+       andq    %r8,%r10
+       andq    %r11,%r13
+       movq    %r10,%rbx
+       movq    %r13,%rdx
+       shrq    $7,%r10
+       leaq    (%r8,%r8,1),%r9
+       shrq    $7,%r13
+       leaq    (%r11,%r11,1),%r12
+       subq    %r10,%rbx
+       subq    %r13,%rdx
+       andq    %rdi,%r9
+       andq    %rdi,%r12
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %rbx,%r9
+       xorq    %rdx,%r12
+       movq    %rsi,%r10
+       movq    %rsi,%r13
+
+       andq    %r9,%r10
+       andq    %r12,%r13
+       movq    %r10,%rbx
+       movq    %r13,%rdx
+       shrq    $7,%r10
+       xorq    %rax,%r8
+       shrq    $7,%r13
+       xorq    %rcx,%r11
+       subq    %r10,%rbx
+       subq    %r13,%rdx
+       leaq    (%r9,%r9,1),%r10
+       leaq    (%r12,%r12,1),%r13
+       xorq    %rax,%r9
+       xorq    %rcx,%r12
+       andq    %rdi,%r10
+       andq    %rdi,%r13
+       andq    %rbp,%rbx
+       andq    %rbp,%rdx
+       xorq    %rbx,%r10
+       xorq    %rdx,%r13
+
+       xorq    %r10,%rax
+       xorq    %r13,%rcx
+       xorq    %r10,%r8
+       xorq    %r13,%r11
+       movq    %rax,%rbx
+       movq    %rcx,%rdx
+       xorq    %r10,%r9
+       shrq    $32,%rbx
+       xorq    %r13,%r12
+       shrq    $32,%rdx
+       xorq    %r8,%r10
+       roll    $8,%eax
+       xorq    %r11,%r13
+       roll    $8,%ecx
+       xorq    %r9,%r10
+       roll    $8,%ebx
+       xorq    %r12,%r13
+
+       roll    $8,%edx
+       xorl    %r10d,%eax
+       shrq    $32,%r10
+       xorl    %r13d,%ecx
+       shrq    $32,%r13
+       xorl    %r10d,%ebx
+       xorl    %r13d,%edx
+
+       movq    %r8,%r10
+       roll    $24,%r8d
+       movq    %r11,%r13
+       roll    $24,%r11d
+       shrq    $32,%r10
+       xorl    %r8d,%eax
+       shrq    $32,%r13
+       xorl    %r11d,%ecx
+       roll    $24,%r10d
+       movq    %r9,%r8
+       roll    $24,%r13d
+       movq    %r12,%r11
+       shrq    $32,%r8
+       xorl    %r10d,%ebx
+       shrq    $32,%r11
+       xorl    %r13d,%edx
+
+       movq    0(%r14),%rsi
+       roll    $16,%r9d
+       movq    64(%r14),%rdi
+       roll    $16,%r12d
+       movq    128(%r14),%rbp
+       roll    $16,%r8d
+       movq    192(%r14),%r10
+       xorl    %r9d,%eax
+       roll    $16,%r11d
+       xorl    %r12d,%ecx
+       movq    256(%r14),%r13
+       xorl    %r8d,%ebx
+       xorl    %r11d,%edx
+       jmp     .Ldec_loop_compact
+.align 16
+.Ldec_compact_done:
+       xorl    0(%r15),%eax
+       xorl    4(%r15),%ebx
+       xorl    8(%r15),%ecx
+       xorl    12(%r15),%edx
+.byte  0xf3,0xc3
+.cfi_endproc   
+.size  _x86_64_AES_decrypt_compact,.-_x86_64_AES_decrypt_compact
+.globl AES_decrypt
+.type  AES_decrypt,@function
+.align 16
+.globl asm_AES_decrypt
+.hidden        asm_AES_decrypt
+asm_AES_decrypt:
+AES_decrypt:
+.cfi_startproc 
+.byte  243,15,30,250
+       movq    %rsp,%rax
+.cfi_def_cfa_register  %rax
+       pushq   %rbx
+.cfi_offset    %rbx,-16
+       pushq   %rbp
+.cfi_offset    %rbp,-24
+       pushq   %r12
+.cfi_offset    %r12,-32
+       pushq   %r13
+.cfi_offset    %r13,-40
+       pushq   %r14
+.cfi_offset    %r14,-48
+       pushq   %r15
+.cfi_offset    %r15,-56
+
+
+       leaq    -63(%rdx),%rcx
+       andq    $-64,%rsp
+       subq    %rsp,%rcx
+       negq    %rcx
+       andq    $0x3c0,%rcx
+       subq    %rcx,%rsp
+       subq    $32,%rsp
+
+       movq    %rsi,16(%rsp)
+       movq    %rax,24(%rsp)
+.cfi_escape    0x0f,0x05,0x77,0x18,0x06,0x23,0x08
+.Ldec_prologue:
+
+       movq    %rdx,%r15
+       movl    240(%r15),%r13d
+
+       movl    0(%rdi),%eax
+       movl    4(%rdi),%ebx
+       movl    8(%rdi),%ecx
+       movl    12(%rdi),%edx
+
+       shll    $4,%r13d
+       leaq    (%r15,%r13,1),%rbp
+       movq    %r15,(%rsp)
+       movq    %rbp,8(%rsp)
+
+
+       leaq    .LAES_Td+2048(%rip),%r14
+       leaq    768(%rsp),%rbp
+       subq    %r14,%rbp
+       andq    $0x300,%rbp
+       leaq    (%r14,%rbp,1),%r14
+       shrq    $3,%rbp
+       addq    %rbp,%r14
+
+       call    _x86_64_AES_decrypt_compact
+
+       movq    16(%rsp),%r9
+       movq    24(%rsp),%rsi
+.cfi_def_cfa   %rsi,8
+       movl    %eax,0(%r9)
+       movl    %ebx,4(%r9)
+       movl    %ecx,8(%r9)
+       movl    %edx,12(%r9)
+
+       movq    -48(%rsi),%r15
+.cfi_restore   %r15
+       movq    -40(%rsi),%r14
+.cfi_restore   %r14
+       movq    -32(%rsi),%r13
+.cfi_restore   %r13
+       movq    -24(%rsi),%r12
+.cfi_restore   %r12
+       movq    -16(%rsi),%rbp
+.cfi_restore   %rbp
+       movq    -8(%rsi),%rbx
+.cfi_restore   %rbx
+       leaq    (%rsi),%rsp
+.cfi_def_cfa_register  %rsp
+.Ldec_epilogue:
+       .byte   0xf3,0xc3
+.cfi_endproc   
+.size  AES_decrypt,.-AES_decrypt
+.globl AES_set_encrypt_key
+.type  AES_set_encrypt_key,@function
+.align 16
+AES_set_encrypt_key:
+.cfi_startproc 
+.byte  243,15,30,250
+       pushq   %rbx
+.cfi_adjust_cfa_offset 8
+.cfi_offset    %rbx,-16
+       pushq   %rbp
+.cfi_adjust_cfa_offset 8
+.cfi_offset    %rbp,-24
+       pushq   %r12
+.cfi_adjust_cfa_offset 8
+.cfi_offset    %r12,-32
+       pushq   %r13
+.cfi_adjust_cfa_offset 8
+.cfi_offset    %r13,-40
+       pushq   %r14
+.cfi_adjust_cfa_offset 8
+.cfi_offset    %r14,-48
+       pushq   %r15
+.cfi_adjust_cfa_offset 8
+.cfi_offset    %r15,-56
+       subq    $8,%rsp
+.cfi_adjust_cfa_offset 8
+.Lenc_key_prologue:
+
+       call    _x86_64_AES_set_encrypt_key
+
+       movq    40(%rsp),%rbp
+.cfi_restore   %rbp
+       movq    48(%rsp),%rbx
+.cfi_restore   %rbx
+       addq    $56,%rsp
+.cfi_adjust_cfa_offset -56
+.Lenc_key_epilogue:
+       .byte   0xf3,0xc3
+.cfi_endproc   
+.size  AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.type  _x86_64_AES_set_encrypt_key,@function
+.align 16
+_x86_64_AES_set_encrypt_key:
+.cfi_startproc 
+       movl    %esi,%ecx
+       movq    %rdi,%rsi
+       movq    %rdx,%rdi
+
+       testq   $-1,%rsi
+       jz      .Lbadpointer
+       testq   $-1,%rdi
+       jz      .Lbadpointer
+
+       leaq    .LAES_Te(%rip),%rbp
+       leaq    2048+128(%rbp),%rbp
+
+
+       movl    0-128(%rbp),%eax
+       movl    32-128(%rbp),%ebx
+       movl    64-128(%rbp),%r8d
+       movl    96-128(%rbp),%edx
+       movl    128-128(%rbp),%eax
+       movl    160-128(%rbp),%ebx
+       movl    192-128(%rbp),%r8d
+       movl    224-128(%rbp),%edx
+
+       cmpl    $128,%ecx
+       je      .L10rounds
+       cmpl    $192,%ecx
*** 10995 LINES SKIPPED ***

Reply via email to