Make a macro for the main encode/decode routine.  Only a small handful
of lines differ for enc and dec.   This will also become the main
scatter/gather update routine.

Signed-off-by: Dave Watson <davejwat...@fb.com>
---
 arch/x86/crypto/aesni-intel_asm.S | 293 +++++++++++++++-----------------------
 1 file changed, 114 insertions(+), 179 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S 
b/arch/x86/crypto/aesni-intel_asm.S
index 529c542..8021fd1 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -222,6 +222,118 @@ ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
        mov     %r13, %r12
 .endm
 
+# GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
+# struct has been initialized by GCM_INIT.
+# Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
+# Clobbers rax, r10-r13, and xmm0-xmm15
+.macro GCM_ENC_DEC operation
+       # Encrypt/Decrypt first few blocks
+
+       and     $(3<<4), %r12
+       jz      _initial_num_blocks_is_0_\@
+       cmp     $(2<<4), %r12
+       jb      _initial_num_blocks_is_1_\@
+       je      _initial_num_blocks_is_2_\@
+_initial_num_blocks_is_3_\@:
+       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
+       sub     $48, %r13
+       jmp     _initial_blocks_\@
+_initial_num_blocks_is_2_\@:
+       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
+       sub     $32, %r13
+       jmp     _initial_blocks_\@
+_initial_num_blocks_is_1_\@:
+       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
+       sub     $16, %r13
+       jmp     _initial_blocks_\@
+_initial_num_blocks_is_0_\@:
+       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
+%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
+_initial_blocks_\@:
+
+       # Main loop - Encrypt/Decrypt remaining blocks
+
+       cmp     $0, %r13
+       je      _zero_cipher_left_\@
+       sub     $64, %r13
+       je      _four_cipher_left_\@
+_crypt_by_4_\@:
+       GHASH_4_ENCRYPT_4_PARALLEL_\operation   %xmm9, %xmm10, %xmm11, %xmm12, \
+       %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
+       %xmm7, %xmm8, enc
+       add     $64, %r11
+       sub     $64, %r13
+       jne     _crypt_by_4_\@
+_four_cipher_left_\@:
+       GHASH_LAST_4    %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
+%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
+_zero_cipher_left_\@:
+       mov     %arg4, %r13
+       and     $15, %r13                       # %r13 = arg4 (mod 16)
+       je      _multiple_of_16_bytes_\@
+
+       # Handle the last <16 Byte block separately
+       paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
+        movdqa SHUF_MASK(%rip), %xmm10
+       PSHUFB_XMM %xmm10, %xmm0
+
+       ENCRYPT_SINGLE_BLOCK    %xmm0, %xmm1        # Encrypt(K, Yn)
+
+       lea (%arg3,%r11,1), %r10
+       mov %r13, %r12
+       READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
+
+       lea ALL_F+16(%rip), %r12
+       sub %r13, %r12
+.ifc \operation, dec
+       movdqa  %xmm1, %xmm2
+.endif
+       pxor    %xmm1, %xmm0            # XOR Encrypt(K, Yn)
+       movdqu  (%r12), %xmm1
+       # get the appropriate mask to mask out top 16-r13 bytes of xmm0
+       pand    %xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
+.ifc \operation, dec
+       pand    %xmm1, %xmm2
+       movdqa SHUF_MASK(%rip), %xmm10
+       PSHUFB_XMM %xmm10 ,%xmm2
+
+       pxor %xmm2, %xmm8
+.else
+       movdqa SHUF_MASK(%rip), %xmm10
+       PSHUFB_XMM %xmm10,%xmm0
+
+       pxor    %xmm0, %xmm8
+.endif
+
+       GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
+.ifc \operation, enc
+       # GHASH computation for the last <16 byte block
+       movdqa SHUF_MASK(%rip), %xmm10
+       # shuffle xmm0 back to output as ciphertext
+       PSHUFB_XMM %xmm10, %xmm0
+.endif
+
+       # Output %r13 bytes
+       MOVQ_R64_XMM %xmm0, %rax
+       cmp $8, %r13
+       jle _less_than_8_bytes_left_\@
+       mov %rax, (%arg2 , %r11, 1)
+       add $8, %r11
+       psrldq $8, %xmm0
+       MOVQ_R64_XMM %xmm0, %rax
+       sub $8, %r13
+_less_than_8_bytes_left_\@:
+       mov %al,  (%arg2, %r11, 1)
+       add $1, %r11
+       shr $8, %rax
+       sub $1, %r13
+       jne _less_than_8_bytes_left_\@
+_multiple_of_16_bytes_\@:
+.endm
+
 # GCM_COMPLETE Finishes update of tag of last partial block
 # Output: Authorization Tag (AUTH_TAG)
 # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
@@ -1245,93 +1357,7 @@ ENTRY(aesni_gcm_dec)
        FUNC_SAVE
 
        GCM_INIT
-
-        # Decrypt first few blocks
-
-       and $(3<<4), %r12
-       jz _initial_num_blocks_is_0_decrypt
-       cmp $(2<<4), %r12
-       jb _initial_num_blocks_is_1_decrypt
-       je _initial_num_blocks_is_2_decrypt
-_initial_num_blocks_is_3_decrypt:
-       INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec
-       sub     $48, %r13
-       jmp     _initial_blocks_decrypted
-_initial_num_blocks_is_2_decrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec
-       sub     $32, %r13
-       jmp     _initial_blocks_decrypted
-_initial_num_blocks_is_1_decrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec
-       sub     $16, %r13
-       jmp     _initial_blocks_decrypted
-_initial_num_blocks_is_0_decrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec
-_initial_blocks_decrypted:
-       cmp     $0, %r13
-       je      _zero_cipher_left_decrypt
-       sub     $64, %r13
-       je      _four_cipher_left_decrypt
-_decrypt_by_4:
-       GHASH_4_ENCRYPT_4_PARALLEL_DEC  %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec
-       add     $64, %r11
-       sub     $64, %r13
-       jne     _decrypt_by_4
-_four_cipher_left_decrypt:
-       GHASH_LAST_4    %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_decrypt:
-       mov     %arg4, %r13
-       and     $15, %r13                               # %r13 = arg4 (mod 16)
-       je      _multiple_of_16_bytes_decrypt
-
-        # Handle the last <16 byte block separately
-
-       paddd ONE(%rip), %xmm0         # increment CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10, %xmm0
-
-       ENCRYPT_SINGLE_BLOCK  %xmm0, %xmm1    # E(K, Yn)
-
-       lea (%arg3,%r11,1), %r10
-       mov %r13, %r12
-       READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-       lea ALL_F+16(%rip), %r12
-       sub %r13, %r12
-       movdqa  %xmm1, %xmm2
-       pxor %xmm1, %xmm0            # Ciphertext XOR E(K, Yn)
-       movdqu (%r12), %xmm1
-       # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0
-       pand %xmm1, %xmm0            # mask out top 16-%r13 bytes of %xmm0
-       pand    %xmm1, %xmm2
-        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10 ,%xmm2
-
-       pxor %xmm2, %xmm8
-       GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-
-        # output %r13 bytes
-       MOVQ_R64_XMM    %xmm0, %rax
-       cmp     $8, %r13
-       jle     _less_than_8_bytes_left_decrypt
-       mov     %rax, (%arg2 , %r11, 1)
-       add     $8, %r11
-       psrldq  $8, %xmm0
-       MOVQ_R64_XMM    %xmm0, %rax
-       sub     $8, %r13
-_less_than_8_bytes_left_decrypt:
-       mov     %al,  (%arg2, %r11, 1)
-       add     $1, %r11
-       shr     $8, %rax
-       sub     $1, %r13
-       jne     _less_than_8_bytes_left_decrypt
-_multiple_of_16_bytes_decrypt:
+       GCM_ENC_DEC dec
        GCM_COMPLETE
        FUNC_RESTORE
        ret
@@ -1417,98 +1443,7 @@ ENTRY(aesni_gcm_enc)
        FUNC_SAVE
 
        GCM_INIT
-        # Encrypt first few blocks
-
-       and     $(3<<4), %r12
-       jz      _initial_num_blocks_is_0_encrypt
-       cmp     $(2<<4), %r12
-       jb      _initial_num_blocks_is_1_encrypt
-       je      _initial_num_blocks_is_2_encrypt
-_initial_num_blocks_is_3_encrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc
-       sub     $48, %r13
-       jmp     _initial_blocks_encrypted
-_initial_num_blocks_is_2_encrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc
-       sub     $32, %r13
-       jmp     _initial_blocks_encrypted
-_initial_num_blocks_is_1_encrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc
-       sub     $16, %r13
-       jmp     _initial_blocks_encrypted
-_initial_num_blocks_is_0_encrypt:
-       INITIAL_BLOCKS_ENC_DEC  %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
-%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc
-_initial_blocks_encrypted:
-
-        # Main loop - Encrypt remaining blocks
-
-       cmp     $0, %r13
-       je      _zero_cipher_left_encrypt
-       sub     $64, %r13
-       je      _four_cipher_left_encrypt
-_encrypt_by_4_encrypt:
-       GHASH_4_ENCRYPT_4_PARALLEL_ENC  %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \
-%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc
-       add     $64, %r11
-       sub     $64, %r13
-       jne     _encrypt_by_4_encrypt
-_four_cipher_left_encrypt:
-       GHASH_LAST_4    %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
-%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
-_zero_cipher_left_encrypt:
-       mov     %arg4, %r13
-       and     $15, %r13                       # %r13 = arg4 (mod 16)
-       je      _multiple_of_16_bytes_encrypt
-
-         # Handle the last <16 Byte block separately
-       paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
-        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10, %xmm0
-
-       ENCRYPT_SINGLE_BLOCK    %xmm0, %xmm1        # Encrypt(K, Yn)
-
-       lea (%arg3,%r11,1), %r10
-       mov %r13, %r12
-       READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
-
-       lea ALL_F+16(%rip), %r12
-       sub %r13, %r12
-       pxor    %xmm1, %xmm0            # Plaintext XOR Encrypt(K, Yn)
-       movdqu  (%r12), %xmm1
-       # get the appropriate mask to mask out top 16-r13 bytes of xmm0
-       pand    %xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
-        movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10,%xmm0
-
-       pxor    %xmm0, %xmm8
-       GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
-       # GHASH computation for the last <16 byte block
-       movdqa SHUF_MASK(%rip), %xmm10
-       PSHUFB_XMM %xmm10, %xmm0
-
-       # shuffle xmm0 back to output as ciphertext
-
-        # Output %r13 bytes
-       MOVQ_R64_XMM %xmm0, %rax
-       cmp $8, %r13
-       jle _less_than_8_bytes_left_encrypt
-       mov %rax, (%arg2 , %r11, 1)
-       add $8, %r11
-       psrldq $8, %xmm0
-       MOVQ_R64_XMM %xmm0, %rax
-       sub $8, %r13
-_less_than_8_bytes_left_encrypt:
-       mov %al,  (%arg2, %r11, 1)
-       add $1, %r11
-       shr $8, %rax
-       sub $1, %r13
-       jne _less_than_8_bytes_left_encrypt
-_multiple_of_16_bytes_encrypt:
-_return_T_encrypt:
+       GCM_ENC_DEC enc
        GCM_COMPLETE
        FUNC_RESTORE
        ret
-- 
2.9.5

Reply via email to