On Fri, 28 Nov 2025 06:01:26 GMT, Jiangli Zhou <[email protected]> wrote:
>> Please review the fix in StubGenerator::aesgcm_avx512 and
>> StubGenerator::aesgcm_avx2 to handle some edge cases with input sizes that
>> are not multiple of the block size.
>>
>> Thanks to Thomas Holenstein and Lukas Zobernig for analyzing the issue and
>> providing the test case!
>
> Jiangli Zhou has updated the pull request incrementally with one additional
> commit since the last revision:
>
> Change to break before operators.
Oh man, the `pos`/`len` modifications in current code are confusing. I
scratched my head for quite a while trying to comprehend why does `__
bind(MESG_BELOW_32_BLKS)` split the `pos += 16` and `len -= 16`? On a surface,
that just looks like a bug.
But looks that way because we do `initial_blocks_16_avx512` twice, do `pos +=
16` twice, but only do the `len += 32` after the second call. Which does not
help if we shortcut after the first call. In fact, I am not at all sure that
checking `len < 32` _without_ modifying `len` beforehand does not break the
assumptions downstream:
initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl, CTR_CHECK,
rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234, SHUF_MASK,
stack_offset);
__ addl(pos, 16 * 16);
__ cmpl(len, 32 * 16);
__ jcc(Assembler::below, MESG_BELOW_32_BLKS);
Really, in these kind of intrinsics, _you want_ to make sure `pos` and `len`
updates are tightly bound together, otherwise these kinds of mistakes would
keep happening. You will lose on code density a bit, but would have more
readable and robust code.
Shouldn't it be like this?
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
index 1e728ffa279..a16e25b075d 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_aes.cpp
@@ -3475,12 +3475,14 @@ void StubGenerator::aesgcm_avx512(Register in, Register
len, Register ct, Regist
initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl,
CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234,
SHUF_MASK, stack_offset);
__ addl(pos, 16 * 16);
+ __ subl(len, 16 * 16);
+
__ cmpl(len, 32 * 16);
__ jcc(Assembler::below, MESG_BELOW_32_BLKS);
initial_blocks_16_avx512(in, out, ct, pos, key, avx512_subkeyHtbl,
CTR_CHECK, rounds, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234,
SHUF_MASK, stack_offset + 16);
__ addl(pos, 16 * 16);
- __ subl(len, 32 * 16);
+ __ subl(len, 16 * 16);
__ cmpl(len, 32 * 16);
__ jcc(Assembler::below, NO_BIG_BLKS);
@@ -3491,24 +3493,27 @@ void StubGenerator::aesgcm_avx512(Register in, Register
len, Register ct, Regist
ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl,
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234,
SHUF_MASK,
true, true, false, false, false,
ghashin_offset, aesout_offset, HashKey_32);
__ addl(pos, 16 * 16);
+ __ subl(len, 16 * 16);
ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl,
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234,
SHUF_MASK,
true, false, true, false, true,
ghashin_offset + 16, aesout_offset + 16, HashKey_16);
__ evmovdquq(AAD_HASHx, ZTMP4, Assembler::AVX_512bit);
__ addl(pos, 16 * 16);
- __ subl(len, 32 * 16);
+ __ subl(len, 16 * 16);
__ jmp(ENCRYPT_BIG_BLKS_NO_HXOR);
__ bind(ENCRYPT_BIG_NBLKS);
ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl,
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234,
SHUF_MASK,
false, true, false, false, false,
ghashin_offset, aesout_offset, HashKey_32);
__ addl(pos, 16 * 16);
+ __ subl(len, 16 * 16);
+
ghash16_encrypt_parallel16_avx512(in, out, ct, pos, avx512_subkeyHtbl,
CTR_CHECK, rounds, key, CTR_BLOCKx, AAD_HASHx, ADDBE_4x4, ADDBE_1234, ADD_1234,
SHUF_MASK,
false, false, true, true, true,
ghashin_offset + 16, aesout_offset + 16, HashKey_16);
__ movdqu(AAD_HASHx, ZTMP4);
__ addl(pos, 16 * 16);
- __ subl(len, 32 * 16);
+ __ subl(len, 16 * 16);
__ bind(NO_BIG_BLKS);
__ cmpl(len, 16 * 16);
@@ -3525,9 +3530,9 @@ void StubGenerator::aesgcm_avx512(Register in, Register
len, Register ct, Regist
ghash16_avx512(false, true, false, false, true, in, pos, avx512_subkeyHtbl,
AAD_HASHx, SHUF_MASK, stack_offset, 16 * 16, 0, HashKey_16);
__ addl(pos, 16 * 16);
+ __ subl(len, 16 * 16);
__ bind(MESG_BELOW_32_BLKS);
- __ subl(len, 16 * 16);
gcm_enc_dec_last_avx512(len, in, pos, AAD_HASHx, SHUF_MASK,
avx512_subkeyHtbl, ghashin_offset, HashKey_16, true, true);
__ bind(GHASH_DONE);
-------------
PR Review: https://git.openjdk.org/jdk/pull/28363#pullrequestreview-3518173513