On Thu, 20 Feb 2025 17:33:18 GMT, Ferenc Rakoczi <d...@openjdk.org> wrote:
>> By using the aarch64 vector registers the speed of the computation of the >> ML-DSA algorithms (key generation, document signing, signature verification) >> can be approximately doubled. > > Ferenc Rakoczi has updated the pull request incrementally with four > additional commits since the last revision: > > - Accepting suggested change from Andrew Dinn > - Added comments suggested by Andrew Dinn > - Fixed copyright years > - renaming a couple of functions src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4683: > 4681: __ mulv(v19, __ T4S, v7, v19); > 4682: > 4683: __ mulv(v16, __ T4S, v16, v30); __ mulv(v16, __ T4S, v16, v30); // m = aLow * qinv src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4688: > 4686: __ mulv(v19, __ T4S, v19, v30); > 4687: > 4688: __ sqdmulh(v16, __ T4S, v16, v31); __ sqdmulh(v16, __ T4S, v16, v31); // n = hi32(2 * m * q) src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4693: > 4691: __ sqdmulh(v19, __ T4S, v19, v31); > 4692: > 4693: __ shsubv(v16, __ T4S, v24, v16); __ shsubv(v16, __ T4S, v24, v16); // a = (aHigh - n) / 2 src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4698: > 4696: __ shsubv(v19, __ T4S, v27, v19); > 4697: > 4698: __ subv(v1, __ T4S, v0, v16); __ subv(v1, __ T4S, v0, v16); // x1 = x - a src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4703: > 4701: __ subv(v7, __ T4S, v6, v19); > 4702: > 4703: __ addv(v0, __ T4S, v0, v16); __ addv(v0, __ T4S, v0, v16); // x0 = x + a src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4742: > 4740: > 4741: for (int i = 0; i < 4; i++) { > 4742: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4813: > 4811: // level 5 > 4812: for (int i = 0; i < 1024; i += 256) { > 4813: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4853: > 4851: // level 6 > 4852: for (int i = 0; i < 1024; i += 128) { > 4853: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4876: > 4874: // level 7 > 4875: for (int i = 0; i < 1024; i += 128) { > 4876: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4905: > 4903: > 4904: void dilithium_sub_add_montmul16() { > 4905: __ subv(v20, __ T4S, v0, v1); __ subv(v20, __ T4S, v0, v1); // b = x0 - x1 src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4910: > 4908: __ subv(v23, __ T4S, v6, v7); > 4909: > 4910: __ addv(v0, __ T4S, v0, v1); __ addv(v0, __ T4S, v0, v1); // a0 = x0 + x1 src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4915: > 4913: __ addv(v6, __ T4S, v6, v7); > 4914: > 4915: __ sqdmulh(v24, __ T4S, v20, v16); __ sqdmulh(v24, __ T4S, v20, v16); // aHigh = hi32(2 * b * c) __ mulv(v1, __ T4S, v20, v16); // aLow = lo32(b * c) src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4924: > 4922: __ mulv(v7, __ T4S, v23, v19); > 4923: > 4924: __ mulv(v1, __ T4S, v1, v30); __ mulv(v1, __ T4S, v1, v30); // m = (aLow * q) src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4929: > 4927: __ mulv(v7, __ T4S, v7, v30); > 4928: > 4929: __ sqdmulh(v1, __ T4S, v1, v31); __ sqdmulh(v1, __ T4S, v1, v31); // n = hi32(2 * m * q) src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 4934: > 4932: __ sqdmulh(v7, __ T4S, v7, v31); > 4933: > 4934: __ shsubv(v1, __ T4S, v24, v1); __ shsubv(v1, __ T4S, v24, v1); // a1 = (aHigh - n) / 2 src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5044: > 5042: // level0 > 5043: for (int i = 0; i < 1024; i += 128) { > 5044: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); //qinv, q src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5115: > 5113: __ str(v31, __ Q, Address(coeffs, i + 224)); > 5114: dilithium_load32zetas(zetas); > 5115: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); //qinv, q src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5166: > 5164: __ lea(dilithiumConsts, ExternalAddress((address) > StubRoutines::aarch64::_dilithiumConsts)); > 5165: > 5166: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q __ ldr(v29, __ Q, Address(dilithiumConsts, 48)); // rsquare src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp line 5228: > 5226: __ lea(dilithiumConsts, ExternalAddress((address) > StubRoutines::aarch64::_dilithiumConsts)); > 5227: > 5228: __ ldpq(v30, v31, Address(dilithiumConsts, 0)); __ ldpq(v30, v31, Address(dilithiumConsts, 0)); // qinv, q ------------- PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967863821 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967864748 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967865658 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967866379 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967866822 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967867752 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967869143 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967870036 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967870373 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967871386 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967871949 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967872681 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967873281 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967873918 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967874418 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967875655 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967876745 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967877717 PR Review Comment: https://git.openjdk.org/jdk/pull/23300#discussion_r1967878884