Maamoun TK <[email protected]> writes: > I considered to use m4 macros but it "mangles" parameter names, it becomes > hard for reader to keep track on the macro body. However, I'm still up to > change it to m4 macros if you like.
The below patch seems to work. It's a drawback that m4 doesn't have named parameters, only $1, $2, ..., but I think it's good with consistency, and I don't think names "param1" and "param2" are that helpful compared to $2, $3. But it would be nice with a bit more documentation of the macros. And of the registers, at least, group them to make it clear which registers are the input data (C0 -- C3?) or other state (e.g., accumulators), which registers are used for the precomputed key-dependent parameters, and which registers are short-lived temporaries. In other news, I've applied for an account at https://linuxone.cloud.marist.edu, but it seems there's some manual review involved so not completed yet, Regards, /Niels diff --git a/arm64/crypto/gcm-hash.asm b/arm64/crypto/gcm-hash.asm index b77b08d6..f86fb504 100644 --- a/arm64/crypto/gcm-hash.asm +++ b/arm64/crypto/gcm-hash.asm @@ -47,21 +47,22 @@ define(`R', `v18') define(`R1', `v19') C common macros: -.macro PMUL in, param1, param2 - pmull F.1q,\param2\().1d,\in\().1d - pmull2 F1.1q,\param2\().2d,\in\().2d - pmull R.1q,\param1\().1d,\in\().1d - pmull2 R1.1q,\param1\().2d,\in\().2d +C PMUL(in, param1, param2) +define(`PMUL', m4_assert_numargs(3)` + pmull F.1q,$3.1d,$1.1d + pmull2 F1.1q,$3.2d,$1.2d + pmull R.1q,$2.1d,$1.1d + pmull2 R1.1q,$2.2d,$1.2d eor F.16b,F.16b,F1.16b eor R.16b,R.16b,R1.16b -.endm - -.macro REDUCTION out +') +C REDUCTION(out) +define(`REDUCTION', m4_assert_numargs(1)` pmull T.1q,F.1d,POLY.1d eor R.16b,R.16b,T.16b ext R.16b,R.16b,R.16b,#8 - eor \out\().16b,F.16b,R.16b -.endm + eor $1.16b,F.16b,R.16b +') C void gcm_init_key (union gcm_block *table) @@ -101,13 +102,14 @@ define(`H3L', `v28') define(`H4M', `v29') define(`H4L', `v30') -.macro PMUL_PARAM in, param1, param2 - pmull2 Hp.1q,\in\().2d,POLY.2d - eor Hm.16b,\in\().16b,Hp.16b - ext \param1\().16b,Hm.16b,\in\().16b,#8 - ext \param2\().16b,\in\().16b,Hm.16b,#8 - ext \param1\().16b,\param1\().16b,\param1\().16b,#8 -.endm +C PMUL_PARAM(in, param1, param2) +define(`PMUL_PARAM', m4_assert_numargs(3)` + pmull2 Hp.1q,$1.2d,POLY.2d + eor Hm.16b,$1.16b,Hp.16b + ext $2.16b,Hm.16b,$1.16b,#8 + ext $3.16b,$1.16b,Hm.16b,#8 + ext $2.16b,$2.16b,$2.16b,#8 +') PROLOGUE(_nettle_gcm_init_key) add x1,TABLE,#16*H_Idx @@ -138,13 +140,13 @@ IF_LE(` C --- calculate H^2 = H*H --- - PMUL_PARAM H,H1M,H1L + PMUL_PARAM(H,H1M,H1L) - PMUL H,H1M,H1L + PMUL(H,H1M,H1L) - REDUCTION H2 + REDUCTION(H2) - PMUL_PARAM H2,H2M,H2L + PMUL_PARAM(H2,H2M,H2L) C we store to the table as doubleword-vectors in current memory endianness C because it's our own strictly internal data structure and what gcm_hash @@ -153,19 +155,19 @@ IF_LE(` C --- calculate H^3 = H^1*H^2 --- - PMUL H2,H1M,H1L + PMUL(H2,H1M,H1L) - REDUCTION H3 + REDUCTION(H3) - PMUL_PARAM H3,H3M,H3L + PMUL_PARAM(H3,H3M,H3L) C --- calculate H^4 = H^2*H^2 --- - PMUL H2,H2M,H2L + PMUL(H2,H2M,H2L) - REDUCTION H4 + REDUCTION(H4) - PMUL_PARAM H4,H4M,H4L + PMUL_PARAM(H4,H4M,H4L) st1 {H3M.2d,H3L.2d,H4M.2d,H4L.2d},[TABLE] @@ -197,16 +199,17 @@ define(`H3L', `v29') define(`H4M', `v30') define(`H4L', `v31') -.macro PMUL_SUM in, param1, param2 - pmull F2.1q,\param2\().1d,\in\().1d - pmull2 F3.1q,\param2\().2d,\in\().2d - pmull R2.1q,\param1\().1d,\in\().1d - pmull2 R3.1q,\param1\().2d,\in\().2d +C PMUL_SUM(in, param1, param2) +define(`PMUL_SUM', m4_assert_numargs(3)` + pmull F2.1q,$3.1d,$1.1d + pmull2 F3.1q,$3.2d,$1.2d + pmull R2.1q,$2.1d,$1.1d + pmull2 R3.1q,$2.2d,$1.2d eor F2.16b,F2.16b,F3.16b eor R2.16b,R2.16b,R3.16b eor F.16b,F.16b,F2.16b eor R.16b,R.16b,R2.16b -.endm +') C void gcm_hash (const struct gcm_key *key, union gcm_block *x, C size_t length, const uint8_t *data) @@ -238,12 +241,12 @@ IF_LE(` eor C0.16b,C0.16b,D.16b - PMUL C1,H3M,H3L - PMUL_SUM C2,H2M,H2L - PMUL_SUM C3,H1M,H1L - PMUL_SUM C0,H4M,H4L + PMUL(C1,H3M,H3L) + PMUL_SUM(C2,H2M,H2L) + PMUL_SUM(C3,H1M,H1L) + PMUL_SUM(C0,H4M,H4L) - REDUCTION D + REDUCTION(D) subs x4,x4,#64 b.ne L4x_loop @@ -264,10 +267,10 @@ IF_LE(` eor C0.16b,C0.16b,D.16b - PMUL C1,H1M,H1L - PMUL_SUM C0,H2M,H2L + PMUL(C1,H1M,H1L) + PMUL_SUM(C0,H2M,H2L) - REDUCTION D + REDUCTION(D) and LENGTH,LENGTH,#31 @@ -284,9 +287,9 @@ IF_LE(` eor C0.16b,C0.16b,D.16b - PMUL C0,H1M,H1L + PMUL(C0,H1M,H1L) - REDUCTION D + REDUCTION(D) Lmod: tst LENGTH,#15 @@ -325,9 +328,9 @@ Lmod_8_load: Lmod_8_done: eor C0.16b,C0.16b,D.16b - PMUL C0,H1M,H1L + PMUL(C0,H1M,H1L) - REDUCTION D + REDUCTION(D) Ldone: IF_LE(` -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ nettle-bugs mailing list [email protected] http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs
