[email protected] (Niels Möller) writes:
> I've tried out this mod function (for 64-bit):
>
> static void
> ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
...
> This gives a speedup of 85% over the general ecc_mod (on my machine),
> and gives about 35% speedup for scalar multiplication (both mul_g and
> mul_a). So with this change, performance of mul_g and mul_1 is roughly
> midway between secp384 and secp521.
Tried the below first implementation of an x86_64 mod function. Gives a
speedup of almost three times over the above C function. With this, the
mul_g operation is 20% slower than for secp384, and the mul_a operation
is slightly faster.
Rgards,
/Niels
diff --git a/configure.ac b/configure.ac
index 3547cae4..2933facf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -476,7 +476,8 @@ asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \
asm_hogweed_optional_list=""
if test "x$enable_public_key" = "xyes" ; then
asm_hogweed_optional_list="ecc-192-modp.asm ecc-224-modp.asm \
- ecc-25519-modp.asm ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm"
+ ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm \
+ ecc-25519-modp.asm ecc-curve448-modp.asm"
fi
OPT_NETTLE_OBJS=""
@@ -580,6 +581,7 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_ecc_256_redc
#undef HAVE_NATIVE_ecc_384_modp
#undef HAVE_NATIVE_ecc_384_redc
+#undef HAVE_NATIVE_ecc_curve448_modp
#undef HAVE_NATIVE_ecc_521_modp
#undef HAVE_NATIVE_ecc_521_redc
#undef HAVE_NATIVE_gcm_hash8
diff --git a/ecc-448.c b/ecc-448.c
index 7d68e1c8..2e840024 100644
--- a/ecc-448.c
+++ b/ecc-448.c
@@ -45,7 +45,11 @@
#include "ecc-448.h"
-#if GMP_NUMB_BITS == 64
+#if HAVE_NATIVE_ecc_curve448_modp
+#define ecc_448_modp nettle_ecc_curve448_modp
+void
+ecc_448_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+#elif GMP_NUMB_BITS == 64
static void
ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
{
diff --git a/x86_64/ecc-curve448-modp.asm b/x86_64/ecc-curve448-modp.asm
new file mode 100644
index 00000000..5ce81960
--- /dev/null
+++ b/x86_64/ecc-curve448-modp.asm
@@ -0,0 +1,141 @@
+C x86_64/ecc-curve448-modp.asm
+
+ifelse(<
+ Copyright (C) 2019 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+>)
+
+ .file "ecc-curve448-modp.asm"
+
+define(<RP>, <%rsi>)
+define(<X0>, <%rax>)
+define(<X1>, <%rbx>)
+define(<X2>, <%rcx>)
+define(<X3>, <%rdx>)
+define(<X4>, <%rbp>)
+define(<X5>, <%rdi>)
+define(<X6>, <%r8>)
+define(<X7>, <%r9>)
+define(<T0>, <%r10>)
+define(<T1>, <%r11>)
+define(<T2>, <%r12>)
+
+PROLOGUE(nettle_ecc_curve448_modp)
+ W64_ENTRY(2, 0)
+
+ push %rbx
+ push %rbp
+ push %r12
+
+ C First load the values to be shifted by 32.
+ mov 88(RP), X1
+ mov X1, X0
+ mov 96(RP), X2
+ mov X1, T0
+ mov 104(RP), X3
+ mov X2, T1
+ mov 56(RP), X4
+ mov X3, T2
+ mov 64(RP), X5
+ mov 72(RP), X6
+ mov 80(RP), X7
+
+ C Multiply by 2^32
+ shl $32, X0
+ shrd $32, X2, X1
+ shrd $32, X3, X2
+ shrd $32, X4, X3
+ shrd $32, X5, X4
+ shrd $32, X6, X5
+ shrd $32, X7, X6
+ shr $32, X7
+
+ C Multiply by 2
+ add T0, T0
+ adc T1, T1
+ adc T2, T2
+ adc $0, X7
+
+ C Main additions
+ add 56(RP), X0
+ adc 64(RP), X1
+ adc 72(RP), X2
+ adc 80(RP), X3
+ adc T0, X4
+ adc T1, X5
+ adc T2, X6
+ adc $0, X7
+
+ add (RP), X0
+ adc 8(RP), X1
+ adc 16(RP), X2
+ adc 24(RP), X3
+ adc 32(RP), X4
+ adc 40(RP), X5
+ adc 48(RP), X6
+ adc $0, X7
+
+ mov X7, T0
+ mov X7, T1
+ shl $32, T0
+ shr $32, T1
+ xor T2, T2
+ add X7, X0
+ adc $0, X1
+ adc $0, X2
+ adc T0, X3
+ adc T1, X4
+ adc $0, X5
+ adc $0, X6
+ adc $0, T2
+
+ mov T2, T0
+ shl $32, T0
+
+ add T2, X0
+ mov X0, (RP)
+ adc $0, X1
+ mov X1, 8(RP)
+ adc $0, X2
+ mov X2, 16(RP)
+ adc T0, X3
+ mov X3, 24(RP)
+ adc $0, X4
+ mov X4, 32(RP)
+ adc $0, X5
+ mov X5, 40(RP)
+ adc $0, X6
+ mov X6, 48(RP)
+
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ W64_EXIT(2, 0)
+ ret
+EPILOGUE(nettle_ecc_curve448_modp)
--
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
_______________________________________________
nettle-bugs mailing list
[email protected]
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs