[email protected] (Niels Möller) writes:

> I've tried out this mod function (for 64-bit):
>
> static void
> ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
...

> This gives a speedup of 85% over the general ecc_mod (on my machine),
> and gives about 35% speedup for scalar multiplication (both mul_g and
> mul_a). So with this change, performance of mul_g and mul_1 is roughly
> midway between secp384 and secp521.

Tried the below first implementation of an x86_64 mod function. Gives a
speedup of almost three times over the above C function. With this, the
mul_g operation is 20% slower than for secp384, and the mul_a operation
is slightly faster.

Rgards,
/Niels

diff --git a/configure.ac b/configure.ac
index 3547cae4..2933facf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -476,7 +476,8 @@ asm_nettle_optional_list="gcm-hash8.asm cpuid.asm \
 asm_hogweed_optional_list=""
 if test "x$enable_public_key" = "xyes" ; then
   asm_hogweed_optional_list="ecc-192-modp.asm ecc-224-modp.asm \
-    ecc-25519-modp.asm ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm"
+    ecc-256-redc.asm ecc-384-modp.asm ecc-521-modp.asm \
+    ecc-25519-modp.asm ecc-curve448-modp.asm"
 fi
 
 OPT_NETTLE_OBJS=""
@@ -580,6 +581,7 @@ AH_VERBATIM([HAVE_NATIVE],
 #undef HAVE_NATIVE_ecc_256_redc
 #undef HAVE_NATIVE_ecc_384_modp
 #undef HAVE_NATIVE_ecc_384_redc
+#undef HAVE_NATIVE_ecc_curve448_modp
 #undef HAVE_NATIVE_ecc_521_modp
 #undef HAVE_NATIVE_ecc_521_redc
 #undef HAVE_NATIVE_gcm_hash8
diff --git a/ecc-448.c b/ecc-448.c
index 7d68e1c8..2e840024 100644
--- a/ecc-448.c
+++ b/ecc-448.c
@@ -45,7 +45,11 @@
 
 #include "ecc-448.h"
 
-#if GMP_NUMB_BITS == 64
+#if HAVE_NATIVE_ecc_curve448_modp
+#define ecc_448_modp nettle_ecc_curve448_modp
+void
+ecc_448_modp (const struct ecc_modulo *m, mp_limb_t *rp);
+#elif GMP_NUMB_BITS == 64
 static void
 ecc_448_modp(const struct ecc_modulo *m, mp_limb_t *rp)
 {
diff --git a/x86_64/ecc-curve448-modp.asm b/x86_64/ecc-curve448-modp.asm
new file mode 100644
index 00000000..5ce81960
--- /dev/null
+++ b/x86_64/ecc-curve448-modp.asm
@@ -0,0 +1,141 @@
+C x86_64/ecc-curve448-modp.asm
+
+ifelse(<
+   Copyright (C) 2019 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+     * the GNU Lesser General Public License as published by the Free
+       Software Foundation; either version 3 of the License, or (at your
+       option) any later version.
+
+   or
+
+     * the GNU General Public License as published by the Free
+       Software Foundation; either version 2 of the License, or (at your
+       option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+>)
+
+       .file "ecc-curve448-modp.asm"
+
+define(<RP>, <%rsi>)
+define(<X0>, <%rax>)
+define(<X1>, <%rbx>)
+define(<X2>, <%rcx>)
+define(<X3>, <%rdx>)
+define(<X4>, <%rbp>)
+define(<X5>, <%rdi>)
+define(<X6>, <%r8>)
+define(<X7>, <%r9>)
+define(<T0>, <%r10>)
+define(<T1>, <%r11>)
+define(<T2>, <%r12>)
+
+PROLOGUE(nettle_ecc_curve448_modp)
+       W64_ENTRY(2, 0)
+
+       push    %rbx
+       push    %rbp
+       push    %r12
+
+       C First load the values to be shifted by 32.
+       mov 88(RP), X1
+       mov X1, X0
+       mov 96(RP), X2
+       mov X1, T0
+       mov 104(RP), X3
+       mov X2, T1
+       mov 56(RP), X4
+       mov X3, T2
+       mov 64(RP), X5
+       mov 72(RP), X6
+       mov 80(RP), X7
+
+       C Multiply by 2^32
+       shl $32, X0
+       shrd $32, X2, X1
+       shrd $32, X3, X2
+       shrd $32, X4, X3
+       shrd $32, X5, X4
+       shrd $32, X6, X5
+       shrd $32, X7, X6
+       shr $32, X7
+
+       C Multiply by 2
+       add T0, T0
+       adc T1, T1
+       adc T2, T2
+       adc $0, X7
+
+       C Main additions
+       add 56(RP), X0
+       adc 64(RP), X1
+       adc 72(RP), X2
+       adc 80(RP), X3
+       adc T0, X4
+       adc T1, X5
+       adc T2, X6
+       adc $0, X7
+
+       add (RP), X0
+       adc 8(RP), X1
+       adc 16(RP), X2
+       adc 24(RP), X3
+       adc 32(RP), X4
+       adc 40(RP), X5
+       adc 48(RP), X6
+       adc $0, X7
+
+       mov X7, T0
+       mov X7, T1
+       shl $32, T0
+       shr $32, T1
+       xor T2, T2
+       add X7, X0
+       adc $0, X1
+       adc $0, X2
+       adc T0, X3
+       adc T1, X4
+       adc $0, X5
+       adc $0, X6
+       adc $0, T2
+
+       mov T2, T0
+       shl $32, T0
+
+       add T2, X0
+       mov X0, (RP)
+       adc $0, X1
+       mov X1, 8(RP)
+       adc $0, X2
+       mov X2, 16(RP)
+       adc T0, X3
+       mov X3, 24(RP)
+       adc $0, X4
+       mov X4, 32(RP)
+       adc $0, X5
+       mov X5, 40(RP)
+       adc $0, X6
+       mov X6, 48(RP)
+
+       pop     %r12
+       pop     %rbp
+       pop     %rbx
+
+       W64_EXIT(2, 0)
+       ret
+EPILOGUE(nettle_ecc_curve448_modp)

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
_______________________________________________
nettle-bugs mailing list
[email protected]
http://lists.lysator.liu.se/mailman/listinfo/nettle-bugs

Reply via email to