Re: Deterministic (EC)DSA

2024-05-22 Thread Niels Möller
nsigned limb_size;
 
   int res;
 
@@ -78,7 +81,10 @@ dsa_verify(const struct dsa_params *params,
   mpz_init(v);
 
   /* The message digest */
-  _nettle_dsa_hash (tmp, mpz_sizeinbase (params->q, 2), digest_size, digest);
+  bit_size = mpz_sizeinbase(params->q, 2);
+  limb_size = NETTLE_BIT_SIZE_TO_LIMB_SIZE(bit_size);
+  _nettle_dsa_hash (mpz_limbs_write (tmp, limb_size), bit_size, digest_size, 
digest);
+  mpz_limbs_finish (tmp, limb_size);
   
   /* v = g^{w * h (mod q)} (mod p)  */
   mpz_mul(tmp, tmp, w);
diff --git a/ecc-ecdsa-sign.c b/ecc-ecdsa-sign.c
index 6a41c14c..522a04d4 100644
--- a/ecc-ecdsa-sign.c
+++ b/ecc-ecdsa-sign.c
@@ -40,6 +40,7 @@
 
 #include "ecdsa.h"
 #include "ecc-internal.h"
+#include "dsa-internal.h"
 
 /* Low-level ECDSA signing */
 
@@ -87,7 +88,7 @@ ecc_ecdsa_sign (const struct ecc_curve *ecc,
   ecc->q.invert (>q, kinv, kp, tp);
   
   /* Process hash digest */
-  ecc_hash (>q, hp, length, digest);
+  _nettle_dsa_hash (hp, ecc->q.bit_size, length, digest);
 
   ecc_mod_mul (>q, tp, zp, rp, tp);
   ecc_mod_add (>q, hp, hp, tp);
diff --git a/ecc-ecdsa-verify.c b/ecc-ecdsa-verify.c
index 9e324ea2..6481b6c3 100644
--- a/ecc-ecdsa-verify.c
+++ b/ecc-ecdsa-verify.c
@@ -40,6 +40,7 @@
 
 #include "ecdsa.h"
 #include "ecc-internal.h"
+#include "dsa-internal.h"
 
 /* Low-level ECDSA verify */
 
@@ -101,7 +102,7 @@ ecc_ecdsa_verify (const struct ecc_curve *ecc,
   ecc->q.invert (>q, sinv, sp, sinv + ecc->p.size);
 
   /* u1 = h / s, P1 = u1 * G */
-  ecc_hash (>q, hp, length, digest);
+  _nettle_dsa_hash (hp, ecc->q.bit_size, length, digest);
   ecc_mod_mul_canonical (>q, u1, hp, sinv, u1);
 
   /* u2 = r / s, P2 = u2 * Y */
diff --git a/ecc-gostdsa-sign.c b/ecc-gostdsa-sign.c
index c811c87e..f5a0cf7b 100644
--- a/ecc-gostdsa-sign.c
+++ b/ecc-gostdsa-sign.c
@@ -79,7 +79,7 @@ ecc_gostdsa_sign (const struct ecc_curve *ecc,
   ecc_j_to_a (ecc, 2, rp, P, P + 3*ecc->p.size);
 
   /* Process hash digest */
-  gost_hash (>q, hp, length, digest);
+  _nettle_gostdsa_hash (hp, ecc->q.bit_size, length, digest);
   if (mpn_zero_p (hp, ecc->p.size))
 mpn_add_1 (hp, hp, ecc->p.size, 1);
 
diff --git a/ecc-gostdsa-verify.c b/ecc-gostdsa-verify.c
index 0570af7e..14c12335 100644
--- a/ecc-gostdsa-verify.c
+++ b/ecc-gostdsa-verify.c
@@ -93,7 +93,7 @@ ecc_gostdsa_verify (const struct ecc_curve *ecc,
 && ecdsa_in_range (ecc, sp)))
 return 0;
 
-  gost_hash (>q, hp, length, digest);
+  _nettle_gostdsa_hash (hp, ecc->q.bit_size, length, digest);
 
   if (mpn_zero_p (hp, ecc->p.size))
 mpn_add_1 (hp, hp, ecc->p.size, 1);
diff --git a/ecc-hash.c b/ecc-hash.c
deleted file mode 100644
index 07877110..
--- a/ecc-hash.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/* ecdsa-hash.c
-
-   Copyright (C) 2013 Niels Möller
-
-   This file is part of GNU Nettle.
-
-   GNU Nettle is free software: you can redistribute it and/or
-   modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
-   Software Foundation; either version 3 of the License, or (at your
-   option) any later version.
-
-   or
-
- * the GNU General Public License as published by the Free
-   Software Foundation; either version 2 of the License, or (at your
-   option) any later version.
-
-   or both in parallel, as here.
-
-   GNU Nettle is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received copies of the GNU General Public License and
-   the GNU Lesser General Public License along with this program.  If
-   not, see http://www.gnu.org/licenses/.
-*/
-
-/* Development of Nettle's ECC support was funded by the .SE Internet Fund. */
-
-#if HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "ecc-internal.h"
-
-/* Convert hash value to an integer. If the digest is larger than
-   the ecc bit size, then we must truncate it and use the leftmost
-   bits. */
-
-/* NOTE: We don't considered the hash value to be secret, so it's ok
-   if the running time of this conversion depends on h.
-
-   Requires m->size + 1 limbs, the extra limb may be needed for
-   unusual limb sizes.
-*/
-
-void
-ecc_hash (const struct ecc_modulo *m,
- mp_limb_t *hp,
- size_t length, const uint8_t *digest)
-{
-  if (length > ((size_t) m->bit_size + 7) / 8)
-length = (m->bit_size + 7) / 8;
-
-  mpn_set_base256 (hp, m->size + 1, digest, length);
-
-  if (8 * length > m->bit_size)
-/* We got a few extra bits, at the low end. Discard them. */
-mpn_rshift (hp, hp, m->size + 1, 8*length - m->bit_size);
-}
-
-void
-gost_hash (const struct ecc_modulo *m,
-  mp_limb_t *hp,
-   

Re: Deterministic (EC)DSA

2024-05-15 Thread Niels Möller
Daiki Ueno  writes:

> I agree, that's a valid use-case.  Perhaps this might suffice:
>
>   /* Sign DIGEST with KEY using deterministic nonce generation.
>*
>* DIGEST must be of the same length of SHA-256 output, that is 32
>* octets long. */
>   void
>   ecdsa_sha256_sign_deterministic (const struct ecc_scalar *key,
>const uint8_t *digest,
>struct dsa_signature *signature);

Looks good to me. Underlying more general helper functions
(however they are organized) can be internal only.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Deterministic (EC)DSA

2024-05-14 Thread Niels Möller
Daiki Ueno  writes:

> I have a slight feeling that the current API is not easy to use, as the
> caller needs to instantiate both hash and HMAC contexts, as there are no
> correlations between the two:

On a more abstract level, are there any reasonable usecases for having
one hash function for processing the message, and a different underlying
hash function for the hmac used to produce the deterministic nonce?

> What would you say to introducing a specialized API, something like the
> following?
>
>   void
>   ecdsa_sha256_sign_deterministic (const struct ecc_scalar *key,
>size_t length,
>const uint8_t *message,
>struct dsa_signature *signature);

I think that could sense, but we also need an advertised api function
that takes the digest as input, for applications that want to process
the message using multiple _update calls.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH v2 1/2] powerpc64: Add optimized assembly for sha256-compress-n

2024-05-05 Thread Niels Möller
  VSR(VK), TK, K
> + addiTK, TK, 4
> +
> + DOLOADS
> +
> + C "permute" state from VSA containing A,B,C,D into VSA,VSB,VSC,VSD

Can you give a bit more detail on this permutation? Does the main round
operations only use 32 bits each from the state registers? There's no
reasonable way to use a more compact representation?

> + vsldoi  VSB, VSA, VSA, 4
> + vsldoi  VSF, VSE, VSE, 4
> +
> + vsldoi  VSC, VSA, VSA, 8
> + vsldoi  VSG, VSE, VSE, 8
> +
> + vsldoi  VSD, VSA, VSA, 12
> + vsldoi  VSH, VSE, VSE, 12
> +
> + EXTENDROUNDS
> + EXTENDROUNDS
> + EXTENDROUNDS
> + NOEXTENDROUNDS
> +
> + C Reload initial state from stack
> + li  T0, 16
> + lxvw4x  VSR(VT0), 0, STATE  C VSA contains A,B,C,D
> + lxvw4x  VSR(VT1), T0, STATE C VSE contains E,F,G,H
> +
> + C Repack VSA,VSB,VSC,VSD into VSA,VSE for storing
> + vmrghw  VSA, VSA, VSB
> + vmrghw  VSC, VSC, VSD
> + vmrghw  VSE, VSE, VSF
> + vmrghw  VSG, VSG, VSH
> +
> + xxmrghd VSR(VSA), VSR(VSA), VSR(VSC)
> + xxmrghd VSR(VSE), VSR(VSE), VSR(VSG)
> +
> + vadduwm VSA, VSA, VT0
> + vadduwm VSE, VSE, VT1

It seems unfortunate to have to do this conversion for each iteration of
the loop, it would be nice if state could be converted to the
most efficient form before entering the loop, and not converted back
until after loop exit. But we probably don't have enoguh registers to
keep the old state exploded into many registers. And load/store of
exploded state doesn't seem that attractive either.

> + li  T0, 16
> + stxvw4x VSR(VSA), 0, STATE
> + stxvw4x VSR(VSE), T0, STATE
> +
> + bdnz.loop

Regards,
/Niels Möller

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Deterministic (EC)DSA

2024-04-18 Thread Niels Möller
Daiki Ueno  writes:

> The attached patch adds support for the deterministic DSA and ECDSA, as
> defined in RFC 6979, which enables us to use the signing function
> without randomness.

Thanks, I've had a first read, and added some comments on your MR
(https://git.lysator.liu.se/nettle/nettle/-/merge_requests/64).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH 0/2] Add optimized powerpc64 assembly for SHA2

2024-04-17 Thread Niels Möller
Eric Richter  writes:

> Thanks for the suggestion! I moved the "% 16" into that eval to clean
> up those load calls.
>
> After a bit of fiddling with m4 though, it appears that this emits
> something like "v16" without applying the translation of v16 -> 16,
> causing the assembler to choke. I did manage to get it to work with a
> naive concatenation macro like this:
>
>define(`CONCAT', `$1$2')
>define(`IV', `CONCAT(v, eval((($1) % 16) + 16))')
>
> though I feel like there is a more elegant and clear solution.

I think m4_unquote (defined and briefly documented in m4-utils.m4) might
solve this problem. Not tested, but something like

   define(`IV', `m4_unquote(v`'eval((($1) % 16) + 16))')

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-04-14 Thread Niels Möller
Niels Möller  writes:

> I've added tests that set the intial counter so that the four counter
> bytes wraps around 2^32, and I've verified that if these instructions
> should be changed to vadduwm, to get output that agrees with nettle's
> other gcm implementations.

I've commit those fixes, and a fix for big-endian support, on the branch 
ppc64-gcm-aes-rebased. I think that's now ready for merging.

I see some opportunities for further improvement, but that can be done
after merge, to aid consistency with related fixes to the other ppc64
assembly files.

> Another question on powerpc64 assembly: For the byte swapping, currently
> done using the vperm instruction and a mask word, is there any reason to
> not use the xxbrd instruction (VSX Vector Byte-Reverse Doubleword)
> instead? That applies to more functions than the new gcm-aes code.

A closer look at the spec indicated that xxbrd is only available from
power9 (i.e., if the processor supports VSX, *and* supports ISA 3.0, if
I've understood it correctly).

I think it would be a good idea to consistently use pseudoops like

  .machine "power8"

in the ppc assembly files, if that would let the assembler catch
accidental use of unavailable instructions.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-04-14 Thread Niels Möller
Daiki Ueno  writes:

> Yes, I've consolidated the description and put it at the introduction.

Thanks, merged now!
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-04-13 Thread Niels Möller
Daiki Ueno  writes:

> I'm attaching a patch to update the documentation.

Thanks.

> -@subsubsection @acronym{SHAKE-256}
> +@subsubsection @acronym{SHAKE-128}
>  @cindex SHAKE

I think heading should be just "shake".

> -In addition to those SHA-3 hash functions, Nettle also provides a SHA-3
> -extendable-output function (XOF), SHAKE-256. Unlike SHA-3 hash functions,
> -SHAKE can produce an output digest of any desired length.
> +In addition to those SHA-3 hash functions, Nettle also provides a
> +SHA-3 extendable-output function (XOF) called SHAKE. Unlike hash
> +functions, SHAKE can produce an output digest of any desired
> +length. There are two variants, SHAKE-128 and SHAKE-256, with
> +different security strengths in terms of collision or preimage
> +resistance.
> +
> +SHAKE-128 internally uses a SHA-3 hash function with 128-bit security
> +strength against second preimage attacks. The hash function is not
> +usable alone with Nettle, only for the use with SHAKE-128.

I think it would be good to write in the intro that shake-256
corresponds to sha3-256, while shake-128 uses sha3 with parameters
corresponding to 128-bit security, for which there's no corresponding
plain hash function defined.

It might also make sense to explain the difference between _shake and
_shake_output functions here, and make the description under each
function a bit shorter.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH 0/2] Add optimized powerpc64 assembly for SHA2

2024-04-04 Thread Niels Möller
Eric Richter  writes:

> I do have a macro though that calculates which register number contains
> the chunk of input data based on an index -- in other words, I use
> registers v16-v31 to hold the input data, the macro just adds 16 to the
> index to get the corresponding register. Right now it operates on raw
> register numbers, should I adjust this macro to be more clear that it
> is operating on vector registers in any way, or should I look into
> changing how that is done?

If it's this macro,

   C Convert an index for W[i] to the corresponding register
   define(`IV', `eval($1 + VW0)')

and the argument $1 is always a numerical expression, then I'd suggest
deleting the definitions of VW0 - VW15 (with only a comment to document
this register usage), and something like

   define(`IV', `v`'eval($1 + 16)')

You could also consider moving the % 16 operation into this macro,

   define(`IV', `v`'eval((($1) % 16) + 16)')

which should make it clear that it can't expand to a register outside of
the intended v16-v31 range.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Issue tracker

2024-03-29 Thread Niels Möller
Hi,

I've enabled the issue tracker at
https://git.lysator.liu.se/nettle/nettle/-/issues, and filed a few
issues for old TODO items, most of which imply ABI and/or API changes.
Feel free to file aditional issues for bugs, feature requests, or to
track work you are doing or planning.

My intention is to (i) keep track of desirable or in-progress changes,
and (ii) use for release planning, e.g., use a tag or "milestone" to
attach to issues that we want to be included in the next release,
similar to how misc/plan.html has been use for some of the releases in
the past.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH 0/2] Add optimized powerpc64 assembly for SHA2

2024-03-28 Thread Niels Möller
Eric Richter  writes:

> This set introduces an optimized powerpc64 assembly implementation for
> SHA256 and SHA512. This have been derived from BSD-2-Clause licensed
> code authored by IBM, originally released in the IBM POWER
> Cryptography Reference Implementation project[1], modified to work in
> Nettle, contributed under the GPL license.
>
> Development of this new implementation targetted POWER 10, however
> supports the POWER 8 ISA and above. The following commits provide the
> performance data I recorded on POWER 10, though similar improvements can
> be found on P8/P9.

Thanks, I've had a first quick look. Nice speedup, and it looks pretty
good. I wasn't aware of the vshasigma instructions.

One comment on the Nettle ppc conventions: I prefer to use register
names rather than just register numbers; that helps me avoid some
confusion when some instructions take v1 registers and others take vs1
registers. Preferably by configuring with ASM_FLAGS=-mregnames during
development. For assemblers that don't like register names (seems to be
the default), machine.m4 arranges for translation from v1 --> 1, etc.

> As an aside: I have tested this patch set on POWER 8 and POWER 10
> hardware running little-endian linux distributions, however I have not
> yet been able to test on a big-endian distro. I can confirm however that
> the original source in IPCRI does compile and pass tests for both little
> and big endian via qemu-user, so spare human error in deriving the
> version for Nettle, it is expected to be functional.

There are big-endian tests in the ci pipeline (hosted on the mirror repo
at https://gitlab.com/gnutls/nettle), using cross-compiling + qemu-user.
And I also have a similar setup locally.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Naming for names in struct nettle_hash

2024-03-28 Thread Niels Möller
Niels Möller  writes:

> Hi, I've got a bug report that sha512_224 and sha512_256 are missing in
> the list returned by nettle_get_hashes, and I'm about to add them.
>
> But then there's a question of naming convention. Currently, the 
>
>   extern const struct nettle_hash nettle_sha512_256;
>
> includes a name field set to the string "sha512-256", which is somewhat
> inconsistent with, e.g., the struct nettle_sha3_256 which includes the
> name "sha3_256".
>
> Should I just change this (patch below)?

I've decided to changing those names to use underscore.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-03-28 Thread Niels Möller
Daiki Ueno  writes:

> Yes, that looks good to me, except _nettle_sha3_shake has a
> copy-and-paste error where SHA3_256_BLOCK_SIZE is hard-coded.

Thanks, good catch.

>> 1. Decide what should be renamed sha3_shake256_*
>
> I guess we can live with the existing interface.  For SHAKE128, we could
> only provide sha3_128_init, sha3_128_update, and
> sha3_128_shake{,_output}, without sha3_128_digest.

Sounds good to me.

>> 2. Implement shake128.
>
> I've extracted it from the ML-KEM merge request and put it here:
> https://git.lysator.liu.se/nettle/nettle/-/merge_requests/63
>
> Not sending via email as it includes a huge test vector.

Thanks, merged to the sha3-shake-updates branch. Sorry if you didn't
intend me to do that right away (I noticed some minor problems after
merge, which I've fixed). I'd like to merge to master after ci runs have
completed.

>> 3. Update docs.
>
> I can do that once we settle the interface.

Excellent. To me, interface in sha3.h now looks good.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Adding MD5/SHA1 support to RSA OAEP

2024-03-26 Thread Niels Möller
Hans Leidekker  writes:

> This was prompted by the DayZ game. I don't know if it uses md5 or sha1, I
> should ask, but I think it's reasonable to wait and see if md5 is still used.

It would be good to know precisely what's needed. A complete patch for
adding rsa_oaep_sha1 functions also needs brief updates to the tests and the
manual (nettle.texinfo).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Adding MD5/SHA1 support to RSA OAEP

2024-03-25 Thread Niels Möller
Hans Leidekker  writes:

> I noticed the arrival of an RSA OAEP implementation in GnuTLS and wanted to 
> use
> that to support the algorithm in Wine. Windows supports it using the old MD5 
> and
> SHA1 hash functions, so my question is: would you accept a patch like below 
> that
> adds these hashes?

Hi, 

I'm fine accepting patches for interop with various legacy systems, if
there's a reasonable usecase, but I don't want to add anything with md5
in it merely for completeness. Can you give a bit more details on your
usecase? Which windows functions do you want to support or interop with?
What will break if you support only the sha2-variants of RSA-OAEP?

Despite md5 and sha1 being generally deprecated, I'm not sure about
whether they're considered insecure when used for RSA-OAEP (via
wikipedia, I found this old paper that seems to imply that the
underlying hash function doesn't need to be that strong:
https://eprint.iacr.org/2006/223).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-03-24 Thread Niels Möller
Niels Möller  writes:

> One other question: In the counter updates,
>
>> C increase ctr value as input to aes_encrypt
>> vaddudm S1, S0, CNT1
>> vaddudm S2, S1, CNT1
>> vaddudm S3, S2, CNT1
>> vaddudm S4, S3, CNT1
>> vaddudm S5, S4, CNT1
>> vaddudm S6, S5, CNT1
>> vaddudm S7, S6, CNT1
>
> shouldn't that be vadduwm (32-bit word addition, rather than 64-bit
> dword addition)? As I understand it, gcm uses a 32-bit counter, which
> should wrap around without any carry to higher bits if the initial value
> is just below 2^32.

I've added tests that set the intial counter so that the four counter
bytes wraps around 2^32, and I've verified that if these instructions
should be changed to vadduwm, to get output that agrees with nettle's
other gcm implementations.

Another question on powerpc64 assembly: For the byte swapping, currently
done using the vperm instruction and a mask word, is there any reason to
not use the xxbrd instruction (VSX Vector Byte-Reverse Doubleword)
instead? That applies to more functions than the new gcm-aes code.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


KEM-interface (was: Re: HPKE ready for Merge!)

2024-03-24 Thread Niels Möller
Norbert Pócs  writes:

> I took an another look at the PR, if there is anything possible to delete
> without loosing the functionality, but unfortunately didn't find anything.

To get a better understanding of the HPKE spec and its complexities,
I've tried to implement KEM x25519-sha256 (and nothing else from the
spec). Patch below.

Some notes:

1. Nettle's hkdf interface isn't that a good fit, if one wants to avoid
   memcpy calls to assemble the inputs. Below, I haven't used Nettle's
   hkdf_extract / hkdf_expand, instead doing corresponding operations
   directly on hmac_sha256. Unless I'm missing something, it seems a
   LabeledExpand function limited to at most 32 octets of output (the
   sha256 digest size) is sufficient for everything in hpke, except for
   the Export feature.

2. I don't quite like that some functions (in particular DeriveKeyPair)
   are defined so that it can fail (not for x25519, though). Having a
   success/failure indication there forces applications to have an error
   handling path, that it's rather difficult to test. I see no obvious
   way for Nettle to shield applications from that, though.

3. For those of you who have looked closer at proposed post-quantum KEM
   mechanisms, is the interface suitable for those too?

4. It seems that HPKE defines a very clean interface between the KEM and
   the rest of the message handling, with the shared_secret the only
   piece of data shered between KEM and the rest of the processing.

Regards,
/Niels

diff --git a/Makefile.in b/Makefile.in
index f027e762..eb520f7a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -225,7 +225,8 @@ hogweed_SOURCES = sexp.c sexp-format.c \
  ed25519-sha512.c ed25519-sha512-pubkey.c \
  ed25519-sha512-sign.c ed25519-sha512-verify.c \
  ed448-shake256.c ed448-shake256-pubkey.c \
- ed448-shake256-sign.c ed448-shake256-verify.c
+ ed448-shake256-sign.c ed448-shake256-verify.c \
+ kem-x25519-sha256.c
 
 OPT_SOURCES = fat-arm.c fat-arm64.c fat-ppc.c fat-s390x.c fat-x86_64.c 
mini-gmp.c
 
diff --git a/hpke-kem.h b/hpke-kem.h
new file mode 100644
index ..00b4610b
--- /dev/null
+++ b/hpke-kem.h
@@ -0,0 +1,71 @@
+/* hpke-kem.h
+
+   Key encapsulation mechanism, suitable for HPKE (RFC 9180).
+
+   Copyright (C) 2024 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free software: you can redistribute it and/or
+   modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your
+   option) any later version.
+
+   or
+
+ * the GNU General Public License as published by the Free
+   Software Foundation; either version 2 of the License, or (at your
+   option) any later version.
+
+   or both in parallel, as here.
+
+   GNU Nettle is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received copies of the GNU General Public License and
+   the GNU Lesser General Public License along with this program.  If
+   not, see http://www.gnu.org/licenses/.
+*/
+
+#ifndef NETTLE_HPKE_KEM_H_INCLUDED
+#define NETTLE_HPKE_KEM_H_INCLUDED
+
+#include "nettle-types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Name mangling */
+#define get_kem_x25519_sha256 nettle_get_kem_x25519_sha256
+
+typedef int kem_derive_keypair_func (uint8_t *public_key, uint8_t *private_key,
+size_t seed_size, const uint8_t *seed);
+/* Take randomness source instead? Passing seed suites deterministic tests. */
+typedef void kem_encapsulate_func (uint8_t *shared_secret, uint8_t 
*encapsulation,
+  const uint8_t *receiver_public_key,
+  void *random_ctx, nettle_random_func 
*random);
+typedef void kem_decapsulate_func (uint8_t *shared_secret, const uint8_t 
*encapsulation,
+  const uint8_t *private_key);
+
+struct hpke_kem {
+  unsigned public_key_size;
+  unsigned private_key_size;
+  unsigned encapsulation_size;
+  unsigned shared_secret_size;
+  kem_derive_keypair_func *derive_keypair;
+  kem_encapsulate_func *encapsulate;
+  kem_decapsulate_func *decapsulate;
+};
+
+const struct hpke_kem *get_kem_x25519_sha256 (void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NETTLE_HPKE_KEM_H_INCLUDED */
diff --git a/kem-x25519-sha256.c b/kem-x25519-sha256.c
new file mode 100644
index ..186ced6c
--- /dev/null
+++ b/kem-x25519-sha256.c
@@ -0,0 +1,170 @@
+/* kem-x25519-sha256.c
+
+   KEM using curve25519, suitable for HPKE (RFC 9180).
+
+   Copyright (C) 2024 Niels Möller
+
+   This file is part of GNU Nettle.
+
+   GNU Nettle is free soft

Re: additional API for SHAKE streaming read

2024-03-24 Thread Niels Möller
Niels Möller  writes:

> I'll try to clean up and post or commit some of my changes, I'm sorry
> that will cause some conflicts.

I've pushed my changes to a branch sha3-shake-updates, does that look
reasonable to you? If so, I think the next steps are

1. Decide what should be renamed sha3_shake256_*
2. Implement shake128.
3. Update docs.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-03-24 Thread Niels Möller
Daiki Ueno  writes:

> Sorry for the delay, and thank you for merging it to master.  I've come
> up with the attached 3 patches on top of it, which basically do:

Thanks for moving this forward. I haven't had time to share my recent
patches either. I have a few concerns, though.

> - Apply my changes in the previous post to count index from zero, not
>   the end of the block

I'm not yet convinced this is a net win, it looks like you need a  
"% sizeof (ctx->block)" to make that work, and I'd like to avoid
divisions, in particular, since when general generalizing this to also
support shake128, the divisor will no longer be constant.

> - Rename sha3_256_shake_output to sha3_shake256_output and add
>   sha3_shake256_init/update as well, as you suggested in the previous
>   conversation.  That would help us implement SHAKE128 without exposing
>   SHA3-128 digest functions and I find it easier to read when used in
>   the ML-KEM implementation.

I'm fine with adding new sha3_shake256_* names, but I think we should
keep old name (which you added for Nettle-3.6). And I think we can use
the same context struct, possibly with convenience aliases like

  #define sha3_shake256_ctx sha3_256_ctx
  #define sha3_shake256_init sha3_256_init

I agree we shouldn't define sha3_128_digest now (as far as I'm aware,
there's no authoritative spec for that), but I think we should design
the api so that it fits if added later.

I'm a bit confused by the choice of shake128 for ML-KEM, and I would
expect that if there are applications where shake128 is a reasonable
security tradeoff, then there likely are reasonable applications of
sha3_128 too. I don't understand the fine details of sha3 security
analysis, but I'd guess that for applications where second preimage (in
contrast to arbitrary collisions) is the relevant attack, sha3_128
should be as secure as sha3_shake128 with a larger output size.

> - Generalize _shake_output function independent of the underlying SHA-3
>   algorithm.

Certainly needed.

I don't think the all-in-one shake function should be deprecated, it
seems like a nice utility. What I'm not sur about about is if it should
be implemented as _output +  _init (very cheap implementation) or its own
function (with less runtime overhead than _output).

I'll try to clean up and post or commit some of my changes, I'm sorry
that will cause some conflicts.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-03-20 Thread Niels Möller
Niels Möller  writes:

> Below is an updated version of gcm-aes-encrypt.asm, seems to work for
> me, and uses fewer of the regular registers. Some comments and
> questions:
>
> 1. What about the vsrX registers, 0 <= X < 32? They are used to copy
>values from and to the v registers (aka vsrX, 32 <= X < 64), e.g.,
>
> xxlor vs1, VSR(S0), VSR(S0)
>
>Can those registers be used freely, and how?

I've asked in a different forum, and as far as I understand, registers
vs0-vs13 free to use ("volatile"), because half of each corresponds to a
volatile floating point register (fpr0-fpr13). While registers vs14-vs31
need to be saved and restored if used (the halves corresponding to
fpr14-fpr31 are non-volatile, so in principle, it would be sufficent to
save and restore those halves).

> 2. From my reading of the ELF v2 ABI spec, there's a "protected zone"
>below the stack pointer that can be used freely for storage. Is that
>right? Or maybe that's only for te ELFv2 ABI?

That appears to be the same in ELFv1 ABI, see
https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#STACK

One other question: In the counter updates,

> C increase ctr value as input to aes_encrypt
> vaddudm S1, S0, CNT1
> vaddudm S2, S1, CNT1
> vaddudm S3, S2, CNT1
> vaddudm S4, S3, CNT1
> vaddudm S5, S4, CNT1
> vaddudm S6, S5, CNT1
> vaddudm S7, S6, CNT1

shouldn't that be vadduwm (32-bit word addition, rather than 64-bit
dword addition)? As I understand it, gcm uses a 32-bit counter, which
should wrap around without any carry to higher bits if the initial value
is just below 2^32.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-03-17 Thread Niels Möller
Niels Möller  writes:

> Next, I'll have a look at register usage in the assembly code.

Below is an updated version of gcm-aes-encrypt.asm, seems to work for
me, and uses fewer of the regular registers. Some comments and
questions:

1. What about the vsrX registers, 0 <= X < 32? They are used to copy
   values from and to the v registers (aka vsrX, 32 <= X < 64), e.g.,

xxlor vs1, VSR(S0), VSR(S0)

   Can those registers be used freely, and how? If we can use them, we
   shouldn't need to save and restore any vector registers. They're not
   mentioned in powerpc64/README. Looking in the ELF v2 ABI spec, that
   seems to say that the low halves (which are used as floting point
   registers) are "volatile", but that's not quite enough?

2. From my reading of the ELF v2 ABI spec, there's a "protected zone"
   below the stack pointer that can be used freely for storage. Is that
   right? Or maybe that's only for te ELFv2 ABI?

3. Nit: In the copyright line, I'd like to delete the "All rights
   reserved" phrase. That's not in any the copyright header of any other
   Nettle files, including those previously contributed by IBM.

Regards,
/Niels

-8<---
C powerpc64/p8/gcm-aes-encrypt.asm

ifelse(`
   Copyright (C) 2023- IBM Inc. All rights reserved
   This file is part of GNU Nettle.

   GNU Nettle is free software: you can redistribute it and/or
   modify it under the terms of either:

 * the GNU Lesser General Public License as published by the Free
   Software Foundation; either version 3 of the License, or (at your
   option) any later version.

   or

 * the GNU General Public License as published by the Free
   Software Foundation; either version 2 of the License, or (at your
   option) any later version.

   or both in parallel, as here.

   GNU Nettle is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received copies of the GNU General Public License and
   the GNU Lesser General Public License along with this program.  If
   not, see http://www.gnu.org/licenses/.
')

C Register usage:

define(`SP', `r1')
define(`TOCP', `r2')

define(`HT', `r3')
define(`SRND', `r4')
define(`SLEN', `r5')
define(`SDST', `r6')
define(`SSRC', `r7')
define(`RK', `r8')
C r9-r11 used as constant indices.
define(`LOOP', `r12')

C
C vectors used in aes encrypt output
C

define(`K', `v1')
define(`S0', `v2')
define(`S1', `v3')
define(`S2', `v4')
define(`S3', `v5')
define(`S4', `v6')
define(`S5', `v7')
define(`S6', `v8')
define(`S7', `v9')

C
C ghash assigned registers and vectors
C

define(`ZERO', `v21')
define(`POLY', `v22')
define(`POLY_L', `v0')

define(`D', `v10')
define(`H1M', `v11')
define(`H1L', `v12')
define(`H2M', `v13')
define(`H2L', `v14')
define(`H3M', `v15')
define(`H3L', `v16')
define(`H4M', `v17')
define(`H4L', `v18')
define(`R', `v19')
define(`F', `v20')
define(`R2', `v21')
define(`F2', `v22')

define(`LE_TEMP', `v30')
define(`LE_MASK', `v31')

define(`CNT1', `v28')
define(`LASTCNT', `v29')

.file "gcm-aes-encrypt.asm"

.text

 C size_t
 C _gcm_aes_encrypt(struct gcm_key *key, size_t rounds,
 C  size_t len, uint8_t *dst, const uint8_t *src)
 C

define(`FUNC_ALIGN', `5')
PROLOGUE(_nettle_gcm_aes_encrypt)
srdi. LOOP, SLEN, 7 C loop n 8 blocks
beq No_encrypt_out

C 288 byte "protected zone" is sufficient for storage.
stxv VSR(v20), -16(SP)
stxv VSR(v21), -32(SP)
stxv VSR(v22), -48(SP)
stxv VSR(v28), -64(SP)
stxv VSR(v29), -80(SP)
stxv VSR(v30), -96(SP)
stxv VSR(v31), -112(SP)

vxor ZERO,ZERO,ZERO
vspltisb CNT1, 1
vsldoi CNT1, ZERO, CNT1, 1  C counter 1

DATA_LOAD_VEC(POLY,.polynomial,r9)
IF_LE(`
li r9,0
lvsl   LE_MASK,0,r9
vspltisb   LE_TEMP,0x07
vxor   LE_MASK,LE_MASK,LE_TEMP
')
xxmrghdVSR(POLY_L),VSR(ZERO),VSR(POLY)

C load table elements
li r9,1*16
li r10,2*16
li r11,3*16
lxvd2x VSR(H1M),0,HT
lxvd2x VSR(H1L),r9,HT
lxvd2x VSR(H2M),r10,HT
lxvd2x VSR(H2L),r11,HT
addi HT, HT, 64
lxvd2x VSR(H3M),0,HT
lxvd2x VSR(H3L),r9,HT
lxvd2x VSR(H4M),r10,HT
lxvd2x VSR(H4L),r11,HT

addi HT, HT,  4048  C Advance to point to the 'CTR' field in the context
 
lxvd2x VSR(D),r9,HT C load 'X' pointer
C byte-reverse of each doubleword permuting on little-endian mode
IF_LE(`
vperm  D,D,D,LE_MASK
')

lxvb16x VSR(S0), 0, HT  C Load 'CTR'

sldi SLEN, LOOP, 7

addi LOOP, LOOP, -1

lxvd2x VSR(K),r11,HTC First subkey
vperm   K,K,K,LE_MASK

.align 5
C increase ctr v

Re: ppc64 micro optimization

2024-03-15 Thread Niels Möller
Niels Möller  writes:

> Danny Tsen  writes:
>
>> My fault. I did not include the gym-aes-crypt.c in the patch. Here is
>> the updated patch. Please apply this one and we can work from there.
>
> Thanks, now pushed onto a new branch ppc64-gcm-aes.

I've now pushed some more changes to that branch: added gcm-internal.h,
fixed the non-fat case, and moved around the nop definitions of the new
functions.

Next, I'll have a look at register usage in the assembly code.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-03-14 Thread Niels Möller
Daiki Ueno  writes:

>> * One could perhaps use index == 0 instead of index == block_size for
>>   the case that there is no buffered data. But the current convention
>>   does make your "if (length <= left)" nice and simple.
>
> I agree that the current convention is a bit awkward, so in the attached
> patch I changed to use index == 0 as the indicator where buffering is
> needed.  That actually makes the code simpler as we can defer buffering
> until when the data is read.  One drawback though is that it causes
> additional memcpy in a corner case where the _shake_output is used to
> retrieve data smaller than the block size.

I wonder if that will still be simpler if one also moves the
sha3_permute calls?

I have merged your previous version to a branch
add-sha3_256_shake_output, and ci looks green. So perhaps best to merge
that to master, and iterate from there?

>> * It looks a bit backwards to me that each iteration *first* copies data
>>   to the digest, and *then* calls sha3_permute. In case no more data is
>>   to be output, that sha3_permute call is wasted. It would be more
>>   natural to me to not call sha3_permute until we know the output is
>>   needed. But to fix that and still keep things nice for the first
>>   output block, I think one would need to reorganize _nettle_sha3_pad to
>>   not imply a call to sha3_permute (via sha3_absorb). So that's better
>>   done in a separate change.
>
> Right, I can do that after the current patch is settled.

I've done a bit of hacking locally. What I did was to take out the
xoring parts of sha3_absorb into it's own function sha3_xor_block, and
let sha3_pad_shake use that, without any call to sha3_permute. And then
call sha3_permute as output is needed.

>> * I'm still tempted to use ctx->index = ~index rather than ctx->index =
>>   index | INDEX_HIGH_BIT. But maybe that would just be too obfuscated.
>
> I'm actually not sure how this works.  For example, if unsigned int is
> 32-bit and index is 3, wouldn't ~index turn to 0xfffc, while index |
> INDEX_HIGH_BIT is 0x8003?

It would be a different representation, with the very minor advantage
that the INDEX_HIGH_BIT value isn't needed (in source code, or handled
at runtime). Like

  index = ctx->index;

  if (index < sizeof(ctx->block)) 
{ ... first call to shake_output, pad and initialize...  }
  else
index = ~index;

  assert (index <= sizeof(ctx->block));

  ... output processing ...

  ctx->index = ~index;

>> In next step, to also support shake128, we should generalize your code
>> using an internal function _sha3_shake_output taking block and block
>> size as arguments.
>
> Yes.

I've tried that in my local hack, I think it's rather straight-forward.
(I might be able to post corresponding patch later). What's unclear is
how much to share between _shake and shake_output. One could define
_shake as _shake_output + _init. The drawback I see is that (i) we would
allow _shake_output followed by _shake, which isn't proper api usage,
and (ii) _shake needs a lot less logic since it should always start by
padding, and it doesn't need to buffer any data, so it seems a bit wrong
to have it call shake_output that does this unneeded extra work.

/Regards,
Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Naming for names in struct nettle_hash

2024-03-13 Thread Niels Möller
Hi, I've got a bug report that sha512_224 and sha512_256 are missing in
the list returned by nettle_get_hashes, and I'm about to add them.

But then there's a question of naming convention. Currently, the 

  extern const struct nettle_hash nettle_sha512_256;

includes a name field set to the string "sha512-256", which is somewhat
inconsistent with, e.g., the struct nettle_sha3_256 which includes the
name "sha3_256".

Should I just change this (patch below)? Honestly, I haven't given much
thought to the conventions used here, but perhaps it makes the most
sense that naming match the names of corresponding C symbols (with
underscore rather than dash)?

Regards,
/Niels

diff --git a/nettle-meta-hashes.c b/nettle-meta-hashes.c
index 4d421182..2245dfb7 100644
--- a/nettle-meta-hashes.c
+++ b/nettle-meta-hashes.c
@@ -49,6 +49,8 @@ const struct nettle_hash * const _nettle_hashes[] = {
   _sha256,
   _sha384,
   _sha512,
+  _sha512_224,
+  _sha512_256,
   _sha3_224,
   _sha3_256,
   _sha3_384,
diff --git a/sha512-224-meta.c b/sha512-224-meta.c
index 24c42bfc..f3751e14 100644
--- a/sha512-224-meta.c
+++ b/sha512-224-meta.c
@@ -39,7 +39,7 @@
 
 const struct nettle_hash nettle_sha512_224 =
   {
-"sha512-224", sizeof(struct sha512_ctx),
+"sha512_224", sizeof(struct sha512_ctx),
 SHA512_224_DIGEST_SIZE,
 SHA512_224_BLOCK_SIZE,
 (nettle_hash_init_func *) sha512_224_init,
diff --git a/sha512-256-meta.c b/sha512-256-meta.c
index 37d17c35..181f2874 100644
--- a/sha512-256-meta.c
+++ b/sha512-256-meta.c
@@ -39,7 +39,7 @@
 
 const struct nettle_hash nettle_sha512_256 =
   {
-"sha512-256", sizeof(struct sha512_ctx),
+"sha512_256", sizeof(struct sha512_ctx),
 SHA512_256_DIGEST_SIZE,
 SHA512_256_BLOCK_SIZE,
 (nettle_hash_init_func *) sha512_256_init,
diff --git a/testsuite/meta-hash-test.c b/testsuite/meta-hash-test.c
index 6a15e7db..ec4e0d1e 100644
--- a/testsuite/meta-hash-test.c
+++ b/testsuite/meta-hash-test.c
@@ -16,6 +16,8 @@ const char* hashes[] = {
   "sha256",
   "sha384",
   "sha512",
+  "sha512_224",
+  "sha512_256",
   "sha3_224",
   "sha3_256",
   "sha3_384",


-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-03-11 Thread Niels Möller
Daiki Ueno  writes:

> Yes, this makes the code a lot simpler.  I'm attaching an updated patch.

Thanks, looks good to me. Some details I'm thinking about that might be
improvements:

* One could perhaps use index == 0 instead of index == block_size for
  the case that there is no buffered data. But the current convention
  does make your "if (length <= left)" nice and simple.

* It looks a bit backwards to me that each iteration *first* copies data
  to the digest, and *then* calls sha3_permute. In case no more data is
  to be output, that sha3_permute call is wasted. It would be more
  natural to me to not call sha3_permute until we know the output is
  needed. But to fix that and still keep things nice for the first
  output block, I think one would need to reorganize _nettle_sha3_pad to
  not imply a call to sha3_permute (via sha3_absorb). So that's better
  done in a separate change.

* I'm still tempted to use ctx->index = ~index rather than ctx->index =
  index | INDEX_HIGH_BIT. But maybe that would just be too obfuscated.

Anything about that you agree with, or that you think should be done
now?

In next step, to also support shake128, we should generalize your code
using an internal function _sha3_shake_output taking block and block
size as arguments.

I'm also not sure about proper naming for shake128. If I read the
Instances table at https://en.wikipedia.org/wiki/SHA-3 right, there's no
standard regular hash function corresponding to shake128. We could still
name it sha3_128_shake, but that might be confusing (there's no
corresponding sha3_128_digest, would there be any use for that?). The
alternative could be to use names sha3_shakeN_init, sha3_shakeN_update,
sha3_shakeN_digest, sha3_shakeN_output (with some of the shake256
functions, as well as the context struct, being aliases to corresponding
sha3_256 names). But aliases also have a cost in potential confusion.

> +  if (length > 0)
> +{
> +  /* Fill in the buffer for next call.  */
> +  _nettle_write_le64 (sizeof (ctx->block), ctx->block, ctx->state.a);
> +  sha3_permute (>state);
> +  memcpy (digest, ctx->block, length);
> +  ctx->index = length | INDEX_HIGH_BIT;
> +}
> +  else
> +ctx->index = sizeof (ctx->block) | INDEX_HIGH_BIT;
> +}

If I read your code right, we actually always have length > 0 at this
place. So either delete the if conditional, or change the condition of
the loop above from (length > sizeof (ctx->block)) to (length >= sizeof
(ctx->block)). The latter option would avoid a memcpy in the case that
the requested digest ends with a full block.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-03-10 Thread Niels Möller
Daiki Ueno  writes:

> Thank you.  The option (3) sounds like a great idea as it only need one
> more function to be added for streaming.  I tried to implement it as the
> attached patch.

Thanks. Interface and tests looks very reasonable to me. Comments on the
implementatino below.

Regards,
/Niels

> +void
> +sha3_256_shake_output(struct sha3_256_ctx *ctx,
> +   size_t length,
> +   uint8_t *digest)
> +{
> +  unsigned offset;
> +  unsigned mask = UINT_MAX >> 1;

I think I'd name the local variable "index" rather than "offset", to
match the state variable. And I think it would make sense with a define
for the high bit, something like

#define INDEX_HIGH_BIT (~((UINT_MAX) >> 1))

(one could also use something like ~0U instead of UINT_MAX, but UINT_MAX
may be more readable).

> +  /* We use the leftmost bit as a flag to indicate SHAKE is initialized.  */
> +  if (ctx->index & ~mask)
> +offset = ctx->index & mask;

The value of offset here is in the range 0 < offset <=
SHA3_256_BLOCK_SIZE, right? One could use a representation where 

  offset = ~ctx->index;

instead of bitwise operations. One would still need the condition if
(ctx->index & INDEX_HIGH_BIT), but that would typically be compiled to
the same as if ((signed int) ctx->index < 0).

I think it would also make sense with an 

  assert (ctx->index < SHA3_256_BLOCK_SIZE);

in the start of sha3_256_update, which will trigger if the update
function is called after the output function, with no init in between.

> +  else
> +{
> +  _sha3_pad_shake (>state, SHA3_256_BLOCK_SIZE, ctx->block, 
> ctx->index);
> +  /* Point at the end of block to trigger fill in of the buffer.  */
> +  offset = sizeof (ctx->block);

I think this block deserves a comment that this is the first call to
sha3_256_shake_output. For the block size, I think it would be nice to
consitently use one of SHA3_256_BLOCK_SIZE and sizeof (ctx->block).

> +}
> +
> +  for (;;)
> +{
> +  /* Write remaining data from the buffer.  */
> +  if (offset < sizeof (ctx->block))
> + {
> +   unsigned remaining;
> +
> +   remaining = MIN(length, sizeof (ctx->block) - offset);
> +   memcpy (digest, >block[offset], remaining);
> +   digest += remaining;
> +   offset += remaining;

I think handling of the leftover can be moved before the loop, and
simplified as

  unsigned left = sizeof(ctx->block) - offset;
  if (length <= left)
{
  memcpy (digest, ctx->block + offset, length);
  ctx->index = (offset + length) | INDEX_HIGH_BIT;
  return;
}
  memcpy (digest, ctx->block + offset, left);
  digest += left;
  length -= left;

followed by a loop

  for (; length >= SHA3_256_BLOCK_SIZE; 
 length -= SHA3_256_BLOCK_SIZE, digest += SHA3_256_BLOCK_SIZE)
{ 
  ... output a full block ...
}

  if (length > 0)
{
  ... do final partial block ...
  ctx->index = length | INDEX_HIGH_BIT;
}
  else 
ctx->index = SHA3_256_BLOCK_SIZE | INDEX_HIGH_BIT;


-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Relax blocking requirement of gcm_update?

2024-03-10 Thread Niels Möller
Niels Möller  writes:

> Would it be worthwhile to drop the restriction of the last sentence, and
> allow all calls to gcm_update to use any size? This requirement may be
> particularly surprising when using nettle_aead; then gcm has different
> requirements for the update function than all other aead algorithms.

Actually, it turns out at least eax and ocb have similar requirements.
So I'm now leaning towards sticking with the convention that nettle's
aead modes should require that if the associated data is passed in
several pieces, all pieces but the last must be an integral number of
blocks (similarly as for the cleartext and ciphertext inputs).

Relaxing that for gcm adds more code complexity, and it's not clear if
it's possible to fix for ocb and eax without an abi break to add
additional state to some of the related structs.

Maybe this should be pointed out more explicitly somewere in the manual?
(Now it's noted in the docs of resp. _update function, but not for aead
in general).

For reference, I've pushed a branch "relax-gcm_update" where I've tried
this out (and which fails eax and ocb tests).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Relax blocking requirement of gcm_update?

2024-03-09 Thread Niels Möller
Justus Winter  writes:

> What happens if that restriction is violated?  As the function cannot
> signal an error, does it lead to silent corruption of the data stream?
> Or does it assert that restriction?

It triggers an assert. Likewise, if you call gcm_update after
gcm_encrypt that also triggers an assert. While I think (without
checking the code closely) an invalid mix of gcm_encrypt and gcm_decrypt
will just result in a garbage digest.

So if you want the bindings to report errors in some friendler way on
misuse, you'd need to keep track of the state of the context and check
that each call is appropriate for the current state.
 
Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Relax blocking requirement of gcm_update?

2024-03-09 Thread Niels Möller
While looking at extended tests of the aead update function (for the
associated data), I stumbled on a restriction of gcm_update that is
different from most (all?) other update functions in Nettle. According
to the docs,

 -- Function: void gcm_update (struct gcm_ctx *CTX, const struct gcm_key
  *KEY, size_t LENGTH, const uint8_t *DATA)
 Provides associated data to be authenticated.  If used, must be
 called before ‘gcm_encrypt’ or ‘gcm_decrypt’.  All but the last
 call for each message _must_ use a length that is a multiple of the
 block size.

Would it be worthwhile to drop the restriction of the last sentence, and
allow all calls to gcm_update to use any size? This requirement may be
particularly surprising when using nettle_aead; then gcm has different
requirements for the update function than all other aead algorithms.

I think that might be doable without any ABI break, by the following
hack: reuse the ctr field of struct gcm_context as a block buffer, while
processing the associated data. The ctr field is clearly needed also for
encrypt/decrypt, but we could move initialization for that purpose from
gcm_set_iv to the first call to encrypt/decrypt.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-03-09 Thread Niels Möller
Niels Möller  writes:

> Below patch seems to fix this issue, but not entirely sure that's the
> way I want to do it.

I've pushed a fix, along the same lines, see
https://git.lysator.liu.se/nettle/nettle/-/commit/99e62003c3916fdef04a2d3327281f8f498b609e

I believe that should fix all hash update functions (and with proper test
coverage). 

There are probably a few more functions where 0, NULL should be allowed,
but currently result in ubsan issues: Corresponding aead update
functions, functions accepting optional nonces, empty messages for rsa
encryption functions, maybe some of the cipher modes.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-03-06 Thread Niels Möller
Danny Tsen  writes:

> My fault. I did not include the gym-aes-crypt.c in the patch. Here is
> the updated patch. Please apply this one and we can work from there.

Thanks, now pushed onto a new branch ppc64-gcm-aes.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-03-05 Thread Niels Möller
Danny Tsen  writes:

> Please let me know when you merge the code and we can work from there.

Hi, I tried to apply and build with the v5 patch, and noticed some problems.

Declaration of _gcm_aes_encrypt / _gcm_aes_decrypt is missing. It can go
in gcm-internal.h, like on this branch,
https://git.lysator.liu.se/nettle/nettle/-/blob/x86_64-gcm-aes/gcm-internal.h?ref_type=heads
Corresponding name mangling defines should also be in gcm-internal.h,
not in the installed gcm.h header.

The file gcm-aes.c was missing in the patch. If the dummy C versions of
_gcm_aes_*crypt are needed only for fat builds, maybe simplest to put the
definitions in fat-ppc.c (maybe one can even use the same "return 0" dummy
function for both encrypt and decrypt).

It would also be nice if you could check that the new code is used
and working in a non-fat build, configured with --disable-fat
--enable-power-crypto-ext.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Support for ML-KEM (Kyber)

2024-03-04 Thread Niels Möller
Daiki Ueno  writes:

> I created a draft patch to support ML-KEM (a post-quantum key
> encapsulation mechanism, formerly known as Kyber)[1], based on the
> explanation in the corresponding IETF draft[2]. 

Interesting. I'll have to read up a bit on it before trying to read the
code. I'm particularly interested in the kind of arithmetic that is
used. Thanks for the references.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: additional API for SHAKE streaming read

2024-03-04 Thread Niels Möller
Daiki Ueno  writes:

> When I'm trying to implement ML-KEM (Kyber), I realized that the current
> API for SHAKE (sha3_256_shake) is a bit too limited: while ML-KEM uses
> SHAKE128 as a source of pseudorandom samples[1], the the current API
> requires the total number of bytes are determined prior to the call, and
> after the call the hash context is reset.

I vaguely recall discussing that when shake256 was added, and we
concluded it was good enough as a start, and could be extended later.

I think it would be nice if one could support the streaming case with
the existing struct sha3_256_ctx, and little extra wrapping. Question is
what the interface should be. I see a few variants:

1.
  void /* Essentially the same as _sha3_pad_shake */
  sha3_256_shake_start (struct sha3_256_ctx *ctx);

  void /* Unbuffered, length must be a multiple of SHA3_256_BLOCK_SIZE */
  sha3_256_shake_output (struct sha3_256_ctx *ctx
 size_t length, uint8_t *dst);

  void /* Last call, length can be arbitrary, context reinitialized */
  sha3_256_shake_end (struct sha3_256_ctx *ctx
  size_t length, uint8_t *dst);

Requiring all calls but the last to be full blocks is consistent with
nettle's funtions for block ciphers. But since we anyway have a buffer
available (to support arbitrary sizes for streaming the input), we could
perhaps just as well reuse that buffer.

2.
  void /* Essentially the same as _sha3_pad_shake */
  sha3_256_shake_start (struct sha3_256_ctx *ctx);

  void /* Arbitrary length, no need to signal end of data */
  sha3_256_shake_output (struct sha3_256_ctx *ctx
 size_t length, uint8_t *dst);

  void /* Explicit init call needed to start a new input message */
  sha3_256_init (struct sha3_256_ctx *ctx);

In this case, sha3_256_shake_output would use ctx->index and ctx->buffer
for partial blocks.

With some hacking (say, using the unused high bit of ctx->index to
signal that shake is in output mode), then we could have just

3.
  void /* Arbitrary length, no need to signal start or end of output */
  sha3_256_shake_output (struct sha3_256_ctx *ctx
 size_t length, uint8_t *dst);

  void /* Explicit init call needed to start a new input message */
  sha3_256_init (struct sha3_256_ctx *ctx);

As always, naming is also a crucial question. Is _shake_output a good
name? Or _shake_read, or _shake_generate? From the terminology in the
spec (https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf), I think
"_shake_output" is reasonable.

When deciding on naming and conventions, we should strive to define
somthing that can be reused for later hash functions with variable
output size (called extendable-output functions, "XOF", in the spec).

So what do you think makes most sense?

To be clear, the hack I'm referring to for option (3) would be something
like

  void /* Arbitrary length, no need to signal start or end of output */
  sha3_256_shake_output (struct sha3_256_ctx *ctx
 size_t length, uint8_t *dst)
  {   
if (!(ctx->index >> 31)) /* 32-bit unsigned int, for simplicity of example 
*/
  { 
_sha3_pad_shake (>state, SHA3_256_BLOCK_SIZE, ctx->block, 
ctx->index);
/* Not sure what representation is most suitable for index, but
   high bit must be set. */
ctx->index = ~0; 
  }
/* If leftovers in buffer (determined from index), copy to output */
/* While we still need more blocks, permute and copy one block to output */
/* If we need a partial block at the end, generate one into buffer,
   copy prefix of it to the output, and set index accordingly */
  }

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-02-22 Thread Niels Möller
Danny Tsen  writes:

> Here is the v5 patch from your comments.  Please review.

Thanks. I think this looks pretty good. Maybe I should commit it on a
branch and we can iterate from there. I'll be on vacation and mostly
offline next week, though.

> --- a/gcm-aes128.c
> +++ b/gcm-aes128.c
> @@ -63,6 +63,11 @@ void
>  gcm_aes128_encrypt(struct gcm_aes128_ctx *ctx,
>   size_t length, uint8_t *dst, const uint8_t *src)
>  {
> +  size_t done = _gcm_aes_encrypt ((struct gcm_key *)ctx, _AES128_ROUNDS, 
> length, dst, src);
> +  ctx->gcm.data_size += done;
> +  length -= done;
> +  src += done;
> +  dst += done;
>GCM_ENCRYPT(ctx, aes128_encrypt, length, dst, src);
>  }

We should come up with some preprocessor things to completely omit the
new code on architectures that don't have _gcm_aes_encrypt (possibly
with some macro to reduce duplication). I think that's the main thing
I'd like to have before merge. Otherwise, looks nice and clean.

Ah, and I think you you could write >key instead of the explicit
cast.

> +C load table elements
> +li r9,1*16
> +li r10,2*16
> +li r11,3*16
> +lxvd2x VSR(H1M),0,HT
> +lxvd2x VSR(H1L),r9,HT
> +lxvd2x VSR(H2M),r10,HT
> +lxvd2x VSR(H2L),r11,HT
> +addi HT, HT, 64
> +lxvd2x VSR(H3M),0,HT
> +lxvd2x VSR(H3L),r9,HT
> +lxvd2x VSR(H4M),r10,HT
> +lxvd2x VSR(H4L),r11,HT
> +
> +li r25,0x10
> +li r26,0x20
> +li r27,0x30
> +li r28,0x40
> +li r29,0x50
> +li r30,0x60
> +li r31,0x70

I still think there's opportunity to reduce number of registers (and
corresponding load-store of callee save registers. E.g, here r9-r11 are
used for the same thing as r25-r27.

> +.align 5
> +C increase ctr value as input to aes_encrypt
> +vaddudm S1, S0, CNT1
> +vaddudm S2, S1, CNT1
> +vaddudm S3, S2, CNT1
> +vaddudm S4, S3, CNT1
> +vaddudm S5, S4, CNT1
> +vaddudm S6, S5, CNT1
> +vaddudm S7, S6, CNT1

This is a rather long dependency chain; I wonder if you could make a
measurable saving of a cycle or two by using additional CNT2 or CNT4
registers (if not, it's preferable to keep the current simple chain).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-18 Thread Niels Möller
Niels Möller  writes:

>> This is similar to this issue:
>> https://gitlab.com/gnutls/gnutls/-/issues/1306
>> where we passed NULL to sha*_update in the GnuTLS code, though it turned
>> to be a non-issue.
>
> I don't remember seeing that issue. I think it should be allowed to call
> sha*_update with 0, NULL (when size is null, there's no reason to ever
> attempt to dereference that pointer). I'll see if I can fix that.

Below patch seems to fix this issue, but not entirely sure that's the
way I want to do it. I think I'd rather not touch the MD_* macros
defined in macros.h, and do improved macros in md-internal.h instead.
Since, for historic reasons, the macros.h file is public.

To get this thoroughly fixed, one would need tests where every nettle
function, that accepts a potentially empty buffer, is called with 0,
NULL, and make sure ubsan is happy with that.

Regards,
/Niels

diff --git a/macros.h b/macros.h
index 990d32ee..e67a403f 100644
--- a/macros.h
+++ b/macros.h
@@ -180,6 +180,8 @@ do {\
length and data. */
 #define MD_UPDATE(ctx, length, data, f, incr)  \
   do { \
+if (length == 0)   \
+  goto __md_done;  \
 if ((ctx)->index)  \
   {
\
/* Try to fill partial block */ \
diff --git a/sha256.c b/sha256.c
index 0c9c21a0..907271bc 100644
--- a/sha256.c
+++ b/sha256.c
@@ -105,6 +105,9 @@ sha256_update(struct sha256_ctx *ctx,
  size_t length, const uint8_t *data)
 {
   size_t blocks;
+  if (length == 0)
+return;
+
   if (ctx->index > 0)
 {
   /* Try to fill partial block */


-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-18 Thread Niels Möller
Daiki Ueno  writes:

> Niels Möller  writes:

>> One failure is the new side-channel test failing with mini-gmp. Which is
>> expected, the test should just be skipped in mini-gmp builds (similar to
>> several other sc tests).
>
> Yes, I'm attaching the patch for this.

I've committed and pushed that part of patch.

>> The other is a complaint from ubsan. I guess it's related to the label
>> == NULL case. I don't know what's the proper place for a fix, maybe it's
>> not in the new code. I think the Nettle APIs should generally allow size
>> == 0, ptr == NULL more or less everywhere, even where libc functions we
>> use formally require ptr != NULL.
>
> This is similar to this issue:
> https://gitlab.com/gnutls/gnutls/-/issues/1306
> where we passed NULL to sha*_update in the GnuTLS code, though it turned
> to be a non-issue.

I don't remember seeing that issue. I think it should be allowed to call
sha*_update with 0, NULL (when size is null, there's no reason to ever
attempt to dereference that pointer). I'll see if I can fix that.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-16 Thread Niels Möller
Daiki Ueno  writes:

> That said, I agree that it would be more user friendly to combine them
> and treat it as a regular error, as we do in pss_encode_mgf1.

Thanks for the update, patch merged.

I noticed that there are two failures in the ci builds. See
https://gitlab.com/gnutls/nettle/-/pipelines/1178451395.

One failure is the new side-channel test failing with mini-gmp. Which is
expected, the test should just be skipped in mini-gmp builds (similar to
several other sc tests).

The other is a complaint from ubsan. I guess it's related to the label
== NULL case. I don't know what's the proper place for a fix, maybe it's
not in the new code. I think the Nettle APIs should generally allow size
== 0, ptr == NULL more or less everywhere, even where libc functions we
use formally require ptr != NULL.

Can you have a look?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-15 Thread Niels Möller
Daiki Ueno  writes:

> Thank you; I have addressed those issues.  As for the merging, I think
> it is ready now.

Thanks, merged.

Thanks to the doc update, I now noticed the possibility of failure from
the encryption functions. Failure is propagated from _oaep_encode_mgf1,
which does

  assert (key_size >= 2 * hash->digest_size - 2);

  if (message_length > key_size - 2 * hash->digest_size - 2)
return 0;

Why is the first an assert (it could be triggered by using an unusually
small RSA key with a large hash function, say rsa_oaep_sha512_encrypt
with an old 512-bit RSA key, which from the docs isn't an obviously
invalid usage), and the other a return value to indicate failure?

One alternative could be to instead check

  if (message_length > key_size 
 || message_length + 2 + 2*hash->digest_size > key_size)
return 0;

(with two tests, to not trigger overflow in case message_length is close
to the maximum size_t value; maybe that is more defensive than necessary
since message_length large enough to trigger overflow can't be the size of
properly allocated memory area).

The opposite alternative would to be to have a documented way for the
application to get the maximum message size, and have an assertion
failure for both cases. That would have the advantage that no return
value is needed, simplifying the api (at least very locally).

Another doc detail: The docs for the decrypt functions don't say
explicitly that *length is both an input and output argument. The text
for the older function rsa_decrypt could be reused (or possibly
improved).


Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: reply: reply: A new realization of ecc-sm2

2024-02-14 Thread Niels Möller
"zhongxuan (A)"  writes:

> Yes, I've tried to make a fork in
> https://git.lysator.liu.se/nettle/nettle/-/forks/new but failed, it
> just reports ' An error occurred while forking the project. Please try
> again. '.

Sorry, I don't really know how to troubleshoot. Maybe you can try if it
works better on the mirror repo at https://gitlab.com/gnutls/nettle?

I've had a new look at your latest patch. I'm not at all familiar with
sm2, and I'm hoping I don't have to fully understand all details, but I
am a bit confused.

I think one important part of your patch is about adding support for
Weierstrass curves with a different constant than a = -3.

And then I look at the spec at
https://datatracker.ietf.org/doc/html/draft-shen-sm2-ecdsa to see how
the curve really is defined. It looks like that document gives several
examples of curves, including one named "Fp-256", but the one that it
looks like you are defining, in eccdata.c, is the one defined in
Appendix D "Recommended parameters".

So my first question: Is "sm2" an appropriate name for a single curve,
or is there some more specific name for the curve in Appendix D that you
use?

Second question, when I look at that curve, it is defined like this:

:A elliptic curve on a prime field of 256 bits is recommended:
: 
: 
:y^2 = x^3 + ax + b
: 
: 
: p=FFFE       
: a=FFFE       FFFC
: b=28E9FA9E 9D9F5E34 4D5A9E4B CF6509A7 F39789F5 15AB8F92 DDBCBD41 4D940E93
: n=FFFE    7203DF6B 21C6052B 53BBF409 39D54123
: Gx=32C4AE2C 1F198119 5F990446 6A39C994 8FE30BBF F2660BE1 715A4589 334C74C7
: Gy=BC3736A2 F4F6779C 59BDCEE3 6B692153 D0A9877C C62A4740 02DF32E5 2139F0A0

But for this parameters, we have a = p - 3 = -3 (mod p), like for all
other Weierstrass curves currently supported by Nettle! Which is good
news, since then the same point addition functions can be used, but it
also means that maybe you have done some work that isn't really needed?

Regards,
/Niels


-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-02-14 Thread Niels Möller
3)
> +vxor   F3,F3,F4
> +vxor   R3,R3,R4
> +
> +vxor   F,F,F3
> +vxor   D,R,R3
> +GHASH_REDUCE(D, F, POLY_L, R2, F2)  C R2, F2 used as temporaries

It may be possible to reduce number of registers, without making the
code slower, by accumulating differently. You ultimately accumulate the
values into F, D, before the final reduction. Maybe you don't need
separate registers for all of the F, F2, F3, F4 registers, and all of D,
R, R2, R3, R4?

If you have any short-lived registers used for the AES part, they could
also overlap with short-lived registers used for ghash.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-14 Thread Niels Möller
Daiki Ueno  writes:

> Sorry for the confusion and thank you for the explanation; now I get it.
> I pushed a change along the of option (2).  Could you take a look again?

Thanks, looks good! Two nits, and let me know at which point you'd like
to get it merged, and do further improvements as followup MRs.

Since the oaep.h header now only declares internal functions, it
shouldn't be installed (moved from HEADERS to DISTFILES in Makefile.in).

And it would be nice if the manual could give some more detail about the
label: As I understnd it, the label is optional, so it's fine to pass 0,
NULL if not needed. And if used, the same label must be used for both
encrypt and decrypt.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-06 Thread Niels Möller
Daiki Ueno  writes:

> Sorry for the confusion and thank you for the explanation; now I get it.
> I pushed a change along the of option (2).  Could you take a look again?

Thanks. This is an unusually busy week for me, so I'm afraid I'll not be
able to look at this (or any of the other pendning changes recently
posted to the list) until next week.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: HPKE ready for Merge!

2024-02-02 Thread Niels Möller
Norbert Pócs  writes:

> The MR is still in a draft phase if I remember correctly. The last
> modifications were not yet reviewed by Niels;
> When would you Niels have time to look into it? I would like to finish it
> up.

Thanks for the reminder. I take it your email from 11 May 2023
summarizes the latest round of changes? I'll try to have another look
over the weekend, and provide a round feedback limited to how much time
I get to spend on it. I appreciate your effort to trim it down, but it's
neverheless rather complex (some 1500 lines excluding the tests).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-02-01 Thread Niels Möller
Daiki Ueno  writes:

> Added `mark_bytes_undefined (1, [key->size]);` to the test
> cases doing encryption.

I'm afraid that isn't right. For one, mark_bytes_undefined is
conditioned so it only has any effect when running the sc tests. Second,
it will not produce any warnings for writes, which I think is what we'd
like to detect here. I think the options are:

1. Just don't allocate any extra byte, and valgrind's should arrange for
   alerts on out-of-bounds writes without anything special.

2. Allocate an extra byte, write some random value before the call, and
   check that the value is unchanged after the call (some other tests
   do that sort of thing, it's simple, old fashioned, and doesn't depend
   on valgrind).

3. Allocate an extra byte, and mark it using VALGRIND_MAKE_MEM_NOACCESS
   (wrapped in some macro depending on the memcheck.h configure check).
   I don't think that gives any real benefit over valgrind's default
   behavior with (1), but might make sense if done in combination with
   (2).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-01-30 Thread Niels Möller
Niels Möller  writes:

> While the powerpc64 vncipher instruction really wants the original
> subkeys, not transformed. So on power, it would be better to have a
> _nettle_aes_invert that is essentially a memcpy, and then the aes
> decrypt assembly code could be reworked without the xors, and run at exactly
> the same speed as encryption. 

I've tried this out, see branch
https://git.lysator.liu.se/nettle/nettle/-/tree/ppc64-aes-invert. It
appears to give the desired improvement in aes decrypt speed, making it
run at the same speed as aes encrypt. Which is a speedup of about 80%
when benchmarked on power10 (the cfarm120 machine).

> Current _nettle_aes_invert also changes the order of the subkeys, with
> a FIXME comment suggesting that it would be better to update the order
> keys are accessed in the aes decryption functions.

I've merged the changes to keep subkey order the same for encrypt and
decrypt (so that the decrypt round loop uses subkeys starting at the end
of the array), which affects all aes implementations except s390x, which
doesn't need any subkey expansion. But I've deleted the sparc32 assembly
rather than updating it.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-01-29 Thread Niels Möller
Daiki Ueno  writes:

> The zip file is no longer accessible, but I still keep a copy and it
> seems identical to the one at:
> https://github.com/pyca/cryptography/tree/main/vectors/cryptography_vectors/asymmetric/RSA/pkcs-1v2-1d2-vec
>
> Is it OK to use the vector assuming it is public domain?

According to the closest LICENSE file,
https://github.com/pyca/cryptography/blob/main/vectors/LICENSE, it's dual
licensed apache/BSD (our choice), so I think that is fine. And if
we copy just the test vectors and not any surrounding code, it seems
questionable if that is even copyrightable.

So I think copying from there, with proper attribution, is perfectly
fine. Formally, we'll be exercising the BSD option.

> Yes, the length should match key->size; I've omitted the
> ciphertext_length argument.

Thanks. Please remove everywhere, it looks like it's still present in
some form in the test code. (You may still want to allocate an extra
byte at the end and check that it isn't modified. Alternatively, rely on
valgrind for detecting overwrites instead).

> I'm not sure about the leading zeros
> though; as far as I read, nettle_mpz_to_octets seems to keep them.

I think nettle_mpz_to_octets is fine. The problem was when the length
passed to this function was computed using nettle_mpz_sizeinbase_256_u,
like it was in a previous revision.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Question on s390x AES acceleration

2024-01-27 Thread Niels Möller
When I look at the logic in
https://git.lysator.liu.se/nettle/nettle/-/blob/master/fat-s390x.c?ref_type=heads,
automatic detection of hardware acceleration for aes128, aes192 and
aes256 is done independently for each key size, based on corresponding
bits in the "km status" words.

While when configured manually, at configure time or using
NETTLE_FAT_OVERRIDE, there are instead two flags, "msa_x1" enabling
aes128 (and sha256), and "msa_x2" enabling aes192, aes256 (and sha512).

I'm thinking that if we could enable all or none of the s390x aes
assembly, then we could make things a bit simpler, only overriding the
internal _nettle_aes_set_key and _nettle_aes_invert_key, instead of all
9 of aes{128,192,256}_{set_encrypt,set_decrypt,invert}_key.

Are there s390x machines out there that have hardware support for just
some of the aes key sizes but not all?

Regards,
/Niels
-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Sparc assembly

2024-01-24 Thread Niels Möller
Hi,

I just pushed changes to the ci script to enable sparc64 tests. It
turned out to be rather easy with current debian-based tools, despite
sparc64 no longer being a release architecture. (Install
gcc-sparc64-linux-gnu cross compiler, use qemu-sparc64, only workaround
(similar to some other archs) was that QEMU_LD_PREFIX had to be set
explicitly).

I don't now if anyone is interested in sparc performance, or
contributing more sparc64 code (I'd consider this architecture rather
obscure by now). But these tests should at least make it rather easy to
maintain the code for the time being. Even as I consider some changes to
the handling aes decryption subkeys that will require updates to this
code.

On the other hand, I'm considering deleting the sparc32 assembly code.
Related machines like the sparc stations (I don't remember if I had a
SS-5, SS-10 or SS-20 at the time I wrote this code, but I remember I had
redhat linux installed on it rather than solaris) are out of support
since the late 1990s. 64-bit sparcs, aka sparc v9, were introduced with
the ultrasparc machines from 1995.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64 micro optimization

2024-01-24 Thread Niels Möller
Danny Tsen  writes:

> Thanks for merging the stitched implementation for PPC64 with your
> detailed information and efforts

We're not quite there yet, though. Do you think you could rebase your
work on top of recent changes? Sorry about conflicts, but I think new
macros should fit well with what you need (feel free to have additional
macros, where you find that useful).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: A new realization of ecc-sm2

2024-01-22 Thread Niels Möller
"zhongxuan (A)"  writes:

> Where should I commit my patch? The access request to fork in Nettle / nettle 
> * GitLab (liu.se)<https://git.lysator.liu.se/nettle/nettle> was denied.

Hi, I think you asked for "developer access" in the Nettle repo, which
would allow you to push any changes you like to the repository. You
can't have that, but unfortunately, I couldn't add any explanatory
message when pushing the deny button in gitlab.

The usual way to contribute code in gitlab (as well as on github), is
that you make your own "fork" of the repository, where you have full
permissions. You commit your changes to a branch in that fork, and then
create a merge request asking for changes to be merged into the original
repo.

See
https://docs.gitlab.com/ee/user/project/repository/forking_workflow.html
on how to create a fork from the gitlab ui.

When you work with git locally, you can have both your own "fork" repo
and the original repo as git "remotes".
  
And if gitlab doesn't work out for you, you could also send the updated
patch to this list.

Best regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


ppc64 micro optimization

2024-01-21 Thread Niels Möller
In preparing for merging the gcm-aes "stitched" implementation, I'm
reviewing the existing ghash code. WIP branch "ppc-ghash-macros.

I've introduced a macro GHASH_REDUCE, for the reduction logic. Besides
that, I've been able to improve scheduling of the reduction instructions
(adding in the result of vpmsumd last seems to improve parallelism, some
3% speedup of gcm_update on power10, benchmarked on cfarm120). I've also
streamlined the way load offsets are used, and trimmed the number of
needed vector registers slightly.

For the AES code, I've merged the new macros (I settled on the names
OPN_XXY and OPN_XXXY), no change in speed expected from that change.

I've also tried to understand the differenct between AES encrypt and
decrypt, where decrypt is much slower, and uses an extra xor instruction
in the round loop. I think the reason for that is that other AES
implementations (including x86_64 and arm64 instructions, and Nettle's C
implementation) expect the decryption subkeys to be transformed via the
AES "MIX_COLUMN" operation, see
https://gitlab.com/gnutls/nettle/-/blob/master/aes-invert-internal.c?ref_type=heads#L163

While the powerpc64 vncipher instruction really wants the original
subkeys, not transformed. So on power, it would be better to have a
_nettle_aes_invert that is essentially a memcpy, and then the aes
decrypt assembly code could be reworked without the xors, and run at exactly
the same speed as encryption. Current _nettle_aes_invert also changes
the order of the subkeys, with a FIXME comment suggesting that it would
be better to update the order keys are accessed in the aes decryption
functions.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-01-20 Thread Niels Möller
Daiki Ueno  writes:

>>> * For tests, would it make some with some test that check that
>>>   encryption with a given message and randomness gives the expected
>>>   output? Even better if there are any authoritative testcases for that?
>
> I would be happy to add if there are any, even if they are not so
> authoritative, though I wasn't even able to find ones with compatible
> license, in particular with SHA-2 being used as an underlying hash
> algorithm for MGF-1.
>
> - Project Wycheproof (Apache 2.0):
>   
> https://github.com/google/wycheproof/blob/master/testvectors/rsa_oaep_2048_sha256_mgf1sha256_test.json
>
> - Python Cryptography (Apache 2.0 and BSD):
>   https://cryptography.io/en/latest/development/custom-vectors/rsa-oaep-sha2/
>
> In any case, I'll try to check against those vectors manually outside
> the Nettle repository to ensure the correctness.

To me it looks like those sources provide reasonable test vectors for
RSA OAEP decryption. 

On licensing, it looks like Apache and GPLv2 might be incompatible. I've
been a bit sloppy when incorporating test code (e.g., for some time I
had some testcode copied from openssl/libcrypto, to test compatibility
glue). But in this case, I think a fully correct workaround would be to
license the related test file LGPLv3 (no GPLv2 option); odd licensing
for some of the test files shouldn't matter much for Nettle applications
since the testcode isn't part of the library applications link. Proper
attribution is of course important.

But my original question was for testing of RSA *en*cryption, if there
are some determinstic testvectors with known output, with tests wiring
something non-random for the randomness input.

>>> * Is it useful to have oaep_decode_mgf1 and oaep_encode_mgf1 advertised
>>>   as public functions, or would it be better to make them internal?
>
> Made them internal functions.

Thanks. (It was maybe a mistake we didn't do that for the pss_*_mgf1
functions when added years ago).

>>> * Do you see any reasonable (i.e., with a net gain in maintainability)
>>>   way to share more code between _oaep_sec_decrypt_variable and
>>>   _pkcs1_sec_decrypt_variable?
>>
>> I did review this part, and to me it seem like it is more maintainable
>> to keep them separate, they already are tricky as it is, adding more
>> variability sounds to me would just make them more complex and
>> difficult to reason about.
>
> I agree with that, considering potential optimization opportunities by
> the compiler.

Let's leave this as is, then.

>>> * For oaep_decode_mgf1, oaep_encode_mgf1, maybe one could let the caller
>>>   allocate and pass in the appropriate hashing context? Would be easy to
>>>   do, e.g., in rsa_oaep_sha512_decrypt. But it looks like that would be
>>>   inconsistent with pss_mgf1, though (which looks like it needs a
>>>   separate hashing context).
>
> Done.

Nice. We'll still have another one allocated for each call to pss_mgf1,
if I read the code correctly.

>>> * I think it was a design mistake to represent RSA ciphertexts as mpz_t
>>>   rather then octet strings in Nettle's original RSA interfaces. I
>>>   wonder if it would make sense to let the new functions take
>>>   octet strings instead?
>
> Done.

What does the OAEP spec say about the ciphertet length? It would make
the interface easier if we say that the ciphertext length *always*
equals key->size; then one could delete passing and checking of the
ciphertext_length argument. In the current MR, it looks like leading
zero bytes are trimmed (behavior of nettle_mpz_sizeinbase_256_u), so
that ciphertext may sometimes be shorter.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Add RSA-OAEP encryption/decryption to Nettle

2024-01-15 Thread Niels Möller
Daiki Ueno  writes:

> Now that another attack on RSA encryption with PKCS#1 v1.5 padding has
> been discovered (though Nettle is not vulnerable)[1], it is recommended
> to avoid using the v1.5 scheme in new applications[2][3], and thus
> supporting RSA-OAEP in Nettle is becoming more relevant.

I agree oaep support is desirable.

> I made some modifications to the existing merge request[4], mainly to
> make it side-channel safe at decryption:
> https://git.lysator.liu.se/nettle/nettle/-/merge_requests/60

Thanks for reviving this issue, and looking into side-channel silence.

> Could you take a look when you have time?

Thanks, I've had a look, and it looks pretty good to me. Some comments
and questions:

* For tests, would it make some with some test that check that
  encryption with a given message and randomness gives the expected
  output? Even better if there are any authoritative testcases for that?

* Is it useful to have oaep_decode_mgf1 and oaep_encode_mgf1 advertised
  as public functions, or would it be better to make them internal?

* Do you see any reasonable (i.e., with a net gain in maintainability)
  way to share more code between _oaep_sec_decrypt_variable and
  _pkcs1_sec_decrypt_variable?

* For oaep_decode_mgf1, oaep_encode_mgf1, maybe one could let the caller
  allocate and pass in the appropriate hashing context? Would be easy to
  do, e.g., in rsa_oaep_sha512_decrypt. But it looks like that would be
  inconsistent with pss_mgf1, though (which looks like it needs a
  separate hashing context).

* I think it was a design mistake to represent RSA ciphertexts as mpz_t
  rather then octet strings in Nettle's original RSA interfaces. I
  wonder if it would make sense to let the new functions take
  octet strings instead?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64: v3: AES/GCM Performance improvement with stitched implementation

2024-01-15 Thread Niels Möller
Niels Möller  writes:

> For next step, I'm considering the refactoring of ghash to use the
> new macros.

I actually started with the macros relevant for the AES code. And it
turned out to be rather easy to do the m4 loops to operate on an
arbitrary list of registers. See branch ppc-aes-macros, in particular,
https://gitlab.com/gnutls/nettle/-/blob/ppc-aes-macros/powerpc64/machine.m4?ref_type=heads#L67

I'm not entirely happy with the naming (OP_YXX, OP_YXXX), which is on
one hand intended to correspond to the instruction pattern the macro
expands to, and at the same time correspond to the expected order of
arguments, which really doesn't work that well. Suggestion welcome.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64: v3: AES/GCM Performance improvement with stitched implementation

2023-12-27 Thread Niels Möller
Danny Tsen  writes:

> Here is another revised patch testing with NETTLE_FAT_OVERRIDE.  Same 
> performance as last version.  I also added a new test vector with 917 bytes 
> for AES/GCM tests to test multiple blocks and partial.  Attached are the 
> patch and benchmark for AES.

Thanks. I haven't yet been able to look closer, but to make progress and
start with the easy parts, I've committed the new test case. For next
step, I'm considering the refactoring of ghash to use the new macros.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64: v2, AES/GCM Performance improvement with stitched implementation

2023-12-11 Thread Niels Möller
register) arguments. Or pass the pointer to the struct gcm_aesxxx_ctx
directly (with the disadvantage that assembly code needs to know
corresponding offsets).

> --- a/powerpc64/machine.m4
> +++ b/powerpc64/machine.m4
> @@ -63,3 +63,40 @@ C INC_VR(VR, INC)
>  define(`INC_VR',`ifelse(substr($1,0,1),`v',
>  ``v'eval($2+substr($1,1,len($1)))',
>  `eval($2+$1)')')
> +
> +C Adding state and round key 0
> +C XOR_4RK0(state, state, rkey0)
> +define(`XOR_4RK0',
> +  `vxor $1, $1, $5
> +   vxor $2, $2, $5
> +   vxor $3, $3, $5
> +   vxor $4, $4, $5')
> +
> +C Do 4 vcipher/vcipherlast
> +C VCIPHER(vcipher/vcipherlast, state, state, rkey)
> +define(`VCIPHER4',
> +  `$1 $2, $2, $6
> +   $1 $3, $3, $6
> +   $1 $4, $4, $6
> +   $1 $5, $5, $6')

I thing this could be generalized to a OP_4WAY macro, used as

  OP_4WAY(vxor/vcipher/vcipherlast, a, b, c, d, k) 

One could also consider generalizing it to arbitrary number of registers
with an m4 lop, to have

  OP_NWAY(op, k, a, b,..., x)

expand to

  op a, a, k
  op b, b, k
  ...
  op x, x, k

But that may be overkill if only 4-way and 8-way are used.

> +C Adding multiplication product
> +C ADD_PROD(c1, c2, a, b)
> +define(`ADD_PROD',
> +  `vxor $1,$1,$3
> +   vxor $2,$2,$4')

Maybe rename GF_ADD; ADD_PROD is not so specific.

> +C GF multification of L/M and data
> +C GF_MUL(
> +C GF_MUL(F, R, HL, HM, S)
> +define(`GF_MUL',
> +  `vpmsumd $1,$3,$5
> +   vpmsumd $2,$4,$5')

Looks like GF_MUL is only used in the pattern GF_MUL; GF_MUL;
ADD_PROD? So could perhaps combine in one macro.  With a commend saying
which operation it performs.

> --- /dev/null
> +++ b/powerpc64/p8/gcm-aes-decrypt.asm
[...]

> +define(`SAVE_GPR', `std $1, $2(SP)')
> +define(`RESTORE_GPR', `ld $1, $2(SP)')

I don't think these macros add much readability. One could possibly have
macros that take a range of registers, but not sure that's worth the
effort.

> +.align 5
> +L8x_round_loop1:
> +lxvd2x VSR(K),r11,RK
> +vperm   K,K,K,LE_MASK
> +VCIPHER4(vcipher, S0, S1, S2, S3, K)
> +VCIPHER4(vcipher, S4, S5, S6, S7, K)
> +addi r11,r11,0x10
> +bdnz L8x_round_loop1
> +
> +lxvd2x VSR(K),r11,RK
> +vperm   K,K,K,LE_MASK
> +VCIPHER4(vcipherlast, S0, S1, S2, S3, K)
> +VCIPHER4(vcipherlast, S4, S5, S6, S7, K)
> +
> +cmpdi LOOP, 0
> +beq do_ghash
> +
> +.align 5
> +Loop8x_de:

Is there a good reason why you have another copy decrypt round loop,
before the main loop?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Deleting obsolete assembly files?

2023-12-08 Thread Niels Möller
ni...@lysator.liu.se (Niels Möller) writes:

> Simon Josefsson  writes:
>
>> Also, remember that Niels proposal is not about removing these
>> algorithms, just dropping the assembler variant.  So they will continue
>> to work fine on these platforms, but will take advantage of more code
>> scrutiny.  I think that is a reasonable trade-off.

> And the only architectures that currently have any md5 assembly is x86
> and x86_64. On my x86_64 laptop, I see a rather modest performance gain
> of about 6% over the C version. I don't expect anyone willing to work on
> improved md5 performance, on x86_64 or on additional platforms.
 
Getting back to this thread. I've pushed a change to delete md5 assembly
on branch delete-md5-asm, for testing. I don't think carrying md5
assembly code is worth the complexity.

The arcfour assembly was deleted in the 3.9 release. Deletion candidates
remaining: 32-bit x86 (aes (non-aesni), sha1, camellia). 32-bit sparc.
32-bit ARM prior to ARMv6. Possibly also 64-bit sparc; currently, only
sparc64-assembly is for aes, written in 2007 based on the sparc32 code.
So unclear how relevant it is for current sparc processors).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add DRBG-CTR-AES256.

2023-12-06 Thread Niels Möller
Simon Josefsson  writes:

> Please release 3.9 before looking at this! :-)
>
> This adds DRBG-CTR-AES256, what do you think?

I've merged this onto a branch add-drbg-ctr-aes256. I've made some
additional changes: use union nettle_block16 where that made sense,
rename Key -> key, fixed typo in testsite/Makefile, and extracted the
output logic to its own helper function.

It could be optimized to call aes256_encrypt with more than one block at
a time, when possible, but probably not worth the extra complexity.
 
Please have a look.

For your sntrup761 patch that depends on this, will you be doing any
more work on that in the near future? In the meantime, I've reworked the
testing for side-channel silence, so it should be rather
straight-forward to add such tests for sntrup761.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Patch to detect on CPU capabilities on Apple Silicon

2023-12-05 Thread Niels Möller
Tim Kosse  writes:

> Whoops, I made a typo in the constant for SHA256, please use this
> revised patch instead.

Thanks, applied and pushed to the master-updates branch for testing.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Mailing list archive is not working

2023-12-05 Thread Niels Möller
Justus Winter  writes:

> https://lists.lysator.liu.se/mailman/hyperkitty/list/nettle-bugs@lists.lysator.liu.se/
>
> shows zero mails this year.  Not sure where to raise that, so I'm
> raising this here.

I've asked mail admins. It turned out that the integration between
mailman and hyperkitty was overlooked when the system was upgraded to
mailman3 one and a half year ago. And it seems you are the first(!) user
reporting that it's broken. Thanks for reaching out.

As of today, archives are finally receiving new mail, but unfortunately
it seems traffic since the upgrade until now isn't archived anywhere
near the list server.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: How to update OpenSSL benchmark glue?

2023-12-05 Thread Niels Möller
Simo Sorce  writes:

> On Tue, 2023-12-05 at 13:17 +1300, Amos Jeffries wrote:
>> FWIW, In Squid with OpenSSLv3 we use this:
>> 
>>   EVP_PKEY_CTX_new_id(EVP_PKEY_RSA, NULL)
>> 
>
> EVP_PKEY_CTX_new_from_name is the more proper way in OpenSSL 3.0

My current version uses EVP_RSA_gen and EVP_EC_gen, then I only need
EVP_PKEY, no EVP_PKEY_CTX. As far as I can tell from the docs, those
functions are recommended and not deprecated.

With that change, the valgrind warning disappeared as well.

I've also had to make some changes to nettle-benchmark, it seems
blowfish, cast128 and des are no longer supported for the default
provider, one would need to somehow enable the "legacy" provider, and
crashed (assert failure in the glue code) with recent openssl. It seemed
easier to just delete those benchmarks; comparative benchmarking of
those algorithms doesn't seem that interesting.

Tangent: Not sure why openssl has demoted blowfish to "legacy", if it's
just the shorter 64-bit blocksize that is considered a problem? (According to
https://www.schneier.com/academic/blowfish/: "Blowfish was created in
1993. While there is still no practical attack against the cipher, it
only has a 64-bit block length and was optimized for 32-bit CPUs. If you
are thinking of using this algorithm, I recommend that you use Twofish
instead").

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: How to update OpenSSL benchmark glue?

2023-12-03 Thread Niels Möller
Simo Sorce  writes:

> Ah you do not need to pass any property for the default provider so you
> can pass "" or even NULL.

Thanks, I now have the RSA code updated (on branch update-openssl-bench,
if anyone wants to see the details). Initialization is now 

  ctx->pkey_ctx = EVP_PKEY_CTX_new_from_name (NULL, "RSA", "");
  if (!ctx->pkey_ctx)
die ("OpenSSL EVP_PKEY_CTX_new_from_name (\"RSA\") failed.\n");
  if (EVP_PKEY_keygen_init (ctx->pkey_ctx) <= 0)
die ("OpenSSL EVP_PKEY_keygen_init failed.\n");
  if (EVP_PKEY_CTX_set_rsa_keygen_bits(ctx->pkey_ctx, size) <= 0)
die ("OpenSSL EVP_PKEY_CTX_set_rsa_keygen_bits failed.\n");
  BIGNUM *e = BN_new();
  BN_set_word(e, 65537);
  EVP_PKEY_CTX_set1_rsa_keygen_pubexp (ctx->pkey_ctx, e);
  EVP_PKEY_keygen (ctx->pkey_ctx, >key);

However, when I run this under valgrind (to check the corresponding
cleanup code doesn't leak memory), I get an error:

  ==3016684== Conditional jump or move depends on uninitialised value(s)
  ==3016684==at 0x4B0B824: EVP_PKEY_generate (in 
/usr/lib/x86_64-linux-gnu/libcrypto.so.3)
  ==3016684==by 0x10F30A: bench_openssl_rsa_init (hogweed-benchmark.c:721)
  ==3016684==by 0x10D7AE: bench_alg (hogweed-benchmark.c:153)
  ==3016684==by 0x10D7AE: main (hogweed-benchmark.c:972)
  ==3016684==

I wonder if that my code missing some initialization, or if that's an
openssl problem? It's also unclear to me when the e bignum above can be
deallocated, does EVP_PKEY_CTX_set1_rsa_keygen_pubexp imply a full copy
into the context?

Next is updating the ecdsa benchmarks, since, e.g.,
EC_KEY_new_by_curve_name, generates deprecation warnings.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: How to update OpenSSL benchmark glue?

2023-11-28 Thread Niels Möller
Simo Sorce  writes:

> This code here generates RSA keys using the modern API:
> https://github.com/latchset/pkcs11-provider/blob/main/tests/tgenkey.c#L129

Thanks, I'll look at that (but I'm off for the https://catsworkshop.dev/
in Copenhagen rest of the week).

> (Ignore the pkcs11_uri params, that's special stuff for the pkcs11
> provider).

What about the first call,

   ctx = EVP_PKEY_CTX_new_from_name(NULL, key_type, "provider=pkcs11");

What should I pass for the last argument if I just want openssl's
software implementation, no hardware tokens?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


How to update OpenSSL benchmark glue?

2023-11-23 Thread Niels Möller
The hogweed-benchmark code for benchmarking OpenSSL uses several
functions that have been deprecated in OpenSSL 3.0, like RSA_new(). I've
spent some hour attempting to update it to non-deprecated functions, but
I'm having a really hard time navigating the openssl apis and
documentation.

I've not yet been able to even generate an RSA key, using openssl
functions like EVP_PKEY_CTX_new_from_name, EVP_PKEY_keygen_init,
EVP_PKEY_CTX_set_rsa_keygen_bits, etc.

Is there anyone on the list familiar with OpenSSL that could help with
this update?

And related question: Do you see a value in hogweed-benchmark (and
nettle-benchmark) running comparative benchmarks to OpenSSL? Otherwise,
maybe it's not worth the effort to keep and maintain that code.

Regards,
/Niels
-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64: AES/GCM Performance improvement with stitched implementation

2023-11-22 Thread Niels Möller
David Edelsohn  writes:

> Calls impose a lot of overhead on Power.

Thanks, that's good to know.

> And both the efficient loop instruction and the preferred indirect call
> instruction use the CTR register.

That's one thing I wonder after having a closer look at the AES loops.

One rather common pattern in GMP and Nettle assembly loops, is to use
the same register as both index register and loop counter. A loop that
in C would conventionally be written as

  for (i = 0; i < n; i++)
dst[i] = f(src[i]);

is written in assembly closer to

  dst += n; src += n; // Base registers point at end of arrays
  n = -n; // Use negative index register
  for (; n != 0; n++)
dst[n] = f(src[n]);

This saves one register (and eliminates corresponding update
instructions), and the loop branch is based on carry flag (or zero flag)
from the index register update n++. (If the items processed by the loop
are larger than a byte, n would also be scaled by the size, and one
would do n += size rather than n++, and it still works just fine).

Would that pattern work well on power, or is it always preferable to use
the special counter register, e.g., if it provides better branch
prediction? I'm not so familiar with power assembly, but from the AES
code it looks like the relevant instructions are mtctr to initialize the
counter, and bdnz to decrement and branch.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ppc64: AES/GCM Performance improvement with stitched implementation

2023-11-22 Thread Niels Möller
Danny Tsen  writes:

> Interleaving at the instructions level may be a good option but due to
> PPC instruction pipeline this may need to have sufficient
> registers/vectors. Use same vectors to change contents in successive
> instructions may require more cycles. In that case, more
> vectors/scalar will get involved and all vectors assignment may have
> to change. That’s the reason I avoided in this case.

To investigate the potential, I would suggest some experiments with
software pipelining.

Write a loop to do 4 blocks of ctr-aes128 at a time, fully unrolling the
round loop. I think that should be 44 instructions of aes mangling, plus
instructions to setup the counter input, and do the final xor and
endianness things with the message. Arrange so that it loads the AES
state in a set of registers we can call A, operating in-place on these
registers. But at the end, arrange the XORing so that the final
cryptotext is located in a different set of registers, B.

Then, write the instructions to do ghash using the B registers as input,
I think that should be about 20-25 instructions. Interleave those as
well as possible with the AES instructions (say, two aes instructions,
one ghash instruction, etc).

Software pipelining means that each iteration of the loop does aes-ctr
on four blocks, + ghash on the output for the four *previous* blocks (so
one needs extra code outside of the loop to deal with first and last 4
blocks). Decrypt processing should be simpler.

Then you can benchmark that loop in isolation. It doesn't need to be the
complete function, the handling of first and last blocks can be omitted,
and it doesn't even have to be completely correct, as long as it's the
right instruction mix and the right data dependencies. The benchmark
should give a good idea for the potential speedup, if any, from
instruction-level interleaving.

I would hope 4-way is doable with available vector registers (and this
inner loop should be less than 100 instructions, so not too
unmanageable). Going up to 8-way (like the current AES code) would also
be interesting, but as you say, you might have a shortage of registers.
If you have to copy state between registers and memory in each iteration
of an 8-way loop (which it looks like you also have to do in your
current patch), that overhead cost may outweight the gains you have from
more independence in the AES rounds.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Fw: ppc64: AES/GCM Performance improvement with stitched implementation

2023-11-21 Thread Niels Möller
the caller (the C code) to handle the left-over.

> --- a/powerpc64/p8/ghash-update.asm
> +++ b/powerpc64/p8/ghash-update.asm
> @@ -281,6 +281,48 @@ IF_LE(`
>  blr
>  EPILOGUE(_nettle_ghash_update)
>  
> +C
> +C GCM multification and reduction
> +C   All inputs depends on definitions
> +C
> +C .align 5
> +C .global _nettle_gf_mul_4x
> +C _nettle_gf_mul_4x:
> +define(`FUNC_ALIGN', `5')
> +PROLOGUE(_nettle_gf_mul_4x)
> +C mflr 0

Could you call this subroutine also from _ghash_update? Does function
call overhead matter?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Nettle 3.9.1 valgrind errors on debian x86_64 (rsa-sec-decrypt-test shows branch access based on input data

2023-11-16 Thread Niels Möller
Daniel Kahn Gillmor  writes:

> I'm experimenting with using valgrind on the testsuite on the debian
> platform.  on x86_64, testing 3.9.1 as built on debian (debian package
> version 3.9.1-2, built and run against libgmp10 debian package
> 2:6.3.0+dfsg-2), all valgrind tests succeed except for
> rsa-sec-decrypt-test.c.  It looks like there are some branch accesses
> based on input data.  Do you have any recommendations for further
> debugging, or steps we should take to improve the situation?

Hi, that's a bug, let me give some background.

Valgrind can be used to test for side channel silence, or more
precisely, branches or memory addresses depending on secret data, by
telling valgrind to treat the secret data as "undefined". I added logic
to do that to some tests, including rsa-sec-decrypt-test, automatically
enabled if the test is run under valgrind. 

But then that test was was broken in a later fix to add more input
validation.

Durign the last few months, I've reorganized and expanding those test to
have that needed instrumentation only when tests are run as part of the
sc-*-test scripts, and those tests are also run by default if valgrind
is available, including by the x86_86 native builds in the ci system.

Also, it's possible to hook valgrind into the tests by running 

  make check EMULATOR='valgrind ...'

(that will invoke test binaries with valgrind, but not test shell
scripts).

So my advice would be to either ignore that error, or do further tests
on the master branch in the git repo.

Then there are some details that are somewhat unclear. For the input
validation in rsa_sec_decrypt, since the cryptotext c is presumably
known by the attacker, it should not be a problem if the comparison c <
n leaks information about it. But then maybe the side-channel test
shouldn't mark the cryptotext input as secret at all, only the private
key?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


More tests for side-channel silence

2023-11-15 Thread Niels Möller
I've added a few more tests to check side-channel silence of ecdsa_sign,
curve35519, curve488, ed25519 and ed448.

The ci system runs these tests only for native x86_64 builds. To extend
to other architectures that are supported by valgrind, one would either
need native builds in the ci, or somehow run valgrind under qemu.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Possible bug of cnd_copy

2023-11-12 Thread Niels Möller
NIIBE Yutaka  writes:

> Niels Möller  wrote:
>> 1. Do the changes on branch
>>https://git.lysator.liu.se/nettle/nettle/-/tree/sc-is_zero?ref_type=heads
>>help?
>
> Yes.  It helps.  I confirmed the function cnd_copy has no problem
> with the change (removing != 0, and require callers to use 0/1),
> for the cases I found, using the Compiler Exprorer.

Thanks for checking. I've now fixed a few compile issues from the CI
builds, and merged this branch.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Possible bug of cnd_copy

2023-11-09 Thread Niels Möller
NIIBE Yutaka  writes:

> I checked other compilers today.

Thanks for investigating! Questions:

1. Do the changes on branch
   https://git.lysator.liu.se/nettle/nettle/-/tree/sc-is_zero?ref_type=heads
   help?

2. If you install valgrind (including header files), do the recently
   added tests for side-channel silence fail when nettle is built with a
   problem compiler? They're intended to catch this kind of issues (even
   if coverage isn't yet that great).

If it's too difficult or too brittle to get compilers to do the intended
thing, we'll have to add assembly implementation for all archs of
interest. I think there were similar issues, in particular with clang,
for gmp's mpn_sec_tabselect, and that's now in assembly for the many
archs.

It's unfortunate if assembly is needed for security, not just
performance, but that's already the case for AES, where the generic
implementation is leaky.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Possible bug of cnd_copy

2023-10-30 Thread Niels Möller
NIIBE Yutaka  writes:

> In the generated code, we can see the conditional jump with the variable
> CND.
>
>   x86 msvc v19.0 (WINE):
>   https://godbolt.org/z/f88edPe46
>
> IIUC, it is better to use something like NOT_EQUAL (in
> nettle/pkcs1-sec-decrypt.c) to compute the mask, too.
>
> If it is my misunderstanding (like MSVC actually is not supported),
> sorry in advance.  I'm learning important things from Nettle
> implementation.  (Thanks again for that.)

Thanks for the report. I think this deserves fixing (even if 32-bit x86
is not a high priority these days). It's a bit tricky to get the
compiler to do the intended thing.

It might also help if one could review call sites for cnd_copy and
arrange so that they pass always 0 or 1 for cnd argument.

I'll be offline the rest of this week, so I will not be able to fix or
review stuff until I'm back.

Regards,
/Niels



-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add DRBG-CTR-AES256.

2023-10-22 Thread Niels Möller
Simon Josefsson  writes:

>>> DRBG-CTR is strange in several ways (e.g., non-uniform seeds), to the
>>> point of being unsafe since it is easy to misuse it.
>>
>> Is that detailed in the paper you link to?
>
> The details and assumptions are clear from the NIST spec, but the
> subjective opinion that it is easy to mis-use is my own.

It would be nice with some reference for this critique. You also link to
https://eprint.iacr.org/2006/379.pdf, which seems related. According to
a quick look at the conclusions, it looks like DRBG AES256 does not
provide advertised security. 

Is the "code book width" in the paper the same as the cipher block size,
so that the problem is that security depends on min(key size, block
size), assuming underlying primitives are secure? I haven't read the
paper carefully.

> My initial patch contained documentation.  While it can always be
> expanded a lot more, I can't think of any further modifications.

Let's use those docs, then. I don't think I've seen any revision of the
patch after my first round of review, could you post an update (on list
or as a merge request)?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Testing for side-channel silence

2023-10-06 Thread Niels Möller
A while ago I wrote

> I'm considering adding more tests depending on valgrind, so
> then it would be nice to include valgrind in the image.

A first working version of this is now implemented. There are only a few
such tests so far: sc-cnd-memcpy-test, sc-gcm-test, sc-memeql-test,
sc-pkcs1-sec-decrypt-test, and sc-rsa-sec-decrypt-test. Adding test will
be a good way to both verify that, e.g., curve25519 is side-channel
silent, and get a bit more confidence for documenting which Nettle
functions are supposed to be side-channel silent.

To walk through how this works, the new tests are script wrappers
running the non-sc version of the test, e.g., sc-gcm-test looks like

  #! /bin/sh
  
  srcdir=`dirname $0`
  . "${srcdir}/sc-valgrind.sh"
  
  with_valgrind ./gcm-test

In turn, with_valgrind is simple shell function defined like

  with_valgrind () {
  type valgrind >/dev/null 2>&1 || exit 77
  NETTLE_TEST_SIDE_CHANNEL=1 valgrind -q --exit-on-first-error=yes 
--error-exitcode=1 "$@"
  }

The environment variable NETTLE_TEST_SIDE_CHANNEL is checked by the
main function shared by all test programs,

  if (getenv("NETTLE_TEST_SIDE_CHANNEL"))
{
  #if HAVE_VALGRIND_MEMCHECK_H
  if (RUNNING_ON_VALGRIND)
test_side_channel = 1;
  else
  #endif
SKIP();
}

The actual test code, in this case, gcm-test.c, calls utility functions
mark_bytes_undefined and mark_bytes_defined, which are usually no-ops,
but when valgrind is available, active, and NETTLE_TEST_SIDE_CHANNEL was
set, they use VALGRIND_MAKE_MEM_UNDEFINED and VALGRIND_MAKE_MEM_DEFINED.
In this case, it's the internal ghash functions that are tested in this
way, not the higher level gcm functions.

The tests are running in the gitlab ci for x86_64. I think it should be
easy to enable also for 32-bit x86 builds (I just need to figure out if
apt-get install libc6-dbg:i386 is the right way to get valgrind to work
for 32-bit executables).

I would like to have valgrind tests in the ci also for other archs that
are supported by valgrind. Maybe that requires native builds (on real
hardware, or qemu system emulation), maybe it would be possible to do
something like

  qemu-aarch64 /usr/aarch64-linux-gnu/bin/valgrind ./foo-test

where foo-test is a cross-compiled arm64 executable? Other options?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: CI cross builds failing, advice needed

2023-10-04 Thread Niels Möller
Niels Möller  writes:

> I'm migrating nettle's CI setup on this branch,
> https://gitlab.com/gnutls/nettle/-/tree/ci-nettle-build-images?ref_type=heads,
> which is now partly working, both for native and cross builds.

Everything seems to be up and working now, and merged to the master
branch. I'm considering doing some further refactoring of the ci config,
and then I want to get valgrind side-channel tests in.

> New failures: One report from the static analyzer, that appear valid
> (but harmless).

Fixed.

> Test failures of poly1305 on powerpc64el, which I haven't been able to
> reproduce in a local cross build + qemu. (poly1305 happens to be the
> only ppc assembly depending om power9 features).

I initially depended on qemu-user-binfmt when running the cross tests,
but that appears to not be reliable (it depends on getting the binfmt
registrations into the kernel, but since the ci-jobs are run in a
container, they use the host's kernel). After switching to starting qemu
explicitly, make check EMULATOR=qemu-xxx, all the cross tests pass. I
don't understand why the poly1305 test in particular failed initially.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: CI cross builds failing, advice needed

2023-10-01 Thread Niels Möller
Simon Josefsson  writes:

> I think that it would be better for nettle to maintain separate
> build-images and not rely on other project's build-images.  We could add
> 'gnutls/nettle-build-images' for this if you want?  Or is there some
> other more canonical home for nettle on gitlab already?

This is now setup, at https://git.lysator.liu.se/nettle/build-images,
mirrored to https://gitlab.com/gnutls/nettle-build-images, and resulting
images published at
https://gitlab.com/gnutls/nettle-build-images/container_registry/5633274
Thanks to Simon and Daiki for help setting this up.

Currently one with tools for native builds (including the gnutls build),
one qiwth cross compilers and qemu for cross builds (with
cross-compilers and qemu), and one with clang tools, for it's static
analyzer. Maybe the gnutls build should use its own image, or reuse
something from gnutls/build-images, since it has lots of additional
dependencies.

I'm migrating nettle's CI setup on this branch,
https://gitlab.com/gnutls/nettle/-/tree/ci-nettle-build-images?ref_type=heads,
which is now partly working, both for native and cross builds.

New failures: One report from the static analyzer, that appear valid
(but harmless). Test failures of poly1305 on powerpc64el, which I
haven't been able to reproduce in a local cross build + qemu. (poly1305 happens
to be the only ppc assembly depending om power9 features).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


CI cross builds failing, advice needed

2023-08-05 Thread Niels Möller
Hi,

I just noticed that Nettle cross ci builds fail, e.g.,
https://gitlab.com/gnutls/nettle/-/jobs/4791865769

This one fails with

  $ apt-get update -q
  Get:1 http://deb.debian.org/debian stable InRelease [151 kB]
  Get:2 http://deb.debian.org/debian-security stable-security InRelease
  [48.0 kB]
  Get:3 http://deb.debian.org/debian stable-updates InRelease [52.1 kB]
  Reading package lists...
  E: Repository 'http://deb.debian.org/debian stable InRelease' changed
  its 'Codename' value from 'bullseye' to 'bookworm'
  E: Repository 'http://deb.debian.org/debian-security stable-security
  InRelease' changed its 'Codename' value from 'bullseye-security' to
  'bookworm-security'
  E: Repository 'http://deb.debian.org/debian stable-updates InRelease'
  changed its 'Codename' value from 'bullseye-updates' to 'bookworm-updates'

related to recent debian release. And when trying to find how to upgrade
the image used,

  BUILD_IMAGES_PROJECT: gnutls/build-images
  DEBIAN_CROSS_BUILD: buildenv-debian-cross
[...]
  image: $CI_REGISTRY/$BUILD_IMAGES_PROJECT:$DEBIAN_CROSS_BUILD

I found the recent commit

  
https://gitlab.com/gnutls/build-images/-/commit/268e1060cce9f3532439b7f0d1b9a9eec4afce2b

deleting the corresponding docker file. I'm not that familiar with
docker images, but I guess some of the options are

1. Switch to the cross-fedora38 image
   
(https://gitlab.com/gnutls/build-images/-/blob/master/docker-cross-fedora38/Dockerfile?ref_type=heads).

2. Use some official debian image (not maintained by gnutls), and
   install needed cross compilers as part of each job.

3. Maintain a separate image (or images) for use by Nettle ci builds.
   E.g., I'm considering adding more tests depending on valgrind, so
   then it would be nice to include valgrind in the image. And it would
   be nice if all apt commands in the ci scripts could be eliminated.

Efficiency could matter: extra work, like installing lots of packages as
part of each ci job, makes the ci more expensive, and I don't know what
limits there are to the gnutls project on gitlab, but by default. So
advice appreciated.

Regards,
/Niels

PS. On the topic on valgrind: Is it possible/practical to use valgrind
in a cross setting, e.g., an x86_64 host, cross compiling for arm, and
then use qemu-arm to run an arm valgrind binary in turn running the arm
binary under test. Or is there some other way to run a cross valgrind?

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


At PETS 2023

2023-07-04 Thread Niels Möller
Hi,

I plan to be at PETS 2023 (Privacy Enhancing Technologies Symposium,
https://petsymposium.org/) in Lausanne next week. I'm attending with
my sigsum.org and glasklarteknik.se hats on.

Let me know if you're at the conference (or nearby) and want to meet up.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add DRBG-CTR-AES256.

2023-06-20 Thread Niels Möller
Simon Josefsson  writes:

> Fortuna is newer but I wonder if anyone will ever use Nettle to
> implement this functionality?  Maybe the Nettle documentation could
> suggest that anyone considering Yarrow should research alternatives
> first.

Do you know what GnuTLS uses for randomness? LSH (my SSH implementation)
uses Nettle's yarrow, but I guess that's rather obscure now.

> DRBG-CTR is strange in several ways (e.g., non-uniform seeds), to the
> point of being unsafe since it is easy to misuse it.

Is that detailed in the paper you link to?

> Considering Dual-EC-DRBG, perhaps standardizing "problematic" prng's
> was a design goal with 800-90A, and in that case the DRBG-CTR designs
> makes a whole lot more sense and would be an appropriate algorithm.
>
> Maybe it should only be added as internal functionality to Nettle...

It could be documented with caveats (usage for anything but tests
discouraged, with some brief motivation and/or pointers to references on
how it's bad), motivated by applications that need to comply with that
standard. Or mentioned but undocumented in a similar way as the
knuth_lfib generator. Or kept completely internal.

Since the interface (of the subset you support) is rather simple, and
according to Joachim there are some use cases, I'd lean towards
documenting it.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add Streamlined NTRU Prime sntrup761.

2023-06-20 Thread Niels Möller
Simon Josefsson  writes:

>>   * SNTRUP761_CIPHERTEXT_SIZE: Probably right, even though I'm a bit
>> confused by the "ciphertext" terminology when there's no
>> corresponding plaintext.
>
> Yeah... I think this is actually an area that could do more work, since
> the output is combined but maybe some consumers could want to split it
> up.  This could be fixed later, and there may be good reasons to NOT
 > expose that internal structure.  The terminology is a bit unclear if the
> key is included in ciphertext or not.

I'm not following... What's "combined"? The output of the encapsulation
is two blobs: One is the secret key, used for local setup of ciphers,
the other is the "ciphertext", to be sent to the other end and
decapsulated using the private key. I don't see why they would ever be
combined, or split up further?

>> Things I think are desirable to do before merging an initial version:
>> Agree on naming. Rename the single-lower-case-letter macros in the .c
>> file to macro-like names. Add valgrind-based tests of side-channel silence.
>
> Agreed, let's get those fixed.

Ah, and for consistency, the public/private key should be the first
argument to the encapsulation and decapsulation functions.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add Streamlined NTRU Prime sntrup761.

2023-06-19 Thread Niels Möller
Niels Möller  writes:

> And int32_divmod_uint14 looked unused. 

My mistake, it's not unused. It is used (via int32_mod_uint14) by
F3_freeze and Fq_freeze, which appear to use signed representation, |x|
<= 1 and |x| <= (q-1)/2 respectively.

> For sorting, it may need a minor reorg to get rid of the unneeded
> variant.

See patch below. I think that makes the code simpler, but it might be
best to leave as is for now.

>> My take was that it would be nice to add sntrup761 to Nettle ASAP to
>> stabilize API and establish support for the algorithm -- we can optimize
>> or improve the implementation later on (there are many optimized
>> implementations around for different architectures out there).
>
> Makes sense, if it's clear what api and abi should look like (but, e.g.,
> use of union nettle_block16 does affect the abi, I think).

Having a closer look at the header file defining the api. I see no abi
subtleties here, only naming nits.

  * sntrup761_keypair: should be sntrup761_generate_keypair, for consistency.

  * sntrup761_enc, sntrup761_dec: Maybe abbreviate less, is
_encapsulate and _decapsulate too much? Or is _enc and _dec really
established in the area?

  * SNTRUP761_PUBLICKEY_SIZE: I think it would be more consistent with
an underscore, _PUBLIC_KEY_SIZE.

  * SNTRUP761_SECRETKEY_SIZE: I prefer SNTRUP761_PRIVATE_KEY_SIZE is
more consistent (maybe "private key" is not modern, but it's the
terminology used for all other asymmetric algorithms in nettle).

  * SNTRUP761_CIPHERTEXT_SIZE: Probably right, even though I'm a bit
confused by the "ciphertext" terminology when there's no
corresponding plaintext.

  * SNTRUP761_SIZE: This needs a more specific name, maybe _SECRET_SIZE,
_SHARED_SECRET_SIZE, _SESSION_KEY_SIZE, _OUTPUT_SIZE, ...? 

In your docs, I noticed a copy-paste error in the docs for
SNTRUP761_PUBLICKEY_SIZE.

Things I think are desirable to do before merging an initial version:
Agree on naming. Rename the single-lower-case-letter macros in the .c
file to macro-like names. Add valgrind-based tests of side-channel silence.

(I'd need to read both spec and implementation closer to have more
opinions on the implementation).

Regards,
/Niels

diff --git a/sntrup761.c b/sntrup761.c
index dc1ca015..fb7fd761 100644
--- a/sntrup761.c
+++ b/sntrup761.c
@@ -55,24 +55,20 @@ crypto_hash_sha512 (unsigned char *out, const unsigned char 
*in, int inlen)
   sha512_digest (, SHA512_DIGEST_SIZE, out);
 }
 
-/* from supercop-20201130/crypto_sort/int32/portable4/int32_minmax.inc */
-#define int32_MINMAX(a,b) \
+#define uint32_MINMAX(a,b) \
 do { \
-  int64_t ab = (int64_t)b ^ (int64_t)a; \
-  int64_t c = (int64_t)b - (int64_t)a; \
-  c ^= ab & (c ^ b); \
-  c >>= 31; \
-  c &= ab; \
-  a ^= c; \
-  b ^= c; \
+  uint64_t d = (uint64_t)b - (uint64_t)a; \
+  uint32_t masked_d = (d >> 32) & d; \
+  a += masked_d; \
+  b -= masked_d; \
 } while(0)
 
-/* from supercop-20201130/crypto_sort/int32/portable4/sort.c */
+/* Based on supercop-20201130/crypto_sort/int32/portable4/sort.c, but
+   using uint32_t rather than int32_t. */
 static void
-crypto_sort_int32 (void *array, long long n)
+crypto_sort_uint32 (uint32_t *x, long long n)
 {
   long long top, p, q, r, i, j;
-  int32_t *x = array;
 
   if (n < 2)
 return;
@@ -86,11 +82,11 @@ crypto_sort_int32 (void *array, long long n)
   while (i + 2 * p <= n)
{
  for (j = i; j < i + p; ++j)
-   int32_MINMAX (x[j], x[j + p]);
+   uint32_MINMAX (x[j], x[j + p]);
  i += 2 * p;
}
   for (j = i; j < n - p; ++j)
-   int32_MINMAX (x[j], x[j + p]);
+   uint32_MINMAX (x[j], x[j + p]);
 
   i = 0;
   j = 0;
@@ -101,9 +97,9 @@ crypto_sort_int32 (void *array, long long n)
  {
if (j == n - q)
  goto done;
-   int32_t a = x[j + p];
+   uint32_t a = x[j + p];
for (r = q; r > p; r >>= 1)
- int32_MINMAX (a, x[j + r]);
+ uint32_MINMAX (a, x[j + r]);
x[j + p] = a;
++j;
if (j == i + p)
@@ -116,9 +112,9 @@ crypto_sort_int32 (void *array, long long n)
{
  for (j = i; j < i + p; ++j)
{
- int32_t a = x[j + p];
+ uint32_t a = x[j + p];
  for (r = q; r > p; r >>= 1)
-   int32_MINMAX (a, x[j + r]);
+   uint32_MINMAX (a, x[j + r]);
  x[j + p] = a;
}
  i += 2 * p;
@@ -127,9 +123,9 @@ crypto_sort_int32 (void *array, long long n)
  j = i;
  while (j < n - q)
{
- int32_t a = x[j + p];
+ uint32_t a = x[j + p];
  for (r = q; r > p; r >>= 1)
-   int32_MINMAX (

Re: [PATCH] Add Streamlined NTRU Prime sntrup761.

2023-06-19 Thread Niels Möller
Simon Josefsson  writes:

> No objection, but I find it challenging to come up with a revised patch
> that I feel comfortable with in the near future.  I'm not sure I even
> understood what unused functions you noticed (and how?); that fix would
> be easy to do.  Gaining confidence in rewritten parts feels a bit more
> complicated.  Would you like to revise the code?

I may be able to try it out in the next few weeks. If I just check out
your branch, do tests work out of the box or do I need to somehow patch
in the DRBG-CTR-AES256 too? 

There are some style details in the current patch that bothers me a bit,
e.g., "q" used as a regular parameter/variable in most of the code, and
then defined as a macro further down.

  #define q 4591

should be changed to something like

  #define SNTRUP761_Q 4591

(or maybe just SNTRUP_Q if it is intended to parametrize the code). 

> Aligned API/ABI's are nice, and good to get right early.  Is
> 'nettle_block16' still the right way to do this, or is this possible to
> (with arguably more readable) aligned() or alignof() attributes?

I think nettle_block16 is the way to go, for representing 16 byte blocks
where we're in control of the alignment (in contrast, e.g, to user's
plaintext/cryptotext for which we don't require any alignment).

I'd like to use alignas, and make nettle_block16 16-byte aligned at
least on x86_64 (and on other archs where assembly code can benefit from
alignment larger than that of an uint64). But aligned is C11, and I
hesitate to require that (while I'm considering requiring C99). And
since this is in public header files and part of the abi, it doesn't
make sense with compiler specific ifdefs.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add Streamlined NTRU Prime sntrup761.

2023-06-19 Thread Niels Möller
Simon Josefsson  writes:

>> In general, it makes sense to add support for post-quantum key exchange
>> methods, another candidate seems to be https://classic.mceliece.org/
>> (with the drawback of much larger pubkeys).
>
> +1

I've been asking some other people too. sntrup seems to be a good option.
Classic mcelice makes sense too, with a different underlying problem,
and a different tradeoff (possible more conservative security, but
larger pubkeys). Other NIST candidates Saber and Kyber I'm told have
some patent issues, so I prefer not to touch them until that has been
sorted out.

So should we focus on getting sntrup761 in as the first post-quantum
key exchange algorithm?

>>> Please consider it a first iteration for early review.
>>
>> I initially looked at the arithmetics. The signed (int32) sorting and
>> division seems unused?
>
> Do you mean crypto_sort_int32?  It is called by crypto_sort_uint32.

And int32_divmod_uint14 looked unused. For sorting, it may need a minor
reorg to get rid of the unneeded variant.

>> For the side-channel silent divmod function, it seems we divide
>> exclusively with one or a few constants, then we could precompute
>> needed constants and perhaps simplify a bit.
>
> Possibly - this is reference code and supports other sntrup lengths.
> Supporting multiple lengths often leads to complexity which leads to
> reduced security.  As far as I can tell, the non-sntrup761 lengths were
> insisted upon by NIST.  So the answer depends on if we want to allow
> this code to be re-used by other sntrup lengths too.

One doesn't have to hard code a single divisor. I'm thinking of
precomputing the reciprocal (and any other magic constant depending on
the divisor), currently done at runtime as

   uint32_t v = 0x8000;
   v /= m;

(unless compiler does lots of inlining).

> Also, do we want to deviate from audited implementations?

Good question. I think the answer is yes. Some considerations:

* We need proper tests for changes, including side-channel things that
  can be tested with valgrind.
 
* If I have to choose between audited code and readable code, I think I
  would usually go for the latter.

* It's nice to have code consistent with general style in Nettle. And
  more importantly, run-time selection of code should be done with the
  same fat machinery as for other algorithms.

* To me, it seems unlikely that we could wrap the audited reference
  implementation in a way that is both practical, and makes the audit
  provide significant confidence in the complete Nettle implementation.

> My take was that it would be nice to add sntrup761 to Nettle ASAP to
> stabilize API and establish support for the algorithm -- we can optimize
> or improve the implementation later on (there are many optimized
> implementations around for different architectures out there).

Makes sense, if it's clear what api and abi should look like (but, e.g.,
use of union nettle_block16 does affect the abi, I think).

Regards,
/Niels


-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add Streamlined NTRU Prime sntrup761.

2023-06-18 Thread Niels Möller
Simon Josefsson  writes:

> This adds sntrup761, what do you think?

What's the context/usecase? I saw some mails on the ietf-ssh list, but
it was a bit unclear to me what the status of this algorithm is.

In general, it makes sense to add support for post-quantum key exchange
methods, another candidate seems to be https://classic.mceliece.org/
(with the drawback of much larger pubkeys).

> Please consider it a first iteration for early review.

I initially looked at the arithmetics. The signed (int32) sorting and
division seems unused? For the side-channel silent divmod function, it
seems we divide exclusively with one or a few constants, then we could
precompute needed constants and perhaps simplify a bit.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: [PATCH] Add DRBG-CTR-AES256.

2023-06-18 Thread Niels Möller
Simon Josefsson  writes:

> This adds DRBG-CTR-AES256, what do you think?

Thanks, I've had a first look.

> --- /dev/null
> +++ b/drbg-ctr-aes256.c
> @@ -0,0 +1,100 @@
> +/* drbg-ctr-aes256.c
> +static void
> +drbg_ctr_aes256_update (struct aes256_ctx *Key,
> + uint8_t *V, uint8_t *provided_data)
> +{
> +  uint8_t tmp[DRBG_CTR_AES256_SEED_SIZE];
> +
> +  INCREMENT (AES_BLOCK_SIZE, V);
> +  aes256_encrypt (Key, AES_BLOCK_SIZE, tmp, V);
> +
> +  INCREMENT (AES_BLOCK_SIZE, V);
> +  aes256_encrypt (Key, AES_BLOCK_SIZE, tmp + AES_BLOCK_SIZE, V);
> +
> +  INCREMENT (AES_BLOCK_SIZE, V);
> +  aes256_encrypt (Key, AES_BLOCK_SIZE, tmp + 2 * AES_BLOCK_SIZE, V);

You could perhaps use ctr_fill16 or something similar for this,
currently a static function in ctr.c. Even though I guess it's
appropriate to aim for clarity rather than highest performance for this
function.

Hmm, or fill tmp with zeros, followed by a call to _nettle_ctr_crypt16
(implies a redundant memxor operation, but perhaps simpler code)?

> +void
> +drbg_ctr_aes256_random (struct drbg_ctr_aes256_ctx *ctx,
> + size_t n, uint8_t *dst)
> +{
> +  while (n >= AES_BLOCK_SIZE)
> +{
> +  INCREMENT (AES_BLOCK_SIZE, ctx->V);
> +  aes256_encrypt (>Key, AES_BLOCK_SIZE, dst, ctx->V);
> +  dst += AES_BLOCK_SIZE;
> +  n -= AES_BLOCK_SIZE;
> +}

Here too, could gain performance by filling the output buffer with ctr
values and do a single call to aes256_encrypt.

> +struct drbg_ctr_aes256_ctx
> +{
> +  struct aes256_ctx Key;
> +  uint8_t V[AES_BLOCK_SIZE];
> +};

It's closer to Nettle style with lower case names, at least for "key".
And V could use the type union nettle_block16 (which provides stricter
alignment than a uint_8).

> +@subsection DRBG-CTR
> +
> +The Deterministic Random Bit Generator (DRBG) family is a complex family
> +of deterministic randomness generators published by NIST in SP 800-90A.

It would be good with a reference to the spec also in the .c or .h file.

> +We support what we believe is the reasonable parts of the CTR_DRBG
> +algorithm for AES256.  Re-seeding, personalization strings, derivation
> +functions and support for non-AES256 is not implemented.
> +Personalization strings can be implemented by the caller, if desired,
> +with xor.  If you need re-seeding or entropy derivation, we suggest that
> +you use Yarrow instead.

Side question: Is Yarrow still a reasonable recommendation, or would it
be good to add some alternative (either its successor Fortuna, or
something completely different)?

> +Our suggested use-case for DRBG-CTR is to deterministically generate
> +known values from a seed when comparing against expected values for some
> +other algorithm.

That's a rather limited usecase. Do you think it's inappropriate to use,
e.g., to expand a short secret seed into multiple subkeys?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


ANNOUNCE: Nettle-3.9.1

2023-06-01 Thread Niels Möller
I've prepared a bugfix release for GNU Nettle, a low-level
cryptographics library. The most severe of the fixed bugs was a memory
corruption bug in the new OCB implementation. Se below for complete list
of bug fixes.

The Nettle home page can be found at
https://www.lysator.liu.se/~nisse/nettle/, and the manual at
https://www.lysator.liu.se/~nisse/nettle/nettle.html.

The release can be downloaded from

  https://ftp.gnu.org/gnu/nettle/nettle-3.9.1.tar.gz
  ftp://ftp.gnu.org/gnu/nettle/nettle-3.9.1.tar.gz
  https://www.lysator.liu.se/~nisse/archive/nettle-3.9.1.tar.gz

Happy hacking,
/Niels Möller

NEWS for the Nettle 3.9.1 release

This is a bugfix release, fixing a few bugs reported for
Nettle-3.9. The bug in the new OCB code may be exploitable for
denial of service or worse, since triggering it leads to
memory corruption. Upgrading from Nettle-3.9 to the new
version is strongly recommended.

The new version is intended to be fully source and binary
compatible with Nettle-3.6. The shared library names are
libnettle.so.8.8 and libhogweed.so.6.8, with sonames
libnettle.so.8 and libhogweed.so.6.

Bug fixes:

* Fix OCB loop for processing messages of size 272 bytes or
  larger. Reported and fixed by Jussi Kivilinna.

* Fix alignment bug in the new x86_64 non-pclmul assembly
  implementation of ghash. Reported by Henrik Grubbström.

* Fix build-time memory leak in eccdata. Reported by Noah
  Watkins.

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.



signature.asc
Description: PGP signature
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ANNOUNCE: Nettle-3.9

2023-05-19 Thread Niels Möller
Jussi Kivilinna  writes:

> On 15.5.2023 23.21, Niels Möller wrote:
>> Jussi Kivilinna  writes:
>> 
>>> I ran into memory corruption issue when benchmarking new OCB code.
>>>
>>> I think "dst -= size;" in ocb_crypt_n() should be "dst += size;" ...
>>>   https://git.lysator.liu.se/nettle/nettle/-/blob/master/ocb.c#L240
>> Ooops. I think you're right. How large input sizes did you need to
>> trigger problems?
>
> Input needs to be 272 bytes (16*17B) to trigger the problem. Here's
> what I get with valgrind (nettle and benchmarking tool compiled with -Og):
>
> I tried to add test-vector (see at bottom) from libgcrypt for large input
> testing but could not get it working. Ciphertext generation works after
> fixing dst pointer increment and changing "ctx->message_count += n;" to
> "ctx->message_count += blocks;" in ocb_crypt_n(), but tag still does not
> match output from libgcrypt:

I've applied your patch (including the test fix from your other mail).
Thanks a lot!

>>> Also it would be nice if ocb_aes128 could be added to nettle_aeads
>>> array for easy access.
>> Which combination(s) of nonce size and tag size would it be useful
>> to
>> advertise like that?
>
> Would same values as for gcm_aes128 make sense (nonce=12, tag=16)?
>
> My use-case is pretty simply to find "ocb_" from nettle_aeads
> list for benchmark run and don't really care about which nonce/tag
> length gets used.

I'd like the choice to be guided by what actual applications need. Nonce
12, tag 16 sounds reasonable, but if I've understood it correctly, at
least openpgp uses something different.

For benchmarks, I hope it's not too cumbersome to define your own
struct nettle_aead, similar to what's in nettle-internal.c.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Undefined reference with clang16 and address sanitizer

2023-05-15 Thread Niels Möller
Noah Watkins  writes:

> Making it through
> further in the compilation process, though, we start to hit more
> undefined references with the sanitizers turned on with clang16:
>
> ```
> clang -fsanitize=address -fsanitize=leak -g -O2 -ggdb3 -Wall -W
> -Wno-sign-compare   -Wmissing-prototypes -Wmissing-declarations
> -Wstrict-prototypes   -Wpointer-arith -Wbad-function-cast
> -Wnested-externs -L.. -fsanitize=address -fsanitize=leak sexp-conv.o
> input.o output.o parse.o misc.o ../getopt.o ../getopt1.o -lnettle
> -lgmp  -o sexp-conv
> /usr/bin/ld: ../libnettle.so: undefined reference to
> `_nettle_aes192_encrypt_aesni'

This, and the other missing symbols, appear to all be for symbols that
are expected to be defined in assembly files. E.g, in my x86_64 build,
this symbol should be defined in the object file produced by these build
rules:

  /usr/bin/m4 /home/nisse/hack/nettle/m4-utils.m4 
/home/nisse/hack/nettle/asm.m4 config.m4 machine.m4 aes192-encrypt-2.asm 
>aes192-encrypt-2.s
  gcc -I.  -DHAVE_CONFIG_H -g -O2 -ggdb3 -Wall -W -Wno-sign-compare 
-Wmissing-prototypes -Wmissing-declarations -Wstrict-prototypes -Wpointer-arith 
-Wbad-function-cast -Wnested-externs -fpic -MT aes192-encrypt-2.o -MD -MP -MF 
aes192-encrypt-2.o.d  -c aes192-encrypt-2.s

As can be verified with 

  $ nm aes192-encrypt-2.o 
   T _nettle_aes192_encrypt_aesni

So you need to investigate what corresponding steps produce in your build.

(Maybe it would make sense to change the Makefile rules to not use $(CC)
for processing assembly files, but traditionally, unix C compilers do
the right thing in that case).

Also double check that you do a make distclean or use a fresh build
directory when changing configure options, since you can get all sorts
of weird errors if you have inconsstent object files lying around.
 
Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ANNOUNCE: Nettle-3.9

2023-05-15 Thread Niels Möller
Jussi Kivilinna  writes:

> I ran into memory corruption issue when benchmarking new OCB code.
>
> I think "dst -= size;" in ocb_crypt_n() should be "dst += size;" ...
>  https://git.lysator.liu.se/nettle/nettle/-/blob/master/ocb.c#L240

Ooops. I think you're right. How large input sizes did you need to
trigger problems?

I just tried valgrind ./examples/nettle-benchmark ocb, which I would
expect to call this function with about 10 KiB data at a time, but no
crashes and no complaints from valgrind, which is puzzling. I'll need
some more investigation to reproduce the problem.

> Also it would be nice if ocb_aes128 could be added to nettle_aeads
> array for easy access.

Which combination(s) of nonce size and tag size would it be useful to
advertise like that?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Undefined reference with clang16 and address sanitizer

2023-05-15 Thread Niels Möller
Noah Watkins  writes:

> Any thoughts on this? Since this runs as part of the build process, it
> is blocking one of our pipelines.

Looks like a missing mpz_clear. I think it should be fixed with
https://git.lysator.liu.se/nettle/nettle/-/commit/966da449232766ad41b9be4f263fcccd4500bd22

Thanks for the report,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


ANNOUNCE: Nettle-3.9

2023-05-14 Thread Niels Möller
I'm happy to announce a new release of GNU Nettle, a low-level
cryptographics library. The release features new SIV-GCM and OCB
authenticated encryption modes, and improved performance for SHA256 and
Poly1305, among other things. See NEWS entries below.

The Nettle home page can be found at
https://www.lysator.liu.se/~nisse/nettle/, and the manual at
https://www.lysator.liu.se/~nisse/nettle/nettle.html.

The release can be downloaded from

  https://ftp.gnu.org/gnu/nettle/nettle-3.9.tar.gz
  ftp://ftp.gnu.org/gnu/nettle/nettle-3.9.tar.gz
  https://www.lysator.liu.se/~nisse/archive/nettle-3.9.tar.gz

Happy hacking,
/Niels Möller

NEWS for the Nettle 3.9 release

This release includes bug fixes, several new features, a few
performance improvements, and one performance regression
affecting GCM on certain platforms.

The new version is intended to be fully source and binary
compatible with Nettle-3.6. The shared library names are
libnettle.so.8.7 and libhogweed.so.6.7, with sonames
libnettle.so.8 and libhogweed.so.6.

This release includes a rewrite of the C implementation of
GHASH (dating from 2011), as well as the plain x86_64 assembly
version, to use precomputed tables in a different way, with
tables always accessed in the same sequential manner.

This should make Nettle's GHASH implementation side-channel
silent on all platforms, but considerably slower on platforms
without carry-less mul instructions. E.g., benchmarks of the C
implementation on x86_64 showed a slowdown of 3 times.

Bug fixes:

* Fix bug in ecdsa and gostdsa signature verify operation, for
  the unlikely corner case that point addition really is point
  duplication.

* Fix for chacha on Power7, nettle's assembly used an
  instruction only available on later processors. Fixed by
  Mamone Tarsha.

* GHASH implementation should now be side-channel silent on
  all architectures.

* A few portability fixes for *BSD.

New features:

* Support for the SM4 block cipher, contributed by Tianjia
  Zhang.

* Support for the Balloon password hash, contributed by Zoltan
  Fridrich.

* Support for SIV-GCM authenticated encryption mode,
  contributed by Daiki Ueno.

* Support for OCB authenticated encryption mode.

* New exported functions md5_compress, sha1_compress,
  sha256_compress, sha512_compress, based on patches from
  Corentin Labbe.

Optimizations:

* Improved sha256 performance, in particular for x86_64 and
  s390x.

* Use GMP's mpn_sec_tabselect, which is implemented in
  assembly on many platforms, and delete the similar nettle
  function. Gives a modest speedup to all ecc operations.

* Faster poly1305 for x86_64 and ppc64. New ppc code
  contributed by Mamone Tarsha.

Miscellaneous:

* New ASM_FLAGS variable recognized by configure.

* Delete all arcfour assembly code. Affects 32-bit x86, 32-bit
  and 64-bit sparc.

Known issues:

* Version 6.2.1 of GNU GMP (the most recent GMP release as of
  this writing) has a known issue for MacOS on 64-bit ARM: GMP
  assembly files use the reserved x18 register. On this
  platform it is recommended to use a GMP snapshot where this
  bug is fixed, and upgrade to a later GMP release when one
  becomes available.

* Also on MacOS, Nettle's testsuite may still break due to
  DYLD_LIBRARY_PATH being discarded under some circumstances.
  As a workaround, use

  make check EMULATOR='env DYLD_LIBRARY_PATH=$(TEST_SHLIB_DIR)'

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.



signature.asc
Description: PGP signature
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ARCFOUR doc fixes

2023-05-11 Thread Niels Möller
Simon Josefsson  writes:

> Hi
>
> What do you think?

Looks good, thanks. Merged this, as well as your other doc fix.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: ancient install-sh and texinfo.tex?

2023-05-11 Thread Niels Möller
Simon Josefsson  writes:

> How about this patch?  They are not up to date and I couldn't find
> anywhere that uses them anyway.

I agree texinfo.tex appears unused (nettle.texinfo is processed with
makeinfo and texi2pdf). If it is deleted, Makefile.in DISTFILES need
updating as well.

I think install-sh is referenced by AC_PROG_INSTALL, which is used in
configure.ac, and it seems install-sh is mentioned in the generated
configure script.

Should I just copy latest version from
https://git.savannah.gnu.org/cgit/gnulib.git/tree/build-aux/install-sh ?

Probably a good idea to also update config.guess and config.sub to
latest versions (previous update in Nettle was a year ago).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Preparing for Nettle-3.9

2023-05-10 Thread Niels Möller
Niels Möller  writes:

> If there are no show-stoppers, I may make the release as soon as this
> Wednesday evening (May 10, and for me, evening starts around 17 UTC).

Testing is going rather well. I've found and fixed one bug giving a
compile error in the tests when configured with --disable-public-key.
Beyond the configurations in the gitlab ci, I've successfully tested on

  x86_64 freebsd
  sparc solaris 10 (thanks to gcc farm)
  64-bit windows (via cross-compile + wine)

For macs, I've had mostly successful reports, except that sometimes mac
arm64 builds seem to suffer from a GMP known issue in GMP-6.2.1, with
improper use of x18 register in GMP assembly files (but I couldn't repro
on gcc104.fsffrance.org). This probably deserves a note in the NEWS
file.

That means that I hope to package and announce a release, if not
tonight, so within the next few days.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Preparing for Nettle-3.9

2023-05-08 Thread Niels Möller
Justus Winter  writes:

> Most appreciated!  I read the documentation and found only two minor
> issues (patches sent).  The interface looks good.

Thanks, both patches applied.

> We have quite a few
> tests: we instantiate it with every compatible cipher and do a roundtrip
> test, we have the test vectors from RFC7253, we have an additional set
> of test vectors with 15 byte nonces published by dkg, 

Do you have any testvectors with a dozen or so blocks? Nettle's
testsuite/ocb-text.c includes some home-made test vectors with 16
blocks, to verify the code paths that tries to handle multiple blocks
more efficiently. See
https://git.lysator.liu.se/nettle/nettle/-/blob/master/testsuite/ocb-test.c#L222
and below.

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Preparing for Nettle-3.9

2023-05-07 Thread Niels Möller
Hi,

I would like to get Nettle-3.9 out (as well as any bugfix update 3.9.1
if needed) before summer. I've written the NEWS entries for 3.9 and
pushed the commits with version updates. See current NEWS draft below.

I would much appreciate 

* Testing, in particular on systems not covered by the gitlab CI (e.g,
  bsd, macos, windows).

* Review of the NEWS file. It's based on my reading of ChangeLog, but if
  you're aware of contributions or notable changes not listed, let me
  know.

* Say if there's any ongoing work that you think ought to be included in
  this release. (I'd prefer to not delay the release for the ongoing
  work that I amm aware of).

If there are no show-stoppers, I may make the release as soon as this
Wednesday evening (May 10, and for me, evening starts around 17 UTC).
That means, that if you want to help out with testing or review or
anything else related to the release, but, very understandably, can't do
that by Wednesday, just let me know your plans before Wednesday so we
can coordinate.

Regards,
/Niels

NEWS for the Nettle 3.9 release

This release includes bug fixes, several new features, a few
performance improvements, and one performance regression
affecting GCM on certain platforms.

The new version is intended to be fully source and binary
compatible with Nettle-3.6. The shared library names are
libnettle.so.8.7 and libhogweed.so.6.7, with sonames
libnettle.so.8 and libhogweed.so.6.

This release includes a rewrite of the C implementation of
GHASH (dating from 2011), as well as the plain x86_64 assembly
version, to use precomputed tables in a different way, with
tables always accessed in the same sequential manner.

This should make Nettle's GHASH implementation side-channel
silent on all platforms, but considerably slower on platforms
without carry-less mul instructions. E.g., benchmarks of the C
implementation on x86_64 showed a slowdown of 3 times.

Bug fixes:

* Fix bug in ecdsa and gostdsa signature verify operation, for
  the unlikely corner case that point addition really is point
  duplication.

* Fix for chacha on Power7, nettle's assembly used an
  instruction only available on later processors. Fixed by
  Mamone Tarsha.

* GHASH implementation should now be side-channel silent on
  all architectures.

* A few portability fixes for *BSD.

New features:

* Support for the SM4 block cipher, contributed by Tianjia
  Zhang.

* Support for the Balloon password hash, contributed by Zoltan
  Fridrich.

* Support for SIV-GCM authenticated encryption mode,
  contributed by Daiki Ueno.

* Support for OCB authenticated encryption mode.

* New exported functions md5_compress, sha1_compress,
  sha256_compress, sha512_compress, based on patches from
  Corentin Labbe.

Optimizations:

* Improved sha256 performance, in particular for x86_64 and
  s390x.

* Use GMP's mpn_sec_tabselect, which is implemented in
  assembly on many platforms, and delete the similar nettle
  function. Gives a modest speedup to all ecc operations.

* Faster poly1305 for x86_64 and ppc64. New ppc code
  contributed by Mamone Tarsha.

Miscellaneous:

* New ASM_FLAGS variable recognized by configure.

* Delete all arcfour assembly code. Affects 32-bit x86, 32-bit
  and 64-bit sparc.

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.

___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Undefined reference with clang16 and address sanitizer

2023-05-07 Thread Niels Möller
Noah Watkins  writes:

> (fwiw sanitizer does report a memory leak when eccdata is
> running at the end of make).
 
If it looks like the sanitizer could be right, can you share the error
report?

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


Re: Undefined reference with clang16 and address sanitizer

2023-04-25 Thread Niels Möller
Noah Watkins  writes:

> Hi nettle-bugs,
>
> We just updated to clang-16 and are encountering an issue compiling
> nettle with -fsanitize=address.
>
> Configured with
>
> CC=clang CXX=clang++ CFLAGS="-fsanitize=address"
> LDFLAGS="-fsanitize=address" ./configure --disable-documentation
> --enable-x86-aesni

Note that --enable-x86-aesni has no effect in a fat build (which is the
default). If you really want to unconditionally use aesni instructions, you
need --disable-fat --enable-x86-aesni.

Also, Nettle's configure modifies CFLAGS (which is somewhat debatable),
so it may be more reliable with

  CC="clang -fsanitize=address"

> I did just test with upstream nettle and the issue appears to be present 
> there.

To be clear, you mean the nettle-3.8.1 release?

> /usr/bin/ld: ../libnettle.so: undefined reference to
> `_nettle_aes192_encrypt_aesni'
> /usr/bin/ld: ../libnettle.so: undefined reference to 
> `_nettle_poly1305_set_key'
> /usr/bin/ld: ../libnettle.so: undefined reference to `_nettle_salsa20_2core'
> /usr/bin/ld: ../libnettle.so: undefined reference to `_nettle_poly1305_blocks'
> /usr/bin/ld: ../libnettle.so: undefined reference to `nettle_serpent_decrypt'
> /usr/bin/ld: ../libnettle.so: undefined reference to `_nettle_memxor_sse2'
> /usr/bin/ld: ../libnettle.so: undefined reference to `_nettle_umac_nh_n'
> /usr/bin/ld: ../libnettle.so: undefined reference to `_nettle_camellia_crypt'
> /usr/bin/ld: ../libnettle.so: undefined reference to
> `_nettle_aes128_encrypt_aesni'
> /usr/bin/ld: ../libnettle.so: undefined reference to
> `_nettle_ghash_update_pclmul'
> ```

It seems you get link errors on all the assembly files that should go
into libnettle.so. 

The way it's supposed to work, configure should symlink various .asm
files from the x86_64/ subdirectories into the build directory, where
they are preprocessed with m4 into .s files, and passed to the compiler
(unix C compiler frontends traditionally recognize .s files as assembly,
and invoke the appropriate assembler).

To troubleshoot, I would suggest 

(i) double check that you start with a clean tree, make distclean,

(ii) comparing configure output and build steps between your clang-14
and clang-16 builds,

(iii) examine the contents of libnettle.so, so try to figure out if the
assembly files are missing completely, or if they're there but with some
other symbol names (those are tweaked a bit by fat build logic: names
with suffixes like _aesni and _pclmul are typical for fat builds, and
setup by the wrapper files in x86_64/fat/).

Regards,
/Niels

-- 
Niels Möller. PGP key CB4962D070D77D7FCB8BA36271D8F1FF368C6677.
Internet email is subject to wholesale government surveillance.
___
nettle-bugs mailing list -- nettle-bugs@lists.lysator.liu.se
To unsubscribe send an email to nettle-bugs-le...@lists.lysator.liu.se


  1   2   3   4   5   6   7   8   9   10   >