Re: [PATCH v2 10/13] Add a base implementation of SHA-256 support

2018-10-17 Thread brian m. carlson
On Wed, Oct 17, 2018 at 06:12:41PM +0200, SZEDER Gábor wrote:
> On macOS there is a MIN macro already defined in the system headers,
> resulting in the following error:
> 
>   CC sha256/block/sha256.o
>   sha256/block/sha256.c:133:9: error: 'MIN' macro redefined 
> [-Werror,-Wmacro-redefined]
>   #define MIN(x, y) ((x) < (y) ? (x) : (y))
>   ^
>   /usr/include/sys/param.h:215:9: note: previous definition is here
>   #define MIN(a,b) (((a)<(b))?(a):(b))
>   ^
>   1 error generated.
>   make: *** [sha256/block/sha256.o] Error 1
> 
> A simple "#undef MIN" solves this issue.  However, I wonder whether we
> should #undef the other #define directives as well, just to be sure
> (and perhaps overly cautious).

I'll undefine the macros at the top.

For MIN, I'm going to go with the simple approach and pull pieces from
the block-sha1 implementation, which doesn't require this code path...

> Some GCC versions (e.g. gcc-4.8 with -O2 -Wall -Werror) complain about
> the above line:
> 
>   CC sha256/block/sha256.o
>   sha256/block/sha256.c: In function ‘blk_SHA256_Final’:
>   sha256/block/sha256.c:174:2: error: dereferencing type-punned pointer will 
> break strict-aliasing rules [-Werror=strict-aliasing]
> put_be64(ctx->buf + trip, ctx->length);
> ^
>   cc1: all warnings being treated as errors
>   make: *** [sha256/block/sha256.o] Error 1
> 
> Something like this makes it compile:
> 
>   void *ptr = ctx->buf + trip;
>   put_be64(ptr, ctx->length);
> 
> However, it's not immediately obvious to me why the compiler
> complains, or why that intermediate void* variable makes any
> difference, but now it's not the time to put on my language lawyer
> hat.
> 
> Perhaps an old compiler bug?  Clang in general, newer GCC versions, or
> gcc-4.8 with -Wall -Werror but without -O2 don't seem to be affected.

...or this code path.  Presumably GCC 4.8 and macOS are happy with the
code that we already have in block-sha1.
-- 
brian m. carlson: Houston, Texas, US
OpenPGP: https://keybase.io/bk2204


signature.asc
Description: PGP signature


Re: [PATCH v2 10/13] Add a base implementation of SHA-256 support

2018-10-17 Thread SZEDER Gábor
On Mon, Oct 15, 2018 at 02:18:57AM +, brian m. carlson wrote:
> diff --git a/sha256/block/sha256.c b/sha256/block/sha256.c
> new file mode 100644
> index 00..18350c161a
> --- /dev/null
> +++ b/sha256/block/sha256.c
> @@ -0,0 +1,180 @@
> +#include "git-compat-util.h"
> +#include "./sha256.h"
> +
> +#define BLKSIZE blk_SHA256_BLKSIZE
> +
> +void blk_SHA256_Init(blk_SHA256_CTX *ctx)
> +{
> + ctx->offset = 0;
> + ctx->length = 0;
> + ctx->state[0] = 0x6A09E667UL;
> + ctx->state[1] = 0xBB67AE85UL;
> + ctx->state[2] = 0x3C6EF372UL;
> + ctx->state[3] = 0xA54FF53AUL;
> + ctx->state[4] = 0x510E527FUL;
> + ctx->state[5] = 0x9B05688CUL;
> + ctx->state[6] = 0x1F83D9ABUL;
> + ctx->state[7] = 0x5BE0CD19UL;
> +}
> +
> +static inline uint32_t ror(uint32_t x, unsigned n)
> +{
> + return (x >> n) | (x << (32 - n));
> +}
> +
> +#define Ch(x,y,z)   (z ^ (x & (y ^ z)))
> +#define Maj(x,y,z)  (((x | y) & z) | (x & y))
> +#define S(x, n) ror((x),(n))
> +#define R(x, n) ((x)>>(n))
> +#define Sigma0(x)   (S(x, 2) ^ S(x, 13) ^ S(x, 22))
> +#define Sigma1(x)   (S(x, 6) ^ S(x, 11) ^ S(x, 25))
> +#define Gamma0(x)   (S(x, 7) ^ S(x, 18) ^ R(x, 3))
> +#define Gamma1(x)   (S(x, 17) ^ S(x, 19) ^ R(x, 10))

[...]

> +#define RND(a,b,c,d,e,f,g,h,i,ki)\
> + t0 = h + Sigma1(e) + Ch(e, f, g) + ki + W[i];   \
> + t1 = Sigma0(a) + Maj(a, b, c);  \
> + d += t0;\
> + h  = t0 + t1;
> +
> + RND(S[0],S[1],S[2],S[3],S[4],S[5],S[6],S[7],0,0x428a2f98);

[...]

> +#undef RND
> +
> + for (i = 0; i < 8; i++) {
> + ctx->state[i] = ctx->state[i] + S[i];
> + }
> +}
> +
> +#define MIN(x, y) ((x) < (y) ? (x) : (y))

On macOS there is a MIN macro already defined in the system headers,
resulting in the following error:

  CC sha256/block/sha256.o
  sha256/block/sha256.c:133:9: error: 'MIN' macro redefined 
[-Werror,-Wmacro-redefined]
  #define MIN(x, y) ((x) < (y) ? (x) : (y))
  ^
  /usr/include/sys/param.h:215:9: note: previous definition is here
  #define MIN(a,b) (((a)<(b))?(a):(b))
  ^
  1 error generated.
  make: *** [sha256/block/sha256.o] Error 1

A simple "#undef MIN" solves this issue.  However, I wonder whether we
should #undef the other #define directives as well, just to be sure
(and perhaps overly cautious).

> +void blk_SHA256_Update(blk_SHA256_CTX *ctx, const void *data, size_t len)
> +{
> + const unsigned char *in = data;
> + size_t n;
> + ctx->length += len;
> + while (len > 0) {
> + if (!ctx->offset && len >= BLKSIZE) {
> + blk_SHA256_Transform(ctx, in);
> + in += BLKSIZE;
> + len -= BLKSIZE;
> + } else {
> + n = MIN(len, (BLKSIZE - ctx->offset));
> + memcpy(ctx->buf + ctx->offset, in, n);
> + ctx->offset += n;
> + in += n;
> + len -= n;
> + if (ctx->offset == BLKSIZE) {
> + blk_SHA256_Transform(ctx, ctx->buf);
> + ctx->offset = 0;
> + }
> + }
> + }
> +}
> +
> +void blk_SHA256_Final(unsigned char *digest, blk_SHA256_CTX *ctx)
> +{
> + const unsigned trip = BLKSIZE - sizeof(ctx->length);
> + int i;
> +
> + ctx->length <<= 3;
> + ctx->buf[ctx->offset++] = 0x80;
> +
> + if (ctx->offset > trip) {
> + memset(ctx->buf + ctx->offset, 0, BLKSIZE - ctx->offset);
> + blk_SHA256_Transform(ctx, ctx->buf);
> + ctx->offset = 0;
> + }
> +
> + memset(ctx->buf + ctx->offset, 0, BLKSIZE - ctx->offset - 
> sizeof(ctx->length));
> +
> + put_be64(ctx->buf + trip, ctx->length);

Some GCC versions (e.g. gcc-4.8 with -O2 -Wall -Werror) complain about
the above line:

  CC sha256/block/sha256.o
  sha256/block/sha256.c: In function ‘blk_SHA256_Final’:
  sha256/block/sha256.c:174:2: error: dereferencing type-punned pointer will 
break strict-aliasing rules [-Werror=strict-aliasing]
put_be64(ctx->buf + trip, ctx->length);
^
  cc1: all warnings being treated as errors
  make: *** [sha256/block/sha256.o] Error 1

Something like this makes it compile:

  void *ptr = ctx->buf + trip;
  put_be64(ptr, ctx->length);

However, it's not immediately obvious to me why the compiler
complains, or why that intermediate void* variable makes any
difference, but now it's not the time to put on my language lawyer
hat.

Perhaps an old compiler bug?  Clang in general, newer GCC versions, or
gcc-4.8 with -Wall -Werror but without -O2 don't seem to be affected.


> + blk_SHA256_Transform(ctx, ctx->buf);
> +
> + /* copy output */
> + for (i = 0; i < 8; i++, digest += sizeof(uint32_t))
> + put_be32(digest, ctx->state[i]);
> +}


Re: [PATCH v2 10/13] Add a base implementation of SHA-256 support

2018-10-16 Thread Duy Nguyen
On Tue, Oct 16, 2018 at 1:31 AM brian m. carlson
 wrote:
>
> On Mon, Oct 15, 2018 at 04:59:12PM +0200, Duy Nguyen wrote:
> >  On Mon, Oct 15, 2018 at 4:23 AM brian m. carlson
> >  wrote:
> > >
> > > SHA-1 is weak and we need to transition to a new hash function.  For
> > > some time, we have referred to this new function as NewHash.  Recently,
> > > we decided to pick SHA-256 as NewHash.
> > >
> > > Add a basic implementation of SHA-256 based off libtomcrypt, which is in
> > > the public domain.  Optimize it and restructure it to meet our coding
> > > standards.  Place it in a directory called "sha256" where it and any
> > > future implementations can live so as to avoid a proliferation of
> > > implementation directories.
> > >
> > > Wire up SHA-256 in the list of hash algorithms, and add a test that the
> > > algorithm works correctly.
> > >
> > > Note that with this patch, it is still not possible to switch to using
> > > SHA-256 in Git.  Additional patches are needed to prepare the code to
> > > handle a larger hash algorithm and further test fixes are needed.
> >
> > At some point I assume SHA-256 will become functional and be part of a
> > git release without all file formats updated to support multiple
> > hashes. Should we somehow discourage the user from using it because it
> > will break when all file formats are finally updated?
>
> In order to activate SHA-256 in the codebase, currently you need a patch
> to force it on.  Otherwise, the code is simply inert and does nothing
> (other than in the test-tool).  I've included the patch below so you can
> see what it does (or if you want to play around with it).
>
> Without this patch, Git remains fully SHA-1 and can't access any of the
> SHA-256 code.  I have some very preliminary patches that do wire up
> extensions.objectFormat (branch object-id-part15 [sic]) but I haven't
> picked them up in a while.  (I need to finish test fixes first.)

Ah, I thought that extensions.objectFormat and setup changes already
landed (I think I saw that series on this list). Sorry for the noise.

-- 
Duy


Re: [PATCH v2 10/13] Add a base implementation of SHA-256 support

2018-10-15 Thread brian m. carlson
On Mon, Oct 15, 2018 at 04:59:12PM +0200, Duy Nguyen wrote:
>  On Mon, Oct 15, 2018 at 4:23 AM brian m. carlson
>  wrote:
> >
> > SHA-1 is weak and we need to transition to a new hash function.  For
> > some time, we have referred to this new function as NewHash.  Recently,
> > we decided to pick SHA-256 as NewHash.
> >
> > Add a basic implementation of SHA-256 based off libtomcrypt, which is in
> > the public domain.  Optimize it and restructure it to meet our coding
> > standards.  Place it in a directory called "sha256" where it and any
> > future implementations can live so as to avoid a proliferation of
> > implementation directories.
> >
> > Wire up SHA-256 in the list of hash algorithms, and add a test that the
> > algorithm works correctly.
> >
> > Note that with this patch, it is still not possible to switch to using
> > SHA-256 in Git.  Additional patches are needed to prepare the code to
> > handle a larger hash algorithm and further test fixes are needed.
> 
> At some point I assume SHA-256 will become functional and be part of a
> git release without all file formats updated to support multiple
> hashes. Should we somehow discourage the user from using it because it
> will break when all file formats are finally updated?

In order to activate SHA-256 in the codebase, currently you need a patch
to force it on.  Otherwise, the code is simply inert and does nothing
(other than in the test-tool).  I've included the patch below so you can
see what it does (or if you want to play around with it).

Without this patch, Git remains fully SHA-1 and can't access any of the
SHA-256 code.  I have some very preliminary patches that do wire up
extensions.objectFormat (branch object-id-part15 [sic]) but I haven't
picked them up in a while.  (I need to finish test fixes first.)

Making the codebase run in SHA-256 mode with a Git that supports both
algorithms but defaults to SHA-1 is currently seriously broken, even
with the object-id-part15 branch.  When those patches go in, they will
be behind a developer flag to prevent wholesale chaos and segfaults.

So in summary, no, I don't think a developer flag is necessary at this
point.

-- >% --
From  Mon Sep 17 00:00:00 2001
From: "brian m. carlson" 
Date: Wed, 25 Jul 2018 23:48:30 +
Subject: [PATCH] Switch default hash algorithm to SHA-256 for testing

Set the default hash algorithm to SHA-256 for testing purposes.

This is a test commit and should not be used in production.

Signed-off-by: brian m. carlson 
---
 repository.c|  2 +-
 setup.c |  2 +-
 t/test-lib-functions.sh |  4 +---
 t/test-lib.sh   | 13 -
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/repository.c b/repository.c
index 5dd1486718..4ce50ea670 100644
--- a/repository.c
+++ b/repository.c
@@ -17,7 +17,7 @@ void initialize_the_repository(void)
the_repo.objects = raw_object_store_new();
the_repo.parsed_objects = parsed_object_pool_new();
 
-   repo_set_hash_algo(_repo, GIT_HASH_SHA1);
+   repo_set_hash_algo(_repo, GIT_HASH_SHA256);
 }
 
 static void expand_base_dir(char **out, const char *in,
diff --git a/setup.c b/setup.c
index b24c811c1c..2b4cc4e5a4 100644
--- a/setup.c
+++ b/setup.c
@@ -490,7 +490,7 @@ int read_repository_format(struct repository_format 
*format, const char *path)
memset(format, 0, sizeof(*format));
format->version = -1;
format->is_bare = -1;
-   format->hash_algo = GIT_HASH_SHA1;
+   format->hash_algo = GIT_HASH_SHA256;
string_list_init(>unknown_extensions, 1);
git_config_from_file(check_repo_format, path, format);
return format->version;
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index dfa1bebb46..91cf2b9d40 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1165,9 +1165,7 @@ test_set_hash () {
 
 # Detect the hash algorithm in use.
 test_detect_hash () {
-   # Currently we only support SHA-1, but in the future this function will
-   # actually detect the algorithm in use.
-   test_hash_algo='sha1'
+   test_hash_algo='sha256'
 }
 
 # Load common hash metadata and common placeholder object IDs for use with
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 3f95bfda60..bb507f2d4f 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -176,18 +176,21 @@ esac
 
 # Convenience
 #
-# A regexp to match 5, 35 and 40 hexdigits
+# A regexp to match 4, 5, 35, 40, and 64 hexdigits
+_x04='[0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
 _x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
 _x35="$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
 _x40="$_x35$_x05"
+_x64="$_x40$_x05$_x05$_x05$_x05$_x04"
 
 # Zero SHA-1
 _z40=
+_z64=
 
-OID_REGEX="$_x40"
-ZERO_OID=$_z40
-EMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904
-EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391

Re: [PATCH v2 10/13] Add a base implementation of SHA-256 support

2018-10-15 Thread Duy Nguyen
 On Mon, Oct 15, 2018 at 4:23 AM brian m. carlson
 wrote:
>
> SHA-1 is weak and we need to transition to a new hash function.  For
> some time, we have referred to this new function as NewHash.  Recently,
> we decided to pick SHA-256 as NewHash.
>
> Add a basic implementation of SHA-256 based off libtomcrypt, which is in
> the public domain.  Optimize it and restructure it to meet our coding
> standards.  Place it in a directory called "sha256" where it and any
> future implementations can live so as to avoid a proliferation of
> implementation directories.
>
> Wire up SHA-256 in the list of hash algorithms, and add a test that the
> algorithm works correctly.
>
> Note that with this patch, it is still not possible to switch to using
> SHA-256 in Git.  Additional patches are needed to prepare the code to
> handle a larger hash algorithm and further test fixes are needed.

At some point I assume SHA-256 will become functional and be part of a
git release without all file formats updated to support multiple
hashes. Should we somehow discourage the user from using it because it
will break when all file formats are finally updated?

The simplest way is to just not register "sha256" in hash_algos unless
some developer flag is set. Or rename sha256 to sha256-experimental or
something to make it more obvious (but then we may need to fix up the
test suite after renaming it back to sha256, not great)
-- 
Duy


[PATCH v2 10/13] Add a base implementation of SHA-256 support

2018-10-14 Thread brian m. carlson
SHA-1 is weak and we need to transition to a new hash function.  For
some time, we have referred to this new function as NewHash.  Recently,
we decided to pick SHA-256 as NewHash.

Add a basic implementation of SHA-256 based off libtomcrypt, which is in
the public domain.  Optimize it and restructure it to meet our coding
standards.  Place it in a directory called "sha256" where it and any
future implementations can live so as to avoid a proliferation of
implementation directories.

Wire up SHA-256 in the list of hash algorithms, and add a test that the
algorithm works correctly.

Note that with this patch, it is still not possible to switch to using
SHA-256 in Git.  Additional patches are needed to prepare the code to
handle a larger hash algorithm and further test fixes are needed.

Signed-off-by: brian m. carlson 
---
 Makefile   |   4 +
 cache.h|  12 ++-
 hash.h |  19 -
 sha1-file.c|  45 +++
 sha256/block/sha256.c  | 180 +
 sha256/block/sha256.h  |  26 ++
 t/helper/test-sha256.c |   7 ++
 t/helper/test-tool.c   |   1 +
 t/helper/test-tool.h   |   1 +
 t/t0014-hash.sh|  25 ++
 10 files changed, 316 insertions(+), 4 deletions(-)
 create mode 100644 sha256/block/sha256.c
 create mode 100644 sha256/block/sha256.h
 create mode 100644 t/helper/test-sha256.c

diff --git a/Makefile b/Makefile
index 1c43bf9aa8..76d378c7ba 100644
--- a/Makefile
+++ b/Makefile
@@ -739,6 +739,7 @@ TEST_BUILTINS_OBJS += test-run-command.o
 TEST_BUILTINS_OBJS += test-scrap-cache-tree.o
 TEST_BUILTINS_OBJS += test-sha1.o
 TEST_BUILTINS_OBJS += test-sha1-array.o
+TEST_BUILTINS_OBJS += test-sha256.o
 TEST_BUILTINS_OBJS += test-sigchain.o
 TEST_BUILTINS_OBJS += test-strcmp-offset.o
 TEST_BUILTINS_OBJS += test-string-list.o
@@ -1633,6 +1634,9 @@ endif
 endif
 endif
 
+LIB_OBJS += sha256/block/sha256.o
+BASIC_CFLAGS += -DSHA256_BLK
+
 ifdef SHA1_MAX_BLOCK_SIZE
LIB_OBJS += compat/sha1-chunked.o
BASIC_CFLAGS += -DSHA1_MAX_BLOCK_SIZE="$(SHA1_MAX_BLOCK_SIZE)"
diff --git a/cache.h b/cache.h
index 48e736b0d5..81ece0360e 100644
--- a/cache.h
+++ b/cache.h
@@ -48,11 +48,17 @@ unsigned long git_deflate_bound(git_zstream *, unsigned 
long);
 /* The block size of SHA-1. */
 #define GIT_SHA1_BLKSZ 64
 
+/* The length in bytes and in hex digits of an object name (SHA-256 value). */
+#define GIT_SHA256_RAWSZ 32
+#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
+/* The block size of SHA-256. */
+#define GIT_SHA256_BLKSZ 64
+
 /* The length in byte and in hex digits of the largest possible hash value. */
-#define GIT_MAX_RAWSZ GIT_SHA1_RAWSZ
-#define GIT_MAX_HEXSZ GIT_SHA1_HEXSZ
+#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
+#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
 /* The largest possible block size for any supported hash. */
-#define GIT_MAX_BLKSZ GIT_SHA1_BLKSZ
+#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
 
 struct object_id {
unsigned char hash[GIT_MAX_RAWSZ];
diff --git a/hash.h b/hash.h
index 46dff69eb3..88d18896d7 100644
--- a/hash.h
+++ b/hash.h
@@ -15,6 +15,8 @@
 #include "block-sha1/sha1.h"
 #endif
 
+#include "sha256/block/sha256.h"
+
 #ifndef platform_SHA_CTX
 /*
  * platform's underlying implementation of SHA-1; could be OpenSSL,
@@ -34,6 +36,18 @@
 #define git_SHA1_Updateplatform_SHA1_Update
 #define git_SHA1_Final platform_SHA1_Final
 
+#ifndef platform_SHA256_CTX
+#define platform_SHA256_CTXSHA256_CTX
+#define platform_SHA256_Init   SHA256_Init
+#define platform_SHA256_Update SHA256_Update
+#define platform_SHA256_Final  SHA256_Final
+#endif
+
+#define git_SHA256_CTX platform_SHA256_CTX
+#define git_SHA256_Initplatform_SHA256_Init
+#define git_SHA256_Update  platform_SHA256_Update
+#define git_SHA256_Final   platform_SHA256_Final
+
 #ifdef SHA1_MAX_BLOCK_SIZE
 #include "compat/sha1-chunked.h"
 #undef git_SHA1_Update
@@ -52,12 +66,15 @@
 #define GIT_HASH_UNKNOWN 0
 /* SHA-1 */
 #define GIT_HASH_SHA1 1
+/* SHA-256  */
+#define GIT_HASH_SHA256 2
 /* Number of algorithms supported (including unknown). */
-#define GIT_HASH_NALGOS (GIT_HASH_SHA1 + 1)
+#define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1)
 
 /* A suitably aligned type for stack allocations of hash contexts. */
 union git_hash_ctx {
git_SHA_CTX sha1;
+   git_SHA256_CTX sha256;
 };
 typedef union git_hash_ctx git_hash_ctx;
 
diff --git a/sha1-file.c b/sha1-file.c
index 9aadaf0c8d..66ba3dadb9 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -40,10 +40,20 @@
 #define EMPTY_TREE_SHA1_BIN_LITERAL \
 "\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60" \
 "\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04"
+#define EMPTY_TREE_SHA256_BIN_LITERAL \
+   "\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1" \
+   "\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5" \
+   "\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc" \
+   "\x53\x21"
 
 #define EMPTY_BLOB_SHA1_BIN_LITERAL \