Linus Torvalds writes:
> I've just integrated the Mozilla SHA1 library implementation that Adgar
> Toernig sent me into the standard git archive (but I did the integration
> differently).
Here is a new PPC SHA1 patch that integrates better with this...
> Interestingly, the Mozilla SHA1 code is about twice as fast as the openssl
> code on my G5, and judging by the disassembly, it's because it's much
> simpler. I think the openssl people have unrolled all the loops totally,
> which tends to be a disaster on any half-way modern CPU. But hey, it could
> be something as simple as optimization flags too.
Very interesting. On my G4 powerbook (since I am at LCA), for a
fsck-cache on a linux-2.6 tree, it takes 6.6 seconds with the openssl
SHA1, 10.7 seconds with the Mozilla SHA1, and ~5.8 seconds with my
SHA1. I'll test it on a G5 tonight, hopefully.
Paul.
diff -urN git.orig/Makefile git/Makefile
--- git.orig/Makefile 2005-04-22 16:23:44.0 +1000
+++ git/Makefile2005-04-22 16:43:31.0 +1000
@@ -34,9 +34,14 @@
SHA1_HEADER="mozilla-sha1/sha1.h"
LIB_OBJS += mozilla-sha1/sha1.o
else
+ifdef PPC_SHA1
+ SHA1_HEADER="ppc/sha1.h"
+ LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o
+else
SHA1_HEADER=
LIBS += -lssl
endif
+endif
CFLAGS += '-DSHA1_HEADER=$(SHA1_HEADER)'
@@ -77,7 +82,7 @@
write-tree.o: $(LIB_H)
clean:
- rm -f *.o mozilla-sha1/*.o $(PROG) $(LIB_FILE)
+ rm -f *.o mozilla-sha1/*.o ppc/*.o $(PROG) $(LIB_FILE)
backup: clean
cd .. ; tar czvf dircache.tar.gz dir-cache
diff -urN git.orig/ppc/sha1.c git/ppc/sha1.c
--- /dev/null 2005-04-04 12:56:19.0 +1000
+++ git/ppc/sha1.c 2005-04-22 16:29:19.0 +1000
@@ -0,0 +1,72 @@
+/*
+ * SHA-1 implementation.
+ *
+ * Copyright (C) 2005 Paul Mackerras <[EMAIL PROTECTED]>
+ *
+ * This version assumes we are running on a big-endian machine.
+ * It calls an external sha1_core() to process blocks of 64 bytes.
+ */
+#include
+#include
+#include "sha1.h"
+
+extern void sha1_core(uint32_t *hash, const unsigned char *p,
+ unsigned int nblocks);
+
+int SHA1_Init(SHA_CTX *c)
+{
+ c->hash[0] = 0x67452301;
+ c->hash[1] = 0xEFCDAB89;
+ c->hash[2] = 0x98BADCFE;
+ c->hash[3] = 0x10325476;
+ c->hash[4] = 0xC3D2E1F0;
+ c->len = 0;
+ c->cnt = 0;
+ return 0;
+}
+
+int SHA1_Update(SHA_CTX *c, const void *ptr, unsigned long n)
+{
+ unsigned long nb;
+ const unsigned char *p = ptr;
+
+ c->len += n << 3;
+ while (n != 0) {
+ if (c->cnt || n < 64) {
+ nb = 64 - c->cnt;
+ if (nb > n)
+ nb = n;
+ memcpy(&c->buf.b[c->cnt], p, nb);
+ if ((c->cnt += nb) == 64) {
+ sha1_core(c->hash, c->buf.b, 1);
+ c->cnt = 0;
+ }
+ } else {
+ nb = n >> 6;
+ sha1_core(c->hash, p, nb);
+ nb <<= 6;
+ }
+ n -= nb;
+ p += nb;
+ }
+ return 0;
+}
+
+int SHA1_Final(unsigned char *hash, SHA_CTX *c)
+{
+ unsigned int cnt = c->cnt;
+
+ c->buf.b[cnt++] = 0x80;
+ if (cnt > 56) {
+ if (cnt < 64)
+ memset(&c->buf.b[cnt], 0, 64 - cnt);
+ sha1_core(c->hash, c->buf.b, 1);
+ cnt = 0;
+ }
+ if (cnt < 56)
+ memset(&c->buf.b[cnt], 0, 56 - cnt);
+ c->buf.l[7] = c->len;
+ sha1_core(c->hash, c->buf.b, 1);
+ memcpy(hash, c->hash, 20);
+ return 0;
+}
diff -urN git.orig/ppc/sha1.h git/ppc/sha1.h
--- /dev/null 2005-04-04 12:56:19.0 +1000
+++ git/ppc/sha1.h 2005-04-22 16:45:28.0 +1000
@@ -0,0 +1,20 @@
+/*
+ * SHA-1 implementation.
+ *
+ * Copyright (C) 2005 Paul Mackerras <[EMAIL PROTECTED]>
+ */
+#include
+
+typedef struct sha_context {
+ uint32_t hash[5];
+ uint32_t cnt;
+ uint64_t len;
+ union {
+ unsigned char b[64];
+ uint64_t l[8];
+ } buf;
+} SHA_CTX;
+
+int SHA1_Init(SHA_CTX *c);
+int SHA1_Update(SHA_CTX *c, const void *p, unsigned long n);
+int SHA1_Final(unsigned char *hash, SHA_CTX *c);
diff -urN git.orig/ppc/sha1ppc.S git/ppc/sha1ppc.S
--- /dev/null 2005-04-04 12:56:19.0 +1000
+++ git/ppc/sha1ppc.S 2005-04-22 16:29:19.0 +1000
@@ -0,0 +1,185 @@
+/*
+ * SHA-1 implementation for PowerPC.
+ *
+ * Copyright (C) 2005 Paul Mackerras.
+ */
+#define FS 80
+
+/*
+ * We roll the registers for T, A, B, C, D, E around on each
+ * iteration; T on iteration t is A on iteration t+1, and so on.
+ * We use registers 7 - 12 for this.
+ */
+#define RT(t) t)+5)%6)+7)
+#define RA(t) t)+4)%6)+7)
+#define RB(t) t)+3)%6)+7)
+#define RC(t) t)+2)%6)+7)
+#define RD(t) t)+1)%6)+7)
+#defin