[Monotone-devel] Re: sha1/botan stuff

2006-08-12 Thread Jack Lloyd
On Thu, Aug 10, 2006 at 12:03:09AM -0700, Nathaniel Smith wrote:

 Umm... anyone want to test out their asm chops?

1.3 GHz Athlon Thunderbird:

Botan mainline:71.01 Mbytes/sec
Botan w/attached:  103.80 Mbytes/sec
Botan w/OpenSSL:   133.14 Mbytes/sec

2 GHz P4-M:

Botan mainline:49.78 Mbytes/sec
Botan w/attached:  63.98 Mbytes/sec
Botan w/OpenSSL:   180.77 Mbytes/sec

Obviously this could be scheduled much better for the P4 (and the
Athlon, for that matter), however I don't know much about Netburst
instruction scheduling. Was only testing on the Athlon until the very
end, so it's possible this is a completely wrong approach for P4
performance, dunno.

-Jack
   .file sha1core.S
   .text
   .p2align 4,,15

.global sha160_core
   .type   sha160_core, @function
sha160_core:
pushl   %ebp
pushl   %edi
pushl   %esi
pushl   %ebx

movl24(%esp), %ebp   # byte input[64]
movl28(%esp), %edi   # u32bit W[80]

movl$0, %esi  # loop counter

.p2align 4,,7
.LOAD_INPUT_LOOP:
movl0(%ebp), %eax
bswapl %eax

movl4(%ebp), %ebx
bswapl %ebx
movl   %eax, 0(%edi,%esi,4)

movl8(%ebp), %ecx
bswapl %ecx
movl   %ebx, 4(%edi,%esi,4)

movl   12(%ebp), %edx
bswapl %edx
movl   %ecx, 8(%edi,%esi,4)

movl   %edx, 12(%edi,%esi,4)

addl  $4, %esi
addl $16, %ebp
cmpl $16, %esi
 jne.LOAD_INPUT_LOOP

leal64(%edi), %ebp

.p2align 4,,7
.EXPANSION_LOOP:
addl $4, %esi

xorl %eax,  %eax
movl  -4(%ebp), %ebx
movl  -8(%ebp), %ecx
movl -12(%ebp), %edx

xorl -20(%ebp), %eax
xorl -24(%ebp), %ebx
xorl -28(%ebp), %ecx
xorl -32(%ebp), %edx

xorl -44(%ebp), %eax
xorl -48(%ebp), %ebx
xorl -52(%ebp), %ecx
xorl -52(%ebp), %eax

xorl -56(%ebp), %edx
xorl -56(%ebp), %ebx
xorl -60(%ebp), %ecx
xorl -64(%ebp), %edx

roll $1, %edx

roll $1, %ecx
movl %edx, (%ebp)

roll $1, %ebx
movl %ecx, 4(%ebp)

xorl %edx, %eax
movl %ebx, 8(%ebp)
roll $1, %eax
movl %eax, 12(%ebp)

addl $16, %ebp
cmpl $80, %esi
jne .EXPANSION_LOOP

   movl 20(%esp), %ebp
   movl 0(%ebp), %eax
   movl 4(%ebp), %ebx
   movl 8(%ebp), %ecx
   movl 12(%ebp), %edx
   movl 16(%ebp), %esi

#define MAGIC1 0x5A827999
#define MAGIC2 0x6ED9EBA1
#define MAGIC3 0x8F1BBCDC
#define MAGIC4 0xCA62C1D6

#define F1(A, B, C, D, E, TEMP, MSG) \
   addl 4*MSG(%edi), E  ; \
   movl C, TEMP ; \
   roll $5, A   ; \
   xorl D, TEMP ; \
   addl A, E; \
   andl B, TEMP ; \
   rorl $2, B   ; \
   xorl D, TEMP ; \
   leal MAGIC1(E,TEMP,1), E ; \
   rorl $5, A   ;

#define F2_OR_F4(A, B, C, D, E, TEMP, MSG, MAGIC) \
   addl 4*MSG(%edi), E  ; \
   movl B, TEMP ; \
   roll $5, A   ; \
   xorl D, TEMP ; \
   addl A, E; \
   xorl C, TEMP ; \
   rorl $2, B   ; \
   leal MAGIC(E,TEMP,1), E  ; \
   rorl $5, A   ;

#define F3(A, B, C, D, E, TEMP, MSG) \
   addl 4*MSG(%edi), E  ; \
   movl B, TEMP ; \
   roll $5, A   ; \
   orl  C, TEMP ; \
   movl B, (%edi)   ; \
   andl D, TEMP ; \
   andl C, (%edi)   ; \
   orl  (%edi), TEMP; \
   addl A, E; \
   leal MAGIC3(E,TEMP,1), E ; \
   rorl $2, B   ; \
   rorl $5, A   ;

#define F2(A, B, C, D, E, TEMP, MSG) \
   F2_OR_F4(A, B, C, D, E, TEMP, MSG, MAGIC2)

#define F4(A, B, C, D, E, TEMP, MSG) \
   F2_OR_F4(A, B, C, D, E, TEMP, MSG, MAGIC4)

#define F_BLOCK(F, MSG) \
F(%eax, %ebx, %ecx, %edx, %esi, %ebp, (MSG+0)) \
F(%esi, %eax, %ebx, %ecx, %edx, %ebp, (MSG+1)) \
F(%edx, %esi, %eax, %ebx, %ecx, %ebp, (MSG+2)) \
F(%ecx, %edx, %esi, %eax, %ebx, %ebp, (MSG+3)) \
F(%ebx, %ecx, %edx, %esi, %eax, %ebp, (MSG+4))

   F_BLOCK(F1, 0)
   F_BLOCK(F1, 5)
   F_BLOCK(F1, 10)
   F_BLOCK(F1, 15)

   F_BLOCK(F2, 20)
   F_BLOCK(F2, 25)
   F_BLOCK(F2, 30)
   F_BLOCK(F2, 35)

   F_BLOCK(F3, 40)
   F_BLOCK(F3, 45)
   F_BLOCK(F3, 50)
   F_BLOCK(F3, 55)

   F_BLOCK(F4, 60)
   F_BLOCK(F4, 65)
   F_BLOCK(F4, 70)
   F_BLOCK(F4, 75)

   movl 20(%esp), %ebp
   addl %eax, 0(%ebp)
   addl %ebx, 4(%ebp)
   addl %ecx, 8(%ebp)
   addl %edx, 12(%ebp)
   addl %esi, 16(%ebp)

popl%ebx
popl%esi
popl%edi
popl%ebp
ret
/*
* SHA-160 Source File  

[Monotone-devel] Re: sha1/botan stuff

2006-08-11 Thread Florian Weimer
* Nathaniel Smith:

   mozilla  :  36.9 MiB/s
   openssl (no-asm debian-i386)   :  56.5 MiB/s
   openssl (no-asm debian-i386-i686/cmov) :  56.5 MiB/s
   botan:  64.1 MiB/s
   beecrypt (from debian)   :  87.0 MiB/s
   nettle (lsh's library, from debian)  : 147.1 MiB/s
   openssl (with asm)   : 263.2 MiB/s

Have you tried libgcrypt?  I suppose it's in the Mozilla range,
though.


___
Monotone-devel mailing list
Monotone-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/monotone-devel


Re: [Monotone-devel] Re: sha1/botan stuff

2006-08-11 Thread Nathaniel Smith
On Fri, Aug 11, 2006 at 08:15:41AM +0200, Florian Weimer wrote:
 * Nathaniel Smith:
 
mozilla  :  36.9 MiB/s
openssl (no-asm debian-i386)   :  56.5 MiB/s
openssl (no-asm debian-i386-i686/cmov) :  56.5 MiB/s
botan:  64.1 MiB/s
beecrypt (from debian)   :  87.0 MiB/s
nettle (lsh's library, from debian)  : 147.1 MiB/s
openssl (with asm)   : 263.2 MiB/s
 
 Have you tried libgcrypt?  I suppose it's in the Mozilla range,
 though.

Huh.  I tried a few more, but gcrypt is indeed the surprise:

gcrypt :  82.3 MiB/s
dean gaudet's SIMD impl[1] : 115.1 MiB/s

[1] http://www.arctic.org/~dean/crypto/sha1.html ; not using the same
harness as the others, but just the timing script included with the
code.  Since the code doesn't even a full sha1 implementation, I
wasn't all that interested in spending time on it :-).

The mozilla line above refers to the C code that git extracted from
mozilla; mozilla itself appears to have two sha1 implementations, one
called sha1 and one called sha1_fast.  I tried installing libnss
(the mozilla crypto library) and benchmarking it, but I totally failed
to get it to actually work, so no new benchmark for that.


At a cursory glance, gcrypt's implementation looks just the same as
all the others; I don't know what makes it so fast.

-- Nathaniel

-- 
Lull'd in the countless chambers of the brain,
Our thoughts are link'd by many a hidden chain:
Awake but one, and lo! what myriads rise!
Each stamps its image as the other flies
  -- Ann Ward Radcliffe, The Mysteries of Udolpho


___
Monotone-devel mailing list
Monotone-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/monotone-devel