On Wed, Dec 06, 2000 at 05:16:41AM +0100, [EMAIL PROTECTED] wrote:
> Intel assembler version for bn_sub_part_words(). I haven't got
> reliable timings yet, please try it out!
Looks like it is about the same speed. This is the interesting part;
how would one improve that?
.L041pw_nc_loop:
movl (%esi), %ecx
movl %ecx, (%ebx)
.L026pw_nc0:
movl 4(%esi), %ecx
movl %ecx, 4(%ebx)
.L027pw_nc1:
movl 8(%esi), %ecx
movl %ecx, 8(%ebx)
.L028pw_nc2:
movl 12(%esi), %ecx
movl %ecx, 12(%ebx)
.L029pw_nc3:
movl 16(%esi), %ecx
movl %ecx, 16(%ebx)
.L030pw_nc4:
movl 20(%esi), %ecx
movl %ecx, 20(%ebx)
.L031pw_nc5:
movl 24(%esi), %ecx
movl %ecx, 24(%ebx)
.L032pw_nc6:
movl 28(%esi), %ecx
movl %ecx, 28(%ebx)
.L033pw_nc7:
addl $32, %esi
addl $32, %ebx
subl $8, %ebp
jnz .L041pw_nc_loop
movl 36(%esp), %ebp
andl $7, %ebp
jz .L042pw_nc_end
movl (%esi), %ecx
movl %ecx, (%ebx)
.L034pw_tail_nc0:
decl %ebp
jz .L042pw_nc_end
movl 4(%esi), %ecx
movl %ecx, 4(%ebx)
.L035pw_tail_nc1:
decl %ebp
jz .L042pw_nc_end
movl 8(%esi), %ecx
movl %ecx, 8(%ebx)
.L036pw_tail_nc2:
decl %ebp
jz .L042pw_nc_end
movl 12(%esi), %ecx
movl %ecx, 12(%ebx)
.L037pw_tail_nc3:
decl %ebp
jz .L042pw_nc_end
movl 16(%esi), %ecx
movl %ecx, 16(%ebx)
.L038pw_tail_nc4:
decl %ebp
jz .L042pw_nc_end
movl 20(%esi), %ecx
movl %ecx, 20(%ebx)
.L039pw_tail_nc5:
decl %ebp
jz .L042pw_nc_end
movl 24(%esi), %ecx
movl %ecx, 24(%ebx)
.L040pw_tail_nc6:
.L042pw_nc_end:
movl $0, %eax
______________________________________________________________________
OpenSSL Project http://www.openssl.org
Development Mailing List [EMAIL PROTECTED]
Automated List Manager [EMAIL PROTECTED]