On Wed, Dec 06, 2000 at 05:16:41AM +0100, [EMAIL PROTECTED] wrote:

>   Intel assembler version for bn_sub_part_words().  I haven't got
>   reliable timings yet, please try it out!

Looks like it is about the same speed. This is the interesting part;
how would one improve that?

.L041pw_nc_loop:
        movl    (%esi),         %ecx
        movl    %ecx,           (%ebx)
.L026pw_nc0:
        movl    4(%esi),        %ecx
        movl    %ecx,           4(%ebx)
.L027pw_nc1:
        movl    8(%esi),        %ecx
        movl    %ecx,           8(%ebx)
.L028pw_nc2:
        movl    12(%esi),       %ecx
        movl    %ecx,           12(%ebx)
.L029pw_nc3:
        movl    16(%esi),       %ecx
        movl    %ecx,           16(%ebx)
.L030pw_nc4:
        movl    20(%esi),       %ecx
        movl    %ecx,           20(%ebx)
.L031pw_nc5:
        movl    24(%esi),       %ecx
        movl    %ecx,           24(%ebx)
.L032pw_nc6:
        movl    28(%esi),       %ecx
        movl    %ecx,           28(%ebx)
.L033pw_nc7:

        addl    $32,            %esi
        addl    $32,            %ebx
        subl    $8,             %ebp
        jnz     .L041pw_nc_loop
        movl    36(%esp),       %ebp
        andl    $7,             %ebp
        jz      .L042pw_nc_end
        movl    (%esi),         %ecx
        movl    %ecx,           (%ebx)
.L034pw_tail_nc0:
        decl    %ebp
        jz      .L042pw_nc_end
        movl    4(%esi),        %ecx
        movl    %ecx,           4(%ebx)
.L035pw_tail_nc1:
        decl    %ebp
        jz      .L042pw_nc_end
        movl    8(%esi),        %ecx
        movl    %ecx,           8(%ebx)
.L036pw_tail_nc2:
        decl    %ebp
        jz      .L042pw_nc_end
        movl    12(%esi),       %ecx
        movl    %ecx,           12(%ebx)
.L037pw_tail_nc3:
        decl    %ebp
        jz      .L042pw_nc_end
        movl    16(%esi),       %ecx
        movl    %ecx,           16(%ebx)
.L038pw_tail_nc4:
        decl    %ebp
        jz      .L042pw_nc_end
        movl    20(%esi),       %ecx
        movl    %ecx,           20(%ebx)
.L039pw_tail_nc5:
        decl    %ebp
        jz      .L042pw_nc_end
        movl    24(%esi),       %ecx
        movl    %ecx,           24(%ebx)
.L040pw_tail_nc6:
.L042pw_nc_end:
        movl    $0,             %eax
______________________________________________________________________
OpenSSL Project                                 http://www.openssl.org
Development Mailing List                       [EMAIL PROTECTED]
Automated List Manager                           [EMAIL PROTECTED]

Reply via email to