The branch master has been updated
       via  c74aea8d6ccdf07ce826a9451887739b8aa64096 (commit)
       via  e3057a57caf4274ea1fb074518e4714059dfcabf (commit)
      from  dfde4219fdebbb5a8a17602fea036f7690e517ea (commit)


- Log -----------------------------------------------------------------
commit c74aea8d6ccdf07ce826a9451887739b8aa64096
Author: Andy Polyakov <ap...@openssl.org>
Date:   Fri Aug 19 23:18:35 2016 +0200

    ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.
    
    RT#4625
    
    Reviewed-by: Rich Salz <rs...@openssl.org>

commit e3057a57caf4274ea1fb074518e4714059dfcabf
Author: Andy Polyakov <ap...@openssl.org>
Date:   Fri Aug 19 23:16:04 2016 +0200

    ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.
    
    RT#4625
    
    Reviewed-by: Rich Salz <rs...@openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/ec/asm/ecp_nistz256-armv4.pl   |  58 ++++---------
 crypto/ec/asm/ecp_nistz256-armv8.pl   |  76 ++++++-----------
 crypto/ec/asm/ecp_nistz256-sparcv9.pl | 150 ++++++++++------------------------
 crypto/ec/asm/ecp_nistz256-x86.pl     |  30 +++----
 crypto/ec/asm/ecp_nistz256-x86_64.pl  |  24 +++---
 crypto/ec/ecp_nistz256.c              |  57 +++++++++----
 6 files changed, 152 insertions(+), 243 deletions(-)

diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl 
b/crypto/ec/asm/ecp_nistz256-armv4.pl
index de3cd5c..2314b75 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -1405,27 +1405,19 @@ ecp_nistz256_point_add:
        stmdb   sp!,{r0-r12,lr}         @ push from r0, unusual, but intentional
        sub     sp,sp,#32*18+16
 
-       ldmia   $b_ptr!,{r4-r11}        @ copy in2
+       ldmia   $b_ptr!,{r4-r11}        @ copy in2_x
        add     r3,sp,#$in2_x
-       orr     r12,r4,r5
-       orr     r12,r12,r6
-       orr     r12,r12,r7
-       orr     r12,r12,r8
-       orr     r12,r12,r9
-       orr     r12,r12,r10
-       orr     r12,r12,r11
        stmia   r3!,{r4-r11}
-       ldmia   $b_ptr!,{r4-r11}
-       orr     r12,r12,r4
-       orr     r12,r12,r5
+       ldmia   $b_ptr!,{r4-r11}        @ copy in2_y
+       stmia   r3!,{r4-r11}
+       ldmia   $b_ptr,{r4-r11}         @ copy in2_z
+       orr     r12,r4,r5
        orr     r12,r12,r6
        orr     r12,r12,r7
        orr     r12,r12,r8
        orr     r12,r12,r9
        orr     r12,r12,r10
        orr     r12,r12,r11
-       stmia   r3!,{r4-r11}
-       ldmia   $b_ptr,{r4-r11}
        cmp     r12,#0
 #ifdef __thumb2__
        it      ne
@@ -1434,27 +1426,19 @@ ecp_nistz256_point_add:
        stmia   r3,{r4-r11}
        str     r12,[sp,#32*18+8]       @ !in2infty
 
-       ldmia   $a_ptr!,{r4-r11}        @ copy in1
+       ldmia   $a_ptr!,{r4-r11}        @ copy in1_x
        add     r3,sp,#$in1_x
-       orr     r12,r4,r5
-       orr     r12,r12,r6
-       orr     r12,r12,r7
-       orr     r12,r12,r8
-       orr     r12,r12,r9
-       orr     r12,r12,r10
-       orr     r12,r12,r11
        stmia   r3!,{r4-r11}
-       ldmia   $a_ptr!,{r4-r11}
-       orr     r12,r12,r4
-       orr     r12,r12,r5
+       ldmia   $a_ptr!,{r4-r11}        @ copy in1_y
+       stmia   r3!,{r4-r11}
+       ldmia   $a_ptr,{r4-r11}         @ copy in1_z
+       orr     r12,r4,r5
        orr     r12,r12,r6
        orr     r12,r12,r7
        orr     r12,r12,r8
        orr     r12,r12,r9
        orr     r12,r12,r10
        orr     r12,r12,r11
-       stmia   r3!,{r4-r11}
-       ldmia   $a_ptr,{r4-r11}
        cmp     r12,#0
 #ifdef __thumb2__
        it      ne
@@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine:
        stmdb   sp!,{r0-r12,lr}         @ push from r0, unusual, but intentional
        sub     sp,sp,#32*15
 
-       ldmia   $a_ptr!,{r4-r11}        @ copy in1
+       ldmia   $a_ptr!,{r4-r11}        @ copy in1_x
        add     r3,sp,#$in1_x
-       orr     r12,r4,r5
-       orr     r12,r12,r6
-       orr     r12,r12,r7
-       orr     r12,r12,r8
-       orr     r12,r12,r9
-       orr     r12,r12,r10
-       orr     r12,r12,r11
        stmia   r3!,{r4-r11}
-       ldmia   $a_ptr!,{r4-r11}
-       orr     r12,r12,r4
-       orr     r12,r12,r5
+       ldmia   $a_ptr!,{r4-r11}        @ copy in1_y
+       stmia   r3!,{r4-r11}
+       ldmia   $a_ptr,{r4-r11}         @ copy in1_z
+       orr     r12,r4,r5
        orr     r12,r12,r6
        orr     r12,r12,r7
        orr     r12,r12,r8
        orr     r12,r12,r9
        orr     r12,r12,r10
        orr     r12,r12,r11
-       stmia   r3!,{r4-r11}
-       ldmia   $a_ptr,{r4-r11}
        cmp     r12,#0
 #ifdef __thumb2__
        it      ne
@@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine:
        stmia   r3,{r4-r11}
        str     r12,[sp,#32*15+4]       @ !in1infty
 
-       ldmia   $b_ptr!,{r4-r11}        @ copy in2
+       ldmia   $b_ptr!,{r4-r11}        @ copy in2_x
        add     r3,sp,#$in2_x
        orr     r12,r4,r5
        orr     r12,r12,r6
@@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine:
        orr     r12,r12,r10
        orr     r12,r12,r11
        stmia   r3!,{r4-r11}
-       ldmia   $b_ptr!,{r4-r11}
+       ldmia   $b_ptr!,{r4-r11}        @ copy in2_y
        orr     r12,r12,r4
        orr     r12,r12,r5
        orr     r12,r12,r6
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl 
b/crypto/ec/asm/ecp_nistz256-armv8.pl
index 1362586..cdc9161 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -862,46 +862,28 @@ ecp_nistz256_point_add:
        stp     x25,x26,[sp,#64]
        sub     sp,sp,#32*12
 
-       ldp     $a0,$a1,[$bp]
-       ldp     $a2,$a3,[$bp,#16]
-       ldp     $t0,$t1,[$bp,#32]
-       ldp     $t2,$t3,[$bp,#48]
+       ldp     $a0,$a1,[$bp,#64]       // in2_z
+       ldp     $a2,$a3,[$bp,#64+16]
         mov    $rp_real,$rp
         mov    $ap_real,$ap
         mov    $bp_real,$bp
-       orr     $a0,$a0,$a1
-       orr     $a2,$a2,$a3
-        ldp    $acc0,$acc1,[$ap]
-       orr     $t0,$t0,$t1
-       orr     $t2,$t2,$t3
-        ldp    $acc2,$acc3,[$ap,#16]
-       orr     $a0,$a0,$a2
-       orr     $t2,$t0,$t2
-        ldp    $t0,$t1,[$ap,#32]
-       orr     $in2infty,$a0,$t2
-       cmp     $in2infty,#0
-        ldp    $t2,$t3,[$ap,#48]
-       csetm   $in2infty,ne            // !in2infty
-
-        ldp    $a0,$a1,[$bp_real,#64]  // forward load for p256_sqr_mont
-       orr     $acc0,$acc0,$acc1
-       orr     $acc2,$acc2,$acc3
-        ldp    $a2,$a3,[$bp_real,#64+16]
-       orr     $t0,$t0,$t1
-       orr     $t2,$t2,$t3
-       orr     $acc0,$acc0,$acc2
-       orr     $t0,$t0,$t2
-       orr     $in1infty,$acc0,$t0
-       cmp     $in1infty,#0
         ldr    $poly1,.Lpoly+8
         ldr    $poly3,.Lpoly+24
-       csetm   $in1infty,ne            // !in1infty
-
+       orr     $t0,$a0,$a1
+       orr     $t2,$a2,$a3
+       orr     $in2infty,$t0,$t2
+       cmp     $in2infty,#0
+       csetm   $in2infty,ne            // !in2infty
        add     $rp,sp,#$Z2sqr
        bl      __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
 
-       ldp     $a0,$a1,[$ap_real,#64]
+       ldp     $a0,$a1,[$ap_real,#64]  // in1_z
        ldp     $a2,$a3,[$ap_real,#64+16]
+       orr     $t0,$a0,$a1
+       orr     $t2,$a2,$a3
+       orr     $in1infty,$t0,$t2
+       cmp     $in1infty,#0
+       csetm   $in1infty,ne            // !in1infty
        add     $rp,sp,#$Z1sqr
        bl      __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
 
@@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine:
        ldr     $poly1,.Lpoly+8
        ldr     $poly3,.Lpoly+24
 
-       ldp     $a0,$a1,[$ap]
-       ldp     $a2,$a3,[$ap,#16]
-       ldp     $t0,$t1,[$ap,#32]
-       ldp     $t2,$t3,[$ap,#48]
-       orr     $a0,$a0,$a1
-       orr     $a2,$a2,$a3
-       orr     $t0,$t0,$t1
-       orr     $t2,$t2,$t3
-       orr     $a0,$a0,$a2
-       orr     $t0,$t0,$t2
-       orr     $in1infty,$a0,$t0
+       ldp     $a0,$a1,[$ap,#64]       // in1_z
+       ldp     $a2,$a3,[$ap,#64+16]
+       orr     $t0,$a0,$a1
+       orr     $t2,$a2,$a3
+       orr     $in1infty,$t0,$t2
        cmp     $in1infty,#0
        csetm   $in1infty,ne            // !in1infty
 
-       ldp     $a0,$a1,[$bp]
-       ldp     $a2,$a3,[$bp,#16]
-       ldp     $t0,$t1,[$bp,#32]
+       ldp     $acc0,$acc1,[$bp]       // in2_x
+       ldp     $acc2,$acc3,[$bp,#16]
+       ldp     $t0,$t1,[$bp,#32]       // in2_y
        ldp     $t2,$t3,[$bp,#48]
-       orr     $a0,$a0,$a1
-       orr     $a2,$a2,$a3
+       orr     $acc0,$acc0,$acc1
+       orr     $acc2,$acc2,$acc3
        orr     $t0,$t0,$t1
        orr     $t2,$t2,$t3
-       orr     $a0,$a0,$a2
+       orr     $acc0,$acc0,$acc2
        orr     $t0,$t0,$t2
-       orr     $in2infty,$a0,$t0
+       orr     $in2infty,$acc0,$t0
        cmp     $in2infty,#0
        csetm   $in2infty,ne            // !in2infty
 
-       ldp     $a0,$a1,[$ap_real,#64]
-       ldp     $a2,$a3,[$ap_real,#64+16]
        add     $rp,sp,#$Z1sqr
        bl      __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
 
diff --git a/crypto/ec/asm/ecp_nistz256-sparcv9.pl 
b/crypto/ec/asm/ecp_nistz256-sparcv9.pl
index 3c7ff50..97201cb 100755
--- a/crypto/ec/asm/ecp_nistz256-sparcv9.pl
+++ b/crypto/ec/asm/ecp_nistz256-sparcv9.pl
@@ -899,71 +899,39 @@ ecp_nistz256_point_add:
        mov     $ap,$ap_real
        mov     $bp,$bp_real
 
-       ld      [$bp],@acc[0]           ! in2_x
-       ld      [$bp+4],@acc[1]
-       ld      [$bp+8],@acc[2]
-       ld      [$bp+12],@acc[3]
-       ld      [$bp+16],@acc[4]
-       ld      [$bp+20],@acc[5]
-       ld      [$bp+24],@acc[6]
-       ld      [$bp+28],@acc[7]
-       ld      [$bp+32],$t0            ! in2_y
-       ld      [$bp+32+4],$t1
-       ld      [$bp+32+8],$t2
-       ld      [$bp+32+12],$t3
-       ld      [$bp+32+16],$t4
-       ld      [$bp+32+20],$t5
-       ld      [$bp+32+24],$t6
-       ld      [$bp+32+28],$t7
-       or      @acc[1],@acc[0],@acc[0]
-       or      @acc[3],@acc[2],@acc[2]
-       or      @acc[5],@acc[4],@acc[4]
-       or      @acc[7],@acc[6],@acc[6]
-       or      @acc[2],@acc[0],@acc[0]
-       or      @acc[6],@acc[4],@acc[4]
-       or      @acc[4],@acc[0],@acc[0]
+       ld      [$bp+64],$t0            ! in2_z
+       ld      [$bp+64+4],$t1
+       ld      [$bp+64+8],$t2
+       ld      [$bp+64+12],$t3
+       ld      [$bp+64+16],$t4
+       ld      [$bp+64+20],$t5
+       ld      [$bp+64+24],$t6
+       ld      [$bp+64+28],$t7
        or      $t1,$t0,$t0
        or      $t3,$t2,$t2
        or      $t5,$t4,$t4
        or      $t7,$t6,$t6
        or      $t2,$t0,$t0
        or      $t6,$t4,$t4
-       or      $t4,$t0,$t0
-       or      @acc[0],$t0,$t0         ! !in2infty
+       or      $t4,$t0,$t0             ! !in2infty
        movrnz  $t0,-1,$t0
        st      $t0,[%fp+STACK_BIAS-12]
 
-       ld      [$ap],@acc[0]           ! in1_x
-       ld      [$ap+4],@acc[1]
-       ld      [$ap+8],@acc[2]
-       ld      [$ap+12],@acc[3]
-       ld      [$ap+16],@acc[4]
-       ld      [$ap+20],@acc[5]
-       ld      [$ap+24],@acc[6]
-       ld      [$ap+28],@acc[7]
-       ld      [$ap+32],$t0            ! in1_y
-       ld      [$ap+32+4],$t1
-       ld      [$ap+32+8],$t2
-       ld      [$ap+32+12],$t3
-       ld      [$ap+32+16],$t4
-       ld      [$ap+32+20],$t5
-       ld      [$ap+32+24],$t6
-       ld      [$ap+32+28],$t7
-       or      @acc[1],@acc[0],@acc[0]
-       or      @acc[3],@acc[2],@acc[2]
-       or      @acc[5],@acc[4],@acc[4]
-       or      @acc[7],@acc[6],@acc[6]
-       or      @acc[2],@acc[0],@acc[0]
-       or      @acc[6],@acc[4],@acc[4]
-       or      @acc[4],@acc[0],@acc[0]
+       ld      [$ap+64],$t0            ! in1_z
+       ld      [$ap+64+4],$t1
+       ld      [$ap+64+8],$t2
+       ld      [$ap+64+12],$t3
+       ld      [$ap+64+16],$t4
+       ld      [$ap+64+20],$t5
+       ld      [$ap+64+24],$t6
+       ld      [$ap+64+28],$t7
        or      $t1,$t0,$t0
        or      $t3,$t2,$t2
        or      $t5,$t4,$t4
        or      $t7,$t6,$t6
        or      $t2,$t0,$t0
        or      $t6,$t4,$t4
-       or      $t4,$t0,$t0
-       or      @acc[0],$t0,$t0         ! !in1infty
+       or      $t4,$t0,$t0             ! !in1infty
        movrnz  $t0,-1,$t0
        st      $t0,[%fp+STACK_BIAS-16]
 
@@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine:
        mov     $ap,$ap_real
        mov     $bp,$bp_real
 
-       ld      [$ap],@acc[0]           ! in1_x
-       ld      [$ap+4],@acc[1]
-       ld      [$ap+8],@acc[2]
-       ld      [$ap+12],@acc[3]
-       ld      [$ap+16],@acc[4]
-       ld      [$ap+20],@acc[5]
-       ld      [$ap+24],@acc[6]
-       ld      [$ap+28],@acc[7]
-       ld      [$ap+32],$t0            ! in1_y
-       ld      [$ap+32+4],$t1
-       ld      [$ap+32+8],$t2
-       ld      [$ap+32+12],$t3
-       ld      [$ap+32+16],$t4
-       ld      [$ap+32+20],$t5
-       ld      [$ap+32+24],$t6
-       ld      [$ap+32+28],$t7
-       or      @acc[1],@acc[0],@acc[0]
-       or      @acc[3],@acc[2],@acc[2]
-       or      @acc[5],@acc[4],@acc[4]
-       or      @acc[7],@acc[6],@acc[6]
-       or      @acc[2],@acc[0],@acc[0]
-       or      @acc[6],@acc[4],@acc[4]
-       or      @acc[4],@acc[0],@acc[0]
+       ld      [$ap+64],$t0            ! in1_z
+       ld      [$ap+64+4],$t1
+       ld      [$ap+64+8],$t2
+       ld      [$ap+64+12],$t3
+       ld      [$ap+64+16],$t4
+       ld      [$ap+64+20],$t5
+       ld      [$ap+64+24],$t6
+       ld      [$ap+64+28],$t7
        or      $t1,$t0,$t0
        or      $t3,$t2,$t2
        or      $t5,$t4,$t4
        or      $t7,$t6,$t6
        or      $t2,$t0,$t0
        or      $t6,$t4,$t4
-       or      $t4,$t0,$t0
-       or      @acc[0],$t0,$t0         ! !in1infty
+       or      $t4,$t0,$t0             ! !in1infty
        movrnz  $t0,-1,$t0
        st      $t0,[%fp+STACK_BIAS-16]
 
@@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3:
        stx     $acc2,[%sp+LOCALS64+$in2_y+16]
        stx     $acc3,[%sp+LOCALS64+$in2_y+24]
 
-       or      $a1,$a0,$a0
-       or      $a3,$a2,$a2
-       or      $acc1,$acc0,$acc0
-       or      $acc3,$acc2,$acc2
-       or      $a2,$a0,$a0
-       or      $acc2,$acc0,$acc0
-       or      $acc0,$a0,$a0
-       movrnz  $a0,-1,$a0                      ! !in2infty
-       stx     $a0,[%fp+STACK_BIAS-8]
-
        ld      [$bp+64],$acc0                  ! in2_z
        ld      [$bp+64+4],$t0
        ld      [$bp+64+8],$acc1
@@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3:
        stx     $acc2,[%sp+LOCALS64+$in2_z+16]
        stx     $acc3,[%sp+LOCALS64+$in2_z+24]
 
+       or      $acc1,$acc0,$acc0
+       or      $acc3,$acc2,$acc2
+       or      $acc2,$acc0,$acc0
+       movrnz  $acc0,-1,$acc0                  ! !in2infty
+       stx     $acc0,[%fp+STACK_BIAS-8]
+
        or      $a0,$t0,$a0
        ld      [$ap+32],$acc0                  ! in1_y
        or      $a1,$t1,$a1
@@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3:
        stx     $acc2,[%sp+LOCALS64+$in1_y+16]
        stx     $acc3,[%sp+LOCALS64+$in1_y+24]
 
-       or      $a1,$a0,$a0
-       or      $a3,$a2,$a2
-       or      $acc1,$acc0,$acc0
-       or      $acc3,$acc2,$acc2
-       or      $a2,$a0,$a0
-       or      $acc2,$acc0,$acc0
-       or      $acc0,$a0,$a0
-       movrnz  $a0,-1,$a0                      ! !in1infty
-       stx     $a0,[%fp+STACK_BIAS-16]
-
        ldx     [%sp+LOCALS64+$in2_z],$a0       ! forward load
        ldx     [%sp+LOCALS64+$in2_z+8],$a1
        ldx     [%sp+LOCALS64+$in2_z+16],$a2
@@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3:
        stx     $acc2,[%sp+LOCALS64+$in1_z+16]
        stx     $acc3,[%sp+LOCALS64+$in1_z+24]
 
+       or      $acc1,$acc0,$acc0
+       or      $acc3,$acc2,$acc2
+       or      $acc2,$acc0,$acc0
+       movrnz  $acc0,-1,$acc0                  ! !in1infty
+       stx     $acc0,[%fp+STACK_BIAS-16]
+
        call    __ecp_nistz256_sqr_mont_vis3    ! p256_sqr_mont(Z2sqr, in2_z);
        add     %sp,LOCALS64+$Z2sqr,$rp
 
@@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3:
        stx     $acc2,[%sp+LOCALS64+$in1_y+16]
        stx     $acc3,[%sp+LOCALS64+$in1_y+24]
 
-       or      $a1,$a0,$a0
-       or      $a3,$a2,$a2
-       or      $acc1,$acc0,$acc0
-       or      $acc3,$acc2,$acc2
-       or      $a2,$a0,$a0
-       or      $acc2,$acc0,$acc0
-       or      $acc0,$a0,$a0
-       movrnz  $a0,-1,$a0                      ! !in1infty
-       stx     $a0,[%fp+STACK_BIAS-16]
-
        ld      [$ap+64],$a0                    ! in1_z
        ld      [$ap+64+4],$t0
        ld      [$ap+64+8],$a1
@@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3:
        stx     $a2,[%sp+LOCALS64+$in1_z+16]
        stx     $a3,[%sp+LOCALS64+$in1_z+24]
 
+       or      $a1,$a0,$t0
+       or      $a3,$a2,$t2
+       or      $t2,$t0,$t0
+       movrnz  $t0,-1,$t0                      ! !in1infty
+       stx     $t0,[%fp+STACK_BIAS-16]
+
        call    __ecp_nistz256_sqr_mont_vis3    ! p256_sqr_mont(Z1sqr, in1_z);
        add     %sp,LOCALS64+$Z1sqr,$rp
 
diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl 
b/crypto/ec/asm/ecp_nistz256-x86.pl
index b96b1aa..1d9e006 100755
--- a/crypto/ec/asm/ecp_nistz256-x86.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86.pl
@@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) {
        &mov    ("edx",&DWP($i+12,"esi"));
        &mov    (&DWP($i+0,"edi"),"eax");
        &mov    (&DWP(32*18+12,"esp"),"ebp")    if ($i==0);
-       &mov    ("ebp","eax")                   if ($i==0);
-       &or     ("ebp","eax")                   if ($i!=0 && $i<64);
+       &mov    ("ebp","eax")                   if ($i==64);
+       &or     ("ebp","eax")                   if ($i>64);
        &mov    (&DWP($i+4,"edi"),"ebx");
-       &or     ("ebp","ebx")                   if ($i<64);
+       &or     ("ebp","ebx")                   if ($i>=64);
        &mov    (&DWP($i+8,"edi"),"ecx");
-       &or     ("ebp","ecx")                   if ($i<64);
+       &or     ("ebp","ecx")                   if ($i>=64);
        &mov    (&DWP($i+12,"edi"),"edx");
-       &or     ("ebp","edx")                   if ($i<64);
+       &or     ("ebp","edx")                   if ($i>=64);
     }
        &xor    ("eax","eax");
        &mov    ("esi",&wparam(1));
@@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) {
        &mov    ("ecx",&DWP($i+8,"esi"));
        &mov    ("edx",&DWP($i+12,"esi"));
        &mov    (&DWP($i+0,"edi"),"eax");
-       &mov    ("ebp","eax")                   if ($i==0);
-       &or     ("ebp","eax")                   if ($i!=0 && $i<64);
+       &mov    ("ebp","eax")                   if ($i==64);
+       &or     ("ebp","eax")                   if ($i>64);
        &mov    (&DWP($i+4,"edi"),"ebx");
-       &or     ("ebp","ebx")                   if ($i<64);
+       &or     ("ebp","ebx")                   if ($i>=64);
        &mov    (&DWP($i+8,"edi"),"ecx");
-       &or     ("ebp","ecx")                   if ($i<64);
+       &or     ("ebp","ecx")                   if ($i>=64);
        &mov    (&DWP($i+12,"edi"),"edx");
-       &or     ("ebp","edx")                   if ($i<64);
+       &or     ("ebp","edx")                   if ($i>=64);
     }
        &xor    ("eax","eax");
        &sub    ("eax","ebp");
@@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) {
        &mov    ("edx",&DWP($i+12,"esi"));
        &mov    (&DWP($i+0,"edi"),"eax");
        &mov    (&DWP(32*15+8,"esp"),"ebp")     if ($i==0);
-       &mov    ("ebp","eax")                   if ($i==0);
-       &or     ("ebp","eax")                   if ($i!=0 && $i<64);
+       &mov    ("ebp","eax")                   if ($i==64);
+       &or     ("ebp","eax")                   if ($i>64);
        &mov    (&DWP($i+4,"edi"),"ebx");
-       &or     ("ebp","ebx")                   if ($i<64);
+       &or     ("ebp","ebx")                   if ($i>=64);
        &mov    (&DWP($i+8,"edi"),"ecx");
-       &or     ("ebp","ecx")                   if ($i<64);
+       &or     ("ebp","ecx")                   if ($i>=64);
        &mov    (&DWP($i+12,"edi"),"edx");
-       &or     ("ebp","edx")                   if ($i<64);
+       &or     ("ebp","edx")                   if ($i>=64);
     }
        &xor    ("eax","eax");
        &mov    ("esi",&wparam(2));
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl 
b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index cc7b976..ddbbedf 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -2294,16 +2294,14 @@ $code.=<<___;
        mov     $b_org, $a_ptr                  # reassign
        movdqa  %xmm0, $in1_x(%rsp)
        movdqa  %xmm1, $in1_x+0x10(%rsp)
-       por     %xmm0, %xmm1
        movdqa  %xmm2, $in1_y(%rsp)
        movdqa  %xmm3, $in1_y+0x10(%rsp)
-       por     %xmm2, %xmm3
        movdqa  %xmm4, $in1_z(%rsp)
        movdqa  %xmm5, $in1_z+0x10(%rsp)
-       por     %xmm1, %xmm3
+       por     %xmm4, %xmm5
 
        movdqu  0x00($a_ptr), %xmm0             # copy  *(P256_POINT *)$b_ptr
-        pshufd \$0xb1, %xmm3, %xmm5
+        pshufd \$0xb1, %xmm5, %xmm3
        movdqu  0x10($a_ptr), %xmm1
        movdqu  0x20($a_ptr), %xmm2
         por    %xmm3, %xmm5
@@ -2315,14 +2313,14 @@ $code.=<<___;
        movdqa  %xmm0, $in2_x(%rsp)
         pshufd \$0x1e, %xmm5, %xmm4
        movdqa  %xmm1, $in2_x+0x10(%rsp)
-       por     %xmm0, %xmm1
-        movq   $r_ptr, %xmm0                   # save $r_ptr
+       movdqu  0x40($a_ptr),%xmm0              # in2_z again
+       movdqu  0x50($a_ptr),%xmm1
        movdqa  %xmm2, $in2_y(%rsp)
        movdqa  %xmm3, $in2_y+0x10(%rsp)
-       por     %xmm2, %xmm3
         por    %xmm4, %xmm5
         pxor   %xmm4, %xmm4
-       por     %xmm1, %xmm3
+       por     %xmm0, %xmm1
+        movq   $r_ptr, %xmm0                   # save $r_ptr
 
        lea     0x40-$bias($a_ptr), $a_ptr      # $a_ptr is still valid
         mov    $src0, $in2_z+8*0(%rsp)         # make in2_z copy
@@ -2333,8 +2331,8 @@ $code.=<<___;
        call    __ecp_nistz256_sqr_mont$x       # p256_sqr_mont(Z2sqr, in2_z);
 
        pcmpeqd %xmm4, %xmm5
-       pshufd  \$0xb1, %xmm3, %xmm4
-       por     %xmm3, %xmm4
+       pshufd  \$0xb1, %xmm1, %xmm4
+       por     %xmm1, %xmm4
        pshufd  \$0, %xmm5, %xmm5               # in1infty
        pshufd  \$0x1e, %xmm4, %xmm3
        por     %xmm3, %xmm4
@@ -2666,16 +2664,14 @@ $code.=<<___;
         mov    0x40+8*3($a_ptr), $acc0
        movdqa  %xmm0, $in1_x(%rsp)
        movdqa  %xmm1, $in1_x+0x10(%rsp)
-       por     %xmm0, %xmm1
        movdqa  %xmm2, $in1_y(%rsp)
        movdqa  %xmm3, $in1_y+0x10(%rsp)
-       por     %xmm2, %xmm3
        movdqa  %xmm4, $in1_z(%rsp)
        movdqa  %xmm5, $in1_z+0x10(%rsp)
-       por     %xmm1, %xmm3
+       por     %xmm4, %xmm5
 
        movdqu  0x00($b_ptr), %xmm0     # copy  *(P256_POINT_AFFINE *)$b_ptr
-        pshufd \$0xb1, %xmm3, %xmm5
+        pshufd \$0xb1, %xmm5, %xmm3
        movdqu  0x10($b_ptr), %xmm1
        movdqu  0x20($b_ptr), %xmm2
         por    %xmm3, %xmm5
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
index 564a889..dca3a2d 100644
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@@ -335,19 +335,16 @@ static void ecp_nistz256_point_add(P256_POINT *r,
     const BN_ULONG *in2_y = b->Y;
     const BN_ULONG *in2_z = b->Z;
 
-    /* We encode infinity as (0,0), which is not on the curve,
-     * so it is OK. */
-    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
-                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+    /*
+     * Infinity in encoded as (,,0)
+     */
+    in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
     if (P256_LIMBS == 8)
-        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
-                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+        in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
 
-    in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
-                in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
+    in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
     if (P256_LIMBS == 8)
-        in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
-                     in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
+        in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
 
     in1infty = is_zero(in1infty);
     in2infty = is_zero(in2infty);
@@ -436,15 +433,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r,
     const BN_ULONG *in2_y = b->Y;
 
     /*
-     * In affine representation we encode infty as (0,0), which is not on the
-     * curve, so it is OK
+     * Infinity in encoded as (,,0)
      */
-    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
-                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+    in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
     if (P256_LIMBS == 8)
-        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
-                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+        in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
 
+    /*
+     * In affine representation we encode infinity as (0,0), which is
+     * not on the curve, so it is OK
+     */
     in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
                 in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
     if (P256_LIMBS == 8)
@@ -1273,6 +1271,8 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP 
*group,
             } else
 #endif
             {
+                BN_ULONG infty;
+
                 /* First window */
                 wvalue = (p_str[0] << 1) & mask;
                 idx += window_size;
@@ -1285,7 +1285,30 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP 
*group,
                 ecp_nistz256_neg(p.p.Z, p.p.Y);
                 copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
 
-                memcpy(p.p.Z, ONE, sizeof(ONE));
+                /*
+                 * Since affine infinity is encoded as (0,0) and
+                 * Jacobian ias (,,0), we need to harmonize them
+                 * by assigning "one" or zero to Z.
+                 */
+                infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
+                         p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
+                if (P256_LIMBS == 8)
+                    infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
+                              p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
+
+                infty = 0 - is_zero(infty);
+                infty = ~infty;
+
+                p.p.Z[0] = ONE[0] & infty;
+                p.p.Z[1] = ONE[1] & infty;
+                p.p.Z[2] = ONE[2] & infty;
+                p.p.Z[3] = ONE[3] & infty;
+                if (P256_LIMBS == 8) {
+                    p.p.Z[4] = ONE[4] & infty;
+                    p.p.Z[5] = ONE[5] & infty;
+                    p.p.Z[6] = ONE[6] & infty;
+                    p.p.Z[7] = ONE[7] & infty;
+                }
 
                 for (i = 1; i < 37; i++) {
                     unsigned int off = (idx - 1) / 8;
_____
openssl-commits mailing list
To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-commits

Reply via email to