The branch master has been updated
       via  3b5a5f995e023593bf3e576f3043107378456bb9 (commit)
       via  45a405382bd99187bc90399e61bf33a720e27610 (commit)
       via  5578ad1f0b9e92f9c4637d254270bf25f5a19723 (commit)
       via  1c5bc7b85abf831beb58375ab2652cdcdf31a0fc (commit)
       via  1645df672f2f8bf01000ace4fa1897927375b0d0 (commit)
       via  90b797f00e825c45739a8c2cf0364eb8bf5513c0 (commit)
       via  0164bf812f8d4781be383b829d29b698cc8eb8d7 (commit)
      from  75e571b59298c868763508d60027e4e666dee1c1 (commit)


- Log -----------------------------------------------------------------
commit 3b5a5f995e023593bf3e576f3043107378456bb9
Author: Fangming.Fang <[email protected]>
Date:   Mon Dec 30 12:15:37 2019 +0000

    Fix side channel in ecp_nistz256-armv8.pl
    
    This change addresses a potential side-channel vulnerability in
    the internals of nistz256 low level operations for armv8.
    
    Reviewed-by: Nicola Tuveri <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    Reviewed-by: Bernd Edlinger <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

commit 45a405382bd99187bc90399e61bf33a720e27610
Author: Bernd Edlinger <[email protected]>
Date:   Thu Aug 29 22:45:18 2019 +0200

    Fix side channel in the ecp_nistz256.c reference implementation
    
    This is only used if configured with
    ./config -DECP_NISTZ256_REFERENCE_IMPLEMENTATION
    
    Reviewed-by: Nicola Tuveri <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

commit 5578ad1f0b9e92f9c4637d254270bf25f5a19723
Author: Bernd Edlinger <[email protected]>
Date:   Sun Aug 25 03:47:01 2019 +0200

    Improve side channel fix in ecp_nistz256-x86_64.pl
    
    Reviewed-by: Nicola Tuveri <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

commit 1c5bc7b85abf831beb58375ab2652cdcdf31a0fc
Author: Bernd Edlinger <[email protected]>
Date:   Sun Aug 25 03:45:31 2019 +0200

    Fix side channel in ecp_nistz256-armv4.pl
    
    Reviewed-by: Nicola Tuveri <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

commit 1645df672f2f8bf01000ace4fa1897927375b0d0
Author: Bernd Edlinger <[email protected]>
Date:   Sun Aug 25 03:03:55 2019 +0200

    Fix side channel in ecp_nistz256-x86.pl
    
    Reviewed-by: Nicola Tuveri <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

commit 90b797f00e825c45739a8c2cf0364eb8bf5513c0
Author: David Benjamin <[email protected]>
Date:   Fri Jun 14 17:06:52 2019 -0400

    Avoid leaking intermediate states in point doubling special case.
    
    Cherry picked from
    
https://github.com/google/boringssl/commit/12d9ed670da3edd64ce8175cfe0e091982989c18
    
    Reviewed-by: Nicola Tuveri <[email protected]>
    Reviewed-by: Matt Caswell <[email protected]>
    Reviewed-by: Bernd Edlinger <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

commit 0164bf812f8d4781be383b829d29b698cc8eb8d7
Author: Nicola Tuveri <[email protected]>
Date:   Sat Jun 8 12:48:47 2019 +0300

    Fix potential SCA vulnerability in some EC_METHODs
    
    This commit addresses a potential side-channel vulnerability in the
    internals of some elliptic curve low level operations.
    The side-channel leakage appears to be tiny, so the severity of this
    issue is rather low.
    
    The issue was reported by David Schrammel and Samuel Weiser.
    
    Reviewed-by: Matt Caswell <[email protected]>
    Reviewed-by: Bernd Edlinger <[email protected]>
    (Merged from https://github.com/openssl/openssl/pull/9239)

-----------------------------------------------------------------------

Summary of changes:
 crypto/ec/asm/ecp_nistz256-armv4.pl  | 79 +++++++++++++++---------------------
 crypto/ec/asm/ecp_nistz256-armv8.pl  | 65 +++++++++++++----------------
 crypto/ec/asm/ecp_nistz256-x86.pl    | 76 ++++++++++++++++------------------
 crypto/ec/asm/ecp_nistz256-x86_64.pl | 26 ++++--------
 crypto/ec/ecp_nistp224.c             | 35 ++++++++++++++--
 crypto/ec/ecp_nistp256.c             | 22 +++++++++-
 crypto/ec/ecp_nistp521.c             | 20 ++++++++-
 crypto/ec/ecp_nistz256.c             | 49 ++++++++++++++++++----
 8 files changed, 216 insertions(+), 156 deletions(-)

diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl 
b/crypto/ec/asm/ecp_nistz256-armv4.pl
index 4e6bfef4b5..4809360ac4 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -1398,7 +1398,7 @@ my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
 # 256-bit vectors on top. Then note that we push
 # starting from r0, which means that we have copy of
 # input arguments just below these temporary vectors.
-# We use three of them for !in1infty, !in2intfy and
+# We use three of them for ~in1infty, ~in2infty and
 # result of check for zero.
 
 $code.=<<___;
@@ -1428,7 +1428,7 @@ ecp_nistz256_point_add:
 #endif
        movne   r12,#-1
        stmia   r3,{r4-r11}
-       str     r12,[sp,#32*18+8]       @ !in2infty
+       str     r12,[sp,#32*18+8]       @ ~in2infty
 
        ldmia   $a_ptr!,{r4-r11}        @ copy in1_x
        add     r3,sp,#$in1_x
@@ -1449,7 +1449,7 @@ ecp_nistz256_point_add:
 #endif
        movne   r12,#-1
        stmia   r3,{r4-r11}
-       str     r12,[sp,#32*18+4]       @ !in1infty
+       str     r12,[sp,#32*18+4]       @ ~in1infty
 
        add     $a_ptr,sp,#$in2_z
        add     $b_ptr,sp,#$in2_z
@@ -1514,33 +1514,20 @@ ecp_nistz256_point_add:
        orr     $a0,$a0,$a2
        orr     $a4,$a4,$a6
        orr     $a0,$a0,$a7
-       orrs    $a0,$a0,$a4
+       orr     $a0,$a0,$a4             @ ~is_equal(U1,U2)
 
-       bne     .Ladd_proceed           @ is_equal(U1,U2)?
+       ldr     $t0,[sp,#32*18+4]       @ ~in1infty
+       ldr     $t1,[sp,#32*18+8]       @ ~in2infty
+       ldr     $t2,[sp,#32*18+12]      @ ~is_equal(S1,S2)
+       mvn     $t0,$t0                 @ -1/0 -> 0/-1
+       mvn     $t1,$t1                 @ -1/0 -> 0/-1
+       orr     $a0,$t0
+       orr     $a0,$t1
+       orrs    $a0,$t2                 @ set flags
 
-       ldr     $t0,[sp,#32*18+4]
-       ldr     $t1,[sp,#32*18+8]
-       ldr     $t2,[sp,#32*18+12]
-       tst     $t0,$t1
-       beq     .Ladd_proceed           @ (in1infty || in2infty)?
-       tst     $t2,$t2
-       beq     .Ladd_double            @ is_equal(S1,S2)?
+       @ if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
+       bne     .Ladd_proceed
 
-       ldr     $r_ptr,[sp,#32*18+16]
-       eor     r4,r4,r4
-       eor     r5,r5,r5
-       eor     r6,r6,r6
-       eor     r7,r7,r7
-       eor     r8,r8,r8
-       eor     r9,r9,r9
-       eor     r10,r10,r10
-       eor     r11,r11,r11
-       stmia   $r_ptr!,{r4-r11}
-       stmia   $r_ptr!,{r4-r11}
-       stmia   $r_ptr!,{r4-r11}
-       b       .Ladd_done
-
-.align 4
 .Ladd_double:
        ldr     $a_ptr,[sp,#32*18+20]
        add     sp,sp,#32*(18-5)+16     @ difference in frame sizes
@@ -1605,15 +1592,15 @@ ecp_nistz256_point_add:
        add     $b_ptr,sp,#$S2
        bl      __ecp_nistz256_sub_from @ p256_sub(res_y, res_y, S2);
 
-       ldr     r11,[sp,#32*18+4]       @ !in1intfy
-       ldr     r12,[sp,#32*18+8]       @ !in2intfy
+       ldr     r11,[sp,#32*18+4]       @ ~in1infty
+       ldr     r12,[sp,#32*18+8]       @ ~in2infty
        add     r1,sp,#$res_x
        add     r2,sp,#$in2_x
-       and     r10,r11,r12
+       and     r10,r11,r12             @ ~in1infty & ~in2infty
        mvn     r11,r11
        add     r3,sp,#$in1_x
-       and     r11,r11,r12
-       mvn     r12,r12
+       and     r11,r11,r12             @ in1infty & ~in2infty
+       mvn     r12,r12                 @ in2infty
        ldr     $r_ptr,[sp,#32*18+16]
 ___
 for($i=0;$i<96;$i+=8) {                        # conditional moves
@@ -1621,11 +1608,11 @@ $code.=<<___;
        ldmia   r1!,{r4-r5}             @ res_x
        ldmia   r2!,{r6-r7}             @ in2_x
        ldmia   r3!,{r8-r9}             @ in1_x
-       and     r4,r4,r10
+       and     r4,r4,r10               @ ~in1infty & ~in2infty
        and     r5,r5,r10
-       and     r6,r6,r11
+       and     r6,r6,r11               @ in1infty & ~in2infty
        and     r7,r7,r11
-       and     r8,r8,r12
+       and     r8,r8,r12               @ in2infty
        and     r9,r9,r12
        orr     r4,r4,r6
        orr     r5,r5,r7
@@ -1660,7 +1647,7 @@ my $Z1sqr = $S2;
 # 256-bit vectors on top. Then note that we push
 # starting from r0, which means that we have copy of
 # input arguments just below these temporary vectors.
-# We use two of them for !in1infty, !in2intfy.
+# We use two of them for ~in1infty, ~in2infty.
 
 my @ONE_mont=(1,0,0,-1,-1,-1,-2,0);
 
@@ -1691,7 +1678,7 @@ ecp_nistz256_point_add_affine:
 #endif
        movne   r12,#-1
        stmia   r3,{r4-r11}
-       str     r12,[sp,#32*15+4]       @ !in1infty
+       str     r12,[sp,#32*15+4]       @ ~in1infty
 
        ldmia   $b_ptr!,{r4-r11}        @ copy in2_x
        add     r3,sp,#$in2_x
@@ -1718,7 +1705,7 @@ ecp_nistz256_point_add_affine:
        it      ne
 #endif
        movne   r12,#-1
-       str     r12,[sp,#32*15+8]       @ !in2infty
+       str     r12,[sp,#32*15+8]       @ ~in2infty
 
        add     $a_ptr,sp,#$in1_z
        add     $b_ptr,sp,#$in1_z
@@ -1800,15 +1787,15 @@ ecp_nistz256_point_add_affine:
        add     $b_ptr,sp,#$S2
        bl      __ecp_nistz256_sub_from @ p256_sub(res_y, res_y, S2);
 
-       ldr     r11,[sp,#32*15+4]       @ !in1intfy
-       ldr     r12,[sp,#32*15+8]       @ !in2intfy
+       ldr     r11,[sp,#32*15+4]       @ ~in1infty
+       ldr     r12,[sp,#32*15+8]       @ ~in2infty
        add     r1,sp,#$res_x
        add     r2,sp,#$in2_x
-       and     r10,r11,r12
+       and     r10,r11,r12             @ ~in1infty & ~in2infty
        mvn     r11,r11
        add     r3,sp,#$in1_x
-       and     r11,r11,r12
-       mvn     r12,r12
+       and     r11,r11,r12             @ in1infty & ~in2infty
+       mvn     r12,r12                 @ in2infty
        ldr     $r_ptr,[sp,#32*15]
 ___
 for($i=0;$i<64;$i+=8) {                        # conditional moves
@@ -1816,11 +1803,11 @@ $code.=<<___;
        ldmia   r1!,{r4-r5}             @ res_x
        ldmia   r2!,{r6-r7}             @ in2_x
        ldmia   r3!,{r8-r9}             @ in1_x
-       and     r4,r4,r10
+       and     r4,r4,r10               @ ~in1infty & ~in2infty
        and     r5,r5,r10
-       and     r6,r6,r11
+       and     r6,r6,r11               @ in1infty & ~in2infty
        and     r7,r7,r11
-       and     r8,r8,r12
+       and     r8,r8,r12               @ in2infty
        and     r9,r9,r12
        orr     r4,r4,r6
        orr     r5,r5,r7
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl 
b/crypto/ec/asm/ecp_nistz256-armv8.pl
index 2bcf130bb6..3be0c1c026 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -725,7 +725,7 @@ $code.=<<___;
 .align 5
 ecp_nistz256_point_double:
        .inst   0xd503233f              // paciasp
-       stp     x29,x30,[sp,#-80]!
+       stp     x29,x30,[sp,#-96]!
        add     x29,sp,#0
        stp     x19,x20,[sp,#16]
        stp     x21,x22,[sp,#32]
@@ -858,7 +858,7 @@ ecp_nistz256_point_double:
        add     sp,x29,#0               // destroy frame
        ldp     x19,x20,[x29,#16]
        ldp     x21,x22,[x29,#32]
-       ldp     x29,x30,[sp],#80
+       ldp     x29,x30,[sp],#96
        .inst   0xd50323bf              // autiasp
        ret
 .size  ecp_nistz256_point_double,.-ecp_nistz256_point_double
@@ -875,7 +875,7 @@ my ($res_x,$res_y,$res_z,
 my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
 # above map() describes stack layout with 12 temporary
 # 256-bit vectors on top.
-my ($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp)=map("x$_",(21..26));
+my 
($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp0,$temp1,$temp2)=map("x$_",(21..28));
 
 $code.=<<___;
 .globl ecp_nistz256_point_add
@@ -883,12 +883,13 @@ $code.=<<___;
 .align 5
 ecp_nistz256_point_add:
        .inst   0xd503233f              // paciasp
-       stp     x29,x30,[sp,#-80]!
+       stp     x29,x30,[sp,#-96]!
        add     x29,sp,#0
        stp     x19,x20,[sp,#16]
        stp     x21,x22,[sp,#32]
        stp     x23,x24,[sp,#48]
        stp     x25,x26,[sp,#64]
+       stp     x27,x28,[sp,#80]
        sub     sp,sp,#32*12
 
        ldp     $a0,$a1,[$bp,#64]       // in2_z
@@ -902,7 +903,7 @@ ecp_nistz256_point_add:
        orr     $t2,$a2,$a3
        orr     $in2infty,$t0,$t2
        cmp     $in2infty,#0
-       csetm   $in2infty,ne            // !in2infty
+       csetm   $in2infty,ne            // ~in2infty
        add     $rp,sp,#$Z2sqr
        bl      __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
 
@@ -912,7 +913,7 @@ ecp_nistz256_point_add:
        orr     $t2,$a2,$a3
        orr     $in1infty,$t0,$t2
        cmp     $in1infty,#0
-       csetm   $in1infty,ne            // !in1infty
+       csetm   $in1infty,ne            // ~in1infty
        add     $rp,sp,#$Z1sqr
        bl      __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
 
@@ -953,7 +954,7 @@ ecp_nistz256_point_add:
 
        orr     $acc0,$acc0,$acc1       // see if result is zero
        orr     $acc2,$acc2,$acc3
-       orr     $temp,$acc0,$acc2
+       orr     $temp0,$acc0,$acc2      // ~is_equal(S1,S2)
 
        add     $bp,sp,#$Z2sqr
        add     $rp,sp,#$U1
@@ -974,32 +975,21 @@ ecp_nistz256_point_add:
 
        orr     $acc0,$acc0,$acc1       // see if result is zero
        orr     $acc2,$acc2,$acc3
-       orr     $acc0,$acc0,$acc2
-       tst     $acc0,$acc0
-       b.ne    .Ladd_proceed           // is_equal(U1,U2)?
+       orr     $acc0,$acc0,$acc2       // ~is_equal(U1,U2)
 
-       tst     $in1infty,$in2infty
-       b.eq    .Ladd_proceed           // (in1infty || in2infty)?
+       mvn     $temp1,$in1infty        // -1/0 -> 0/-1
+       mvn     $temp2,$in2infty        // -1/0 -> 0/-1
+       orr     $acc0,$acc0,$temp1
+       orr     $acc0,$acc0,$temp2
+       orr     $acc0,$acc0,$temp0
+       cbnz    $acc0,.Ladd_proceed     // if(~is_equal(U1,U2) | in1infty | 
in2infty | ~is_equal(S1,S2))
 
-       tst     $temp,$temp
-       b.eq    .Ladd_double            // is_equal(S1,S2)?
-
-       eor     $a0,$a0,$a0
-       eor     $a1,$a1,$a1
-       stp     $a0,$a1,[$rp_real]
-       stp     $a0,$a1,[$rp_real,#16]
-       stp     $a0,$a1,[$rp_real,#32]
-       stp     $a0,$a1,[$rp_real,#48]
-       stp     $a0,$a1,[$rp_real,#64]
-       stp     $a0,$a1,[$rp_real,#80]
-       b       .Ladd_done
-
-.align 4
 .Ladd_double:
        mov     $ap,$ap_real
        mov     $rp,$rp_real
        ldp     x23,x24,[x29,#48]
        ldp     x25,x26,[x29,#64]
+       ldp     x27,x28,[x29,#80]
        add     sp,sp,#32*(12-4)        // difference in stack frames
        b       .Ldouble_shortcut
 
@@ -1084,14 +1074,14 @@ ___
 for($i=0;$i<64;$i+=32) {               # conditional moves
 $code.=<<___;
        ldp     $acc0,$acc1,[$ap_real,#$i]      // in1
-       cmp     $in1infty,#0                    // !$in1intfy, remember?
+       cmp     $in1infty,#0                    // ~$in1intfy, remember?
        ldp     $acc2,$acc3,[$ap_real,#$i+16]
        csel    $t0,$a0,$t0,ne
        csel    $t1,$a1,$t1,ne
        ldp     $a0,$a1,[sp,#$res_x+$i+32]      // res
        csel    $t2,$a2,$t2,ne
        csel    $t3,$a3,$t3,ne
-       cmp     $in2infty,#0                    // !$in2intfy, remember?
+       cmp     $in2infty,#0                    // ~$in2intfy, remember?
        ldp     $a2,$a3,[sp,#$res_x+$i+48]
        csel    $acc0,$t0,$acc0,ne
        csel    $acc1,$t1,$acc1,ne
@@ -1105,13 +1095,13 @@ ___
 }
 $code.=<<___;
        ldp     $acc0,$acc1,[$ap_real,#$i]      // in1
-       cmp     $in1infty,#0                    // !$in1intfy, remember?
+       cmp     $in1infty,#0                    // ~$in1intfy, remember?
        ldp     $acc2,$acc3,[$ap_real,#$i+16]
        csel    $t0,$a0,$t0,ne
        csel    $t1,$a1,$t1,ne
        csel    $t2,$a2,$t2,ne
        csel    $t3,$a3,$t3,ne
-       cmp     $in2infty,#0                    // !$in2intfy, remember?
+       cmp     $in2infty,#0                    // ~$in2intfy, remember?
        csel    $acc0,$t0,$acc0,ne
        csel    $acc1,$t1,$acc1,ne
        csel    $acc2,$t2,$acc2,ne
@@ -1125,7 +1115,8 @@ $code.=<<___;
        ldp     x21,x22,[x29,#32]
        ldp     x23,x24,[x29,#48]
        ldp     x25,x26,[x29,#64]
-       ldp     x29,x30,[sp],#80
+       ldp     x27,x28,[x29,#80]
+       ldp     x29,x30,[sp],#96
        .inst   0xd50323bf              // autiasp
        ret
 .size  ecp_nistz256_point_add,.-ecp_nistz256_point_add
@@ -1169,7 +1160,7 @@ ecp_nistz256_point_add_affine:
        orr     $t2,$a2,$a3
        orr     $in1infty,$t0,$t2
        cmp     $in1infty,#0
-       csetm   $in1infty,ne            // !in1infty
+       csetm   $in1infty,ne            // ~in1infty
 
        ldp     $acc0,$acc1,[$bp]       // in2_x
        ldp     $acc2,$acc3,[$bp,#16]
@@ -1183,7 +1174,7 @@ ecp_nistz256_point_add_affine:
        orr     $t0,$t0,$t2
        orr     $in2infty,$acc0,$t0
        cmp     $in2infty,#0
-       csetm   $in2infty,ne            // !in2infty
+       csetm   $in2infty,ne            // ~in2infty
 
        add     $rp,sp,#$Z1sqr
        bl      __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@@ -1293,14 +1284,14 @@ ___
 for($i=0;$i<64;$i+=32) {               # conditional moves
 $code.=<<___;
        ldp     $acc0,$acc1,[$ap_real,#$i]      // in1
-       cmp     $in1infty,#0                    // !$in1intfy, remember?
+       cmp     $in1infty,#0                    // ~$in1intfy, remember?
        ldp     $acc2,$acc3,[$ap_real,#$i+16]
        csel    $t0,$a0,$t0,ne
        csel    $t1,$a1,$t1,ne
        ldp     $a0,$a1,[sp,#$res_x+$i+32]      // res
        csel    $t2,$a2,$t2,ne
        csel    $t3,$a3,$t3,ne
-       cmp     $in2infty,#0                    // !$in2intfy, remember?
+       cmp     $in2infty,#0                    // ~$in2intfy, remember?
        ldp     $a2,$a3,[sp,#$res_x+$i+48]
        csel    $acc0,$t0,$acc0,ne
        csel    $acc1,$t1,$acc1,ne
@@ -1317,13 +1308,13 @@ ___
 }
 $code.=<<___;
        ldp     $acc0,$acc1,[$ap_real,#$i]      // in1
-       cmp     $in1infty,#0                    // !$in1intfy, remember?
+       cmp     $in1infty,#0                    // ~$in1intfy, remember?
        ldp     $acc2,$acc3,[$ap_real,#$i+16]
        csel    $t0,$a0,$t0,ne
        csel    $t1,$a1,$t1,ne
        csel    $t2,$a2,$t2,ne
        csel    $t3,$a3,$t3,ne
-       cmp     $in2infty,#0                    // !$in2intfy, remember?
+       cmp     $in2infty,#0                    // ~$in2intfy, remember?
        csel    $acc0,$t0,$acc0,ne
        csel    $acc1,$t1,$acc1,ne
        csel    $acc2,$t2,$acc2,ne
diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl 
b/crypto/ec/asm/ecp_nistz256-x86.pl
index b1b0b6bc47..bf8c4db5a4 100755
--- a/crypto/ec/asm/ecp_nistz256-x86.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86.pl
@@ -1387,7 +1387,7 @@ for ($i=0;$i<7;$i++) {
 
        # above map() describes stack layout with 18 temporary
        # 256-bit vectors on top, then we take extra words for
-       # !in1infty, !in2infty, result of check for zero and
+       # ~in1infty, ~in2infty, result of check for zero and
        # OPENSSL_ia32cap_P copy. [one unused word for padding]
        &stack_push(8*18+5);
                                                if ($sse2) {
@@ -1418,7 +1418,7 @@ for ($i=0;$i<7;$i++) {
        &sub    ("eax","ebp");
        &or     ("ebp","eax");
        &sar    ("ebp",31);
-       &mov    (&DWP(32*18+4,"esp"),"ebp");    # !in2infty
+       &mov    (&DWP(32*18+4,"esp"),"ebp");    # ~in2infty
 
        &lea    ("edi",&DWP($in1_x,"esp"));
     for($i=0;$i<96;$i+=16) {
@@ -1440,7 +1440,7 @@ for ($i=0;$i<7;$i++) {
        &sub    ("eax","ebp");
        &or     ("ebp","eax");
        &sar    ("ebp",31);
-       &mov    (&DWP(32*18+0,"esp"),"ebp");    # !in1infty
+       &mov    (&DWP(32*18+0,"esp"),"ebp");    # ~in1infty
 
        &mov    ("eax",&DWP(32*18+12,"esp"));   # OPENSSL_ia32cap_P copy
        &lea    ("esi",&DWP($in2_z,"esp"));
@@ -1515,23 +1515,19 @@ for ($i=0;$i<7;$i++) {
        &or     ("eax",&DWP(0,"edi"));
        &or     ("eax",&DWP(4,"edi"));
        &or     ("eax",&DWP(8,"edi"));
-       &or     ("eax",&DWP(12,"edi"));
+       &or     ("eax",&DWP(12,"edi"));         # ~is_equal(U1,U2)
 
-       &data_byte(0x3e);                       # predict taken
-       &jnz    (&label("add_proceed"));        # is_equal(U1,U2)?
-
-       &mov    ("eax",&DWP(32*18+0,"esp"));
-       &and    ("eax",&DWP(32*18+4,"esp"));
-       &mov    ("ebx",&DWP(32*18+8,"esp"));
-       &jz     (&label("add_proceed"));        # (in1infty || in2infty)?
-       &test   ("ebx","ebx");
-       &jz     (&label("add_double"));         # is_equal(S1,S2)?
+       &mov    ("ebx",&DWP(32*18+0,"esp"));    # ~in1infty
+       &not    ("ebx");                        # -1/0 -> 0/-1
+       &or     ("eax","ebx");
+       &mov    ("ebx",&DWP(32*18+4,"esp"));    # ~in2infty
+       &not    ("ebx");                        # -1/0 -> 0/-1
+       &or     ("eax","ebx");
+       &or     ("eax",&DWP(32*18+8,"esp"));    # ~is_equal(S1,S2)
 
-       &mov    ("edi",&wparam(0));
-       &xor    ("eax","eax");
-       &mov    ("ecx",96/4);
-       &data_byte(0xfc,0xf3,0xab);             # cld; stosd
-       &jmp    (&label("add_done"));
+       # if (~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
+       &data_byte(0x3e);                       # predict taken
+       &jnz    (&label("add_proceed"));
 
 &set_label("add_double",16);
        &mov    ("esi",&wparam(1));
@@ -1613,34 +1609,34 @@ for ($i=0;$i<7;$i++) {
        &lea    ("edi",&DWP($res_y,"esp"));
        &call   ("_ecp_nistz256_sub");          # p256_sub(res_y, res_y, S2);
 
-       &mov    ("ebp",&DWP(32*18+0,"esp"));    # !in1infty
-       &mov    ("esi",&DWP(32*18+4,"esp"));    # !in2infty
+       &mov    ("ebp",&DWP(32*18+0,"esp"));    # ~in1infty
+       &mov    ("esi",&DWP(32*18+4,"esp"));    # ~in2infty
        &mov    ("edi",&wparam(0));
        &mov    ("edx","ebp");
        &not    ("ebp");
-       &and    ("edx","esi");
-       &and    ("ebp","esi");
-       &not    ("esi");
+       &and    ("edx","esi");                  # ~in1infty & ~in2infty
+       &and    ("ebp","esi");                  # in1infty & ~in2infty
+       &not    ("esi");                        # in2infty
 
        ########################################
        # conditional moves
     for($i=64;$i<96;$i+=4) {
-       &mov    ("eax","edx");
+       &mov    ("eax","edx");                  # ~in1infty & ~in2infty
        &and    ("eax",&DWP($res_x+$i,"esp"));
-       &mov    ("ebx","ebp");
+       &mov    ("ebx","ebp");                  # in1infty & ~in2infty
        &and    ("ebx",&DWP($in2_x+$i,"esp"));
-       &mov    ("ecx","esi");
+       &mov    ("ecx","esi");                  # in2infty
        &and    ("ecx",&DWP($in1_x+$i,"esp"));
        &or     ("eax","ebx");
        &or     ("eax","ecx");
        &mov    (&DWP($i,"edi"),"eax");
     }
     for($i=0;$i<64;$i+=4) {
-       &mov    ("eax","edx");
+       &mov    ("eax","edx");                  # ~in1infty & ~in2infty
        &and    ("eax",&DWP($res_x+$i,"esp"));
-       &mov    ("ebx","ebp");
+       &mov    ("ebx","ebp");                  # in1infty & ~in2infty
        &and    ("ebx",&DWP($in2_x+$i,"esp"));
-       &mov    ("ecx","esi");
+       &mov    ("ecx","esi");                  # in2infty
        &and    ("ecx",&DWP($in1_x+$i,"esp"));
        &or     ("eax","ebx");
        &or     ("eax","ecx");
@@ -1667,7 +1663,7 @@ for ($i=0;$i<7;$i++) {
 
        # above map() describes stack layout with 15 temporary
        # 256-bit vectors on top, then we take extra words for
-       # !in1infty, !in2infty, and OPENSSL_ia32cap_P copy.
+       # ~in1infty, ~in2infty, and OPENSSL_ia32cap_P copy.
        &stack_push(8*15+3);
                                                if ($sse2) {
        &call   ("_picup_eax");
@@ -1697,7 +1693,7 @@ for ($i=0;$i<7;$i++) {
        &sub    ("eax","ebp");
        &or     ("ebp","eax");
        &sar    ("ebp",31);
-       &mov    (&DWP(32*15+0,"esp"),"ebp");    # !in1infty
+       &mov    (&DWP(32*15+0,"esp"),"ebp");    # ~in1infty
 
        &lea    ("edi",&DWP($in2_x,"esp"));
     for($i=0;$i<64;$i+=16) {
@@ -1723,7 +1719,7 @@ for ($i=0;$i<7;$i++) {
         &lea   ("ebp",&DWP($in1_z,"esp"));
        &sar    ("ebx",31);
         &lea   ("edi",&DWP($Z1sqr,"esp"));
-       &mov    (&DWP(32*15+4,"esp"),"ebx");    # !in2infty
+       &mov    (&DWP(32*15+4,"esp"),"ebx");    # ~in2infty
 
        &call   ("_ecp_nistz256_mul_mont");     # p256_sqr_mont(Z1sqr, in1_z);
 
@@ -1822,14 +1818,14 @@ for ($i=0;$i<7;$i++) {
        &lea    ("edi",&DWP($res_y,"esp"));
        &call   ("_ecp_nistz256_sub");          # p256_sub(res_y, res_y, S2);
 
-       &mov    ("ebp",&DWP(32*15+0,"esp"));    # !in1infty
-       &mov    ("esi",&DWP(32*15+4,"esp"));    # !in2infty
+       &mov    ("ebp",&DWP(32*15+0,"esp"));    # ~in1infty
+       &mov    ("esi",&DWP(32*15+4,"esp"));    # ~in2infty
        &mov    ("edi",&wparam(0));
        &mov    ("edx","ebp");
        &not    ("ebp");
-       &and    ("edx","esi");
-       &and    ("ebp","esi");
-       &not    ("esi");
+       &and    ("edx","esi");                  # ~in1infty & ~in2infty
+       &and    ("ebp","esi");                  # in1infty & ~in2infty
+       &not    ("esi");                        # in2infty
 
        ########################################
        # conditional moves
@@ -1847,11 +1843,11 @@ for ($i=0;$i<7;$i++) {
        &mov    (&DWP($i,"edi"),"eax");
     }
     for($i=0;$i<64;$i+=4) {
-       &mov    ("eax","edx");
+       &mov    ("eax","edx");                  # ~in1infty & ~in2infty
        &and    ("eax",&DWP($res_x+$i,"esp"));
-       &mov    ("ebx","ebp");
+       &mov    ("ebx","ebp");                  # in1infty & ~in2infty
        &and    ("ebx",&DWP($in2_x+$i,"esp"));
-       &mov    ("ecx","esi");
+       &mov    ("ecx","esi");                  # in2infty
        &and    ("ecx",&DWP($in1_x+$i,"esp"));
        &or     ("eax","ebx");
        &or     ("eax","ecx");
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl 
b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index 97ff6fb659..787cbc7f53 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -3627,29 +3627,19 @@ $code.=<<___;
        call    __ecp_nistz256_sub_from$x       # p256_sub(H, U2, U1);
 
        or      $acc5, $acc4                    # see if result is zero
+       or      $acc0, $acc4
+       or      $acc1, $acc4                    # !is_equal(U1, U2)
+
+       movq    %xmm2, $acc0                    # in1infty | in2infty
+       movq    %xmm3, $acc1                    # !is_equal(S1, S2)
+
        or      $acc0, $acc4
        or      $acc1, $acc4
 
+       # if (!is_equal(U1, U2) | in1infty | in2infty | !is_equal(S1, S2))
        .byte   0x3e                            # predict taken
-       jnz     .Ladd_proceed$x                 # is_equal(U1,U2)?
-       movq    %xmm2, $acc0
-       movq    %xmm3, $acc1
-       test    $acc0, $acc0
-       jnz     .Ladd_proceed$x                 # (in1infty || in2infty)?
-       test    $acc1, $acc1
-       jz      .Ladd_double$x                  # is_equal(S1,S2)?
+       jnz     .Ladd_proceed$x
 
-       movq    %xmm0, $r_ptr                   # restore $r_ptr
-       pxor    %xmm0, %xmm0
-       movdqu  %xmm0, 0x00($r_ptr)
-       movdqu  %xmm0, 0x10($r_ptr)
-       movdqu  %xmm0, 0x20($r_ptr)
-       movdqu  %xmm0, 0x30($r_ptr)
-       movdqu  %xmm0, 0x40($r_ptr)
-       movdqu  %xmm0, 0x50($r_ptr)
-       jmp     .Ladd_done$x
-
-.align 32
 .Ladd_double$x:
        movq    %xmm1, $a_ptr                   # restore $a_ptr
        movq    %xmm0, $r_ptr                   # restore $r_ptr
diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c
index a726f8e249..6777d32244 100644
--- a/crypto/ec/ecp_nistp224.c
+++ b/crypto/ec/ecp_nistp224.c
@@ -912,6 +912,7 @@ static void point_add(felem x3, felem y3, felem z3,
     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
     widefelem tmp, tmp2;
     limb z1_is_zero, z2_is_zero, x_equal, y_equal;
+    limb points_equal;
 
     if (!mixed) {
         /* ftmp2 = z2^2 */
@@ -968,15 +969,41 @@ static void point_add(felem x3, felem y3, felem z3,
     felem_reduce(ftmp, tmp);
 
     /*
-     * the formulae are incorrect if the points are equal so we check for
-     * this and do doubling if this happens
+     * The formulae are incorrect if the points are equal, in affine 
coordinates
+     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
+     * happens.
+     *
+     * We use bitwise operations to avoid potential side-channels introduced by
+     * the short-circuiting behaviour of boolean operators.
      */
     x_equal = felem_is_zero(ftmp);
     y_equal = felem_is_zero(ftmp3);
+    /*
+     * The special case of either point being the point at infinity (z1 and/or
+     * z2 are zero), is handled separately later on in this function, so we
+     * avoid jumping to point_double here in those special cases.
+     */
     z1_is_zero = felem_is_zero(z1);
     z2_is_zero = felem_is_zero(z2);
-    /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
-    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
+
+    /*
+     * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
+     * specific implementation `felem_is_zero()` returns truth as `0x1`
+     * (rather than `0xff..ff`).
+     *
+     * This implies that `~true` in this implementation becomes
+     * `0xff..fe` (rather than `0x0`): for this reason, to be used in
+     * the if expression, we mask out only the last bit in the next
+     * line.
+     */
+    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
+
+    if (points_equal) {
+        /*
+         * This is obviously not constant-time but, as mentioned before, this
+         * case never happens during single point multiplication, so there is 
no
+         * timing leak for ECDH or ECDSA signing.
+         */
         point_double(x3, y3, z3, x1, y1, z1);
         return;
     }
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index 4cbac522cd..954263c960 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -1241,6 +1241,7 @@ static void point_add(felem x3, felem y3, felem z3,
     longfelem tmp, tmp2;
     smallfelem small1, small2, small3, small4, small5;
     limb x_equal, y_equal, z1_is_zero, z2_is_zero;
+    limb points_equal;
 
     felem_shrink(small3, z1);
 
@@ -1340,7 +1341,26 @@ static void point_add(felem x3, felem y3, felem z3,
     felem_shrink(small1, ftmp5);
     y_equal = smallfelem_is_zero(small1);
 
-    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
+    /*
+     * The formulae are incorrect if the points are equal, in affine 
coordinates
+     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
+     * happens.
+     *
+     * We use bitwise operations to avoid potential side-channels introduced by
+     * the short-circuiting behaviour of boolean operators.
+     *
+     * The special case of either point being the point at infinity (z1 and/or
+     * z2 are zero), is handled separately later on in this function, so we
+     * avoid jumping to point_double here in those special cases.
+     */
+    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
+
+    if (points_equal) {
+        /*
+         * This is obviously not constant-time but, as mentioned before, this
+         * case never happens during single point multiplication, so there is 
no
+         * timing leak for ECDH or ECDSA signing.
+         */
         point_double(x3, y3, z3, x1, y1, z1);
         return;
     }
diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c
index 6b5dc8eb99..78a98c7187 100644
--- a/crypto/ec/ecp_nistp521.c
+++ b/crypto/ec/ecp_nistp521.c
@@ -1158,6 +1158,7 @@ static void point_add(felem x3, felem y3, felem z3,
     felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
     largefelem tmp, tmp2;
     limb x_equal, y_equal, z1_is_zero, z2_is_zero;
+    limb points_equal;
 
     z1_is_zero = felem_is_zero(z1);
     z2_is_zero = felem_is_zero(z2);
@@ -1242,7 +1243,24 @@ static void point_add(felem x3, felem y3, felem z3,
     felem_scalar64(ftmp5, 2);
     /* ftmp5[i] < 2^61 */
 
-    if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
+    /*
+     * The formulae are incorrect if the points are equal, in affine 
coordinates
+     * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
+     * happens.
+     *
+     * We use bitwise operations to avoid potential side-channels introduced by
+     * the short-circuiting behaviour of boolean operators.
+     *
+     * The special case of either point being the point at infinity (z1 and/or
+     * z2 are zero), is handled separately later on in this function, so we
+     * avoid jumping to point_double here in those special cases.
+     *
+     * Notice the comment below on the implications of this branching for 
timing
+     * leaks and why it is considered practically irrelevant.
+     */
+    points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
+
+    if (points_equal) {
         /*
          * This is obviously not constant-time but it will almost-never happen
          * for ECDH / ECDSA. The case where it can happen is during scalar-mult
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
index 603557ced5..1609c4bbf7 100644
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@@ -358,16 +358,47 @@ static void ecp_nistz256_point_add(P256_POINT *r,
     ecp_nistz256_sub(H, U2, U1);                /* H = U2 - U1 */
 
     /*
-     * This should not happen during sign/ecdh, so no constant time violation
+     * The formulae are incorrect if the points are equal so we check for
+     * this and do doubling if this happens.
+     *
+     * Points here are in Jacobian projective coordinates (Xi, Yi, Zi)
+     * that are bound to the affine coordinates (xi, yi) by the following
+     * equations:
+     *     - xi = Xi / (Zi)^2
+     *     - y1 = Yi / (Zi)^3
+     *
+     * For the sake of optimization, the algorithm operates over
+     * intermediate variables U1, U2 and S1, S2 that are derived from
+     * the projective coordinates:
+     *     - U1 = X1 * (Z2)^2 ; U2 = X2 * (Z1)^2
+     *     - S1 = Y1 * (Z2)^3 ; S2 = Y2 * (Z1)^3
+     *
+     * It is easy to prove that is_equal(U1, U2) implies that the affine
+     * x-coordinates are equal, or either point is at infinity.
+     * Likewise is_equal(S1, S2) implies that the affine y-coordinates are
+     * equal, or either point is at infinity.
+     *
+     * The special case of either point being the point at infinity (Z1 or Z2
+     * is zero), is handled separately later on in this function, so we avoid
+     * jumping to point_double here in those special cases.
+     *
+     * When both points are inverse of each other, we know that the affine
+     * x-coordinates are equal, and the y-coordinates have different sign.
+     * Therefore since U1 = U2, we know H = 0, and therefore Z3 = H*Z1*Z2
+     * will equal 0, thus the result is infinity, if we simply let this
+     * function continue normally.
+     *
+     * We use bitwise operations to avoid potential side-channels introduced by
+     * the short-circuiting behaviour of boolean operators.
      */
-    if (is_equal(U1, U2) && !in1infty && !in2infty) {
-        if (is_equal(S1, S2)) {
-            ecp_nistz256_point_double(r, a);
-            return;
-        } else {
-            memset(r, 0, sizeof(*r));
-            return;
-        }
+    if (is_equal(U1, U2) & ~in1infty & ~in2infty & is_equal(S1, S2)) {
+        /*
+         * This is obviously not constant-time but it should never happen 
during
+         * single point multiplication, so there is no timing leak for ECDH or
+         * ECDSA signing.
+         */
+        ecp_nistz256_point_double(r, a);
+        return;
     }
 
     ecp_nistz256_sqr_mont(Rsqr, R);             /* R^2 */

Reply via email to