The branch master has been updated
via 3b5a5f995e023593bf3e576f3043107378456bb9 (commit)
via 45a405382bd99187bc90399e61bf33a720e27610 (commit)
via 5578ad1f0b9e92f9c4637d254270bf25f5a19723 (commit)
via 1c5bc7b85abf831beb58375ab2652cdcdf31a0fc (commit)
via 1645df672f2f8bf01000ace4fa1897927375b0d0 (commit)
via 90b797f00e825c45739a8c2cf0364eb8bf5513c0 (commit)
via 0164bf812f8d4781be383b829d29b698cc8eb8d7 (commit)
from 75e571b59298c868763508d60027e4e666dee1c1 (commit)
- Log -----------------------------------------------------------------
commit 3b5a5f995e023593bf3e576f3043107378456bb9
Author: Fangming.Fang <[email protected]>
Date: Mon Dec 30 12:15:37 2019 +0000
Fix side channel in ecp_nistz256-armv8.pl
This change addresses a potential side-channel vulnerability in
the internals of nistz256 low level operations for armv8.
Reviewed-by: Nicola Tuveri <[email protected]>
Reviewed-by: Matt Caswell <[email protected]>
Reviewed-by: Bernd Edlinger <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
commit 45a405382bd99187bc90399e61bf33a720e27610
Author: Bernd Edlinger <[email protected]>
Date: Thu Aug 29 22:45:18 2019 +0200
Fix side channel in the ecp_nistz256.c reference implementation
This is only used if configured with
./config -DECP_NISTZ256_REFERENCE_IMPLEMENTATION
Reviewed-by: Nicola Tuveri <[email protected]>
Reviewed-by: Matt Caswell <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
commit 5578ad1f0b9e92f9c4637d254270bf25f5a19723
Author: Bernd Edlinger <[email protected]>
Date: Sun Aug 25 03:47:01 2019 +0200
Improve side channel fix in ecp_nistz256-x86_64.pl
Reviewed-by: Nicola Tuveri <[email protected]>
Reviewed-by: Matt Caswell <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
commit 1c5bc7b85abf831beb58375ab2652cdcdf31a0fc
Author: Bernd Edlinger <[email protected]>
Date: Sun Aug 25 03:45:31 2019 +0200
Fix side channel in ecp_nistz256-armv4.pl
Reviewed-by: Nicola Tuveri <[email protected]>
Reviewed-by: Matt Caswell <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
commit 1645df672f2f8bf01000ace4fa1897927375b0d0
Author: Bernd Edlinger <[email protected]>
Date: Sun Aug 25 03:03:55 2019 +0200
Fix side channel in ecp_nistz256-x86.pl
Reviewed-by: Nicola Tuveri <[email protected]>
Reviewed-by: Matt Caswell <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
commit 90b797f00e825c45739a8c2cf0364eb8bf5513c0
Author: David Benjamin <[email protected]>
Date: Fri Jun 14 17:06:52 2019 -0400
Avoid leaking intermediate states in point doubling special case.
Cherry picked from
https://github.com/google/boringssl/commit/12d9ed670da3edd64ce8175cfe0e091982989c18
Reviewed-by: Nicola Tuveri <[email protected]>
Reviewed-by: Matt Caswell <[email protected]>
Reviewed-by: Bernd Edlinger <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
commit 0164bf812f8d4781be383b829d29b698cc8eb8d7
Author: Nicola Tuveri <[email protected]>
Date: Sat Jun 8 12:48:47 2019 +0300
Fix potential SCA vulnerability in some EC_METHODs
This commit addresses a potential side-channel vulnerability in the
internals of some elliptic curve low level operations.
The side-channel leakage appears to be tiny, so the severity of this
issue is rather low.
The issue was reported by David Schrammel and Samuel Weiser.
Reviewed-by: Matt Caswell <[email protected]>
Reviewed-by: Bernd Edlinger <[email protected]>
(Merged from https://github.com/openssl/openssl/pull/9239)
-----------------------------------------------------------------------
Summary of changes:
crypto/ec/asm/ecp_nistz256-armv4.pl | 79 +++++++++++++++---------------------
crypto/ec/asm/ecp_nistz256-armv8.pl | 65 +++++++++++++----------------
crypto/ec/asm/ecp_nistz256-x86.pl | 76 ++++++++++++++++------------------
crypto/ec/asm/ecp_nistz256-x86_64.pl | 26 ++++--------
crypto/ec/ecp_nistp224.c | 35 ++++++++++++++--
crypto/ec/ecp_nistp256.c | 22 +++++++++-
crypto/ec/ecp_nistp521.c | 20 ++++++++-
crypto/ec/ecp_nistz256.c | 49 ++++++++++++++++++----
8 files changed, 216 insertions(+), 156 deletions(-)
diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl
b/crypto/ec/asm/ecp_nistz256-armv4.pl
index 4e6bfef4b5..4809360ac4 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -1398,7 +1398,7 @@ my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
# 256-bit vectors on top. Then note that we push
# starting from r0, which means that we have copy of
# input arguments just below these temporary vectors.
-# We use three of them for !in1infty, !in2intfy and
+# We use three of them for ~in1infty, ~in2infty and
# result of check for zero.
$code.=<<___;
@@ -1428,7 +1428,7 @@ ecp_nistz256_point_add:
#endif
movne r12,#-1
stmia r3,{r4-r11}
- str r12,[sp,#32*18+8] @ !in2infty
+ str r12,[sp,#32*18+8] @ ~in2infty
ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
@@ -1449,7 +1449,7 @@ ecp_nistz256_point_add:
#endif
movne r12,#-1
stmia r3,{r4-r11}
- str r12,[sp,#32*18+4] @ !in1infty
+ str r12,[sp,#32*18+4] @ ~in1infty
add $a_ptr,sp,#$in2_z
add $b_ptr,sp,#$in2_z
@@ -1514,33 +1514,20 @@ ecp_nistz256_point_add:
orr $a0,$a0,$a2
orr $a4,$a4,$a6
orr $a0,$a0,$a7
- orrs $a0,$a0,$a4
+ orr $a0,$a0,$a4 @ ~is_equal(U1,U2)
- bne .Ladd_proceed @ is_equal(U1,U2)?
+ ldr $t0,[sp,#32*18+4] @ ~in1infty
+ ldr $t1,[sp,#32*18+8] @ ~in2infty
+ ldr $t2,[sp,#32*18+12] @ ~is_equal(S1,S2)
+ mvn $t0,$t0 @ -1/0 -> 0/-1
+ mvn $t1,$t1 @ -1/0 -> 0/-1
+ orr $a0,$t0
+ orr $a0,$t1
+ orrs $a0,$t2 @ set flags
- ldr $t0,[sp,#32*18+4]
- ldr $t1,[sp,#32*18+8]
- ldr $t2,[sp,#32*18+12]
- tst $t0,$t1
- beq .Ladd_proceed @ (in1infty || in2infty)?
- tst $t2,$t2
- beq .Ladd_double @ is_equal(S1,S2)?
+ @ if(~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
+ bne .Ladd_proceed
- ldr $r_ptr,[sp,#32*18+16]
- eor r4,r4,r4
- eor r5,r5,r5
- eor r6,r6,r6
- eor r7,r7,r7
- eor r8,r8,r8
- eor r9,r9,r9
- eor r10,r10,r10
- eor r11,r11,r11
- stmia $r_ptr!,{r4-r11}
- stmia $r_ptr!,{r4-r11}
- stmia $r_ptr!,{r4-r11}
- b .Ladd_done
-
-.align 4
.Ladd_double:
ldr $a_ptr,[sp,#32*18+20]
add sp,sp,#32*(18-5)+16 @ difference in frame sizes
@@ -1605,15 +1592,15 @@ ecp_nistz256_point_add:
add $b_ptr,sp,#$S2
bl __ecp_nistz256_sub_from @ p256_sub(res_y, res_y, S2);
- ldr r11,[sp,#32*18+4] @ !in1intfy
- ldr r12,[sp,#32*18+8] @ !in2intfy
+ ldr r11,[sp,#32*18+4] @ ~in1infty
+ ldr r12,[sp,#32*18+8] @ ~in2infty
add r1,sp,#$res_x
add r2,sp,#$in2_x
- and r10,r11,r12
+ and r10,r11,r12 @ ~in1infty & ~in2infty
mvn r11,r11
add r3,sp,#$in1_x
- and r11,r11,r12
- mvn r12,r12
+ and r11,r11,r12 @ in1infty & ~in2infty
+ mvn r12,r12 @ in2infty
ldr $r_ptr,[sp,#32*18+16]
___
for($i=0;$i<96;$i+=8) { # conditional moves
@@ -1621,11 +1608,11 @@ $code.=<<___;
ldmia r1!,{r4-r5} @ res_x
ldmia r2!,{r6-r7} @ in2_x
ldmia r3!,{r8-r9} @ in1_x
- and r4,r4,r10
+ and r4,r4,r10 @ ~in1infty & ~in2infty
and r5,r5,r10
- and r6,r6,r11
+ and r6,r6,r11 @ in1infty & ~in2infty
and r7,r7,r11
- and r8,r8,r12
+ and r8,r8,r12 @ in2infty
and r9,r9,r12
orr r4,r4,r6
orr r5,r5,r7
@@ -1660,7 +1647,7 @@ my $Z1sqr = $S2;
# 256-bit vectors on top. Then note that we push
# starting from r0, which means that we have copy of
# input arguments just below these temporary vectors.
-# We use two of them for !in1infty, !in2intfy.
+# We use two of them for ~in1infty, ~in2infty.
my @ONE_mont=(1,0,0,-1,-1,-1,-2,0);
@@ -1691,7 +1678,7 @@ ecp_nistz256_point_add_affine:
#endif
movne r12,#-1
stmia r3,{r4-r11}
- str r12,[sp,#32*15+4] @ !in1infty
+ str r12,[sp,#32*15+4] @ ~in1infty
ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
@@ -1718,7 +1705,7 @@ ecp_nistz256_point_add_affine:
it ne
#endif
movne r12,#-1
- str r12,[sp,#32*15+8] @ !in2infty
+ str r12,[sp,#32*15+8] @ ~in2infty
add $a_ptr,sp,#$in1_z
add $b_ptr,sp,#$in1_z
@@ -1800,15 +1787,15 @@ ecp_nistz256_point_add_affine:
add $b_ptr,sp,#$S2
bl __ecp_nistz256_sub_from @ p256_sub(res_y, res_y, S2);
- ldr r11,[sp,#32*15+4] @ !in1intfy
- ldr r12,[sp,#32*15+8] @ !in2intfy
+ ldr r11,[sp,#32*15+4] @ ~in1infty
+ ldr r12,[sp,#32*15+8] @ ~in2infty
add r1,sp,#$res_x
add r2,sp,#$in2_x
- and r10,r11,r12
+ and r10,r11,r12 @ ~in1infty & ~in2infty
mvn r11,r11
add r3,sp,#$in1_x
- and r11,r11,r12
- mvn r12,r12
+ and r11,r11,r12 @ in1infty & ~in2infty
+ mvn r12,r12 @ in2infty
ldr $r_ptr,[sp,#32*15]
___
for($i=0;$i<64;$i+=8) { # conditional moves
@@ -1816,11 +1803,11 @@ $code.=<<___;
ldmia r1!,{r4-r5} @ res_x
ldmia r2!,{r6-r7} @ in2_x
ldmia r3!,{r8-r9} @ in1_x
- and r4,r4,r10
+ and r4,r4,r10 @ ~in1infty & ~in2infty
and r5,r5,r10
- and r6,r6,r11
+ and r6,r6,r11 @ in1infty & ~in2infty
and r7,r7,r11
- and r8,r8,r12
+ and r8,r8,r12 @ in2infty
and r9,r9,r12
orr r4,r4,r6
orr r5,r5,r7
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl
b/crypto/ec/asm/ecp_nistz256-armv8.pl
index 2bcf130bb6..3be0c1c026 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -725,7 +725,7 @@ $code.=<<___;
.align 5
ecp_nistz256_point_double:
.inst 0xd503233f // paciasp
- stp x29,x30,[sp,#-80]!
+ stp x29,x30,[sp,#-96]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
@@ -858,7 +858,7 @@ ecp_nistz256_point_double:
add sp,x29,#0 // destroy frame
ldp x19,x20,[x29,#16]
ldp x21,x22,[x29,#32]
- ldp x29,x30,[sp],#80
+ ldp x29,x30,[sp],#96
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
@@ -875,7 +875,7 @@ my ($res_x,$res_y,$res_z,
my ($Z1sqr, $Z2sqr) = ($Hsqr, $Rsqr);
# above map() describes stack layout with 12 temporary
# 256-bit vectors on top.
-my ($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp)=map("x$_",(21..26));
+my
($rp_real,$ap_real,$bp_real,$in1infty,$in2infty,$temp0,$temp1,$temp2)=map("x$_",(21..28));
$code.=<<___;
.globl ecp_nistz256_point_add
@@ -883,12 +883,13 @@ $code.=<<___;
.align 5
ecp_nistz256_point_add:
.inst 0xd503233f // paciasp
- stp x29,x30,[sp,#-80]!
+ stp x29,x30,[sp,#-96]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
stp x23,x24,[sp,#48]
stp x25,x26,[sp,#64]
+ stp x27,x28,[sp,#80]
sub sp,sp,#32*12
ldp $a0,$a1,[$bp,#64] // in2_z
@@ -902,7 +903,7 @@ ecp_nistz256_point_add:
orr $t2,$a2,$a3
orr $in2infty,$t0,$t2
cmp $in2infty,#0
- csetm $in2infty,ne // !in2infty
+ csetm $in2infty,ne // ~in2infty
add $rp,sp,#$Z2sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
@@ -912,7 +913,7 @@ ecp_nistz256_point_add:
orr $t2,$a2,$a3
orr $in1infty,$t0,$t2
cmp $in1infty,#0
- csetm $in1infty,ne // !in1infty
+ csetm $in1infty,ne // ~in1infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@@ -953,7 +954,7 @@ ecp_nistz256_point_add:
orr $acc0,$acc0,$acc1 // see if result is zero
orr $acc2,$acc2,$acc3
- orr $temp,$acc0,$acc2
+ orr $temp0,$acc0,$acc2 // ~is_equal(S1,S2)
add $bp,sp,#$Z2sqr
add $rp,sp,#$U1
@@ -974,32 +975,21 @@ ecp_nistz256_point_add:
orr $acc0,$acc0,$acc1 // see if result is zero
orr $acc2,$acc2,$acc3
- orr $acc0,$acc0,$acc2
- tst $acc0,$acc0
- b.ne .Ladd_proceed // is_equal(U1,U2)?
+ orr $acc0,$acc0,$acc2 // ~is_equal(U1,U2)
- tst $in1infty,$in2infty
- b.eq .Ladd_proceed // (in1infty || in2infty)?
+ mvn $temp1,$in1infty // -1/0 -> 0/-1
+ mvn $temp2,$in2infty // -1/0 -> 0/-1
+ orr $acc0,$acc0,$temp1
+ orr $acc0,$acc0,$temp2
+ orr $acc0,$acc0,$temp0
+ cbnz $acc0,.Ladd_proceed // if(~is_equal(U1,U2) | in1infty |
in2infty | ~is_equal(S1,S2))
- tst $temp,$temp
- b.eq .Ladd_double // is_equal(S1,S2)?
-
- eor $a0,$a0,$a0
- eor $a1,$a1,$a1
- stp $a0,$a1,[$rp_real]
- stp $a0,$a1,[$rp_real,#16]
- stp $a0,$a1,[$rp_real,#32]
- stp $a0,$a1,[$rp_real,#48]
- stp $a0,$a1,[$rp_real,#64]
- stp $a0,$a1,[$rp_real,#80]
- b .Ladd_done
-
-.align 4
.Ladd_double:
mov $ap,$ap_real
mov $rp,$rp_real
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
+ ldp x27,x28,[x29,#80]
add sp,sp,#32*(12-4) // difference in stack frames
b .Ldouble_shortcut
@@ -1084,14 +1074,14 @@ ___
for($i=0;$i<64;$i+=32) { # conditional moves
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
- cmp $in1infty,#0 // !$in1intfy, remember?
+ cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
ldp $a0,$a1,[sp,#$res_x+$i+32] // res
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
- cmp $in2infty,#0 // !$in2intfy, remember?
+ cmp $in2infty,#0 // ~$in2intfy, remember?
ldp $a2,$a3,[sp,#$res_x+$i+48]
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
@@ -1105,13 +1095,13 @@ ___
}
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
- cmp $in1infty,#0 // !$in1intfy, remember?
+ cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
- cmp $in2infty,#0 // !$in2intfy, remember?
+ cmp $in2infty,#0 // ~$in2intfy, remember?
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
csel $acc2,$t2,$acc2,ne
@@ -1125,7 +1115,8 @@ $code.=<<___;
ldp x21,x22,[x29,#32]
ldp x23,x24,[x29,#48]
ldp x25,x26,[x29,#64]
- ldp x29,x30,[sp],#80
+ ldp x27,x28,[x29,#80]
+ ldp x29,x30,[sp],#96
.inst 0xd50323bf // autiasp
ret
.size ecp_nistz256_point_add,.-ecp_nistz256_point_add
@@ -1169,7 +1160,7 @@ ecp_nistz256_point_add_affine:
orr $t2,$a2,$a3
orr $in1infty,$t0,$t2
cmp $in1infty,#0
- csetm $in1infty,ne // !in1infty
+ csetm $in1infty,ne // ~in1infty
ldp $acc0,$acc1,[$bp] // in2_x
ldp $acc2,$acc3,[$bp,#16]
@@ -1183,7 +1174,7 @@ ecp_nistz256_point_add_affine:
orr $t0,$t0,$t2
orr $in2infty,$acc0,$t0
cmp $in2infty,#0
- csetm $in2infty,ne // !in2infty
+ csetm $in2infty,ne // ~in2infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@@ -1293,14 +1284,14 @@ ___
for($i=0;$i<64;$i+=32) { # conditional moves
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
- cmp $in1infty,#0 // !$in1intfy, remember?
+ cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
ldp $a0,$a1,[sp,#$res_x+$i+32] // res
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
- cmp $in2infty,#0 // !$in2intfy, remember?
+ cmp $in2infty,#0 // ~$in2intfy, remember?
ldp $a2,$a3,[sp,#$res_x+$i+48]
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
@@ -1317,13 +1308,13 @@ ___
}
$code.=<<___;
ldp $acc0,$acc1,[$ap_real,#$i] // in1
- cmp $in1infty,#0 // !$in1intfy, remember?
+ cmp $in1infty,#0 // ~$in1intfy, remember?
ldp $acc2,$acc3,[$ap_real,#$i+16]
csel $t0,$a0,$t0,ne
csel $t1,$a1,$t1,ne
csel $t2,$a2,$t2,ne
csel $t3,$a3,$t3,ne
- cmp $in2infty,#0 // !$in2intfy, remember?
+ cmp $in2infty,#0 // ~$in2intfy, remember?
csel $acc0,$t0,$acc0,ne
csel $acc1,$t1,$acc1,ne
csel $acc2,$t2,$acc2,ne
diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl
b/crypto/ec/asm/ecp_nistz256-x86.pl
index b1b0b6bc47..bf8c4db5a4 100755
--- a/crypto/ec/asm/ecp_nistz256-x86.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86.pl
@@ -1387,7 +1387,7 @@ for ($i=0;$i<7;$i++) {
# above map() describes stack layout with 18 temporary
# 256-bit vectors on top, then we take extra words for
- # !in1infty, !in2infty, result of check for zero and
+ # ~in1infty, ~in2infty, result of check for zero and
# OPENSSL_ia32cap_P copy. [one unused word for padding]
&stack_push(8*18+5);
if ($sse2) {
@@ -1418,7 +1418,7 @@ for ($i=0;$i<7;$i++) {
&sub ("eax","ebp");
&or ("ebp","eax");
&sar ("ebp",31);
- &mov (&DWP(32*18+4,"esp"),"ebp"); # !in2infty
+ &mov (&DWP(32*18+4,"esp"),"ebp"); # ~in2infty
&lea ("edi",&DWP($in1_x,"esp"));
for($i=0;$i<96;$i+=16) {
@@ -1440,7 +1440,7 @@ for ($i=0;$i<7;$i++) {
&sub ("eax","ebp");
&or ("ebp","eax");
&sar ("ebp",31);
- &mov (&DWP(32*18+0,"esp"),"ebp"); # !in1infty
+ &mov (&DWP(32*18+0,"esp"),"ebp"); # ~in1infty
&mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
&lea ("esi",&DWP($in2_z,"esp"));
@@ -1515,23 +1515,19 @@ for ($i=0;$i<7;$i++) {
&or ("eax",&DWP(0,"edi"));
&or ("eax",&DWP(4,"edi"));
&or ("eax",&DWP(8,"edi"));
- &or ("eax",&DWP(12,"edi"));
+ &or ("eax",&DWP(12,"edi")); # ~is_equal(U1,U2)
- &data_byte(0x3e); # predict taken
- &jnz (&label("add_proceed")); # is_equal(U1,U2)?
-
- &mov ("eax",&DWP(32*18+0,"esp"));
- &and ("eax",&DWP(32*18+4,"esp"));
- &mov ("ebx",&DWP(32*18+8,"esp"));
- &jz (&label("add_proceed")); # (in1infty || in2infty)?
- &test ("ebx","ebx");
- &jz (&label("add_double")); # is_equal(S1,S2)?
+ &mov ("ebx",&DWP(32*18+0,"esp")); # ~in1infty
+ ¬ ("ebx"); # -1/0 -> 0/-1
+ &or ("eax","ebx");
+ &mov ("ebx",&DWP(32*18+4,"esp")); # ~in2infty
+ ¬ ("ebx"); # -1/0 -> 0/-1
+ &or ("eax","ebx");
+ &or ("eax",&DWP(32*18+8,"esp")); # ~is_equal(S1,S2)
- &mov ("edi",&wparam(0));
- &xor ("eax","eax");
- &mov ("ecx",96/4);
- &data_byte(0xfc,0xf3,0xab); # cld; stosd
- &jmp (&label("add_done"));
+ # if (~is_equal(U1,U2) | in1infty | in2infty | ~is_equal(S1,S2))
+ &data_byte(0x3e); # predict taken
+ &jnz (&label("add_proceed"));
&set_label("add_double",16);
&mov ("esi",&wparam(1));
@@ -1613,34 +1609,34 @@ for ($i=0;$i<7;$i++) {
&lea ("edi",&DWP($res_y,"esp"));
&call ("_ecp_nistz256_sub"); # p256_sub(res_y, res_y, S2);
- &mov ("ebp",&DWP(32*18+0,"esp")); # !in1infty
- &mov ("esi",&DWP(32*18+4,"esp")); # !in2infty
+ &mov ("ebp",&DWP(32*18+0,"esp")); # ~in1infty
+ &mov ("esi",&DWP(32*18+4,"esp")); # ~in2infty
&mov ("edi",&wparam(0));
&mov ("edx","ebp");
¬ ("ebp");
- &and ("edx","esi");
- &and ("ebp","esi");
- ¬ ("esi");
+ &and ("edx","esi"); # ~in1infty & ~in2infty
+ &and ("ebp","esi"); # in1infty & ~in2infty
+ ¬ ("esi"); # in2infty
########################################
# conditional moves
for($i=64;$i<96;$i+=4) {
- &mov ("eax","edx");
+ &mov ("eax","edx"); # ~in1infty & ~in2infty
&and ("eax",&DWP($res_x+$i,"esp"));
- &mov ("ebx","ebp");
+ &mov ("ebx","ebp"); # in1infty & ~in2infty
&and ("ebx",&DWP($in2_x+$i,"esp"));
- &mov ("ecx","esi");
+ &mov ("ecx","esi"); # in2infty
&and ("ecx",&DWP($in1_x+$i,"esp"));
&or ("eax","ebx");
&or ("eax","ecx");
&mov (&DWP($i,"edi"),"eax");
}
for($i=0;$i<64;$i+=4) {
- &mov ("eax","edx");
+ &mov ("eax","edx"); # ~in1infty & ~in2infty
&and ("eax",&DWP($res_x+$i,"esp"));
- &mov ("ebx","ebp");
+ &mov ("ebx","ebp"); # in1infty & ~in2infty
&and ("ebx",&DWP($in2_x+$i,"esp"));
- &mov ("ecx","esi");
+ &mov ("ecx","esi"); # in2infty
&and ("ecx",&DWP($in1_x+$i,"esp"));
&or ("eax","ebx");
&or ("eax","ecx");
@@ -1667,7 +1663,7 @@ for ($i=0;$i<7;$i++) {
# above map() describes stack layout with 15 temporary
# 256-bit vectors on top, then we take extra words for
- # !in1infty, !in2infty, and OPENSSL_ia32cap_P copy.
+ # ~in1infty, ~in2infty, and OPENSSL_ia32cap_P copy.
&stack_push(8*15+3);
if ($sse2) {
&call ("_picup_eax");
@@ -1697,7 +1693,7 @@ for ($i=0;$i<7;$i++) {
&sub ("eax","ebp");
&or ("ebp","eax");
&sar ("ebp",31);
- &mov (&DWP(32*15+0,"esp"),"ebp"); # !in1infty
+ &mov (&DWP(32*15+0,"esp"),"ebp"); # ~in1infty
&lea ("edi",&DWP($in2_x,"esp"));
for($i=0;$i<64;$i+=16) {
@@ -1723,7 +1719,7 @@ for ($i=0;$i<7;$i++) {
&lea ("ebp",&DWP($in1_z,"esp"));
&sar ("ebx",31);
&lea ("edi",&DWP($Z1sqr,"esp"));
- &mov (&DWP(32*15+4,"esp"),"ebx"); # !in2infty
+ &mov (&DWP(32*15+4,"esp"),"ebx"); # ~in2infty
&call ("_ecp_nistz256_mul_mont"); # p256_sqr_mont(Z1sqr, in1_z);
@@ -1822,14 +1818,14 @@ for ($i=0;$i<7;$i++) {
&lea ("edi",&DWP($res_y,"esp"));
&call ("_ecp_nistz256_sub"); # p256_sub(res_y, res_y, S2);
- &mov ("ebp",&DWP(32*15+0,"esp")); # !in1infty
- &mov ("esi",&DWP(32*15+4,"esp")); # !in2infty
+ &mov ("ebp",&DWP(32*15+0,"esp")); # ~in1infty
+ &mov ("esi",&DWP(32*15+4,"esp")); # ~in2infty
&mov ("edi",&wparam(0));
&mov ("edx","ebp");
¬ ("ebp");
- &and ("edx","esi");
- &and ("ebp","esi");
- ¬ ("esi");
+ &and ("edx","esi"); # ~in1infty & ~in2infty
+ &and ("ebp","esi"); # in1infty & ~in2infty
+ ¬ ("esi"); # in2infty
########################################
# conditional moves
@@ -1847,11 +1843,11 @@ for ($i=0;$i<7;$i++) {
&mov (&DWP($i,"edi"),"eax");
}
for($i=0;$i<64;$i+=4) {
- &mov ("eax","edx");
+ &mov ("eax","edx"); # ~in1infty & ~in2infty
&and ("eax",&DWP($res_x+$i,"esp"));
- &mov ("ebx","ebp");
+ &mov ("ebx","ebp"); # in1infty & ~in2infty
&and ("ebx",&DWP($in2_x+$i,"esp"));
- &mov ("ecx","esi");
+ &mov ("ecx","esi"); # in2infty
&and ("ecx",&DWP($in1_x+$i,"esp"));
&or ("eax","ebx");
&or ("eax","ecx");
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl
b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index 97ff6fb659..787cbc7f53 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -3627,29 +3627,19 @@ $code.=<<___;
call __ecp_nistz256_sub_from$x # p256_sub(H, U2, U1);
or $acc5, $acc4 # see if result is zero
+ or $acc0, $acc4
+ or $acc1, $acc4 # !is_equal(U1, U2)
+
+ movq %xmm2, $acc0 # in1infty | in2infty
+ movq %xmm3, $acc1 # !is_equal(S1, S2)
+
or $acc0, $acc4
or $acc1, $acc4
+ # if (!is_equal(U1, U2) | in1infty | in2infty | !is_equal(S1, S2))
.byte 0x3e # predict taken
- jnz .Ladd_proceed$x # is_equal(U1,U2)?
- movq %xmm2, $acc0
- movq %xmm3, $acc1
- test $acc0, $acc0
- jnz .Ladd_proceed$x # (in1infty || in2infty)?
- test $acc1, $acc1
- jz .Ladd_double$x # is_equal(S1,S2)?
+ jnz .Ladd_proceed$x
- movq %xmm0, $r_ptr # restore $r_ptr
- pxor %xmm0, %xmm0
- movdqu %xmm0, 0x00($r_ptr)
- movdqu %xmm0, 0x10($r_ptr)
- movdqu %xmm0, 0x20($r_ptr)
- movdqu %xmm0, 0x30($r_ptr)
- movdqu %xmm0, 0x40($r_ptr)
- movdqu %xmm0, 0x50($r_ptr)
- jmp .Ladd_done$x
-
-.align 32
.Ladd_double$x:
movq %xmm1, $a_ptr # restore $a_ptr
movq %xmm0, $r_ptr # restore $r_ptr
diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c
index a726f8e249..6777d32244 100644
--- a/crypto/ec/ecp_nistp224.c
+++ b/crypto/ec/ecp_nistp224.c
@@ -912,6 +912,7 @@ static void point_add(felem x3, felem y3, felem z3,
felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, x_out, y_out, z_out;
widefelem tmp, tmp2;
limb z1_is_zero, z2_is_zero, x_equal, y_equal;
+ limb points_equal;
if (!mixed) {
/* ftmp2 = z2^2 */
@@ -968,15 +969,41 @@ static void point_add(felem x3, felem y3, felem z3,
felem_reduce(ftmp, tmp);
/*
- * the formulae are incorrect if the points are equal so we check for
- * this and do doubling if this happens
+ * The formulae are incorrect if the points are equal, in affine
coordinates
+ * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
+ * happens.
+ *
+ * We use bitwise operations to avoid potential side-channels introduced by
+ * the short-circuiting behaviour of boolean operators.
*/
x_equal = felem_is_zero(ftmp);
y_equal = felem_is_zero(ftmp3);
+ /*
+ * The special case of either point being the point at infinity (z1 and/or
+ * z2 are zero), is handled separately later on in this function, so we
+ * avoid jumping to point_double here in those special cases.
+ */
z1_is_zero = felem_is_zero(z1);
z2_is_zero = felem_is_zero(z2);
- /* In affine coordinates, (X_1, Y_1) == (X_2, Y_2) */
- if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
+
+ /*
+ * Compared to `ecp_nistp256.c` and `ecp_nistp521.c`, in this
+ * specific implementation `felem_is_zero()` returns truth as `0x1`
+ * (rather than `0xff..ff`).
+ *
+ * This implies that `~true` in this implementation becomes
+ * `0xff..fe` (rather than `0x0`): for this reason, to be used in
+ * the if expression, we mask out only the last bit in the next
+ * line.
+ */
+ points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero)) & 1;
+
+ if (points_equal) {
+ /*
+ * This is obviously not constant-time but, as mentioned before, this
+ * case never happens during single point multiplication, so there is
no
+ * timing leak for ECDH or ECDSA signing.
+ */
point_double(x3, y3, z3, x1, y1, z1);
return;
}
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index 4cbac522cd..954263c960 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -1241,6 +1241,7 @@ static void point_add(felem x3, felem y3, felem z3,
longfelem tmp, tmp2;
smallfelem small1, small2, small3, small4, small5;
limb x_equal, y_equal, z1_is_zero, z2_is_zero;
+ limb points_equal;
felem_shrink(small3, z1);
@@ -1340,7 +1341,26 @@ static void point_add(felem x3, felem y3, felem z3,
felem_shrink(small1, ftmp5);
y_equal = smallfelem_is_zero(small1);
- if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
+ /*
+ * The formulae are incorrect if the points are equal, in affine
coordinates
+ * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
+ * happens.
+ *
+ * We use bitwise operations to avoid potential side-channels introduced by
+ * the short-circuiting behaviour of boolean operators.
+ *
+ * The special case of either point being the point at infinity (z1 and/or
+ * z2 are zero), is handled separately later on in this function, so we
+ * avoid jumping to point_double here in those special cases.
+ */
+ points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
+
+ if (points_equal) {
+ /*
+ * This is obviously not constant-time but, as mentioned before, this
+ * case never happens during single point multiplication, so there is
no
+ * timing leak for ECDH or ECDSA signing.
+ */
point_double(x3, y3, z3, x1, y1, z1);
return;
}
diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c
index 6b5dc8eb99..78a98c7187 100644
--- a/crypto/ec/ecp_nistp521.c
+++ b/crypto/ec/ecp_nistp521.c
@@ -1158,6 +1158,7 @@ static void point_add(felem x3, felem y3, felem z3,
felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
largefelem tmp, tmp2;
limb x_equal, y_equal, z1_is_zero, z2_is_zero;
+ limb points_equal;
z1_is_zero = felem_is_zero(z1);
z2_is_zero = felem_is_zero(z2);
@@ -1242,7 +1243,24 @@ static void point_add(felem x3, felem y3, felem z3,
felem_scalar64(ftmp5, 2);
/* ftmp5[i] < 2^61 */
- if (x_equal && y_equal && !z1_is_zero && !z2_is_zero) {
+ /*
+ * The formulae are incorrect if the points are equal, in affine
coordinates
+ * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
+ * happens.
+ *
+ * We use bitwise operations to avoid potential side-channels introduced by
+ * the short-circuiting behaviour of boolean operators.
+ *
+ * The special case of either point being the point at infinity (z1 and/or
+ * z2 are zero), is handled separately later on in this function, so we
+ * avoid jumping to point_double here in those special cases.
+ *
+ * Notice the comment below on the implications of this branching for
timing
+ * leaks and why it is considered practically irrelevant.
+ */
+ points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
+
+ if (points_equal) {
/*
* This is obviously not constant-time but it will almost-never happen
* for ECDH / ECDSA. The case where it can happen is during scalar-mult
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
index 603557ced5..1609c4bbf7 100644
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@@ -358,16 +358,47 @@ static void ecp_nistz256_point_add(P256_POINT *r,
ecp_nistz256_sub(H, U2, U1); /* H = U2 - U1 */
/*
- * This should not happen during sign/ecdh, so no constant time violation
+ * The formulae are incorrect if the points are equal so we check for
+ * this and do doubling if this happens.
+ *
+ * Points here are in Jacobian projective coordinates (Xi, Yi, Zi)
+ * that are bound to the affine coordinates (xi, yi) by the following
+ * equations:
+ * - xi = Xi / (Zi)^2
+ * - y1 = Yi / (Zi)^3
+ *
+ * For the sake of optimization, the algorithm operates over
+ * intermediate variables U1, U2 and S1, S2 that are derived from
+ * the projective coordinates:
+ * - U1 = X1 * (Z2)^2 ; U2 = X2 * (Z1)^2
+ * - S1 = Y1 * (Z2)^3 ; S2 = Y2 * (Z1)^3
+ *
+ * It is easy to prove that is_equal(U1, U2) implies that the affine
+ * x-coordinates are equal, or either point is at infinity.
+ * Likewise is_equal(S1, S2) implies that the affine y-coordinates are
+ * equal, or either point is at infinity.
+ *
+ * The special case of either point being the point at infinity (Z1 or Z2
+ * is zero), is handled separately later on in this function, so we avoid
+ * jumping to point_double here in those special cases.
+ *
+ * When both points are inverse of each other, we know that the affine
+ * x-coordinates are equal, and the y-coordinates have different sign.
+ * Therefore since U1 = U2, we know H = 0, and therefore Z3 = H*Z1*Z2
+ * will equal 0, thus the result is infinity, if we simply let this
+ * function continue normally.
+ *
+ * We use bitwise operations to avoid potential side-channels introduced by
+ * the short-circuiting behaviour of boolean operators.
*/
- if (is_equal(U1, U2) && !in1infty && !in2infty) {
- if (is_equal(S1, S2)) {
- ecp_nistz256_point_double(r, a);
- return;
- } else {
- memset(r, 0, sizeof(*r));
- return;
- }
+ if (is_equal(U1, U2) & ~in1infty & ~in2infty & is_equal(S1, S2)) {
+ /*
+ * This is obviously not constant-time but it should never happen
during
+ * single point multiplication, so there is no timing leak for ECDH or
+ * ECDSA signing.
+ */
+ ecp_nistz256_point_double(r, a);
+ return;
}
ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */