The branch master has been updated via c74aea8d6ccdf07ce826a9451887739b8aa64096 (commit) via e3057a57caf4274ea1fb074518e4714059dfcabf (commit) from dfde4219fdebbb5a8a17602fea036f7690e517ea (commit)
- Log ----------------------------------------------------------------- commit c74aea8d6ccdf07ce826a9451887739b8aa64096 Author: Andy Polyakov <ap...@openssl.org> Date: Fri Aug 19 23:18:35 2016 +0200 ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity. RT#4625 Reviewed-by: Rich Salz <rs...@openssl.org> commit e3057a57caf4274ea1fb074518e4714059dfcabf Author: Andy Polyakov <ap...@openssl.org> Date: Fri Aug 19 23:16:04 2016 +0200 ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity. RT#4625 Reviewed-by: Rich Salz <rs...@openssl.org> ----------------------------------------------------------------------- Summary of changes: crypto/ec/asm/ecp_nistz256-armv4.pl | 58 ++++--------- crypto/ec/asm/ecp_nistz256-armv8.pl | 76 ++++++----------- crypto/ec/asm/ecp_nistz256-sparcv9.pl | 150 ++++++++++------------------------ crypto/ec/asm/ecp_nistz256-x86.pl | 30 +++---- crypto/ec/asm/ecp_nistz256-x86_64.pl | 24 +++--- crypto/ec/ecp_nistz256.c | 57 +++++++++---- 6 files changed, 152 insertions(+), 243 deletions(-) diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl index de3cd5c..2314b75 100755 --- a/crypto/ec/asm/ecp_nistz256-armv4.pl +++ b/crypto/ec/asm/ecp_nistz256-armv4.pl @@ -1405,27 +1405,19 @@ ecp_nistz256_point_add: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*18+16 - ldmia $b_ptr!,{r4-r11} @ copy in2 + ldmia $b_ptr!,{r4-r11} @ copy in2_x add r3,sp,#$in2_x - orr r12,r4,r5 - orr r12,r12,r6 - orr r12,r12,r7 - orr r12,r12,r8 - orr r12,r12,r9 - orr r12,r12,r10 - orr r12,r12,r11 stmia r3!,{r4-r11} - ldmia $b_ptr!,{r4-r11} - orr r12,r12,r4 - orr r12,r12,r5 + ldmia $b_ptr!,{r4-r11} @ copy in2_y + stmia r3!,{r4-r11} + ldmia $b_ptr,{r4-r11} @ copy in2_z + orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 - stmia r3!,{r4-r11} - ldmia $b_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne @@ -1434,27 +1426,19 @@ ecp_nistz256_point_add: stmia r3,{r4-r11} str r12,[sp,#32*18+8] @ !in2infty - ldmia $a_ptr!,{r4-r11} @ copy in1 + ldmia $a_ptr!,{r4-r11} @ copy in1_x add r3,sp,#$in1_x - orr r12,r4,r5 - orr r12,r12,r6 - orr r12,r12,r7 - orr r12,r12,r8 - orr r12,r12,r9 - orr r12,r12,r10 - orr r12,r12,r11 stmia r3!,{r4-r11} - ldmia $a_ptr!,{r4-r11} - orr r12,r12,r4 - orr r12,r12,r5 + ldmia $a_ptr!,{r4-r11} @ copy in1_y + stmia r3!,{r4-r11} + ldmia $a_ptr,{r4-r11} @ copy in1_z + orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 - stmia r3!,{r4-r11} - ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne @@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine: stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional sub sp,sp,#32*15 - ldmia $a_ptr!,{r4-r11} @ copy in1 + ldmia $a_ptr!,{r4-r11} @ copy in1_x add r3,sp,#$in1_x - orr r12,r4,r5 - orr r12,r12,r6 - orr r12,r12,r7 - orr r12,r12,r8 - orr r12,r12,r9 - orr r12,r12,r10 - orr r12,r12,r11 stmia r3!,{r4-r11} - ldmia $a_ptr!,{r4-r11} - orr r12,r12,r4 - orr r12,r12,r5 + ldmia $a_ptr!,{r4-r11} @ copy in1_y + stmia r3!,{r4-r11} + ldmia $a_ptr,{r4-r11} @ copy in1_z + orr r12,r4,r5 orr r12,r12,r6 orr r12,r12,r7 orr r12,r12,r8 orr r12,r12,r9 orr r12,r12,r10 orr r12,r12,r11 - stmia r3!,{r4-r11} - ldmia $a_ptr,{r4-r11} cmp r12,#0 #ifdef __thumb2__ it ne @@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine: stmia r3,{r4-r11} str r12,[sp,#32*15+4] @ !in1infty - ldmia $b_ptr!,{r4-r11} @ copy in2 + ldmia $b_ptr!,{r4-r11} @ copy in2_x add r3,sp,#$in2_x orr r12,r4,r5 orr r12,r12,r6 @@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine: orr r12,r12,r10 orr r12,r12,r11 stmia r3!,{r4-r11} - ldmia $b_ptr!,{r4-r11} + ldmia $b_ptr!,{r4-r11} @ copy in2_y orr r12,r12,r4 orr r12,r12,r5 orr r12,r12,r6 diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl index 1362586..cdc9161 100644 --- a/crypto/ec/asm/ecp_nistz256-armv8.pl +++ b/crypto/ec/asm/ecp_nistz256-armv8.pl @@ -862,46 +862,28 @@ ecp_nistz256_point_add: stp x25,x26,[sp,#64] sub sp,sp,#32*12 - ldp $a0,$a1,[$bp] - ldp $a2,$a3,[$bp,#16] - ldp $t0,$t1,[$bp,#32] - ldp $t2,$t3,[$bp,#48] + ldp $a0,$a1,[$bp,#64] // in2_z + ldp $a2,$a3,[$bp,#64+16] mov $rp_real,$rp mov $ap_real,$ap mov $bp_real,$bp - orr $a0,$a0,$a1 - orr $a2,$a2,$a3 - ldp $acc0,$acc1,[$ap] - orr $t0,$t0,$t1 - orr $t2,$t2,$t3 - ldp $acc2,$acc3,[$ap,#16] - orr $a0,$a0,$a2 - orr $t2,$t0,$t2 - ldp $t0,$t1,[$ap,#32] - orr $in2infty,$a0,$t2 - cmp $in2infty,#0 - ldp $t2,$t3,[$ap,#48] - csetm $in2infty,ne // !in2infty - - ldp $a0,$a1,[$bp_real,#64] // forward load for p256_sqr_mont - orr $acc0,$acc0,$acc1 - orr $acc2,$acc2,$acc3 - ldp $a2,$a3,[$bp_real,#64+16] - orr $t0,$t0,$t1 - orr $t2,$t2,$t3 - orr $acc0,$acc0,$acc2 - orr $t0,$t0,$t2 - orr $in1infty,$acc0,$t0 - cmp $in1infty,#0 ldr $poly1,.Lpoly+8 ldr $poly3,.Lpoly+24 - csetm $in1infty,ne // !in1infty - + orr $t0,$a0,$a1 + orr $t2,$a2,$a3 + orr $in2infty,$t0,$t2 + cmp $in2infty,#0 + csetm $in2infty,ne // !in2infty add $rp,sp,#$Z2sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z); - ldp $a0,$a1,[$ap_real,#64] + ldp $a0,$a1,[$ap_real,#64] // in1_z ldp $a2,$a3,[$ap_real,#64+16] + orr $t0,$a0,$a1 + orr $t2,$a2,$a3 + orr $in1infty,$t0,$t2 + cmp $in1infty,#0 + csetm $in1infty,ne // !in1infty add $rp,sp,#$Z1sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); @@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine: ldr $poly1,.Lpoly+8 ldr $poly3,.Lpoly+24 - ldp $a0,$a1,[$ap] - ldp $a2,$a3,[$ap,#16] - ldp $t0,$t1,[$ap,#32] - ldp $t2,$t3,[$ap,#48] - orr $a0,$a0,$a1 - orr $a2,$a2,$a3 - orr $t0,$t0,$t1 - orr $t2,$t2,$t3 - orr $a0,$a0,$a2 - orr $t0,$t0,$t2 - orr $in1infty,$a0,$t0 + ldp $a0,$a1,[$ap,#64] // in1_z + ldp $a2,$a3,[$ap,#64+16] + orr $t0,$a0,$a1 + orr $t2,$a2,$a3 + orr $in1infty,$t0,$t2 cmp $in1infty,#0 csetm $in1infty,ne // !in1infty - ldp $a0,$a1,[$bp] - ldp $a2,$a3,[$bp,#16] - ldp $t0,$t1,[$bp,#32] + ldp $acc0,$acc1,[$bp] // in2_x + ldp $acc2,$acc3,[$bp,#16] + ldp $t0,$t1,[$bp,#32] // in2_y ldp $t2,$t3,[$bp,#48] - orr $a0,$a0,$a1 - orr $a2,$a2,$a3 + orr $acc0,$acc0,$acc1 + orr $acc2,$acc2,$acc3 orr $t0,$t0,$t1 orr $t2,$t2,$t3 - orr $a0,$a0,$a2 + orr $acc0,$acc0,$acc2 orr $t0,$t0,$t2 - orr $in2infty,$a0,$t0 + orr $in2infty,$acc0,$t0 cmp $in2infty,#0 csetm $in2infty,ne // !in2infty - ldp $a0,$a1,[$ap_real,#64] - ldp $a2,$a3,[$ap_real,#64+16] add $rp,sp,#$Z1sqr bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z); diff --git a/crypto/ec/asm/ecp_nistz256-sparcv9.pl b/crypto/ec/asm/ecp_nistz256-sparcv9.pl index 3c7ff50..97201cb 100755 --- a/crypto/ec/asm/ecp_nistz256-sparcv9.pl +++ b/crypto/ec/asm/ecp_nistz256-sparcv9.pl @@ -899,71 +899,39 @@ ecp_nistz256_point_add: mov $ap,$ap_real mov $bp,$bp_real - ld [$bp],@acc[0] ! in2_x - ld [$bp+4],@acc[1] - ld [$bp+8],@acc[2] - ld [$bp+12],@acc[3] - ld [$bp+16],@acc[4] - ld [$bp+20],@acc[5] - ld [$bp+24],@acc[6] - ld [$bp+28],@acc[7] - ld [$bp+32],$t0 ! in2_y - ld [$bp+32+4],$t1 - ld [$bp+32+8],$t2 - ld [$bp+32+12],$t3 - ld [$bp+32+16],$t4 - ld [$bp+32+20],$t5 - ld [$bp+32+24],$t6 - ld [$bp+32+28],$t7 - or @acc[1],@acc[0],@acc[0] - or @acc[3],@acc[2],@acc[2] - or @acc[5],@acc[4],@acc[4] - or @acc[7],@acc[6],@acc[6] - or @acc[2],@acc[0],@acc[0] - or @acc[6],@acc[4],@acc[4] - or @acc[4],@acc[0],@acc[0] + ld [$bp+64],$t0 ! in2_z + ld [$bp+64+4],$t1 + ld [$bp+64+8],$t2 + ld [$bp+64+12],$t3 + ld [$bp+64+16],$t4 + ld [$bp+64+20],$t5 + ld [$bp+64+24],$t6 + ld [$bp+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 - or $t4,$t0,$t0 - or @acc[0],$t0,$t0 ! !in2infty + or $t4,$t0,$t0 ! !in2infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-12] - ld [$ap],@acc[0] ! in1_x - ld [$ap+4],@acc[1] - ld [$ap+8],@acc[2] - ld [$ap+12],@acc[3] - ld [$ap+16],@acc[4] - ld [$ap+20],@acc[5] - ld [$ap+24],@acc[6] - ld [$ap+28],@acc[7] - ld [$ap+32],$t0 ! in1_y - ld [$ap+32+4],$t1 - ld [$ap+32+8],$t2 - ld [$ap+32+12],$t3 - ld [$ap+32+16],$t4 - ld [$ap+32+20],$t5 - ld [$ap+32+24],$t6 - ld [$ap+32+28],$t7 - or @acc[1],@acc[0],@acc[0] - or @acc[3],@acc[2],@acc[2] - or @acc[5],@acc[4],@acc[4] - or @acc[7],@acc[6],@acc[6] - or @acc[2],@acc[0],@acc[0] - or @acc[6],@acc[4],@acc[4] - or @acc[4],@acc[0],@acc[0] + ld [$ap+64],$t0 ! in1_z + ld [$ap+64+4],$t1 + ld [$ap+64+8],$t2 + ld [$ap+64+12],$t3 + ld [$ap+64+16],$t4 + ld [$ap+64+20],$t5 + ld [$ap+64+24],$t6 + ld [$ap+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 - or $t4,$t0,$t0 - or @acc[0],$t0,$t0 ! !in1infty + or $t4,$t0,$t0 ! !in1infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-16] @@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine: mov $ap,$ap_real mov $bp,$bp_real - ld [$ap],@acc[0] ! in1_x - ld [$ap+4],@acc[1] - ld [$ap+8],@acc[2] - ld [$ap+12],@acc[3] - ld [$ap+16],@acc[4] - ld [$ap+20],@acc[5] - ld [$ap+24],@acc[6] - ld [$ap+28],@acc[7] - ld [$ap+32],$t0 ! in1_y - ld [$ap+32+4],$t1 - ld [$ap+32+8],$t2 - ld [$ap+32+12],$t3 - ld [$ap+32+16],$t4 - ld [$ap+32+20],$t5 - ld [$ap+32+24],$t6 - ld [$ap+32+28],$t7 - or @acc[1],@acc[0],@acc[0] - or @acc[3],@acc[2],@acc[2] - or @acc[5],@acc[4],@acc[4] - or @acc[7],@acc[6],@acc[6] - or @acc[2],@acc[0],@acc[0] - or @acc[6],@acc[4],@acc[4] - or @acc[4],@acc[0],@acc[0] + ld [$ap+64],$t0 ! in1_z + ld [$ap+64+4],$t1 + ld [$ap+64+8],$t2 + ld [$ap+64+12],$t3 + ld [$ap+64+16],$t4 + ld [$ap+64+20],$t5 + ld [$ap+64+24],$t6 + ld [$ap+64+28],$t7 or $t1,$t0,$t0 or $t3,$t2,$t2 or $t5,$t4,$t4 or $t7,$t6,$t6 or $t2,$t0,$t0 or $t6,$t4,$t4 - or $t4,$t0,$t0 - or @acc[0],$t0,$t0 ! !in1infty + or $t4,$t0,$t0 ! !in1infty movrnz $t0,-1,$t0 st $t0,[%fp+STACK_BIAS-16] @@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in2_y+16] stx $acc3,[%sp+LOCALS64+$in2_y+24] - or $a1,$a0,$a0 - or $a3,$a2,$a2 - or $acc1,$acc0,$acc0 - or $acc3,$acc2,$acc2 - or $a2,$a0,$a0 - or $acc2,$acc0,$acc0 - or $acc0,$a0,$a0 - movrnz $a0,-1,$a0 ! !in2infty - stx $a0,[%fp+STACK_BIAS-8] - ld [$bp+64],$acc0 ! in2_z ld [$bp+64+4],$t0 ld [$bp+64+8],$acc1 @@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in2_z+16] stx $acc3,[%sp+LOCALS64+$in2_z+24] + or $acc1,$acc0,$acc0 + or $acc3,$acc2,$acc2 + or $acc2,$acc0,$acc0 + movrnz $acc0,-1,$acc0 ! !in2infty + stx $acc0,[%fp+STACK_BIAS-8] + or $a0,$t0,$a0 ld [$ap+32],$acc0 ! in1_y or $a1,$t1,$a1 @@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in1_y+16] stx $acc3,[%sp+LOCALS64+$in1_y+24] - or $a1,$a0,$a0 - or $a3,$a2,$a2 - or $acc1,$acc0,$acc0 - or $acc3,$acc2,$acc2 - or $a2,$a0,$a0 - or $acc2,$acc0,$acc0 - or $acc0,$a0,$a0 - movrnz $a0,-1,$a0 ! !in1infty - stx $a0,[%fp+STACK_BIAS-16] - ldx [%sp+LOCALS64+$in2_z],$a0 ! forward load ldx [%sp+LOCALS64+$in2_z+8],$a1 ldx [%sp+LOCALS64+$in2_z+16],$a2 @@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3: stx $acc2,[%sp+LOCALS64+$in1_z+16] stx $acc3,[%sp+LOCALS64+$in1_z+24] + or $acc1,$acc0,$acc0 + or $acc3,$acc2,$acc2 + or $acc2,$acc0,$acc0 + movrnz $acc0,-1,$acc0 ! !in1infty + stx $acc0,[%fp+STACK_BIAS-16] + call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z2sqr, in2_z); add %sp,LOCALS64+$Z2sqr,$rp @@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3: stx $acc2,[%sp+LOCALS64+$in1_y+16] stx $acc3,[%sp+LOCALS64+$in1_y+24] - or $a1,$a0,$a0 - or $a3,$a2,$a2 - or $acc1,$acc0,$acc0 - or $acc3,$acc2,$acc2 - or $a2,$a0,$a0 - or $acc2,$acc0,$acc0 - or $acc0,$a0,$a0 - movrnz $a0,-1,$a0 ! !in1infty - stx $a0,[%fp+STACK_BIAS-16] - ld [$ap+64],$a0 ! in1_z ld [$ap+64+4],$t0 ld [$ap+64+8],$a1 @@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3: stx $a2,[%sp+LOCALS64+$in1_z+16] stx $a3,[%sp+LOCALS64+$in1_z+24] + or $a1,$a0,$t0 + or $a3,$a2,$t2 + or $t2,$t0,$t0 + movrnz $t0,-1,$t0 ! !in1infty + stx $t0,[%fp+STACK_BIAS-16] + call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z1sqr, in1_z); add %sp,LOCALS64+$Z1sqr,$rp diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl index b96b1aa..1d9e006 100755 --- a/crypto/ec/asm/ecp_nistz256-x86.pl +++ b/crypto/ec/asm/ecp_nistz256-x86.pl @@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*18+12,"esp"),"ebp") if ($i==0); - &mov ("ebp","eax") if ($i==0); - &or ("ebp","eax") if ($i!=0 && $i<64); + &mov ("ebp","eax") if ($i==64); + &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); - &or ("ebp","ebx") if ($i<64); + &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); - &or ("ebp","ecx") if ($i<64); + &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); - &or ("ebp","edx") if ($i<64); + &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(1)); @@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) { &mov ("ecx",&DWP($i+8,"esi")); &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); - &mov ("ebp","eax") if ($i==0); - &or ("ebp","eax") if ($i!=0 && $i<64); + &mov ("ebp","eax") if ($i==64); + &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); - &or ("ebp","ebx") if ($i<64); + &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); - &or ("ebp","ecx") if ($i<64); + &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); - &or ("ebp","edx") if ($i<64); + &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &sub ("eax","ebp"); @@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) { &mov ("edx",&DWP($i+12,"esi")); &mov (&DWP($i+0,"edi"),"eax"); &mov (&DWP(32*15+8,"esp"),"ebp") if ($i==0); - &mov ("ebp","eax") if ($i==0); - &or ("ebp","eax") if ($i!=0 && $i<64); + &mov ("ebp","eax") if ($i==64); + &or ("ebp","eax") if ($i>64); &mov (&DWP($i+4,"edi"),"ebx"); - &or ("ebp","ebx") if ($i<64); + &or ("ebp","ebx") if ($i>=64); &mov (&DWP($i+8,"edi"),"ecx"); - &or ("ebp","ecx") if ($i<64); + &or ("ebp","ecx") if ($i>=64); &mov (&DWP($i+12,"edi"),"edx"); - &or ("ebp","edx") if ($i<64); + &or ("ebp","edx") if ($i>=64); } &xor ("eax","eax"); &mov ("esi",&wparam(2)); diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl index cc7b976..ddbbedf 100755 --- a/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -2294,16 +2294,14 @@ $code.=<<___; mov $b_org, $a_ptr # reassign movdqa %xmm0, $in1_x(%rsp) movdqa %xmm1, $in1_x+0x10(%rsp) - por %xmm0, %xmm1 movdqa %xmm2, $in1_y(%rsp) movdqa %xmm3, $in1_y+0x10(%rsp) - por %xmm2, %xmm3 movdqa %xmm4, $in1_z(%rsp) movdqa %xmm5, $in1_z+0x10(%rsp) - por %xmm1, %xmm3 + por %xmm4, %xmm5 movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$b_ptr - pshufd \$0xb1, %xmm3, %xmm5 + pshufd \$0xb1, %xmm5, %xmm3 movdqu 0x10($a_ptr), %xmm1 movdqu 0x20($a_ptr), %xmm2 por %xmm3, %xmm5 @@ -2315,14 +2313,14 @@ $code.=<<___; movdqa %xmm0, $in2_x(%rsp) pshufd \$0x1e, %xmm5, %xmm4 movdqa %xmm1, $in2_x+0x10(%rsp) - por %xmm0, %xmm1 - movq $r_ptr, %xmm0 # save $r_ptr + movdqu 0x40($a_ptr),%xmm0 # in2_z again + movdqu 0x50($a_ptr),%xmm1 movdqa %xmm2, $in2_y(%rsp) movdqa %xmm3, $in2_y+0x10(%rsp) - por %xmm2, %xmm3 por %xmm4, %xmm5 pxor %xmm4, %xmm4 - por %xmm1, %xmm3 + por %xmm0, %xmm1 + movq $r_ptr, %xmm0 # save $r_ptr lea 0x40-$bias($a_ptr), $a_ptr # $a_ptr is still valid mov $src0, $in2_z+8*0(%rsp) # make in2_z copy @@ -2333,8 +2331,8 @@ $code.=<<___; call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Z2sqr, in2_z); pcmpeqd %xmm4, %xmm5 - pshufd \$0xb1, %xmm3, %xmm4 - por %xmm3, %xmm4 + pshufd \$0xb1, %xmm1, %xmm4 + por %xmm1, %xmm4 pshufd \$0, %xmm5, %xmm5 # in1infty pshufd \$0x1e, %xmm4, %xmm3 por %xmm3, %xmm4 @@ -2666,16 +2664,14 @@ $code.=<<___; mov 0x40+8*3($a_ptr), $acc0 movdqa %xmm0, $in1_x(%rsp) movdqa %xmm1, $in1_x+0x10(%rsp) - por %xmm0, %xmm1 movdqa %xmm2, $in1_y(%rsp) movdqa %xmm3, $in1_y+0x10(%rsp) - por %xmm2, %xmm3 movdqa %xmm4, $in1_z(%rsp) movdqa %xmm5, $in1_z+0x10(%rsp) - por %xmm1, %xmm3 + por %xmm4, %xmm5 movdqu 0x00($b_ptr), %xmm0 # copy *(P256_POINT_AFFINE *)$b_ptr - pshufd \$0xb1, %xmm3, %xmm5 + pshufd \$0xb1, %xmm5, %xmm3 movdqu 0x10($b_ptr), %xmm1 movdqu 0x20($b_ptr), %xmm2 por %xmm3, %xmm5 diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c index 564a889..dca3a2d 100644 --- a/crypto/ec/ecp_nistz256.c +++ b/crypto/ec/ecp_nistz256.c @@ -335,19 +335,16 @@ static void ecp_nistz256_point_add(P256_POINT *r, const BN_ULONG *in2_y = b->Y; const BN_ULONG *in2_z = b->Z; - /* We encode infinity as (0,0), which is not on the curve, - * so it is OK. */ - in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | - in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + /* + * Infinity in encoded as (,,0) + */ + in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]); if (P256_LIMBS == 8) - in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | - in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]); - in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | - in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); + in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]); if (P256_LIMBS == 8) - in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | - in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); + in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]); in1infty = is_zero(in1infty); in2infty = is_zero(in2infty); @@ -436,15 +433,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r, const BN_ULONG *in2_y = b->Y; /* - * In affine representation we encode infty as (0,0), which is not on the - * curve, so it is OK + * Infinity in encoded as (,,0) */ - in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] | - in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]); + in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]); if (P256_LIMBS == 8) - in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] | - in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]); + in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]); + /* + * In affine representation we encode infinity as (0,0), which is + * not on the curve, so it is OK + */ in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); if (P256_LIMBS == 8) @@ -1273,6 +1271,8 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group, } else #endif { + BN_ULONG infty; + /* First window */ wvalue = (p_str[0] << 1) & mask; idx += window_size; @@ -1285,7 +1285,30 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group, ecp_nistz256_neg(p.p.Z, p.p.Y); copy_conditional(p.p.Y, p.p.Z, wvalue & 1); - memcpy(p.p.Z, ONE, sizeof(ONE)); + /* + * Since affine infinity is encoded as (0,0) and + * Jacobian ias (,,0), we need to harmonize them + * by assigning "one" or zero to Z. + */ + infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] | + p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]); + if (P256_LIMBS == 8) + infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] | + p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]); + + infty = 0 - is_zero(infty); + infty = ~infty; + + p.p.Z[0] = ONE[0] & infty; + p.p.Z[1] = ONE[1] & infty; + p.p.Z[2] = ONE[2] & infty; + p.p.Z[3] = ONE[3] & infty; + if (P256_LIMBS == 8) { + p.p.Z[4] = ONE[4] & infty; + p.p.Z[5] = ONE[5] & infty; + p.p.Z[6] = ONE[6] & infty; + p.p.Z[7] = ONE[7] & infty; + } for (i = 1; i < 37; i++) { unsigned int off = (idx - 1) / 8; _____ openssl-commits mailing list To unsubscribe: https://mta.openssl.org/mailman/listinfo/openssl-commits