http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/ecp.c ---------------------------------------------------------------------- diff --git a/version22/c/ecp.c b/version22/c/ecp.c new file mode 100644 index 0000000..a6dcfad --- /dev/null +++ b/version22/c/ecp.c @@ -0,0 +1,1176 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL Elliptic Curve Functions */ +/* SU=m, SU is Stack Usage (Weierstrass Curves) */ + +//#define HAS_MAIN + +#include "amcl.h" + +/* test for P=O point-at-infinity */ +int ECP_isinf(ECP *P) +{ +#if CURVETYPE==EDWARDS + FP_reduce(P->x); + FP_reduce(P->y); + FP_reduce(P->z); + return (BIG_iszilch(P->x) && BIG_comp(P->y,P->z)==0); +#else + return P->inf; +#endif +} + +/* Conditional swap of P and Q dependant on d */ +static void ECP_cswap(ECP *P,ECP *Q,int d) +{ + BIG_cswap(P->x,Q->x,d); +#if CURVETYPE!=MONTGOMERY + BIG_cswap(P->y,Q->y,d); +#endif + BIG_cswap(P->z,Q->z,d); +#if CURVETYPE!=EDWARDS + d=~(d-1); + d=d&(P->inf^Q->inf); + P->inf^=d; + Q->inf^=d; +#endif +} + +#if CURVETYPE!=MONTGOMERY +/* Conditional move Q to P dependant on d */ +static void ECP_cmove(ECP *P,ECP *Q,int d) +{ + BIG_cmove(P->x,Q->x,d); +#if CURVETYPE!=MONTGOMERY + BIG_cmove(P->y,Q->y,d); +#endif + BIG_cmove(P->z,Q->z,d); +#if CURVETYPE!=EDWARDS + d=~(d-1); + P->inf^=(P->inf^Q->inf)&d; +#endif +} + +/* return 1 if b==c, no branching */ +static int teq(sign32 b,sign32 c) +{ + sign32 x=b^c; + x-=1; // if x=0, x now -1 + return (int)((x>>31)&1); +} +#endif // CURVETYPE!=MONTGOMERY + +#if CURVETYPE!=MONTGOMERY +/* Constant time select from pre-computed table */ +static void ECP_select(ECP *P,ECP W[],sign32 b) +{ + ECP MP; + sign32 m=b>>31; + sign32 babs=(b^m)-m; + + babs=(babs-1)/2; + + ECP_cmove(P,&W[0],teq(babs,0)); // conditional move + ECP_cmove(P,&W[1],teq(babs,1)); + ECP_cmove(P,&W[2],teq(babs,2)); + ECP_cmove(P,&W[3],teq(babs,3)); + ECP_cmove(P,&W[4],teq(babs,4)); + ECP_cmove(P,&W[5],teq(babs,5)); + ECP_cmove(P,&W[6],teq(babs,6)); + ECP_cmove(P,&W[7],teq(babs,7)); + + ECP_copy(&MP,P); + ECP_neg(&MP); // minus P + ECP_cmove(P,&MP,(int)(m&1)); +} +#endif + +/* Test P == Q */ +/* SU=168 */ +int ECP_equals(ECP *P,ECP *Q) +{ +#if CURVETYPE==WEIERSTRASS + BIG pz2,qz2,a,b; + if (ECP_isinf(P) && ECP_isinf(Q)) return 1; + if (ECP_isinf(P) || ECP_isinf(Q)) return 0; + + FP_sqr(pz2,P->z); + FP_sqr(qz2,Q->z); + + FP_mul(a,P->x,qz2); + FP_mul(b,Q->x,pz2); + FP_reduce(a); + FP_reduce(b); + if (BIG_comp(a,b)!=0) return 0; + + FP_mul(a,P->y,qz2); + FP_mul(a,a,Q->z); + FP_mul(b,Q->y,pz2); + FP_mul(b,b,P->z); + FP_reduce(a); + FP_reduce(b); + if (BIG_comp(a,b)!=0) return 0; + return 1; +#else + BIG a,b; + if (ECP_isinf(P) && ECP_isinf(Q)) return 1; + if (ECP_isinf(P) || ECP_isinf(Q)) return 0; + + FP_mul(a,P->x,Q->z); + FP_mul(b,Q->x,P->z); + FP_reduce(a); + FP_reduce(b); + if (BIG_comp(a,b)!=0) return 0; + +#if CURVETYPE==EDWARDS + FP_mul(a,P->y,Q->z); + FP_mul(b,Q->y,P->z); + FP_reduce(a); + FP_reduce(b); + if (BIG_comp(a,b)!=0) return 0; +#endif + + return 1; +#endif +} + +/* Set P=Q */ +/* SU=16 */ +void ECP_copy(ECP *P,ECP *Q) +{ +#if CURVETYPE!=EDWARDS + P->inf=Q->inf; +#endif + BIG_copy(P->x,Q->x); +#if CURVETYPE!=MONTGOMERY + BIG_copy(P->y,Q->y); +#endif + BIG_copy(P->z,Q->z); +} + +/* Set P=-Q */ +#if CURVETYPE!=MONTGOMERY +/* SU=8 */ +void ECP_neg(ECP *P) +{ + if (ECP_isinf(P)) return; +#if CURVETYPE==WEIERSTRASS + FP_neg(P->y,P->y); + BIG_norm(P->y); +#else + FP_neg(P->x,P->x); + BIG_norm(P->x); +#endif + +} +#endif + +/* Set P=O */ +void ECP_inf(ECP *P) +{ +#if CURVETYPE==EDWARDS + BIG_zero(P->x); + FP_one(P->y); + FP_one(P->z); +#else + P->inf=1; +#endif +} + +/* Calculate right Hand Side of curve equation y^2=RHS */ +/* SU=56 */ +void ECP_rhs(BIG v,BIG x) +{ +#if CURVETYPE==WEIERSTRASS + /* x^3+Ax+B */ + BIG t; + FP_sqr(t,x); + FP_mul(t,t,x); + + if (CURVE_A==-3) + { + FP_neg(v,x); + BIG_norm(v); + BIG_imul(v,v,-CURVE_A); + BIG_norm(v); + FP_add(v,t,v); + } + else BIG_copy(v,t); + + BIG_rcopy(t,CURVE_B); + FP_nres(t); + FP_add(v,t,v); + FP_reduce(v); +#endif + +#if CURVETYPE==EDWARDS + /* (Ax^2-1)/(Bx^2-1) */ + BIG t,m,one; + BIG_rcopy(m,Modulus); + FP_sqr(v,x); + FP_one(one); + BIG_rcopy(t,CURVE_B); + FP_nres(t); + FP_mul(t,v,t); + FP_sub(t,t,one); + if (CURVE_A==1) FP_sub(v,v,one); + + if (CURVE_A==-1) + { + FP_add(v,v,one); + FP_neg(v,v); + } + FP_redc(v); + FP_redc(t); + BIG_moddiv(v,v,t,m); + FP_nres(v); +#endif + +#if CURVETYPE==MONTGOMERY + /* x^3+Ax^2+x */ + BIG x2,x3; + FP_sqr(x2,x); + FP_mul(x3,x2,x); + BIG_copy(v,x); + FP_imul(x2,x2,CURVE_A); + FP_add(v,v,x2); + FP_add(v,v,x3); + FP_reduce(v); +#endif +} + +/* Set P=(x,y) */ + +#if CURVETYPE==MONTGOMERY + +/* Set P=(x,{y}) */ + +int ECP_set(ECP *P,BIG x) +{ + BIG m,rhs; + BIG_rcopy(m,Modulus); + BIG_copy(rhs,x); + FP_nres(rhs); + ECP_rhs(rhs,rhs); + FP_redc(rhs); + + if (BIG_jacobi(rhs,m)!=1) + { + ECP_inf(P); + return 0; + } + P->inf=0; + BIG_copy(P->x,x); + FP_nres(P->x); + FP_one(P->z); + return 1; +} + +/* Extract x coordinate as BIG */ +int ECP_get(BIG x,ECP *P) +{ + if (ECP_isinf(P)) return -1; + ECP_affine(P); + BIG_copy(x,P->x); + FP_redc(x); + return 0; +} + + +#else +/* Extract (x,y) and return sign of y. If x and y are the same return only x */ +/* SU=16 */ +int ECP_get(BIG x,BIG y,ECP *P) +{ + int s; +#if CURVETYPE!=EDWARDS + if (ECP_isinf(P)) return -1; +#endif + ECP_affine(P); + + BIG_copy(y,P->y); + FP_redc(y); + + s=BIG_parity(y); + + BIG_copy(x,P->x); + FP_redc(x); + + return s; +} + +/* Set P=(x,{y}) */ +/* SU=96 */ +int ECP_set(ECP *P,BIG x,BIG y) +{ + BIG rhs,y2; + BIG_copy(y2,y); + + FP_nres(y2); + FP_sqr(y2,y2); + FP_reduce(y2); + + + + BIG_copy(rhs,x); + FP_nres(rhs); + + ECP_rhs(rhs,rhs); + + if (BIG_comp(y2,rhs)!=0) + { + ECP_inf(P); + return 0; + } +#if CURVETYPE==WEIERSTRASS + P->inf=0; +#endif + BIG_copy(P->x,x); + FP_nres(P->x); + BIG_copy(P->y,y); + FP_nres(P->y); + FP_one(P->z); + return 1; +} + +/* Set P=(x,y), where y is calculated from x with sign s */ +/* SU=136 */ +int ECP_setx(ECP *P,BIG x,int s) +{ + BIG t,rhs,m; + BIG_rcopy(m,Modulus); + + BIG_copy(rhs,x); + FP_nres(rhs); + ECP_rhs(rhs,rhs); + BIG_copy(t,rhs); + FP_redc(t); + if (BIG_jacobi(t,m)!=1) + { + ECP_inf(P); + return 0; + } +#if CURVETYPE==WEIERSTRASS + P->inf=0; +#endif + BIG_copy(P->x,x); + FP_nres(P->x); + + FP_sqrt(P->y,rhs); + BIG_copy(rhs,P->y); + FP_redc(rhs); + if (BIG_parity(rhs)!=s) + FP_neg(P->y,P->y); + FP_reduce(P->y); + FP_one(P->z); + return 1; +} + +#endif + +/* Convert P to Affine, from (x,y,z) to (x,y) */ +/* SU=160 */ +void ECP_affine(ECP *P) +{ + BIG one,iz,m; +#if CURVETYPE==WEIERSTRASS + BIG izn; + if (ECP_isinf(P)) return; + FP_one(one); + if (BIG_comp(P->z,one)==0) return; + BIG_rcopy(m,Modulus); + + FP_redc(P->z); + BIG_invmodp(iz,P->z,m); + FP_nres(iz); + + FP_sqr(izn,iz); + FP_mul(P->x,P->x,izn); + FP_mul(izn,izn,iz); + FP_mul(P->y,P->y,izn); + FP_reduce(P->y); + +#endif +#if CURVETYPE==EDWARDS + FP_one(one); + if (BIG_comp(P->z,one)==0) return; + BIG_rcopy(m,Modulus); + + FP_redc(P->z); + BIG_invmodp(iz,P->z,m); + FP_nres(iz); + + FP_mul(P->x,P->x,iz); + FP_mul(P->y,P->y,iz); + FP_reduce(P->y); + +#endif +#if CURVETYPE==MONTGOMERY + if (ECP_isinf(P)) return; + FP_one(one); + if (BIG_comp(P->z,one)==0) return; + + BIG_rcopy(m,Modulus); + + FP_redc(P->z); + BIG_invmodp(iz,P->z,m); + FP_nres(iz); + + FP_mul(P->x,P->x,iz); + +#endif + FP_reduce(P->x); + BIG_copy(P->z,one); +} + +/* SU=120 */ +void ECP_outputxyz(ECP *P) +{ + BIG x,z; + if (ECP_isinf(P)) + { + printf("Infinity\n"); + return; + } + BIG_copy(x,P->x); + FP_reduce(x); + FP_redc(x); + BIG_copy(z,P->z); + FP_reduce(z); + FP_redc(z); + +#if CURVETYPE!=MONTGOMERY + BIG y; + BIG_copy(y,P->y); + FP_reduce(y); + FP_redc(y); + printf("("); + BIG_output(x); + printf(","); + BIG_output(y); + printf(","); + BIG_output(z); + printf(")\n"); + +#else + printf("("); + BIG_output(x); + printf(","); + BIG_output(z); + printf(")\n"); +#endif +} + +/* SU=16 */ +/* Output point P */ +void ECP_output(ECP *P) +{ + if (ECP_isinf(P)) + { + printf("Infinity\n"); + return; + } + ECP_affine(P); +#if CURVETYPE!=MONTGOMERY + FP_redc(P->x); + FP_redc(P->y); + printf("("); + BIG_output(P->x); + printf(","); + BIG_output(P->y); + printf(")\n"); + FP_nres(P->x); + FP_nres(P->y); +#else + FP_redc(P->x); + printf("("); + BIG_output(P->x); + printf(")\n"); + FP_nres(P->x); +#endif +} + + +/* SU=88 */ +/* Convert P to octet string */ +void ECP_toOctet(octet *W,ECP *P) +{ +#if CURVETYPE==MONTGOMERY + BIG x; + ECP_get(x,P); + W->len=MODBYTES+1; + W->val[0]=6; + BIG_toBytes(&(W->val[1]),x); +#else + BIG x,y; + ECP_get(x,y,P); + W->len=2*MODBYTES+1; + W->val[0]=4; + BIG_toBytes(&(W->val[1]),x); + BIG_toBytes(&(W->val[MODBYTES+1]),y); +#endif +} + +/* SU=88 */ +/* Restore P from octet string */ +int ECP_fromOctet(ECP *P,octet *W) +{ +#if CURVETYPE==MONTGOMERY + BIG x; + BIG_fromBytes(x,&(W->val[1])); + if (ECP_set(P,x)) return 1; + return 0; +#else + BIG x,y; + BIG_fromBytes(x,&(W->val[1])); + BIG_fromBytes(y,&(W->val[MODBYTES+1])); + if (ECP_set(P,x,y)) return 1; + return 0; +#endif +} + + +/* Set P=2P */ +/* SU=272 */ +void ECP_dbl(ECP *P) +{ +#if CURVETYPE==WEIERSTRASS + BIG one; + BIG w1,w7,w8,w2,w3,w6; + if (ECP_isinf(P)) return; + + if (BIG_iszilch(P->y)) + { + P->inf=1; + return; + } + FP_one(one); + BIG_zero(w6); + + if (CURVE_A==-3) + { + if (BIG_comp(P->z,one)==0) BIG_copy(w6,one); + else FP_sqr(w6,P->z); + FP_neg(w1,w6); + FP_add(w3,P->x,w1); + FP_add(w8,P->x,w6); + FP_mul(w3,w3,w8); + BIG_imul(w8,w3,3); + } + else + { + /* assuming A=0 */ + FP_sqr(w1,P->x); + BIG_imul(w8,w1,3); + } + + FP_sqr(w2,P->y); + FP_mul(w3,P->x,w2); + + BIG_imul(w3,w3,4); + FP_neg(w1,w3); + + BIG_norm(w1); + + FP_sqr(P->x,w8); + FP_add(P->x,P->x,w1); + FP_add(P->x,P->x,w1); + + BIG_norm(P->x); + + if (BIG_comp(P->z,one)==0) BIG_copy(P->z,P->y); + else FP_mul(P->z,P->z,P->y); + FP_add(P->z,P->z,P->z); + + + FP_add(w7,w2,w2); + FP_sqr(w2,w7); + + FP_add(w2,w2,w2); + FP_sub(w3,w3,P->x); + FP_mul(P->y,w8,w3); + + FP_sub(P->y,P->y,w2); + + BIG_norm(P->y); + BIG_norm(P->z); + +#endif + +#if CURVETYPE==EDWARDS + /* Not using square for multiplication swap, as (1) it needs more adds, and (2) it triggers more reductions */ + BIG B,C,D,E,F,H,J; + + FP_mul(B,P->x,P->y); + FP_add(B,B,B); + FP_sqr(C,P->x); + FP_sqr(D,P->y); + if (CURVE_A==1) BIG_copy(E,C); + if (CURVE_A==-1) FP_neg(E,C); + FP_add(F,E,D); + + BIG_norm(F); + + FP_sqr(H,P->z); + FP_add(H,H,H); + FP_sub(J,F,H); + FP_mul(P->x,B,J); + FP_sub(E,E,D); + FP_mul(P->y,F,E); + FP_mul(P->z,F,J); + + BIG_norm(P->x); + BIG_norm(P->y); + BIG_norm(P->z); + +#endif + +#if CURVETYPE==MONTGOMERY + BIG A,B,AA,BB,C; + if (ECP_isinf(P)) return; + + FP_add(A,P->x,P->z); + FP_sqr(AA,A); + FP_sub(B,P->x,P->z); + FP_sqr(BB,B); + FP_sub(C,AA,BB); + + FP_mul(P->x,AA,BB); + FP_imul(A,C,(CURVE_A+2)/4); + FP_add(BB,BB,A); + FP_mul(P->z,BB,C); + + BIG_norm(P->x); + BIG_norm(P->z); +#endif +} + +#if CURVETYPE==MONTGOMERY + +/* Set P+=Q. W is difference between P and Q and is affine */ +void ECP_add(ECP *P,ECP *Q,ECP *W) +{ + BIG A,B,C,D,DA,CB; + + FP_add(A,P->x,P->z); + FP_sub(B,P->x,P->z); + + FP_add(C,Q->x,Q->z); + FP_sub(D,Q->x,Q->z); + + FP_mul(DA,D,A); + FP_mul(CB,C,B); + + FP_add(A,DA,CB); + FP_sqr(A,A); + FP_sub(B,DA,CB); + FP_sqr(B,B); + + BIG_copy(P->x,A); + FP_mul(P->z,W->x,B); + + FP_reduce(P->z); + if (BIG_iszilch(P->z)) P->inf=1; + else P->inf=0; + + BIG_norm(P->x); +} + + +#else + +/* Set P+=Q */ +/* SU=248 */ +void ECP_add(ECP *P,ECP *Q) +{ +#if CURVETYPE==WEIERSTRASS + int aff; + BIG one,B,D,E,C,A; + if (ECP_isinf(Q)) return; + if (ECP_isinf(P)) + { + ECP_copy(P,Q); + return; + } + + FP_one(one); + aff=1; + if (BIG_comp(Q->z,one)!=0) aff=0; + + if (!aff) + { + FP_sqr(A,Q->z); + FP_mul(C,A,Q->z); + + FP_sqr(B,P->z); + FP_mul(D,B,P->z); + + FP_mul(A,P->x,A); + FP_mul(C,P->y,C); + } + else + { + BIG_copy(A,P->x); + BIG_copy(C,P->y); + + FP_sqr(B,P->z); + FP_mul(D,B,P->z); + } + + FP_mul(B,Q->x,B); + FP_sub(B,B,A); /* B=Qx.z^2-x.Qz^2 */ + FP_mul(D,Q->y,D); + FP_sub(D,D,C); /* D=Qy.z^3-y.Qz^3 */ + + FP_reduce(B); + if (BIG_iszilch(B)) + { + FP_reduce(D); + if (BIG_iszilch(D)) + { + ECP_dbl(P); + return; + } + else + { + ECP_inf(P); + return; + } + } + if (!aff) FP_mul(P->z,P->z,Q->z); + FP_mul(P->z,P->z,B); + + FP_sqr(E,B); + FP_mul(B,B,E); + FP_mul(A,A,E); + + FP_add(E,A,A); + FP_add(E,E,B); + + FP_sqr(P->x,D); + FP_sub(P->x,P->x,E); + + FP_sub(A,A,P->x); + FP_mul(P->y,A,D); + FP_mul(C,C,B); + FP_sub(P->y,P->y,C); + + BIG_norm(P->x); + BIG_norm(P->y); + BIG_norm(P->z); + +#else + BIG b,A,B,C,D,E,F,G; + + BIG_rcopy(b,CURVE_B); + FP_nres(b); + FP_mul(A,P->z,Q->z); + + FP_sqr(B,A); + FP_mul(C,P->x,Q->x); + FP_mul(D,P->y,Q->y); + FP_mul(E,C,D); + FP_mul(E,E,b); + + FP_sub(F,B,E); + FP_add(G,B,E); + + if (CURVE_A==1) FP_sub(E,D,C); + FP_add(C,C,D); + + FP_add(B,P->x,P->y); + FP_add(D,Q->x,Q->y); + FP_mul(B,B,D); + FP_sub(B,B,C); + FP_mul(B,B,F); + FP_mul(P->x,A,B); + + + if (CURVE_A==1) FP_mul(C,E,G); + if (CURVE_A==-1)FP_mul(C,C,G); + + FP_mul(P->y,A,C); + FP_mul(P->z,F,G); + + BIG_norm(P->x); + BIG_norm(P->y); + BIG_norm(P->z); + +#endif +} + +/* Set P-=Q */ +/* SU=16 */ +void ECP_sub(ECP *P,ECP *Q) +{ + ECP_neg(Q); + ECP_add(P,Q); + ECP_neg(Q); +} + +#endif + + +#if CURVETYPE==WEIERSTRASS +/* normalises array of points. Assumes P[0] is normalised already */ + +static void ECP_multiaffine(int m,ECP P[],BIG work[]) +{ + int i; + BIG t1,t2; + + FP_one(work[0]); + BIG_copy(work[1],P[0].z); + for (i=2; i<m; i++) + FP_mul(work[i],work[i-1],P[i-1].z); + + FP_mul(t1,work[m-1],P[m-1].z); + FP_inv(t1,t1); + + BIG_copy(t2,P[m-1].z); + FP_mul(work[m-1],work[m-1],t1); + + for (i=m-2;; i--) + { + if (i==0) + { + FP_mul(work[0],t1,t2); + break; + } + FP_mul(work[i],work[i],t2); + FP_mul(work[i],work[i],t1); + FP_mul(t2,P[i].z,t2); + } + /* now work[] contains inverses of all Z coordinates */ + + for (i=0; i<m; i++) + { + FP_one(P[i].z); + FP_sqr(t1,work[i]); + FP_mul(P[i].x,P[i].x,t1); + FP_mul(t1,work[i],t1); + FP_mul(P[i].y,P[i].y,t1); + } +} + +#endif + +#if CURVETYPE!=MONTGOMERY +/* constant time multiply by small integer of length bts - use ladder */ +void ECP_pinmul(ECP *P,int e,int bts) +{ + int i,b; + ECP R0,R1; + + ECP_affine(P); + ECP_inf(&R0); + ECP_copy(&R1,P); + + for (i=bts-1; i>=0; i--) + { + b=(e>>i)&1; + ECP_copy(P,&R1); + ECP_add(P,&R0); + ECP_cswap(&R0,&R1,b); + ECP_copy(&R1,P); + ECP_dbl(&R0); + ECP_cswap(&R0,&R1,b); + } + ECP_copy(P,&R0); + ECP_affine(P); +} +#endif + +/* Set P=r*P */ +/* SU=424 */ +void ECP_mul(ECP *P,BIG e) +{ +#if CURVETYPE==MONTGOMERY + /* Montgomery ladder */ + int nb,i,b; + ECP R0,R1,D; + if (ECP_isinf(P)) return; + if (BIG_iszilch(e)) + { + ECP_inf(P); + return; + } + ECP_affine(P); + + ECP_copy(&R0,P); + ECP_copy(&R1,P); + ECP_dbl(&R1); + ECP_copy(&D,P); + + nb=BIG_nbits(e); + for (i=nb-2; i>=0; i--) + { + b=BIG_bit(e,i); + ECP_copy(P,&R1); + ECP_add(P,&R0,&D); + ECP_cswap(&R0,&R1,b); + ECP_copy(&R1,P); + ECP_dbl(&R0); + ECP_cswap(&R0,&R1,b); + } + ECP_copy(P,&R0); + +#else + /* fixed size windows */ + int i,nb,s,ns; + BIG mt,t; + ECP Q,W[8],C; + sign8 w[1+(NLEN*BASEBITS+3)/4]; +#if CURVETYPE==WEIERSTRASS + BIG work[8]; +#endif + if (ECP_isinf(P)) return; + if (BIG_iszilch(e)) + { + ECP_inf(P); + return; + } + + ECP_affine(P); + + /* precompute table */ + + ECP_copy(&Q,P); + ECP_dbl(&Q); + +//printf("Q= ");ECP_output(&Q); printf("\n"); + + ECP_copy(&W[0],P); + + for (i=1; i<8; i++) + { + ECP_copy(&W[i],&W[i-1]); + ECP_add(&W[i],&Q); + } + +//printf("W[1]= ");ECP_output(&W[1]); printf("\n"); + + /* convert the table to affine */ +#if CURVETYPE==WEIERSTRASS + ECP_multiaffine(8,W,work); +#endif + + /* make exponent odd - add 2P if even, P if odd */ + BIG_copy(t,e); + s=BIG_parity(t); + BIG_inc(t,1); + BIG_norm(t); + ns=BIG_parity(t); + BIG_copy(mt,t); + BIG_inc(mt,1); + BIG_norm(mt); + BIG_cmove(t,mt,s); + ECP_cmove(&Q,P,ns); + ECP_copy(&C,&Q); + + nb=1+(BIG_nbits(t)+3)/4; + + /* convert exponent to signed 4-bit window */ + for (i=0; i<nb; i++) + { + w[i]=BIG_lastbits(t,5)-16; + BIG_dec(t,w[i]); + BIG_norm(t); + BIG_fshr(t,4); + } + w[nb]=BIG_lastbits(t,5); + + ECP_copy(P,&W[(w[nb]-1)/2]); + for (i=nb-1; i>=0; i--) + { + ECP_select(&Q,W,w[i]); + ECP_dbl(P); + ECP_dbl(P); + ECP_dbl(P); + ECP_dbl(P); + ECP_add(P,&Q); + } + ECP_sub(P,&C); /* apply correction */ +#endif + ECP_affine(P); +} + +#if CURVETYPE!=MONTGOMERY +/* Set P=eP+fQ double multiplication */ +/* constant time - as useful for GLV method in pairings */ +/* SU=456 */ + +void ECP_mul2(ECP *P,ECP *Q,BIG e,BIG f) +{ + BIG te,tf,mt; + ECP S,T,W[8],C; + sign8 w[1+(NLEN*BASEBITS+1)/2]; + int i,a,b,s,ns,nb; +#if CURVETYPE==WEIERSTRASS + BIG work[8]; +#endif + + ECP_affine(P); + ECP_affine(Q); + + BIG_copy(te,e); + BIG_copy(tf,f); + + /* precompute table */ + ECP_copy(&W[1],P); + ECP_sub(&W[1],Q); /* P+Q */ + ECP_copy(&W[2],P); + ECP_add(&W[2],Q); /* P-Q */ + ECP_copy(&S,Q); + ECP_dbl(&S); /* S=2Q */ + ECP_copy(&W[0],&W[1]); + ECP_sub(&W[0],&S); + ECP_copy(&W[3],&W[2]); + ECP_add(&W[3],&S); + ECP_copy(&T,P); + ECP_dbl(&T); /* T=2P */ + ECP_copy(&W[5],&W[1]); + ECP_add(&W[5],&T); + ECP_copy(&W[6],&W[2]); + ECP_add(&W[6],&T); + ECP_copy(&W[4],&W[5]); + ECP_sub(&W[4],&S); + ECP_copy(&W[7],&W[6]); + ECP_add(&W[7],&S); + +#if CURVETYPE==WEIERSTRASS + ECP_multiaffine(8,W,work); +#endif + + /* if multiplier is odd, add 2, else add 1 to multiplier, and add 2P or P to correction */ + + s=BIG_parity(te); + BIG_inc(te,1); + BIG_norm(te); + ns=BIG_parity(te); + BIG_copy(mt,te); + BIG_inc(mt,1); + BIG_norm(mt); + BIG_cmove(te,mt,s); + ECP_cmove(&T,P,ns); + ECP_copy(&C,&T); + + s=BIG_parity(tf); + BIG_inc(tf,1); + BIG_norm(tf); + ns=BIG_parity(tf); + BIG_copy(mt,tf); + BIG_inc(mt,1); + BIG_norm(mt); + BIG_cmove(tf,mt,s); + ECP_cmove(&S,Q,ns); + ECP_add(&C,&S); + + BIG_add(mt,te,tf); + BIG_norm(mt); + nb=1+(BIG_nbits(mt)+1)/2; + + /* convert exponent to signed 2-bit window */ + for (i=0; i<nb; i++) + { + a=BIG_lastbits(te,3)-4; + BIG_dec(te,a); + BIG_norm(te); + BIG_fshr(te,2); + b=BIG_lastbits(tf,3)-4; + BIG_dec(tf,b); + BIG_norm(tf); + BIG_fshr(tf,2); + w[i]=4*a+b; + } + w[nb]=(4*BIG_lastbits(te,3)+BIG_lastbits(tf,3)); + + ECP_copy(P,&W[(w[nb]-1)/2]); + for (i=nb-1; i>=0; i--) + { + ECP_select(&T,W,w[i]); + ECP_dbl(P); + ECP_dbl(P); + ECP_add(P,&T); + } + ECP_sub(P,&C); /* apply correction */ + ECP_affine(P); +} + +#endif + + +#ifdef HAS_MAIN + +int main() +{ + int i; + ECP G,P; + csprng RNG; + BIG r,s,x,y,b,m,w,q; + BIG_rcopy(x,CURVE_Gx); +#if CURVETYPE!=MONTGOMERY + BIG_rcopy(y,CURVE_Gy); +#endif + BIG_rcopy(m,Modulus); + + printf("x= "); + BIG_output(x); + printf("\n"); +#if CURVETYPE!=MONTGOMERY + printf("y= "); + BIG_output(y); + printf("\n"); +#endif + RNG_seed(&RNG,3,"abc"); + +#if CURVETYPE!=MONTGOMERY + ECP_set(&G,x,y); +#else + ECP_set(&G,x); +#endif + if (ECP_isinf(&G)) printf("Failed to set - point not on curve\n"); + else printf("set success\n"); + + ECP_output(&G); + + BIG_rcopy(r,CURVE_Order); //BIG_dec(r,7); + printf("r= "); + BIG_output(r); + printf("\n"); + + ECP_copy(&P,&G); + + ECP_mul(&P,r); + + ECP_output(&P); +//exit(0); + BIG_randomnum(w,&RNG); + BIG_mod(w,r); + + ECP_copy(&P,&G); + ECP_mul(&P,w); + + ECP_output(&P); + + return 0; +} + +#endif
http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/ecp2.c ---------------------------------------------------------------------- diff --git a/version22/c/ecp2.c b/version22/c/ecp2.c new file mode 100644 index 0000000..4808569 --- /dev/null +++ b/version22/c/ecp2.c @@ -0,0 +1,696 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL Weierstrass elliptic curve functions over FP2 */ +/* SU=m, m is Stack Usage */ + +#include "amcl.h" + +int ECP2_isinf(ECP2 *P) +{ + return P->inf; +} + +/* Set P=Q */ +/* SU= 16 */ +void ECP2_copy(ECP2 *P,ECP2 *Q) +{ + P->inf=Q->inf; + FP2_copy(&(P->x),&(Q->x)); + FP2_copy(&(P->y),&(Q->y)); + FP2_copy(&(P->z),&(Q->z)); +} + +/* set P to Infinity */ +/* SU= 8 */ +void ECP2_inf(ECP2 *P) +{ + P->inf=1; + FP2_zero(&(P->x)); + FP2_zero(&(P->y)); + FP2_zero(&(P->z)); +} + +/* Conditional move Q to P dependant on d */ +static void ECP2_cmove(ECP2 *P,ECP2 *Q,int d) +{ + FP2_cmove(&(P->x),&(Q->x),d); + FP2_cmove(&(P->y),&(Q->y),d); + FP2_cmove(&(P->z),&(Q->z),d); + d=~(d-1); + P->inf^=(P->inf^Q->inf)&d; +} + +/* return 1 if b==c, no branching */ +static int teq(sign32 b,sign32 c) +{ + sign32 x=b^c; + x-=1; // if x=0, x now -1 + return (int)((x>>31)&1); +} + +/* Constant time select from pre-computed table */ +static void ECP2_select(ECP2 *P,ECP2 W[],sign32 b) +{ + ECP2 MP; + sign32 m=b>>31; + sign32 babs=(b^m)-m; + + babs=(babs-1)/2; + + ECP2_cmove(P,&W[0],teq(babs,0)); // conditional move + ECP2_cmove(P,&W[1],teq(babs,1)); + ECP2_cmove(P,&W[2],teq(babs,2)); + ECP2_cmove(P,&W[3],teq(babs,3)); + ECP2_cmove(P,&W[4],teq(babs,4)); + ECP2_cmove(P,&W[5],teq(babs,5)); + ECP2_cmove(P,&W[6],teq(babs,6)); + ECP2_cmove(P,&W[7],teq(babs,7)); + + ECP2_copy(&MP,P); + ECP2_neg(&MP); // minus P + ECP2_cmove(P,&MP,(int)(m&1)); +} + +/* return 1 if P==Q, else 0 */ +/* SU= 312 */ +int ECP2_equals(ECP2 *P,ECP2 *Q) +{ + FP2 pz2,qz2,a,b; + if (P->inf && Q->inf) return 1; + if (P->inf || Q->inf) return 0; + + FP2_sqr(&pz2,&(P->z)); + FP2_sqr(&qz2,&(Q->z)); + + FP2_mul(&a,&(P->x),&qz2); + FP2_mul(&b,&(Q->x),&pz2); + if (!FP2_equals(&a,&b)) return 0; + + FP2_mul(&a,&(P->y),&qz2); + FP2_mul(&a,&a,&(Q->z)); + FP2_mul(&b,&(Q->y),&pz2); + FP2_mul(&b,&b,&(P->z)); + if (!FP2_equals(&a,&b)) return 0; + return 1; +} + +/* Make P affine (so z=1) */ +/* SU= 232 */ +void ECP2_affine(ECP2 *P) +{ + FP2 one,iz,izn; + if (P->inf) return; + + FP2_one(&one); + if (FP2_isunity(&(P->z))) + { + FP2_reduce(&(P->x)); + FP2_reduce(&(P->y)); + return; + } + + FP2_inv(&iz,&(P->z)); + FP2_sqr(&izn,&iz); + FP2_mul(&(P->x),&(P->x),&izn); + FP2_mul(&izn,&izn,&iz); + FP2_mul(&(P->y),&(P->y),&izn); + + FP2_reduce(&(P->x)); + FP2_reduce(&(P->y)); + FP2_copy(&(P->z),&one); +} + +/* extract x, y from point P */ +/* SU= 16 */ +int ECP2_get(FP2 *x,FP2 *y,ECP2 *P) +{ + if (P->inf) return -1; + ECP2_affine(P); + FP2_copy(y,&(P->y)); + FP2_copy(x,&(P->x)); + return 0; +} + +/* SU= 152 */ +/* Output point P */ +void ECP2_output(ECP2 *P) +{ + FP2 x,y; + if (P->inf) + { + printf("Infinity\n"); + return; + } + ECP2_get(&x,&y,P); + printf("("); + FP2_output(&x); + printf(","); + FP2_output(&y); + printf(")\n"); +} + +/* SU= 232 */ +void ECP2_outputxyz(ECP2 *P) +{ + ECP2 Q; + if (P->inf) + { + printf("Infinity\n"); + return; + } + ECP2_copy(&Q,P); + printf("("); + FP2_output(&(Q.x)); + printf(","); + FP2_output(&(Q.y)); + printf(","); + FP2_output(&(Q.z)); + printf(")\n"); +} + +/* SU= 168 */ +/* Convert Q to octet string */ +void ECP2_toOctet(octet *W,ECP2 *Q) +{ + FP2 qx,qy; + ECP2_get(&qx,&qy,Q); + FP_redc(qx.a); + FP_redc(qx.b); + FP_redc(qy.a); + FP_redc(qy.b); + W->len=4*MODBYTES; + + BIG_toBytes(&(W->val[0]),qx.a); + BIG_toBytes(&(W->val[MODBYTES]),qx.b); + BIG_toBytes(&(W->val[2*MODBYTES]),qy.a); + BIG_toBytes(&(W->val[3*MODBYTES]),qy.b); +} + +/* SU= 176 */ +/* restore Q from octet string */ +int ECP2_fromOctet(ECP2 *Q,octet *W) +{ + FP2 qx,qy; + BIG_fromBytes(qx.a,&(W->val[0])); + BIG_fromBytes(qx.b,&(W->val[MODBYTES])); + BIG_fromBytes(qy.a,&(W->val[2*MODBYTES])); + BIG_fromBytes(qy.b,&(W->val[3*MODBYTES])); + FP_nres(qx.a); + FP_nres(qx.b); + FP_nres(qy.a); + FP_nres(qy.b); + + if (ECP2_set(Q,&qx,&qy)) return 1; + return 0; +} + +/* SU= 128 */ +/* Calculate RHS of twisted curve equation x^3+B/i */ +void ECP2_rhs(FP2 *rhs,FP2 *x) +{ + /* calculate RHS of elliptic curve equation */ + FP2 t; + BIG b; + FP2_sqr(&t,x); + + FP2_mul(rhs,&t,x); + + /* Assuming CURVE_A=0 */ + + BIG_rcopy(b,CURVE_B); + + FP2_from_BIG(&t,b); + + FP2_div_ip(&t); /* IMPORTANT - here we use the SEXTIC twist of the curve */ + + FP2_add(rhs,&t,rhs); + FP2_reduce(rhs); +} + + +/* Set P=(x,y). Return 1 if (x,y) is on the curve, else return 0*/ +/* SU= 232 */ +int ECP2_set(ECP2 *P,FP2 *x,FP2 *y) +{ + FP2 one,rhs,y2; + FP2_copy(&y2,y); + + FP2_sqr(&y2,&y2); + ECP2_rhs(&rhs,x); + + if (!FP2_equals(&y2,&rhs)) + { + + P->inf=1; + return 0; + } + + P->inf=0; + FP2_copy(&(P->x),x); + FP2_copy(&(P->y),y); + + FP2_one(&one); + FP2_copy(&(P->z),&one); + return 1; +} + +/* Set P=(x,y). Return 1 if (x,.) is on the curve, else return 0 */ +/* SU= 232 */ +int ECP2_setx(ECP2 *P,FP2 *x) +{ + FP2 y; + ECP2_rhs(&y,x); + + if (!FP2_sqrt(&y,&y)) + { + P->inf=1; + return 0; + } + + P->inf=0; + FP2_copy(&(P->x),x); + FP2_copy(&(P->y),&y); + FP2_one(&(P->z)); + return 1; +} + +/* Set P=-P */ +/* SU= 8 */ +void ECP2_neg(ECP2 *P) +{ + FP2_neg(&(P->y),&(P->y)); + FP2_norm(&(P->y)); +} + +/* R+=R */ +/* return -1 for Infinity, 0 for addition, 1 for doubling */ +/* SU= 448 */ +int ECP2_dbl(ECP2 *P) +{ + FP2 w1,w7,w8,w2,w3; + if (P->inf) return -1; + + if (FP2_iszilch(&(P->y))) + { + P->inf=1; + return -1; + } + + /* Assuming A=0 */ + FP2_sqr(&w1,&(P->x)); + FP2_imul(&w8,&w1,3); + + FP2_sqr(&w2,&(P->y)); + FP2_mul(&w3,&(P->x),&w2); + FP2_imul(&w3,&w3,4); + + FP2_neg(&w1,&w3); + + FP2_norm(&w1); + + FP2_sqr(&(P->x),&w8); + FP2_add(&(P->x),&(P->x),&w1); + FP2_add(&(P->x),&(P->x),&w1); + + FP2_norm(&(P->x)); + + if (FP2_isunity(&(P->z))) FP2_copy(&(P->z),&(P->y)); + else FP2_mul(&(P->z),&(P->z),&(P->y)); + FP2_add(&(P->z),&(P->z),&(P->z)); + + FP2_add(&w7,&w2,&w2); + FP2_sqr(&w2,&w7); + + FP2_add(&w2,&w2,&w2); + FP2_sub(&w3,&w3,&(P->x)); + FP2_mul(&(P->y),&w8,&w3); + FP2_sub(&(P->y),&(P->y),&w2); + + + FP2_norm(&(P->y)); + FP2_norm(&(P->z)); + + return 1; +} + +/* Set P+=Q */ +/* SU= 400 */ +int ECP2_add(ECP2 *P,ECP2 *Q) +{ + int aff; + FP2 B,D,E,C,A; + if (Q->inf) return 0; + if (P->inf) + { + ECP2_copy(P,Q); + return 0; + } + + aff=1; + if (!FP2_isunity(&(Q->z))) aff=0; + + if (!aff) + { + FP2_sqr(&A,&(Q->z)); + FP2_mul(&C,&A,&(Q->z)); + + FP2_sqr(&B,&(P->z)); + FP2_mul(&D,&B,&(P->z)); + + FP2_mul(&A,&(P->x),&A); + FP2_mul(&C,&(P->y),&C); + } + else + { + FP2_copy(&A,&(P->x)); + FP2_copy(&C,&(P->y)); + + FP2_sqr(&B,&(P->z)); + FP2_mul(&D,&B,&(P->z)); + } + + FP2_mul(&B,&(Q->x),&B); + FP2_sub(&B,&B,&A); /* B=Qx.z^2-x.Qz^2 */ + FP2_mul(&D,&(Q->y),&D); + FP2_sub(&D,&D,&C); /* D=Qy.z^3-y.Qz^3 */ + + if (FP2_iszilch(&B)) + { + if (FP2_iszilch(&D)) + { + ECP2_dbl(P); + return 1; + } + else + { + ECP2_inf(P); + return -1; + } + } + if (!aff) FP2_mul(&(P->z),&(P->z),&(Q->z)); + FP2_mul(&(P->z),&(P->z),&B); + + FP2_sqr(&E,&B); + FP2_mul(&B,&B,&E); + FP2_mul(&A,&A,&E); + + FP2_add(&E,&A,&A); + FP2_add(&E,&E,&B); + + FP2_sqr(&(P->x),&D); + FP2_sub(&(P->x),&(P->x),&E); + + FP2_sub(&A,&A,&(P->x)); + FP2_mul(&(P->y),&A,&D); + FP2_mul(&C,&C,&B); + FP2_sub(&(P->y),&(P->y),&C); + + FP2_norm(&(P->x)); + FP2_norm(&(P->y)); + FP2_norm(&(P->z)); + + return 0; +} + +/* Set P-=Q */ +/* SU= 16 */ +void ECP2_sub(ECP2 *P,ECP2 *Q) +{ + ECP2_neg(Q); + ECP2_add(P,Q); + ECP2_neg(Q); +} + +/* normalises m-array of ECP2 points. Requires work vector of m FP2s */ +/* SU= 200 */ +static void ECP2_multiaffine(int m,ECP2 *P,FP2 *work) +{ + int i; + FP2 t1,t2; + + FP2_one(&work[0]); + FP2_copy(&work[1],&(P[0].z)); + for (i=2; i<m; i++) + FP2_mul(&work[i],&work[i-1],&(P[i-1].z)); + FP2_mul(&t1,&work[m-1],&(P[m-1].z)); + + FP2_inv(&t1,&t1); + + FP2_copy(&t2,&(P[m-1].z)); + FP2_mul(&work[m-1],&work[m-1],&t1); + + for (i=m-2;; i--) + { + if (i==0) + { + FP2_mul(&work[0],&t1,&t2); + break; + } + FP2_mul(&work[i],&work[i],&t2); + FP2_mul(&work[i],&work[i],&t1); + FP2_mul(&t2,&(P[i].z),&t2); + } + /* now work[] contains inverses of all Z coordinates */ + + for (i=0; i<m; i++) + { + FP2_one(&(P[i].z)); + FP2_sqr(&t1,&work[i]); + FP2_mul(&(P[i].x),&(P[i].x),&t1); + FP2_mul(&t1,&work[i],&t1); + FP2_mul(&(P[i].y),&(P[i].y),&t1); + } +} + +/* P*=e */ +/* SU= 280 */ +void ECP2_mul(ECP2 *P,BIG e) +{ + /* fixed size windows */ + int i,nb,s,ns; + BIG mt,t; + ECP2 Q,W[8],C; + sign8 w[1+(NLEN*BASEBITS+3)/4]; + FP2 work[8]; + + if (ECP2_isinf(P)) return; + ECP2_affine(P); + + + /* precompute table */ + + ECP2_copy(&Q,P); + ECP2_dbl(&Q); + ECP2_copy(&W[0],P); + + for (i=1; i<8; i++) + { + ECP2_copy(&W[i],&W[i-1]); + ECP2_add(&W[i],&Q); + } + + /* convert the table to affine */ + + ECP2_multiaffine(8,W,work); + + /* make exponent odd - add 2P if even, P if odd */ + BIG_copy(t,e); + s=BIG_parity(t); + BIG_inc(t,1); + BIG_norm(t); + ns=BIG_parity(t); + BIG_copy(mt,t); + BIG_inc(mt,1); + BIG_norm(mt); + BIG_cmove(t,mt,s); + ECP2_cmove(&Q,P,ns); + ECP2_copy(&C,&Q); + + nb=1+(BIG_nbits(t)+3)/4; + + /* convert exponent to signed 4-bit window */ + for (i=0; i<nb; i++) + { + w[i]=BIG_lastbits(t,5)-16; + BIG_dec(t,w[i]); + BIG_norm(t); + BIG_fshr(t,4); + } + w[nb]=BIG_lastbits(t,5); + + ECP2_copy(P,&W[(w[nb]-1)/2]); + for (i=nb-1; i>=0; i--) + { + ECP2_select(&Q,W,w[i]); + ECP2_dbl(P); + ECP2_dbl(P); + ECP2_dbl(P); + ECP2_dbl(P); + ECP2_add(P,&Q); + } + ECP2_sub(P,&C); /* apply correction */ + ECP2_affine(P); +} + +/* Calculates q.P using Frobenius constant X */ +/* SU= 96 */ +void ECP2_frob(ECP2 *P,FP2 *X) +{ + FP2 X2; + if (P->inf) return; + FP2_sqr(&X2,X); + FP2_conj(&(P->x),&(P->x)); + FP2_conj(&(P->y),&(P->y)); + FP2_conj(&(P->z),&(P->z)); + FP2_reduce(&(P->z)); + + FP2_mul(&(P->x),&X2,&(P->x)); + FP2_mul(&(P->y),&X2,&(P->y)); + FP2_mul(&(P->y),X,&(P->y)); +} + +void ECP2_mul4(ECP2 *P,ECP2 Q[4],BIG u[4]) +{ + int i,j,a[4],nb; + ECP2 W[8],T,C; + BIG mt,t[4]; + FP2 work[8]; + sign8 w[NLEN*BASEBITS+1]; + + for (i=0; i<4; i++) + { + BIG_copy(t[i],u[i]); + ECP2_affine(&Q[i]); + } + + /* precompute table */ + + ECP2_copy(&W[0],&Q[0]); + ECP2_sub(&W[0],&Q[1]); /* P-Q */ + ECP2_copy(&W[1],&W[0]); + ECP2_copy(&W[2],&W[0]); + ECP2_copy(&W[3],&W[0]); + ECP2_copy(&W[4],&Q[0]); + ECP2_add(&W[4],&Q[1]); /* P+Q */ + ECP2_copy(&W[5],&W[4]); + ECP2_copy(&W[6],&W[4]); + ECP2_copy(&W[7],&W[4]); + + ECP2_copy(&T,&Q[2]); + ECP2_sub(&T,&Q[3]); /* R-S */ + ECP2_sub(&W[1],&T); + ECP2_add(&W[2],&T); + ECP2_sub(&W[5],&T); + ECP2_add(&W[6],&T); + ECP2_copy(&T,&Q[2]); + ECP2_add(&T,&Q[3]); /* R+S */ + ECP2_sub(&W[0],&T); + ECP2_add(&W[3],&T); + ECP2_sub(&W[4],&T); + ECP2_add(&W[7],&T); + + ECP2_multiaffine(8,W,work); + + /* if multiplier is even add 1 to multiplier, and add P to correction */ + ECP2_inf(&C); + + BIG_zero(mt); + for (i=0; i<4; i++) + { + if (BIG_parity(t[i])==0) + { + BIG_inc(t[i],1); + BIG_norm(t[i]); + ECP2_add(&C,&Q[i]); + } + BIG_add(mt,mt,t[i]); + BIG_norm(mt); + } + + nb=1+BIG_nbits(mt); + + /* convert exponent to signed 1-bit window */ + for (j=0; j<nb; j++) + { + for (i=0; i<4; i++) + { + a[i]=BIG_lastbits(t[i],2)-2; + BIG_dec(t[i],a[i]); + BIG_norm(t[i]); + BIG_fshr(t[i],1); + } + w[j]=8*a[0]+4*a[1]+2*a[2]+a[3]; + } + w[nb]=8*BIG_lastbits(t[0],2)+4*BIG_lastbits(t[1],2)+2*BIG_lastbits(t[2],2)+BIG_lastbits(t[3],2); + + ECP2_copy(P,&W[(w[nb]-1)/2]); + for (i=nb-1; i>=0; i--) + { + ECP2_select(&T,W,w[i]); + ECP2_dbl(P); + ECP2_add(P,&T); + } + ECP2_sub(P,&C); /* apply correction */ + + ECP2_affine(P); +} + +/* + +int main() +{ + int i; + ECP2 G,P; + ECP2 *W; + FP2 x,y,w,z,f; + BIG r,xa,xb,ya,yb; + + BIG_rcopy(xa,CURVE_Pxa); + BIG_rcopy(xb,CURVE_Pxb); + BIG_rcopy(ya,CURVE_Pya); + BIG_rcopy(yb,CURVE_Pyb); + + FP2_from_BIGs(&x,xa,xb); + FP2_from_BIGs(&y,ya,yb); + ECP2_set(&G,&x,&y); + if (G.inf) printf("Failed to set - point not on curve\n"); + else printf("set success\n"); + + ECP2_output(&G); + +// BIG_copy(r,CURVE_Order); + BIG_rcopy(r,Modulus); + + ECP2_copy(&P,&G); + + ECP2_mul(&P,r); + + ECP2_output(&P); + + FP2_gfc(&f,12); + + ECP2_frob(&G,&f); + + ECP2_output(&G); + + return 0; +} + +*/ http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/faster.c ---------------------------------------------------------------------- diff --git a/version22/c/faster.c b/version22/c/faster.c new file mode 100644 index 0000000..7786880 --- /dev/null +++ b/version22/c/faster.c @@ -0,0 +1,98 @@ + +#include <stdio.h> +#include "amcl.h" + +#ifdef COMBA + +int main() +{ + int i,j,k,N; + + N=NLEN; + + + printf("Insert this code in BIG_mul() in file big.c between #define UNWOUND and #else \n\n"); + + for (i=0;i<N;i++) + printf("\td[%d]=(dchunk)a[%d]*b[%d];\n",i,i,i); + + printf("\n\ts=d[0];\n\tt = s; c[0]=(chunk)t&BMASK; co=t>>BASEBITS;\n"); + + for (k=1;k<N;k++) + { + printf("\ts+=d[%d]; t=co+s ",k); + for (i=k;i>=1+k/2;i--) + printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i); + printf("; c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n",k); + } + printf("\n"); + for (k=N;k<2*N-1;k++) + { + printf("\ts-=d[%d]; t=co+s ",k-N); + for (i=N-1;i>=1+k/2;i--) + printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i); + printf("; c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n",k); + } + printf("\tc[%d]=(chunk)co;\n",2*N-1); + + + + printf("\nInsert this code in BIG_sqr() in file big.c between #define UNWOUND and #else \n\n"); + + printf("\n\tt=(dchunk)a[0]*a[0]; c[0]=(chunk)t&BMASK; co=t>>BASEBITS;\n"); + + for (k=1;k<N;k++) + { + printf("\tt= ",k); + for (i=k;i>=1+k/2;i--) + printf("+(dchunk)a[%d]*a[%d]",i,k-i); + printf("; t+=t; t+=co;"); + if (k%2==0) printf(" t+=(dchunk)a[%d]*a[%d];",k/2,k/2); + printf(" c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n", k); + } + printf("\n"); + + for (k=N;k<2*N-2;k++) + { + printf("\tt= ",k-N); + for (i=N-1;i>=1+k/2;i--) + printf("+(dchunk)a[%d]*a[%d]",i,k-i); + printf("; t+=t; t+=co;"); + if (k%2==0) printf(" t+=(dchunk)a[%d]*a[%d];",k/2,k/2); + printf(" c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n", k); + } + printf("\tt=co; t+=(dchunk)a[%d]*a[%d]; c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n ",N-1,N-1,2*N-2); + + printf("\tc[%d]=(chunk)co;\n",2*N-1); + + +#if MODTYPE == NOT_SPECIAL + + printf("\nInsert this code in BIG_monty() in file big.c between #define UNWOUND and #else \n\n"); + + printf("\tt=d[0]; v[0]=((chunk)t*MC)&BMASK; t+=(dchunk)v[0]*md[0]; s=0; c=(t>>BASEBITS);\n\n"); + + for (k=1;k<N;k++) + { + printf("\tt=d[%d]+c+s+(dchunk)v[0]*md[%d]",k,k); + for (i=k-1;i>k/2;i--) printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i); + printf("; v[%d]=((chunk)t*MC)&BMASK; t+=(dchunk)v[%d]*md[0]; ",k,k); + printf(" dd[%d]=(dchunk)v[%d]*md[%d]; s+=dd[%d]; c=(t>>BASEBITS); \n",k,k,k,k); + } + printf("\n"); + for (k=N;k<2*N-1;k++) + { + printf("\tt=d[%d]+c+s",k); + for (i=N-1;i>=1+k/2;i--) printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i); + printf("; a[%d]=(chunk)t&BMASK; s-=dd[%d]; c=(t>>BASEBITS); \n",k-N,k-N+1); + } + printf("\ta[%d]=d[%d]+(chunk)c&BMASK;\n",N-1,2*N-1); + + +#endif + +} + +#endif + + http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/faster.txt ---------------------------------------------------------------------- diff --git a/version22/c/faster.txt b/version22/c/faster.txt new file mode 100644 index 0000000..6995eab --- /dev/null +++ b/version22/c/faster.txt @@ -0,0 +1,25 @@ +We assume than optimizing compilers will unwind loops at every opportunity. + +But sometimes they don't. So time-critical code will run faster if we step +in and unwind complex loops for the compiler. + +Once the architecture and ECC/RSA support is decided upon (that is amcl.h +and arch.h are settled), then compile and execute the program faster.c like +this (using MinGW port of GCC as an example), in the same directory as +arch.h and amcl.h + +gcc -O2 -std=c99 faster.c -o faster.exe +faster > t.txt + +Now extract the code fragments from t.txt and insert them where indicated +into big.c + +Finally make sure that + +#define UNWOUND + +appears somewhere in amcl.h + +Finally build the library as normal, and maybe get a 50% speed-up! +If there is no significant improvement, don't use this method! + http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/ff.c ---------------------------------------------------------------------- diff --git a/version22/c/ff.c b/version22/c/ff.c new file mode 100644 index 0000000..3ae7029 --- /dev/null +++ b/version22/c/ff.c @@ -0,0 +1,1150 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL basic functions for Large Finite Field support */ + +#include "amcl.h" + +/* Arazi and Qi inversion mod 256 */ +static int invmod256(int a) +{ + int U,t1,t2,b,c; + t1=0; + c=(a>>1)&1; + t1+=c; + t1&=1; + t1=2-t1; + t1<<=1; + U=t1+1; + +// i=2 + b=a&3; + t1=U*b; + t1>>=2; + c=(a>>2)&3; + t2=(U*c)&3; + t1+=t2; + t1*=U; + t1&=3; + t1=4-t1; + t1<<=2; + U+=t1; + +// i=4 + b=a&15; + t1=U*b; + t1>>=4; + c=(a>>4)&15; + t2=(U*c)&15; + t1+=t2; + t1*=U; + t1&=15; + t1=16-t1; + t1<<=4; + U+=t1; + + return U; +} + +/* a=1/a mod 2^BIGBITS. This is very fast! */ +void BIG_invmod2m(BIG a) +{ + int i; + BIG U,t1,b,c; + BIG_zero(U); + BIG_inc(U,invmod256(BIG_lastbits(a,8))); + for (i=8; i<BIGBITS; i<<=1) + { + BIG_copy(b,a); + BIG_mod2m(b,i); // bottom i bits of a + + BIG_smul(t1,U,b); + BIG_shr(t1,i); // top i bits of U*b + + BIG_copy(c,a); + BIG_shr(c,i); + BIG_mod2m(c,i); // top i bits of a + + BIG_smul(b,U,c); + BIG_mod2m(b,i); // bottom i bits of U*c + + BIG_add(t1,t1,b); + BIG_smul(b,t1,U); + BIG_copy(t1,b); // (t1+b)*U + BIG_mod2m(t1,i); // bottom i bits of (t1+b)*U + + BIG_one(b); + BIG_shl(b,i); + BIG_sub(t1,b,t1); + BIG_norm(t1); + + BIG_shl(t1,i); + + BIG_add(U,U,t1); + } + BIG_copy(a,U); + BIG_norm(a); + BIG_mod2m(a,BIGBITS); +} + +/* +void FF_rcopy(BIG x[],const BIG y[],int n) +{ + int i; + for (i=0;i<n;i++) + BIG_rcopy(x[i],y[i]); +} +*/ + +/* x=y */ +void FF_copy(BIG x[],BIG y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_copy(x[i],y[i]); +} + +/* x=y<<n */ +static void FF_dsucopy(BIG x[],BIG y[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_copy(x[n+i],y[i]); + BIG_zero(x[i]); + } +} + +/* x=y */ +static void FF_dscopy(BIG x[],BIG y[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_copy(x[i],y[i]); + BIG_zero(x[n+i]); + } +} + +/* x=y>>n */ +static void FF_sducopy(BIG x[],BIG y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_copy(x[i],y[n+i]); +} + +/* set to zero */ +void FF_zero(BIG x[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_zero(x[i]); +} + +/* test equals 0 */ +int FF_iszilch(BIG x[],int n) +{ + int i; + for (i=0; i<n; i++) + if (!BIG_iszilch(x[i])) return 0; + return 1; +} + +/* shift right by BIGBITS-bit words */ +static void FF_shrw(BIG a[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_copy(a[i],a[i+n]); + BIG_zero(a[i+n]); + } +} + +/* shift left by BIGBITS-bit words */ +static void FF_shlw(BIG a[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_copy(a[i+n],a[i]); + BIG_zero(a[i]); + } +} + +/* extract last bit */ +int FF_parity(BIG x[]) +{ + return BIG_parity(x[0]); +} + +/* extract last m bits */ +int FF_lastbits(BIG x[],int m) +{ + return BIG_lastbits(x[0],m); +} + +/* x=1 */ +void FF_one(BIG x[],int n) +{ + int i; + BIG_one(x[0]); + for (i=1; i<n; i++) + BIG_zero(x[i]); +} + +/* x=m, where m is 32-bit int */ +void FF_init(BIG x[],sign32 m,int n) +{ + int i; + BIG_zero(x[0]); +#if CHUNK<64 + x[0][0]=(chunk)(m&BMASK); + x[0][1]=(chunk)(m>>BASEBITS); +#else + x[0][0]=(chunk)m; +#endif + for (i=1; i<n; i++) + BIG_zero(x[i]); +} + +/* compare x and y - must be normalised */ +int FF_comp(BIG x[],BIG y[],int n) +{ + int i,j; + for (i=n-1; i>=0; i--) + { + j=BIG_comp(x[i],y[i]); + if (j!=0) return j; + } + return 0; +} + +/* recursive add */ +static void FF_radd(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_add(z[zp+i],x[xp+i],y[yp+i]); +} + +/* recursive inc */ +static void FF_rinc(BIG z[],int zp,BIG y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_add(z[zp+i],z[zp+i],y[yp+i]); +} + +/* recursive sub */ +/* +static void FF_rsub(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,int n) +{ + int i; + for (i=0;i<n;i++) + BIG_sub(z[zp+i],x[xp+i],y[yp+i]); +} +*/ + +/* recursive dec */ +static void FF_rdec(BIG z[],int zp,BIG y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_sub(z[zp+i],z[zp+i],y[yp+i]); +} + +/* simple add */ +void FF_add(BIG z[],BIG x[],BIG y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_add(z[i],x[i],y[i]); +} + +/* simple sub */ +void FF_sub(BIG z[],BIG x[],BIG y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_sub(z[i],x[i],y[i]); +} + +/* increment/decrement by a small integer */ +void FF_inc(BIG x[],int m,int n) +{ + BIG_inc(x[0],m); + FF_norm(x,n); +} + +void FF_dec(BIG x[],int m,int n) +{ + BIG_dec(x[0],m); + FF_norm(x,n); +} + +/* normalise - but hold any overflow in top part unless n<0 */ +static void FF_rnorm(BIG z[],int zp,int n) +{ + int i,trunc=0; + chunk carry; + if (n<0) + { + /* -v n signals to do truncation */ + n=-n; + trunc=1; + } + for (i=0; i<n-1; i++) + { + carry=BIG_norm(z[zp+i]); + + z[zp+i][NLEN-1]^=carry<<P_TBITS; /* remove it */ + z[zp+i+1][0]+=carry; + } + carry=BIG_norm(z[zp+n-1]); + if (trunc) z[zp+n-1][NLEN-1]^=carry<<P_TBITS; +} + +void FF_norm(BIG z[],int n) +{ + FF_rnorm(z,0,n); +} + +/* shift left by one bit */ +void FF_shl(BIG x[],int n) +{ + int i; + int carry,delay_carry=0; + for (i=0; i<n-1; i++) + { + carry=BIG_fshl(x[i],1); + x[i][0]|=delay_carry; + x[i][NLEN-1]^=(chunk)carry<<P_TBITS; + delay_carry=carry; + } + BIG_fshl(x[n-1],1); + x[n-1][0]|=delay_carry; +} + +/* shift right by one bit */ +void FF_shr(BIG x[],int n) +{ + int i; + int carry; + for (i=n-1; i>0; i--) + { + carry=BIG_fshr(x[i],1); + x[i-1][NLEN-1]|=(chunk)carry<<P_TBITS; + } + BIG_fshr(x[0],1); +} + +void FF_output(BIG x[],int n) +{ + int i; + FF_norm(x,n); + for (i=n-1; i>=0; i--) + { + BIG_output(x[i]); + printf(" "); + } +} + +void FF_rawoutput(BIG x[],int n) +{ + int i; + for (i=n-1; i>=0; i--) + { + BIG_rawoutput(x[i]); + printf(" "); + } +} + +/* Convert FFs to/from octet strings */ +void FF_toOctet(octet *w,BIG x[],int n) +{ + int i; + w->len=n*MODBYTES; + for (i=0; i<n; i++) + { + BIG_toBytes(&(w->val[(n-i-1)*MODBYTES]),x[i]); + } +} + +void FF_fromOctet(BIG x[],octet *w,int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_fromBytes(x[i],&(w->val[(n-i-1)*MODBYTES])); + } +} + +/* in-place swapping using xor - side channel resistant */ +static void FF_cswap(BIG a[],BIG b[],int d,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_cswap(a[i],b[i],d); + return; +} + +/* z=x*y, t is workspace */ +static void FF_karmul(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,BIG t[],int tp,int n) +{ + int nd2; + if (n==1) + { + BIG_mul(t[tp],x[xp],y[yp]); + BIG_split(z[zp+1],z[zp],t[tp],BIGBITS); + return; + } + + nd2=n/2; + FF_radd(z,zp,x,xp,x,xp+nd2,nd2); + FF_rnorm(z,zp,nd2); /* needs this if recursion level too deep */ + + FF_radd(z,zp+nd2,y,yp,y,yp+nd2,nd2); + FF_rnorm(z,zp+nd2,nd2); + FF_karmul(t,tp,z,zp,z,zp+nd2,t,tp+n,nd2); + FF_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2); + FF_karmul(z,zp+n,x,xp+nd2,y,yp+nd2,t,tp+n,nd2); + FF_rdec(t,tp,z,zp,n); + FF_rdec(t,tp,z,zp+n,n); + FF_rinc(z,zp+nd2,t,tp,n); + FF_rnorm(z,zp,2*n); +} + +static void FF_karsqr(BIG z[],int zp,BIG x[],int xp,BIG t[],int tp,int n) +{ + int nd2; + if (n==1) + { + BIG_sqr(t[tp],x[xp]); + BIG_split(z[zp+1],z[zp],t[tp],BIGBITS); + return; + } + nd2=n/2; + FF_karsqr(z,zp,x,xp,t,tp+n,nd2); + FF_karsqr(z,zp+n,x,xp+nd2,t,tp+n,nd2); + FF_karmul(t,tp,x,xp,x,xp+nd2,t,tp+n,nd2); + FF_rinc(z,zp+nd2,t,tp,n); + FF_rinc(z,zp+nd2,t,tp,n); + + FF_rnorm(z,zp+nd2,n); /* was FF_rnorm(z,zp,2*n) */ +} + +static void FF_karmul_lower(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,BIG t[],int tp,int n) +{ + /* Calculates Least Significant bottom half of x*y */ + int nd2; + if (n==1) + { + /* only calculate bottom half of product */ + BIG_smul(z[zp],x[xp],y[yp]); + return; + } + nd2=n/2; + FF_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2); + FF_karmul_lower(t,tp,x,xp+nd2,y,yp,t,tp+n,nd2); + FF_rinc(z,zp+nd2,t,tp,nd2); + FF_karmul_lower(t,tp,x,xp,y,yp+nd2,t,tp+n,nd2); + FF_rinc(z,zp+nd2,t,tp,nd2); + FF_rnorm(z,zp+nd2,-nd2); /* truncate it */ +} + +static void FF_karmul_upper(BIG z[],BIG x[],BIG y[],BIG t[],int n) +{ + /* Calculates Most Significant upper half of x*y, given lower part */ + int nd2; + + nd2=n/2; + FF_radd(z,n,x,0,x,nd2,nd2); + FF_radd(z,n+nd2,y,0,y,nd2,nd2); + FF_rnorm(z,n,nd2); + FF_rnorm(z,n+nd2,nd2); + + FF_karmul(t,0,z,n+nd2,z,n,t,n,nd2); /* t = (a0+a1)(b0+b1) */ + FF_karmul(z,n,x,nd2,y,nd2,t,n,nd2); /* z[n]= a1*b1 */ + /* z[0-nd2]=l(a0b0) z[nd2-n]= h(a0b0)+l(t)-l(a0b0)-l(a1b1) */ + FF_rdec(t,0,z,n,n); /* t=t-a1b1 */ + FF_rinc(z,nd2,z,0,nd2); /* z[nd2-n]+=l(a0b0) = h(a0b0)+l(t)-l(a1b1) */ + FF_rdec(z,nd2,t,0,nd2); /* z[nd2-n]=h(a0b0)+l(t)-l(a1b1)-l(t-a1b1)=h(a0b0) */ + FF_rnorm(z,0,-n); /* a0b0 now in z - truncate it */ + FF_rdec(t,0,z,0,n); /* (a0+a1)(b0+b1) - a0b0 */ + FF_rinc(z,nd2,t,0,n); + + FF_rnorm(z,nd2,n); +} + +/* z=x*y */ +void FF_mul(BIG z[],BIG x[],BIG y[],int n) +{ +#ifndef C99 + BIG t[2*FFLEN]; +#else + BIG t[2*n]; +#endif +// FF_norm(x,n); /* change here */ +// FF_norm(y,n); /* change here */ + FF_karmul(z,0,x,0,y,0,t,0,n); +} + +/* return low part of product */ +static void FF_lmul(BIG z[],BIG x[],BIG y[],int n) +{ +#ifndef C99 + BIG t[2*FFLEN]; +#else + BIG t[2*n]; +#endif +// FF_norm(x,n); /* change here */ +// FF_norm(y,n); /* change here */ + FF_karmul_lower(z,0,x,0,y,0,t,0,n); +} + +/* Set b=b mod c */ +void FF_mod(BIG b[],BIG c[],int n) +{ + int k=0; + + FF_norm(b,n); + if (FF_comp(b,c,n)<0) + return; + do + { + FF_shl(c,n); + k++; + } + while (FF_comp(b,c,n)>=0); + + while (k>0) + { + FF_shr(c,n); + if (FF_comp(b,c,n)>=0) + { + FF_sub(b,b,c,n); + FF_norm(b,n); + } + k--; + } +} + +/* z=x^2 */ +void FF_sqr(BIG z[],BIG x[],int n) +{ +#ifndef C99 + BIG t[2*FFLEN]; +#else + BIG t[2*n]; +#endif +// FF_norm(x,n); /* change here */ + FF_karsqr(z,0,x,0,t,0,n); +} + +/* r=t mod modulus, N is modulus, ND is Montgomery Constant */ +static void FF_reduce(BIG r[],BIG T[],BIG N[],BIG ND[],int n) +{ + /* fast karatsuba Montgomery reduction */ +#ifndef C99 + BIG t[2*FFLEN]; + BIG m[FFLEN]; +#else + BIG t[2*n]; + BIG m[n]; +#endif + FF_sducopy(r,T,n); /* keep top half of T */ + //FF_norm(T,n); /* change here */ + FF_karmul_lower(m,0,T,0,ND,0,t,0,n); /* m=T.(1/N) mod R */ + + //FF_norm(N,n); /* change here */ + FF_karmul_upper(T,N,m,t,n); /* T=mN */ + FF_sducopy(m,T,n); + + FF_add(r,r,N,n); + FF_sub(r,r,m,n); + FF_norm(r,n); +} + + +/* Set r=a mod b */ +/* a is of length - 2*n */ +/* r,b is of length - n */ +void FF_dmod(BIG r[],BIG a[],BIG b[],int n) +{ + int k; +#ifndef C99 + BIG m[2*FFLEN]; + BIG x[2*FFLEN]; +#else + BIG m[2*n]; + BIG x[2*n]; +#endif + FF_copy(x,a,2*n); + FF_norm(x,2*n); + FF_dsucopy(m,b,n); + k=BIGBITS*n; + + while (FF_comp(x,m,2*n)>=0) + { + FF_sub(x,x,m,2*n); + FF_norm(x,2*n); + } + + while (k>0) + { + FF_shr(m,2*n); + + if (FF_comp(x,m,2*n)>=0) + { + FF_sub(x,x,m,2*n); + FF_norm(x,2*n); + } + + k--; + } + FF_copy(r,x,n); + FF_mod(r,b,n); +} + +/* Set r=1/a mod p. Binary method - a<p on entry */ + +void FF_invmodp(BIG r[],BIG a[],BIG p[],int n) +{ +#ifndef C99 + BIG u[FFLEN],v[FFLEN],x1[FFLEN],x2[FFLEN],t[FFLEN],one[FFLEN]; +#else + BIG u[n],v[n],x1[n],x2[n],t[n],one[n]; +#endif + FF_copy(u,a,n); + FF_copy(v,p,n); + FF_one(one,n); + FF_copy(x1,one,n); + FF_zero(x2,n); + +// reduce n in here as well! + while (FF_comp(u,one,n)!=0 && FF_comp(v,one,n)!=0) + { + while (FF_parity(u)==0) + { + FF_shr(u,n); + if (FF_parity(x1)!=0) + { + FF_add(x1,p,x1,n); + FF_norm(x1,n); + } + FF_shr(x1,n); + } + while (FF_parity(v)==0) + { + FF_shr(v,n); + if (FF_parity(x2)!=0) + { + FF_add(x2,p,x2,n); + FF_norm(x2,n); + } + FF_shr(x2,n); + } + if (FF_comp(u,v,n)>=0) + { + + FF_sub(u,u,v,n); + FF_norm(u,n); + if (FF_comp(x1,x2,n)>=0) FF_sub(x1,x1,x2,n); + else + { + FF_sub(t,p,x2,n); + FF_add(x1,x1,t,n); + } + FF_norm(x1,n); + } + else + { + FF_sub(v,v,u,n); + FF_norm(v,n); + if (FF_comp(x2,x1,n)>=0) FF_sub(x2,x2,x1,n); + else + { + FF_sub(t,p,x1,n); + FF_add(x2,x2,t,n); + } + FF_norm(x2,n); + } + } + if (FF_comp(u,one,n)==0) + FF_copy(r,x1,n); + else + FF_copy(r,x2,n); +} + +/* nesidue mod m */ +static void FF_nres(BIG a[],BIG m[],int n) +{ +#ifndef C99 + BIG d[2*FFLEN]; +#else + BIG d[2*n]; +#endif + + if (n==1) + { + BIG_dscopy(d[0],a[0]); + BIG_dshl(d[0],NLEN*BASEBITS); + BIG_dmod(a[0],d[0],m[0]); + } + else + { + FF_dsucopy(d,a,n); + FF_dmod(a,d,m,n); + } +} + +static void FF_redc(BIG a[],BIG m[],BIG ND[],int n) +{ +#ifndef C99 + BIG d[2*FFLEN]; +#else + BIG d[2*n]; +#endif + if (n==1) + { + BIG_dzero(d[0]); + BIG_dscopy(d[0],a[0]); + BIG_monty(a[0],m[0],((chunk)1<<BASEBITS)-ND[0][0],d[0]); + } + else + { + FF_mod(a,m,n); + FF_dscopy(d,a,n); + FF_reduce(a,d,m,ND,n); + FF_mod(a,m,n); + } +} + +/* U=1/a mod 2^m - Arazi & Qi */ +static void FF_invmod2m(BIG U[],BIG a[],int n) +{ + int i; +#ifndef C99 + BIG t1[FFLEN],b[FFLEN],c[FFLEN]; +#else + BIG t1[2*n],b[n],c[n]; +#endif + + FF_zero(U,n); + FF_zero(b,n); + FF_zero(c,n); + FF_zero(t1,2*n); + + BIG_copy(U[0],a[0]); + BIG_invmod2m(U[0]); + for (i=1; i<n; i<<=1) + { + FF_copy(b,a,i); + FF_mul(t1,U,b,i); + FF_shrw(t1,i); // top half to bottom half, top half=0 + + FF_copy(c,a,2*i); + FF_shrw(c,i); // top half of c + FF_lmul(b,U,c,i); // should set top half of b=0 + FF_add(t1,t1,b,i); + FF_norm(t1,2*i); + FF_lmul(b,t1,U,i); + FF_copy(t1,b,i); + FF_one(b,i); + FF_shlw(b,i); + FF_sub(t1,b,t1,2*i); + FF_norm(t1,2*i); + FF_shlw(t1,i); + FF_add(U,U,t1,2*i); + } + + FF_norm(U,n); +} + +void FF_random(BIG x[],csprng *rng,int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_random(x[i],rng); + } + /* make sure top bit is 1 */ + while (BIG_nbits(x[n-1])<MODBYTES*8) BIG_random(x[n-1],rng); +} + +/* generate random x mod p */ +void FF_randomnum(BIG x[],BIG p[],csprng *rng,int n) +{ + int i; +#ifndef C99 + BIG d[2*FFLEN]; +#else + BIG d[2*n]; +#endif + for (i=0; i<2*n; i++) + { + BIG_random(d[i],rng); + } + FF_dmod(x,d,p,n); +} + +static void FF_modmul(BIG z[],BIG x[],BIG y[],BIG p[],BIG ND[],int n) +{ +#ifndef C99 + BIG d[2*FFLEN]; +#else + BIG d[2*n]; +#endif + chunk ex=P_EXCESS(x[n-1]); + chunk ey=P_EXCESS(y[n-1]); +#ifdef dchunk + if ((dchunk)(ex+1)*(ey+1)>(dchunk)P_FEXCESS) +#else + if ((ex+1)>P_FEXCESS/(ey+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d %d\n",ex,ey); +#endif + FF_mod(x,p,n); + } + + if (n==1) + { + BIG_mul(d[0],x[0],y[0]); + BIG_monty(z[0],p[0],((chunk)1<<BASEBITS)-ND[0][0],d[0]); + } + else + { + FF_mul(d,x,y,n); + FF_reduce(z,d,p,ND,n); + } +} + +static void FF_modsqr(BIG z[],BIG x[],BIG p[],BIG ND[],int n) +{ +#ifndef C99 + BIG d[2*FFLEN]; +#else + BIG d[2*n]; +#endif + chunk ex=P_EXCESS(x[n-1]); +#ifdef dchunk + if ((dchunk)(ex+1)*(ex+1)>(dchunk)P_FEXCESS) +#else + if ((ex+1)>P_FEXCESS/(ex+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d\n",ex); +#endif + FF_mod(x,p,n); + } + + if (n==1) + { + BIG_sqr(d[0],x[0]); + BIG_monty(z[0],p[0],((chunk)1<<BASEBITS)-ND[0][0],d[0]); + } + else + { + FF_sqr(d,x,n); + FF_reduce(z,d,p,ND,n); + } +} + +/* r=x^e mod p using side-channel resistant Montgomery Ladder, for large e */ +void FF_skpow(BIG r[],BIG x[],BIG e[],BIG p[],int n) +{ + int i,b; +#ifndef C99 + BIG R0[FFLEN],R1[FFLEN],ND[FFLEN]; +#else + BIG R0[n],R1[n],ND[n]; +#endif + FF_invmod2m(ND,p,n); + + FF_one(R0,n); + FF_copy(R1,x,n); + FF_nres(R0,p,n); + FF_nres(R1,p,n); + + for (i=8*MODBYTES*n-1; i>=0; i--) + { + b=BIG_bit(e[i/BIGBITS],i%BIGBITS); + FF_modmul(r,R0,R1,p,ND,n); + + FF_cswap(R0,R1,b,n); + FF_modsqr(R0,R0,p,ND,n); + + FF_copy(R1,r,n); + FF_cswap(R0,R1,b,n); + } + FF_copy(r,R0,n); + FF_redc(r,p,ND,n); +} + +/* r=x^e mod p using side-channel resistant Montgomery Ladder, for short e */ +void FF_skspow(BIG r[],BIG x[],BIG e,BIG p[],int n) +{ + int i,b; +#ifndef C99 + BIG R0[FFLEN],R1[FFLEN],ND[FFLEN]; +#else + BIG R0[n],R1[n],ND[n]; +#endif + FF_invmod2m(ND,p,n); + FF_one(R0,n); + FF_copy(R1,x,n); + FF_nres(R0,p,n); + FF_nres(R1,p,n); + for (i=8*MODBYTES-1; i>=0; i--) + { + b=BIG_bit(e,i); + FF_modmul(r,R0,R1,p,ND,n); + FF_cswap(R0,R1,b,n); + FF_modsqr(R0,R0,p,ND,n); + FF_copy(R1,r,n); + FF_cswap(R0,R1,b,n); + } + FF_copy(r,R0,n); + FF_redc(r,p,ND,n); +} + +/* raise to an integer power - right-to-left method */ +void FF_power(BIG r[],BIG x[],int e,BIG p[],int n) +{ + int f=1; +#ifndef C99 + BIG w[FFLEN],ND[FFLEN]; +#else + BIG w[n],ND[n]; +#endif + FF_invmod2m(ND,p,n); + + FF_copy(w,x,n); + FF_nres(w,p,n); + + if (e==2) + { + FF_modsqr(r,w,p,ND,n); + } + else for (;;) + { + if (e%2==1) + { + if (f) FF_copy(r,w,n); + else FF_modmul(r,r,w,p,ND,n); + f=0; + } + e>>=1; + if (e==0) break; + FF_modsqr(w,w,p,ND,n); + } + + FF_redc(r,p,ND,n); +} + +/* r=x^e mod p, faster but not side channel resistant */ +void FF_pow(BIG r[],BIG x[],BIG e[],BIG p[],int n) +{ + int i,b; +#ifndef C99 + BIG w[FFLEN],ND[FFLEN]; +#else + BIG w[n],ND[n]; +#endif + FF_invmod2m(ND,p,n); + + FF_copy(w,x,n); + FF_one(r,n); + FF_nres(r,p,n); + FF_nres(w,p,n); + + for (i=8*MODBYTES*n-1; i>=0; i--) + { + FF_modsqr(r,r,p,ND,n); + b=BIG_bit(e[i/BIGBITS],i%BIGBITS); + if (b==1) FF_modmul(r,r,w,p,ND,n); + } + FF_redc(r,p,ND,n); +} + +/* double exponentiation r=x^e.y^f mod p */ +void FF_pow2(BIG r[],BIG x[],BIG e,BIG y[],BIG f,BIG p[],int n) +{ + int i,eb,fb; +#ifndef C99 + BIG xn[FFLEN],yn[FFLEN],xy[FFLEN],ND[FFLEN]; +#else + BIG xn[n],yn[n],xy[n],ND[n]; +#endif + + FF_invmod2m(ND,p,n); + + FF_copy(xn,x,n); + FF_copy(yn,y,n); + FF_nres(xn,p,n); + FF_nres(yn,p,n); + FF_modmul(xy,xn,yn,p,ND,n); + FF_one(r,n); + FF_nres(r,p,n); + + for (i=8*MODBYTES-1; i>=0; i--) + { + eb=BIG_bit(e,i); + fb=BIG_bit(f,i); + FF_modsqr(r,r,p,ND,n); + if (eb==1) + { + if (fb==1) FF_modmul(r,r,xy,p,ND,n); + else FF_modmul(r,r,xn,p,ND,n); + } + else + { + if (fb==1) FF_modmul(r,r,yn,p,ND,n); + } + } + FF_redc(r,p,ND,n); +} + +static sign32 igcd(sign32 x,sign32 y) +{ + /* integer GCD, returns GCD of x and y */ + sign32 r; + if (y==0) return x; + while ((r=x%y)!=0) + x=y,y=r; + return y; +} + +/* quick and dirty check for common factor with s */ +int FF_cfactor(BIG w[],sign32 s,int n) +{ + int r; + sign32 g; +#ifndef C99 + BIG x[FFLEN],y[FFLEN]; +#else + BIG x[n],y[n]; +#endif + FF_init(y,s,n); + FF_copy(x,w,n); + FF_norm(x,n); + +// if (FF_parity(x)==0) return 1; + do + { + FF_sub(x,x,y,n); + FF_norm(x,n); + while (!FF_iszilch(x,n) && FF_parity(x)==0) FF_shr(x,n); + } + while (FF_comp(x,y,n)>0); +#if CHUNK<32 + g=x[0][0]+((sign32)(x[0][1])<<BASEBITS); +#else + g=(sign32)x[0][0]; +#endif + r=igcd(s,g); + if (r>1) return 1; + return 0; +} + +/* Miller-Rabin test for primality. Slow. */ +int FF_prime(BIG p[],csprng *rng,int n) +{ + int i,j,loop,s=0; +#ifndef C99 + BIG d[FFLEN],x[FFLEN],unity[FFLEN],nm1[FFLEN]; +#else + BIG d[n],x[n],unity[n],nm1[n]; +#endif + sign32 sf=4849845;/* 3*5*.. *19 */ + + FF_norm(p,n); + + if (FF_cfactor(p,sf,n)) return 0; + + FF_one(unity,n); + FF_sub(nm1,p,unity,n); + FF_norm(nm1,n); + FF_copy(d,nm1,n); + while (FF_parity(d)==0) + { + FF_shr(d,n); + s++; + } + if (s==0) return 0; + + for (i=0; i<10; i++) + { + FF_randomnum(x,p,rng,n); + FF_pow(x,x,d,p,n); + if (FF_comp(x,unity,n)==0 || FF_comp(x,nm1,n)==0) continue; + loop=0; + for (j=1; j<s; j++) + { + FF_power(x,x,2,p,n); + if (FF_comp(x,unity,n)==0) return 0; + if (FF_comp(x,nm1,n)==0 ) + { + loop=1; + break; + } + } + if (loop) continue; + return 0; + } + + return 1; +} + +/* +BIG P[4]= {{0x1670957,0x1568CD3C,0x2595E5,0xEED4F38,0x1FC9A971,0x14EF7E62,0xA503883,0x9E1E05E,0xBF59E3},{0x1844C908,0x1B44A798,0x3A0B1E7,0xD1B5B4E,0x1836046F,0x87E94F9,0x1D34C537,0xF7183B0,0x46D07},{0x17813331,0x19E28A90,0x1473A4D6,0x1CACD01F,0x1EEA8838,0xAF2AE29,0x1F85292A,0x1632585E,0xD945E5},{0x919F5EF,0x1567B39F,0x19F6AD11,0x16CE47CF,0x9B36EB1,0x35B7D3,0x483B28C,0xCBEFA27,0xB5FC21}}; + +int main() +{ + int i; + BIG p[4],e[4],x[4],r[4]; + csprng rng; + char raw[100]; + for (i=0;i<100;i++) raw[i]=i; + RAND_seed(&rng,100,raw); + + + FF_init(x,3,4); + + FF_copy(p,P,4); + FF_copy(e,p,4); + FF_dec(e,1,4); + FF_norm(e,4); + + + + printf("p= ");FF_output(p,4); printf("\n"); + if (FF_prime(p,&rng,4)) printf("p is a prime\n"); + printf("e= ");FF_output(e,4); printf("\n"); + + FF_skpow(r,x,e,p,4); + printf("r= ");FF_output(r,4); printf("\n"); +} + +*/ http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/fp.c ---------------------------------------------------------------------- diff --git a/version22/c/fp.c b/version22/c/fp.c new file mode 100644 index 0000000..5d48f1c --- /dev/null +++ b/version22/c/fp.c @@ -0,0 +1,608 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL mod p functions */ +/* Small Finite Field arithmetic */ +/* SU=m, SU is Stack Usage (NOT_SPECIAL Modulus) */ + +#include "amcl.h" + +/* Fast Modular Reduction Methods */ + +/* r=d mod m */ +/* d MUST be normalised */ +/* Products must be less than pR in all cases !!! */ +/* So when multiplying two numbers, their product *must* be less than MODBITS+BASEBITS*NLEN */ +/* Results *may* be one bit bigger than MODBITS */ + +#if MODTYPE == PSEUDO_MERSENNE +/* r=d mod m */ + +/* Converts from BIG integer to n-residue form mod Modulus */ +void FP_nres(BIG a) +{ + BIG tmp; + BIG_rcopy(tmp,a); +} + +/* Converts from n-residue form back to BIG integer form */ +void FP_redc(BIG a) +{ + BIG tmp; + BIG_rcopy(tmp,a); +} + +/* reduce a DBIG to a BIG exploiting the special form of the modulus */ +void FP_mod(BIG r,DBIG d) +{ + BIG t,b; + chunk v,tw; + BIG_split(t,b,d,MODBITS); + + /* Note that all of the excess gets pushed into t. So if squaring a value with a 4-bit excess, this results in + t getting all 8 bits of the excess product! So products must be less than pR which is Montgomery compatible */ + + if (MConst < NEXCESS) + { + BIG_imul(t,t,MConst); + + BIG_norm(t); + tw=t[NLEN-1]; + t[NLEN-1]&=TMASK; + t[0]+=MConst*((tw>>TBITS)); + } + else + { + v=BIG_pmul(t,t,MConst); + tw=t[NLEN-1]; + t[NLEN-1]&=TMASK; +#if CHUNK == 16 + t[1]+=muladd(MConst,((tw>>TBITS)+(v<<(BASEBITS-TBITS))),0,&t[0]); +#else + t[0]+=MConst*((tw>>TBITS)+(v<<(BASEBITS-TBITS))); +#endif + } + BIG_add(r,t,b); + BIG_norm(r); +} +#endif + +/* This only applies to Curve C448, so specialised (for now) */ +#if MODTYPE == GENERALISED_MERSENNE + +/* Converts from BIG integer to n-residue form mod Modulus */ +void FP_nres(BIG a) +{ + BIG tmp; + BIG_rcopy(tmp,a); +} + +/* Converts from n-residue form back to BIG integer form */ +void FP_redc(BIG a) +{ + BIG tmp; + BIG_rcopy(tmp,a); +} + +/* reduce a DBIG to a BIG exploiting the special form of the modulus */ +void FP_mod(BIG r,DBIG d) +{ + BIG t,b; + chunk carry; + BIG_split(t,b,d,MBITS); + + BIG_add(r,t,b); + + BIG_dscopy(d,t); + BIG_dshl(d,MBITS/2); + + BIG_split(t,b,d,MBITS); + + BIG_add(r,r,t); + BIG_add(r,r,b); + BIG_norm(r); + BIG_shl(t,MBITS/2); + + BIG_add(r,r,t); + + carry=r[NLEN-1]>>TBITS; + + r[NLEN-1]&=TMASK; + r[0]+=carry; + + r[224/BASEBITS]+=carry<<(224%BASEBITS); /* need to check that this falls mid-word */ + BIG_norm(r); + +} + +#endif + +#if MODTYPE == MONTGOMERY_FRIENDLY + +/* convert to Montgomery n-residue form */ +void FP_nres(BIG a) +{ + DBIG d; + BIG m; + BIG_rcopy(m,Modulus); + BIG_dscopy(d,a); + BIG_dshl(d,NLEN*BASEBITS); + BIG_dmod(a,d,m); +} + +/* convert back to regular form */ +void FP_redc(BIG a) +{ + DBIG d; + BIG_dzero(d); + BIG_dscopy(d,a); + FP_mod(a,d); +} + +/* fast modular reduction from DBIG to BIG exploiting special form of the modulus */ +void FP_mod(BIG a,DBIG d) +{ + int i; + + for (i=0; i<NLEN; i++) + d[NLEN+i]+=muladd(d[i],MConst-1,d[i],&d[NLEN+i-1]); + + BIG_sducopy(a,d); + BIG_norm(a); +} + +#endif + +#if MODTYPE == NOT_SPECIAL + +/* convert BIG a to Montgomery n-residue form */ +/* SU= 120 */ +void FP_nres(BIG a) +{ + DBIG d; + BIG m; + BIG_rcopy(m,Modulus); + BIG_dscopy(d,a); + BIG_dshl(d,NLEN*BASEBITS); + BIG_dmod(a,d,m); +} + +/* SU= 80 */ +/* convert back to regular form */ +void FP_redc(BIG a) +{ + DBIG d; + BIG_dzero(d); + BIG_dscopy(d,a); + FP_mod(a,d); +} + +/* reduce a DBIG to a BIG using Montgomery's no trial division method */ +/* d is expected to be dnormed before entry */ +/* SU= 112 */ +void FP_mod(BIG a,DBIG d) +{ + BIG mdls; + BIG_rcopy(mdls,Modulus); + BIG_monty(a,mdls,MConst,d); +} + +#endif + +/* test x==0 ? */ +/* SU= 48 */ +int FP_iszilch(BIG x) +{ + BIG m; + BIG_rcopy(m,Modulus); + BIG_mod(x,m); + return BIG_iszilch(x); +} + +/* output FP */ +/* SU= 48 */ +void FP_output(BIG r) +{ + BIG c; + BIG_copy(c,r); + FP_redc(c); + BIG_output(c); +} + +void FP_rawoutput(BIG r) +{ + BIG_rawoutput(r); +} + +#ifdef GET_STATS +int tsqr=0,rsqr=0,tmul=0,rmul=0; +int tadd=0,radd=0,tneg=0,rneg=0; +int tdadd=0,rdadd=0,tdneg=0,rdneg=0; +#endif + +/* r=a*b mod Modulus */ +/* product must be less that p.R - and we need to know this in advance! */ +/* SU= 88 */ +void FP_mul(BIG r,BIG a,BIG b) +{ + DBIG d; + chunk ea,eb; + BIG_norm(a); + BIG_norm(b); + ea=EXCESS(a); + eb=EXCESS(b); + +#ifdef dchunk + if ((dchunk)(ea+1)*(eb+1)>(dchunk)FEXCESS) +#else + if ((ea+1)>FEXCESS/(eb+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d %d %d\n",ea,eb,FEXCESS); +#endif + FP_reduce(a); /* it is sufficient to fully reduce just one of them < p */ +#ifdef GET_STATS + rmul++; + } + + tmul++; +#else + } +#endif + + BIG_mul(d,a,b); + FP_mod(r,d); +} + +/* multiplication by an integer, r=a*c */ +/* SU= 136 */ +void FP_imul(BIG r,BIG a,int c) +{ + DBIG d; + BIG m; + int s=0; + chunk afx; + BIG_norm(a); + if (c<0) + { + c=-c; + s=1; + } + afx=(EXCESS(a)+1)*(c+1)+1; + if (c<NEXCESS && afx<FEXCESS) + BIG_imul(r,a,c); + else + { + if (afx<FEXCESS) + { + BIG_pmul(r,a,c); + } + else + { + BIG_rcopy(m,Modulus); + BIG_pxmul(d,a,c); + BIG_dmod(r,d,m); + } + } + if (s) FP_neg(r,r); + BIG_norm(r); +} + +/* Set r=a^2 mod m */ +/* SU= 88 */ +void FP_sqr(BIG r,BIG a) +{ + DBIG d; + chunk ea; + BIG_norm(a); + ea=EXCESS(a); +#ifdef dchunk + if ((dchunk)(ea+1)*(ea+1)>(dchunk)FEXCESS) +#else + if ((ea+1)>FEXCESS/(ea+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d\n",ea); +#endif + FP_reduce(a); +#ifdef GET_STATS + rsqr++; + } + tsqr++; +#else + } +#endif + + BIG_sqr(d,a); + FP_mod(r,d); +} + +/* SU= 16 */ +/* Set r=a+b */ +void FP_add(BIG r,BIG a,BIG b) +{ + BIG_add(r,a,b); + if (EXCESS(r)+2>=FEXCESS) /* +2 because a and b not normalised */ + { +#ifdef DEBUG_REDUCE + printf("Sum too large - reducing it %d\n",EXCESS(r)); +#endif + FP_reduce(r); +#ifdef GET_STATS + radd++; + } + tadd++; +#else + } +#endif +} + +/* Set r=a-b mod m */ +/* SU= 56 */ +void FP_sub(BIG r,BIG a,BIG b) +{ + BIG n; + FP_neg(n,b); + FP_add(r,a,n); +} + +/* SU= 48 */ +/* Fully reduce a mod Modulus */ +void FP_reduce(BIG a) +{ + BIG m; + BIG_rcopy(m,Modulus); + BIG_mod(a,m); +} + +// https://graphics.stanford.edu/~seander/bithacks.html +// constant time log to base 2 (or number of bits in) + +static int logb2(unsign32 v) +{ + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + r = (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; + return r+1; +} + +/* Set r=-a mod Modulus */ +/* SU= 64 */ +void FP_neg(BIG r,BIG a) +{ + int sb; +// chunk ov; + BIG m; + + BIG_rcopy(m,Modulus); + BIG_norm(a); + + sb=logb2((unsign32)EXCESS(a)); + /* + ov=EXCESS(a); + sb=1; + while(ov!=0) + { + sb++; // only unpredictable branch + ov>>=1; + } + */ + BIG_fshl(m,sb); + BIG_sub(r,m,a); + + if (EXCESS(r)>=FEXCESS) + { +#ifdef DEBUG_REDUCE + printf("Negation too large - reducing it %d\n",EXCESS(r)); +#endif + FP_reduce(r); +#ifdef GET_STATS + rneg++; + } + tneg++; +#else + } +#endif + +} + +/* Set r=a/2. */ +/* SU= 56 */ +void FP_div2(BIG r,BIG a) +{ + BIG m; + BIG_rcopy(m,Modulus); + BIG_norm(a); + if (BIG_parity(a)==0) + { + BIG_copy(r,a); + BIG_fshr(r,1); + } + else + { + BIG_add(r,a,m); + BIG_norm(r); + BIG_fshr(r,1); + } +} + +/* set w=1/x */ +void FP_inv(BIG w,BIG x) +{ + BIG m; + BIG_rcopy(m,Modulus); + BIG_copy(w,x); + FP_redc(w); + + BIG_invmodp(w,w,m); + FP_nres(w); +} + +/* SU=8 */ +/* set n=1 */ +void FP_one(BIG n) +{ + BIG_one(n); + FP_nres(n); +} + +/* Set r=a^b mod Modulus */ +/* SU= 136 */ +void FP_pow(BIG r,BIG a,BIG b) +{ + BIG w,z,zilch; + int bt; + BIG_zero(zilch); + + BIG_norm(b); + BIG_copy(z,b); + BIG_copy(w,a); + FP_one(r); + while(1) + { + bt=BIG_parity(z); + BIG_fshr(z,1); + if (bt) FP_mul(r,r,w); + if (BIG_comp(z,zilch)==0) break; + FP_sqr(w,w); + } + FP_reduce(r); +} + +/* is r a QR? */ +int FP_qr(BIG r) +{ + int j; + BIG m; + BIG_rcopy(m,Modulus); + FP_redc(r); + j=BIG_jacobi(r,m); + FP_nres(r); + if (j==1) return 1; + return 0; + +} + +/* Set a=sqrt(b) mod Modulus */ +/* SU= 160 */ +void FP_sqrt(BIG r,BIG a) +{ + BIG v,i,b; + BIG m; + BIG_rcopy(m,Modulus); + BIG_mod(a,m); + BIG_copy(b,m); + if (MOD8==5) + { + BIG_dec(b,5); + BIG_norm(b); + BIG_fshr(b,3); /* (p-5)/8 */ + BIG_copy(i,a); + BIG_fshl(i,1); + FP_pow(v,i,b); + FP_mul(i,i,v); + FP_mul(i,i,v); + BIG_dec(i,1); + FP_mul(r,a,v); + FP_mul(r,r,i); + BIG_mod(r,m); + } + if (MOD8==3 || MOD8==7) + { + BIG_inc(b,1); + BIG_norm(b); + BIG_fshr(b,2); /* (p+1)/4 */ + FP_pow(r,a,b); + } +} + +/* +int main() +{ + + BIG r; + + FP_one(r); + FP_sqr(r,r); + + BIG_output(r); + + int i,carry; + DBIG c={0,0,0,0,0,0,0,0}; + BIG a={1,2,3,4}; + BIG b={3,4,5,6}; + BIG r={11,12,13,14}; + BIG s={23,24,25,15}; + BIG w; + +// printf("NEXCESS= %d\n",NEXCESS); +// printf("MConst= %d\n",MConst); + + BIG_copy(b,Modulus); + BIG_dec(b,1); + BIG_norm(b); + + BIG_randomnum(r); BIG_norm(r); BIG_mod(r,Modulus); +// BIG_randomnum(s); norm(s); BIG_mod(s,Modulus); + +// BIG_output(r); +// BIG_output(s); + + BIG_output(r); + FP_nres(r); + BIG_output(r); + BIG_copy(a,r); + FP_redc(r); + BIG_output(r); + BIG_dscopy(c,a); + FP_mod(r,c); + BIG_output(r); + + +// exit(0); + +// copy(r,a); + printf("r= "); BIG_output(r); + BIG_modsqr(r,r,Modulus); + printf("r^2= "); BIG_output(r); + + FP_nres(r); + FP_sqrt(r,r); + FP_redc(r); + printf("r= "); BIG_output(r); + BIG_modsqr(r,r,Modulus); + printf("r^2= "); BIG_output(r); + + +// for (i=0;i<100000;i++) FP_sqr(r,r); +// for (i=0;i<100000;i++) + FP_sqrt(r,r); + + BIG_output(r); +} +*/
