http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/ff.c ---------------------------------------------------------------------- diff --git a/version3/c/ff.c b/version3/c/ff.c new file mode 100644 index 0000000..104c461 --- /dev/null +++ b/version3/c/ff.c @@ -0,0 +1,1096 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL basic functions for Large Finite Field support */ + +#include "ff_WWW.h" + +/* Arazi and Qi inversion mod 256 */ +static int invmod256(int a) +{ + int U,t1,t2,b,c; + t1=0; + c=(a>>1)&1; + t1+=c; + t1&=1; + t1=2-t1; + t1<<=1; + U=t1+1; + +// i=2 + b=a&3; + t1=U*b; + t1>>=2; + c=(a>>2)&3; + t2=(U*c)&3; + t1+=t2; + t1*=U; + t1&=3; + t1=4-t1; + t1<<=2; + U+=t1; + +// i=4 + b=a&15; + t1=U*b; + t1>>=4; + c=(a>>4)&15; + t2=(U*c)&15; + t1+=t2; + t1*=U; + t1&=15; + t1=16-t1; + t1<<=4; + U+=t1; + + return U; +} + +/* a=1/a mod 2^BIGBITS. This is very fast! */ +void BIG_XXX_invmod2m(BIG_XXX a) +{ + int i; + BIG_XXX U,t1,b,c; + BIG_XXX_zero(U); + BIG_XXX_inc(U,invmod256(BIG_XXX_lastbits(a,8))); + for (i=8; i<BIGBITS_XXX; i<<=1) + { + BIG_XXX_norm(U); + BIG_XXX_copy(b,a); + BIG_XXX_mod2m(b,i); // bottom i bits of a + + BIG_XXX_smul(t1,U,b); + BIG_XXX_shr(t1,i); // top i bits of U*b + + BIG_XXX_copy(c,a); + BIG_XXX_shr(c,i); + BIG_XXX_mod2m(c,i); // top i bits of a + + BIG_XXX_smul(b,U,c); + BIG_XXX_mod2m(b,i); // bottom i bits of U*c + + BIG_XXX_add(t1,t1,b); + BIG_XXX_norm(t1); + BIG_XXX_smul(b,t1,U); + BIG_XXX_copy(t1,b); // (t1+b)*U + BIG_XXX_mod2m(t1,i); // bottom i bits of (t1+b)*U + + BIG_XXX_one(b); + BIG_XXX_shl(b,i); + BIG_XXX_sub(t1,b,t1); + BIG_XXX_norm(t1); + + BIG_XXX_shl(t1,i); + + BIG_XXX_add(U,U,t1); + } + BIG_XXX_copy(a,U); + BIG_XXX_norm(a); + BIG_XXX_mod2m(a,BIGBITS_XXX); +} + +/* x=y */ +void FF_WWW_copy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_copy(x[i],y[i]); +} + +/* x=y<<n */ +static void FF_WWW_dsucopy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(x[n+i],y[i]); + BIG_XXX_zero(x[i]); + } +} + +/* x=y */ +static void FF_WWW_dscopy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(x[i],y[i]); + BIG_XXX_zero(x[n+i]); + } +} + +/* x=y>>n */ +static void FF_WWW_sducopy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_copy(x[i],y[n+i]); +} + +/* set to zero */ +void FF_WWW_zero(BIG_XXX x[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_zero(x[i]); +} + +/* test equals 0 */ +int FF_WWW_iszilch(BIG_XXX x[],int n) +{ + int i; + for (i=0; i<n; i++) + if (!BIG_XXX_iszilch(x[i])) return 0; + return 1; +} + +/* shift right by BIGBITS-bit words */ +static void FF_WWW_shrw(BIG_XXX a[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(a[i],a[i+n]); + BIG_XXX_zero(a[i+n]); + } +} + +/* shift left by BIGBITS-bit words */ +static void FF_WWW_shlw(BIG_XXX a[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(a[i+n],a[i]); + BIG_XXX_zero(a[i]); + } +} + +/* extract last bit */ +int FF_WWW_parity(BIG_XXX x[]) +{ + return BIG_XXX_parity(x[0]); +} + +/* extract last m bits */ +int FF_WWW_lastbits(BIG_XXX x[],int m) +{ + return BIG_XXX_lastbits(x[0],m); +} + +/* x=1 */ +void FF_WWW_one(BIG_XXX x[],int n) +{ + int i; + BIG_XXX_one(x[0]); + for (i=1; i<n; i++) + BIG_XXX_zero(x[i]); +} + +/* x=m, where m is 32-bit int */ +void FF_WWW_init(BIG_XXX x[],sign32 m,int n) +{ + int i; + BIG_XXX_zero(x[0]); +#if CHUNK<64 + x[0][0]=(chunk)(m&BMASK_XXX); + x[0][1]=(chunk)(m>>BASEBITS_XXX); +#else + x[0][0]=(chunk)m; +#endif + for (i=1; i<n; i++) + BIG_XXX_zero(x[i]); +} + +/* compare x and y - must be normalised */ +int FF_WWW_comp(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i,j; + for (i=n-1; i>=0; i--) + { + j=BIG_XXX_comp(x[i],y[i]); + if (j!=0) return j; + } + return 0; +} + +/* recursive add */ +static void FF_WWW_radd(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_add(z[zp+i],x[xp+i],y[yp+i]); +} + +/* recursive inc */ +static void FF_WWW_rinc(BIG_XXX z[],int zp,BIG_XXX y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_add(z[zp+i],z[zp+i],y[yp+i]); +} + +/* recursive dec */ +static void FF_WWW_rdec(BIG_XXX z[],int zp,BIG_XXX y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_sub(z[zp+i],z[zp+i],y[yp+i]); +} + +/* simple add */ +void FF_WWW_add(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_add(z[i],x[i],y[i]); +} + +/* simple sub */ +void FF_WWW_sub(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_sub(z[i],x[i],y[i]); +} + +/* increment/decrement by a small integer */ +void FF_WWW_inc(BIG_XXX x[],int m,int n) +{ + BIG_XXX_inc(x[0],m); + FF_WWW_norm(x,n); +} + +void FF_WWW_dec(BIG_XXX x[],int m,int n) +{ + BIG_XXX_dec(x[0],m); + FF_WWW_norm(x,n); +} + +/* normalise - but hold any overflow in top part unless n<0 */ +static void FF_WWW_rnorm(BIG_XXX z[],int zp,int n) +{ + int i,trunc=0; + chunk carry; + if (n<0) + { + /* -v n signals to do truncation */ + n=-n; + trunc=1; + } + for (i=0; i<n-1; i++) + { + carry=BIG_XXX_norm(z[zp+i]); + + z[zp+i][NLEN_XXX-1]^=carry<<P_TBITS_WWW; /* remove it */ + z[zp+i+1][0]+=carry; + } + carry=BIG_XXX_norm(z[zp+n-1]); + if (trunc) z[zp+n-1][NLEN_XXX-1]^=carry<<P_TBITS_WWW; +} + +void FF_WWW_norm(BIG_XXX z[],int n) +{ + FF_WWW_rnorm(z,0,n); +} + +/* shift left by one bit */ +void FF_WWW_shl(BIG_XXX x[],int n) +{ + int i; + int carry,delay_carry=0; + for (i=0; i<n-1; i++) + { + carry=BIG_XXX_fshl(x[i],1); + x[i][0]|=delay_carry; + x[i][NLEN_XXX-1]^=(chunk)carry<<P_TBITS_WWW; + delay_carry=carry; + } + BIG_XXX_fshl(x[n-1],1); + x[n-1][0]|=delay_carry; +} + +/* shift right by one bit */ +void FF_WWW_shr(BIG_XXX x[],int n) +{ + int i; + int carry; + for (i=n-1; i>0; i--) + { + carry=BIG_XXX_fshr(x[i],1); + x[i-1][NLEN_XXX-1]|=(chunk)carry<<P_TBITS_WWW; + } + BIG_XXX_fshr(x[0],1); +} + +void FF_WWW_output(BIG_XXX x[],int n) +{ + int i; + FF_WWW_norm(x,n); + for (i=n-1; i>=0; i--) + { + BIG_XXX_output(x[i]); + printf(" "); + } +} + +void FF_WWW_rawoutput(BIG_XXX x[],int n) +{ + int i; + for (i=n-1; i>=0; i--) + { + BIG_XXX_rawoutput(x[i]); + printf(" "); + } +} + +/* Convert FFs to/from octet strings */ +void FF_WWW_toOctet(octet *w,BIG_XXX x[],int n) +{ + int i; + w->len=n*MODBYTES_XXX; + for (i=0; i<n; i++) + { + BIG_XXX_toBytes(&(w->val[(n-i-1)*MODBYTES_XXX]),x[i]); + } +} + +void FF_WWW_fromOctet(BIG_XXX x[],octet *w,int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_fromBytes(x[i],&(w->val[(n-i-1)*MODBYTES_XXX])); + } +} + +/* in-place swapping using xor - side channel resistant */ +static void FF_WWW_cswap(BIG_XXX a[],BIG_XXX b[],int d,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_cswap(a[i],b[i],d); + return; +} + +/* z=x*y, t is workspace */ +static void FF_WWW_karmul(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX y[],int yp,BIG_XXX t[],int tp,int n) +{ + int nd2; + if (n==1) + { + BIG_XXX_norm(x[xp]); + BIG_XXX_norm(y[yp]); + BIG_XXX_mul(t[tp],x[xp],y[yp]); + BIG_XXX_split(z[zp+1],z[zp],t[tp],BIGBITS_XXX); + return; + } + + nd2=n/2; + FF_WWW_radd(z,zp,x,xp,x,xp+nd2,nd2); + FF_WWW_rnorm(z,zp,nd2); /* needs this if recursion level too deep */ + + FF_WWW_radd(z,zp+nd2,y,yp,y,yp+nd2,nd2); + FF_WWW_rnorm(z,zp+nd2,nd2); + FF_WWW_karmul(t,tp,z,zp,z,zp+nd2,t,tp+n,nd2); + FF_WWW_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2); + FF_WWW_karmul(z,zp+n,x,xp+nd2,y,yp+nd2,t,tp+n,nd2); + FF_WWW_rdec(t,tp,z,zp,n); + FF_WWW_rdec(t,tp,z,zp+n,n); + FF_WWW_rinc(z,zp+nd2,t,tp,n); + FF_WWW_rnorm(z,zp,2*n); +} + +static void FF_WWW_karsqr(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX t[],int tp,int n) +{ + int nd2; + if (n==1) + { + BIG_XXX_norm(x[xp]); + BIG_XXX_sqr(t[tp],x[xp]); + BIG_XXX_split(z[zp+1],z[zp],t[tp],BIGBITS_XXX); + return; + } + nd2=n/2; + FF_WWW_karsqr(z,zp,x,xp,t,tp+n,nd2); + FF_WWW_karsqr(z,zp+n,x,xp+nd2,t,tp+n,nd2); + FF_WWW_karmul(t,tp,x,xp,x,xp+nd2,t,tp+n,nd2); + FF_WWW_rinc(z,zp+nd2,t,tp,n); + FF_WWW_rinc(z,zp+nd2,t,tp,n); + + FF_WWW_rnorm(z,zp+nd2,n); /* was FF_rnorm(z,zp,2*n) */ +} + +static void FF_WWW_karmul_lower(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX y[],int yp,BIG_XXX t[],int tp,int n) +{ + /* Calculates Least Significant bottom half of x*y */ + int nd2; + if (n==1) + { + /* only calculate bottom half of product */ + BIG_XXX_norm(x[xp]); + BIG_XXX_norm(y[yp]); + BIG_XXX_smul(z[zp],x[xp],y[yp]); + return; + } + nd2=n/2; + FF_WWW_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2); + FF_WWW_karmul_lower(t,tp,x,xp+nd2,y,yp,t,tp+n,nd2); + FF_WWW_rinc(z,zp+nd2,t,tp,nd2); + FF_WWW_karmul_lower(t,tp,x,xp,y,yp+nd2,t,tp+n,nd2); + FF_WWW_rinc(z,zp+nd2,t,tp,nd2); + FF_WWW_rnorm(z,zp+nd2,-nd2); /* truncate it */ +} + +static void FF_WWW_karmul_upper(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],BIG_XXX t[],int n) +{ + /* Calculates Most Significant upper half of x*y, given lower part */ + int nd2; + + nd2=n/2; + FF_WWW_radd(z,n,x,0,x,nd2,nd2); + FF_WWW_radd(z,n+nd2,y,0,y,nd2,nd2); + FF_WWW_rnorm(z,n,nd2); + FF_WWW_rnorm(z,n+nd2,nd2); + + FF_WWW_karmul(t,0,z,n+nd2,z,n,t,n,nd2); /* t = (a0+a1)(b0+b1) */ + FF_WWW_karmul(z,n,x,nd2,y,nd2,t,n,nd2); /* z[n]= a1*b1 */ + /* z[0-nd2]=l(a0b0) z[nd2-n]= h(a0b0)+l(t)-l(a0b0)-l(a1b1) */ + FF_WWW_rdec(t,0,z,n,n); /* t=t-a1b1 */ + FF_WWW_rinc(z,nd2,z,0,nd2); /* z[nd2-n]+=l(a0b0) = h(a0b0)+l(t)-l(a1b1) */ + FF_WWW_rdec(z,nd2,t,0,nd2); /* z[nd2-n]=h(a0b0)+l(t)-l(a1b1)-l(t-a1b1)=h(a0b0) */ + FF_WWW_rnorm(z,0,-n); /* a0b0 now in z - truncate it */ + FF_WWW_rdec(t,0,z,0,n); /* (a0+a1)(b0+b1) - a0b0 */ + FF_WWW_rinc(z,nd2,t,0,n); + + FF_WWW_rnorm(z,nd2,n); +} + +/* z=x*y */ +void FF_WWW_mul(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; +#else + BIG_XXX t[2*n]; +#endif + FF_WWW_karmul(z,0,x,0,y,0,t,0,n); +} + +/* return low part of product */ +static void FF_WWW_lmul(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; +#else + BIG_XXX t[2*n]; +#endif + FF_WWW_karmul_lower(z,0,x,0,y,0,t,0,n); +} + +/* Set b=b mod c */ +void FF_WWW_mod(BIG_XXX b[],BIG_XXX c[],int n) +{ + int k=0; + + FF_WWW_norm(b,n); + if (FF_WWW_comp(b,c,n)<0) + return; + do + { + FF_WWW_shl(c,n); + k++; + } + while (FF_WWW_comp(b,c,n)>=0); + + while (k>0) + { + FF_WWW_shr(c,n); + if (FF_WWW_comp(b,c,n)>=0) + { + FF_WWW_sub(b,b,c,n); + FF_WWW_norm(b,n); + } + k--; + } +} + +/* z=x^2 */ +void FF_WWW_sqr(BIG_XXX z[],BIG_XXX x[],int n) +{ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; +#else + BIG_XXX t[2*n]; +#endif + FF_WWW_karsqr(z,0,x,0,t,0,n); +} + +/* r=t mod modulus, N is modulus, ND is Montgomery Constant */ +static void FF_WWW_reduce(BIG_XXX r[],BIG_XXX T[],BIG_XXX N[],BIG_XXX ND[],int n) +{ + /* fast karatsuba Montgomery reduction */ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; + BIG_XXX m[FFLEN_WWW]; +#else + BIG_XXX t[2*n]; + BIG_XXX m[n]; +#endif + FF_WWW_sducopy(r,T,n); /* keep top half of T */ + FF_WWW_karmul_lower(m,0,T,0,ND,0,t,0,n); /* m=T.(1/N) mod R */ + + FF_WWW_karmul_upper(T,N,m,t,n); /* T=mN */ + FF_WWW_sducopy(m,T,n); + + FF_WWW_add(r,r,N,n); + FF_WWW_sub(r,r,m,n); + FF_WWW_norm(r,n); +} + + +/* Set r=a mod b */ +/* a is of length - 2*n */ +/* r,b is of length - n */ +void FF_WWW_dmod(BIG_XXX r[],BIG_XXX a[],BIG_XXX b[],int n) +{ + int k; +#ifndef C99 + BIG_XXX m[2*FFLEN_WWW]; + BIG_XXX x[2*FFLEN_WWW]; +#else + BIG_XXX m[2*n]; + BIG_XXX x[2*n]; +#endif + FF_WWW_copy(x,a,2*n); + FF_WWW_norm(x,2*n); + FF_WWW_dsucopy(m,b,n); + k=BIGBITS_XXX*n; + + while (FF_WWW_comp(x,m,2*n)>=0) + { + FF_WWW_sub(x,x,m,2*n); + FF_WWW_norm(x,2*n); + } + + while (k>0) + { + FF_WWW_shr(m,2*n); + + if (FF_WWW_comp(x,m,2*n)>=0) + { + FF_WWW_sub(x,x,m,2*n); + FF_WWW_norm(x,2*n); + } + + k--; + } + FF_WWW_copy(r,x,n); + FF_WWW_mod(r,b,n); +} + +/* Set r=1/a mod p. Binary method - a<p on entry */ + +void FF_WWW_invmodp(BIG_XXX r[],BIG_XXX a[],BIG_XXX p[],int n) +{ +#ifndef C99 + BIG_XXX u[FFLEN_WWW],v[FFLEN_WWW],x1[FFLEN_WWW],x2[FFLEN_WWW],t[FFLEN_WWW],one[FFLEN_WWW]; +#else + BIG_XXX u[n],v[n],x1[n],x2[n],t[n],one[n]; +#endif + FF_WWW_copy(u,a,n); + FF_WWW_copy(v,p,n); + FF_WWW_one(one,n); + FF_WWW_copy(x1,one,n); + FF_WWW_zero(x2,n); + +// reduce n in here as well! + while (FF_WWW_comp(u,one,n)!=0 && FF_WWW_comp(v,one,n)!=0) + { + while (FF_WWW_parity(u)==0) + { + FF_WWW_shr(u,n); + if (FF_WWW_parity(x1)!=0) + { + FF_WWW_add(x1,p,x1,n); + FF_WWW_norm(x1,n); + } + FF_WWW_shr(x1,n); + } + while (FF_WWW_parity(v)==0) + { + FF_WWW_shr(v,n); + if (FF_WWW_parity(x2)!=0) + { + FF_WWW_add(x2,p,x2,n); + FF_WWW_norm(x2,n); + } + FF_WWW_shr(x2,n); + } + if (FF_WWW_comp(u,v,n)>=0) + { + + FF_WWW_sub(u,u,v,n); + FF_WWW_norm(u,n); + if (FF_WWW_comp(x1,x2,n)>=0) FF_WWW_sub(x1,x1,x2,n); + else + { + FF_WWW_sub(t,p,x2,n); + FF_WWW_add(x1,x1,t,n); + } + FF_WWW_norm(x1,n); + } + else + { + FF_WWW_sub(v,v,u,n); + FF_WWW_norm(v,n); + if (FF_WWW_comp(x2,x1,n)>=0) FF_WWW_sub(x2,x2,x1,n); + else + { + FF_WWW_sub(t,p,x1,n); + FF_WWW_add(x2,x2,t,n); + } + FF_WWW_norm(x2,n); + } + } + if (FF_WWW_comp(u,one,n)==0) + FF_WWW_copy(r,x1,n); + else + FF_WWW_copy(r,x2,n); +} + +/* nesidue mod m */ +static void FF_WWW_nres(BIG_XXX a[],BIG_XXX m[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + if (n==1) + { + BIG_XXX_dscopy(d[0],a[0]); + BIG_XXX_dshl(d[0],NLEN_XXX*BASEBITS_XXX); + BIG_XXX_dmod(a[0],d[0],m[0]); + } + else + { + FF_WWW_dsucopy(d,a,n); + FF_WWW_dmod(a,d,m,n); + } +} + +static void FF_WWW_redc(BIG_XXX a[],BIG_XXX m[],BIG_XXX ND[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + if (n==1) + { + BIG_XXX_dzero(d[0]); + BIG_XXX_dscopy(d[0],a[0]); + BIG_XXX_monty(a[0],m[0],((chunk)1<<BASEBITS_XXX)-ND[0][0],d[0]); + } + else + { + FF_WWW_mod(a,m,n); + FF_WWW_dscopy(d,a,n); + FF_WWW_reduce(a,d,m,ND,n); + FF_WWW_mod(a,m,n); + } +} + +/* U=1/a mod 2^m - Arazi & Qi */ +static void FF_WWW_invmod2m(BIG_XXX U[],BIG_XXX a[],int n) +{ + int i; +#ifndef C99 + BIG_XXX t1[FFLEN_WWW],b[FFLEN_WWW],c[FFLEN_WWW]; +#else + BIG_XXX t1[2*n],b[n],c[n]; +#endif + + FF_WWW_zero(U,n); + FF_WWW_zero(b,n); + FF_WWW_zero(c,n); + FF_WWW_zero(t1,2*n); + + BIG_XXX_copy(U[0],a[0]); + BIG_XXX_invmod2m(U[0]); + for (i=1; i<n; i<<=1) + { + FF_WWW_copy(b,a,i); + FF_WWW_mul(t1,U,b,i); + FF_WWW_shrw(t1,i); // top half to bottom half, top half=0 + + FF_WWW_copy(c,a,2*i); + FF_WWW_shrw(c,i); // top half of c + FF_WWW_lmul(b,U,c,i); // should set top half of b=0 + FF_WWW_add(t1,t1,b,i); + FF_WWW_norm(t1,2*i); + FF_WWW_lmul(b,t1,U,i); + FF_WWW_copy(t1,b,i); + FF_WWW_one(b,i); + FF_WWW_shlw(b,i); + FF_WWW_sub(t1,b,t1,2*i); + FF_WWW_norm(t1,2*i); + FF_WWW_shlw(t1,i); + FF_WWW_add(U,U,t1,2*i); + } + + FF_WWW_norm(U,n); +} + +void FF_WWW_random(BIG_XXX x[],csprng *rng,int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_random(x[i],rng); + } + /* make sure top bit is 1 */ + while (BIG_XXX_nbits(x[n-1])<MODBYTES_XXX*8) BIG_XXX_random(x[n-1],rng); +} + +/* generate random x mod p */ +void FF_WWW_randomnum(BIG_XXX x[],BIG_XXX p[],csprng *rng,int n) +{ + int i; +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + for (i=0; i<2*n; i++) + { + BIG_XXX_random(d[i],rng); + } + FF_WWW_dmod(x,d,p,n); +} + +static void FF_WWW_modmul(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],BIG_XXX p[],BIG_XXX ND[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + chunk ex=P_EXCESS_WWW(x[n-1]); + chunk ey=P_EXCESS_WWW(y[n-1]); +#ifdef dchunk + if ((dchunk)(ex+1)*(ey+1)>(dchunk)P_FEXCESS_WWW) +#else + if ((ex+1)>P_FEXCESS_WWW/(ey+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d %d\n",ex,ey); +#endif + FF_WWW_mod(x,p,n); + } + + if (n==1) + { + BIG_XXX_mul(d[0],x[0],y[0]); + BIG_XXX_monty(z[0],p[0],((chunk)1<<BASEBITS_XXX)-ND[0][0],d[0]); + } + else + { + FF_WWW_mul(d,x,y,n); + FF_WWW_reduce(z,d,p,ND,n); + } +} + +static void FF_WWW_modsqr(BIG_XXX z[],BIG_XXX x[],BIG_XXX p[],BIG_XXX ND[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + chunk ex=P_EXCESS_WWW(x[n-1]); +#ifdef dchunk + if ((dchunk)(ex+1)*(ex+1)>(dchunk)P_FEXCESS_WWW) +#else + if ((ex+1)>P_FEXCESS_WWW/(ex+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d\n",ex); +#endif + FF_WWW_mod(x,p,n); + } + if (n==1) + { + BIG_XXX_sqr(d[0],x[0]); + BIG_XXX_monty(z[0],p[0],((chunk)1<<BASEBITS_XXX)-ND[0][0],d[0]); + } + else + { + FF_WWW_sqr(d,x,n); + FF_WWW_reduce(z,d,p,ND,n); + } +} + +/* r=x^e mod p using side-channel resistant Montgomery Ladder, for large e */ +void FF_WWW_skpow(BIG_XXX r[],BIG_XXX x[],BIG_XXX e[],BIG_XXX p[],int n) +{ + int i,b; +#ifndef C99 + BIG_XXX R0[FFLEN_WWW],R1[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX R0[n],R1[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_one(R0,n); + FF_WWW_copy(R1,x,n); + FF_WWW_nres(R0,p,n); + FF_WWW_nres(R1,p,n); + + for (i=8*MODBYTES_XXX*n-1; i>=0; i--) + { + b=BIG_XXX_bit(e[i/BIGBITS_XXX],i%BIGBITS_XXX); + FF_WWW_modmul(r,R0,R1,p,ND,n); + + FF_WWW_cswap(R0,R1,b,n); + FF_WWW_modsqr(R0,R0,p,ND,n); + + FF_WWW_copy(R1,r,n); + FF_WWW_cswap(R0,R1,b,n); + } + FF_WWW_copy(r,R0,n); + FF_WWW_redc(r,p,ND,n); +} + +/* r=x^e mod p using side-channel resistant Montgomery Ladder, for short e */ +void FF_WWW_skspow(BIG_XXX r[],BIG_XXX x[],BIG_XXX e,BIG_XXX p[],int n) +{ + int i,b; +#ifndef C99 + BIG_XXX R0[FFLEN_WWW],R1[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX R0[n],R1[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + FF_WWW_one(R0,n); + FF_WWW_copy(R1,x,n); + FF_WWW_nres(R0,p,n); + FF_WWW_nres(R1,p,n); + for (i=8*MODBYTES_XXX-1; i>=0; i--) + { + b=BIG_XXX_bit(e,i); + FF_WWW_modmul(r,R0,R1,p,ND,n); + FF_WWW_cswap(R0,R1,b,n); + FF_WWW_modsqr(R0,R0,p,ND,n); + FF_WWW_copy(R1,r,n); + FF_WWW_cswap(R0,R1,b,n); + } + FF_WWW_copy(r,R0,n); + FF_WWW_redc(r,p,ND,n); +} + +/* raise to an integer power - right-to-left method */ +void FF_WWW_power(BIG_XXX r[],BIG_XXX x[],int e,BIG_XXX p[],int n) +{ + int f=1; +#ifndef C99 + BIG_XXX w[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX w[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_copy(w,x,n); + FF_WWW_nres(w,p,n); + + if (e==2) + { + FF_WWW_modsqr(r,w,p,ND,n); + } + else for (;;) + { + if (e%2==1) + { + if (f) FF_WWW_copy(r,w,n); + else FF_WWW_modmul(r,r,w,p,ND,n); + f=0; + } + e>>=1; + if (e==0) break; + FF_WWW_modsqr(w,w,p,ND,n); + } + + FF_WWW_redc(r,p,ND,n); +} + +/* r=x^e mod p, faster but not side channel resistant */ +void FF_WWW_pow(BIG_XXX r[],BIG_XXX x[],BIG_XXX e[],BIG_XXX p[],int n) +{ + int i,b; +#ifndef C99 + BIG_XXX w[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX w[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_copy(w,x,n); + FF_WWW_one(r,n); + FF_WWW_nres(r,p,n); + FF_WWW_nres(w,p,n); + + for (i=8*MODBYTES_XXX*n-1; i>=0; i--) + { + FF_WWW_modsqr(r,r,p,ND,n); + b=BIG_XXX_bit(e[i/BIGBITS_XXX],i%BIGBITS_XXX); + if (b==1) FF_WWW_modmul(r,r,w,p,ND,n); + } + FF_WWW_redc(r,p,ND,n); +} + +/* double exponentiation r=x^e.y^f mod p */ +void FF_WWW_pow2(BIG_XXX r[],BIG_XXX x[],BIG_XXX e,BIG_XXX y[],BIG_XXX f,BIG_XXX p[],int n) +{ + int i,eb,fb; +#ifndef C99 + BIG_XXX xn[FFLEN_WWW],yn[FFLEN_WWW],xy[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX xn[n],yn[n],xy[n],ND[n]; +#endif + + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_copy(xn,x,n); + FF_WWW_copy(yn,y,n); + FF_WWW_nres(xn,p,n); + FF_WWW_nres(yn,p,n); + FF_WWW_modmul(xy,xn,yn,p,ND,n); + FF_WWW_one(r,n); + FF_WWW_nres(r,p,n); + + for (i=8*MODBYTES_XXX-1; i>=0; i--) + { + eb=BIG_XXX_bit(e,i); + fb=BIG_XXX_bit(f,i); + FF_WWW_modsqr(r,r,p,ND,n); + if (eb==1) + { + if (fb==1) FF_WWW_modmul(r,r,xy,p,ND,n); + else FF_WWW_modmul(r,r,xn,p,ND,n); + } + else + { + if (fb==1) FF_WWW_modmul(r,r,yn,p,ND,n); + } + } + FF_WWW_redc(r,p,ND,n); +} + +static sign32 igcd(sign32 x,sign32 y) +{ + /* integer GCD, returns GCD of x and y */ + sign32 r; + if (y==0) return x; + while ((r=x%y)!=0) + x=y,y=r; + return y; +} + +/* quick and dirty check for common factor with s */ +int FF_WWW_cfactor(BIG_XXX w[],sign32 s,int n) +{ + int r; + sign32 g; +#ifndef C99 + BIG_XXX x[FFLEN_WWW],y[FFLEN_WWW]; +#else + BIG_XXX x[n],y[n]; +#endif + FF_WWW_init(y,s,n); + FF_WWW_copy(x,w,n); + FF_WWW_norm(x,n); + + do + { + FF_WWW_sub(x,x,y,n); + FF_WWW_norm(x,n); + while (!FF_WWW_iszilch(x,n) && FF_WWW_parity(x)==0) FF_WWW_shr(x,n); + } + while (FF_WWW_comp(x,y,n)>0); +#if CHUNK<32 + g=x[0][0]+((sign32)(x[0][1])<<BASEBITS_XXX); +#else + g=(sign32)x[0][0]; +#endif + r=igcd(s,g); + if (r>1) return 1; + return 0; +} + +/* Miller-Rabin test for primality. Slow. */ +int FF_WWW_prime(BIG_XXX p[],csprng *rng,int n) +{ + int i,j,loop,s=0; +#ifndef C99 + BIG_XXX d[FFLEN_WWW],x[FFLEN_WWW],unity[FFLEN_WWW],nm1[FFLEN_WWW]; +#else + BIG_XXX d[n],x[n],unity[n],nm1[n]; +#endif + sign32 sf=4849845;/* 3*5*.. *19 */ + + FF_WWW_norm(p,n); + + if (FF_WWW_cfactor(p,sf,n)) return 0; + + FF_WWW_one(unity,n); + FF_WWW_sub(nm1,p,unity,n); + FF_WWW_norm(nm1,n); + FF_WWW_copy(d,nm1,n); + while (FF_WWW_parity(d)==0) + { + FF_WWW_shr(d,n); + s++; + } + if (s==0) return 0; + + for (i=0; i<10; i++) + { + FF_WWW_randomnum(x,p,rng,n); + FF_WWW_pow(x,x,d,p,n); + if (FF_WWW_comp(x,unity,n)==0 || FF_WWW_comp(x,nm1,n)==0) continue; + loop=0; + for (j=1; j<s; j++) + { + FF_WWW_power(x,x,2,p,n); + if (FF_WWW_comp(x,unity,n)==0) return 0; + if (FF_WWW_comp(x,nm1,n)==0 ) + { + loop=1; + break; + } + } + if (loop) continue; + return 0; + } + + return 1; +}
http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/ff.h ---------------------------------------------------------------------- diff --git a/version3/c/ff.h b/version3/c/ff.h new file mode 100644 index 0000000..a50e653 --- /dev/null +++ b/version3/c/ff.h @@ -0,0 +1,296 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +/** + * @file ff.h + * @author Mike Scott + * @brief FF Header File + * + */ + +#ifndef FF_WWW_H +#define FF_WWW_H + +#include "big_XXX.h" +#include "config_ff_WWW.h" + +#define HFLEN_WWW (FFLEN_WWW/2) /**< Useful for half-size RSA private key operations */ +#define P_MBITS_WWW (MODBYTES_XXX*8) /**< Number of bits in modulus */ +#define P_TBITS_WWW (P_MBITS_WWW%BASEBITS_XXX) /**< TODO */ +#define P_EXCESS_WWW(a) (((a[NLEN_XXX-1])>>(P_TBITS_WWW))+1) /**< TODO */ +#define P_FEXCESS_WWW ((chunk)1<<(BASEBITS_XXX*NLEN_XXX-P_MBITS_WWW-1)) /**< TODO */ + + +/* Finite Field Prototypes */ +/** @brief Copy one FF element of given length to another + * + @param x FF instance to be copied to, on exit = y + @param y FF instance to be copied from + @param n size of FF in BIGs + + */ +extern void FF_WWW_copy(BIG_XXX *x,BIG_XXX *y,int n); +/** @brief Initialize an FF element of given length from a 32-bit integer m + * + @param x FF instance to be copied to, on exit = m + @param m integer + @param n size of FF in BIGs + */ +extern void FF_WWW_init(BIG_XXX *x,sign32 m,int n); +/** @brief Set FF element of given size to zero + * + @param x FF instance to be set to zero + @param n size of FF in BIGs + */ +extern void FF_WWW_zero(BIG_XXX *x,int n); +/** @brief Tests for FF element equal to zero + * + @param x FF number to be tested + @param n size of FF in BIGs + @return 1 if zero, else returns 0 + */ +extern int FF_WWW_iszilch(BIG_XXX *x,int n); +/** @brief return parity of an FF, that is the least significant bit + * + @param x FF number + @return 0 or 1 + */ +extern int FF_WWW_parity(BIG_XXX *x); +/** @brief return least significant m bits of an FF + * + @param x FF number + @param m number of bits to return. Assumed to be less than BASEBITS. + @return least significant n bits as an integer + */ +extern int FF_WWW_lastbits(BIG_XXX *x,int m); +/** @brief Set FF element of given size to unity + * + @param x FF instance to be set to unity + @param n size of FF in BIGs + */ +extern void FF_WWW_one(BIG_XXX *x,int n); +/** @brief Compares two FF numbers. Inputs must be normalised externally + * + @param x first FF number to be compared + @param y second FF number to be compared + @param n size of FF in BIGs + @return -1 is x<y, 0 if x=y, 1 if x>y + */ +extern int FF_WWW_comp(BIG_XXX *x,BIG_XXX *y,int n); +/** @brief addition of two FFs + * + @param x FF instance, on exit = y+z + @param y FF instance + @param z FF instance + @param n size of FF in BIGs + */ +extern void FF_WWW_add(BIG_XXX *x,BIG_XXX *y,BIG_XXX *z,int n); +/** @brief subtraction of two FFs + * + @param x FF instance, on exit = y-z + @param y FF instance + @param z FF instance + @param n size of FF in BIGs + */ +extern void FF_WWW_sub(BIG_XXX *x,BIG_XXX *y,BIG_XXX *z,int n); +/** @brief increment an FF by an integer,and normalise + * + @param x FF instance, on exit = x+m + @param m an integer to be added to x + @param n size of FF in BIGs + */ +extern void FF_WWW_inc(BIG_XXX *x,int m,int n); +/** @brief Decrement an FF by an integer,and normalise + * + @param x FF instance, on exit = x-m + @param m an integer to be subtracted from x + @param n size of FF in BIGs + */ +extern void FF_WWW_dec(BIG_XXX *x,int m,int n); +/** @brief Normalises the components of an FF + * + @param x FF instance to be normalised + @param n size of FF in BIGs + */ +extern void FF_WWW_norm(BIG_XXX *x,int n); +/** @brief Shift left an FF by 1 bit + * + @param x FF instance to be shifted left + @param n size of FF in BIGs + */ +extern void FF_WWW_shl(BIG_XXX *x,int n); +/** @brief Shift right an FF by 1 bit + * + @param x FF instance to be shifted right + @param n size of FF in BIGs + */ +extern void FF_WWW_shr(BIG_XXX *x,int n); +/** @brief Formats and outputs an FF to the console + * + @param x FF instance to be printed + @param n size of FF in BIGs + */ +extern void FF_WWW_output(BIG_XXX *x,int n); +/** @brief Formats and outputs an FF to the console, in raw form + * + @param x FF instance to be printed + @param n size of FF in BIGs + */ +extern void FF_WWW_rawoutput(BIG_XXX *x,int n); +/** @brief Formats and outputs an FF instance to an octet string + * + Converts an FF to big-endian base 256 form. + @param S output octet string + @param x FF instance to be converted to an octet string + @param n size of FF in BIGs + */ +extern void FF_WWW_toOctet(octet *S,BIG_XXX *x,int n); +/** @brief Populates an FF instance from an octet string + * + Creates FF from big-endian base 256 form. + @param x FF instance to be created from an octet string + @param S input octet string + @param n size of FF in BIGs + */ +extern void FF_WWW_fromOctet(BIG_XXX *x,octet *S,int n); +/** @brief Multiplication of two FFs + * + Uses Karatsuba method internally + @param x FF instance, on exit = y*z + @param y FF instance + @param z FF instance + @param n size of FF in BIGs + */ +extern void FF_WWW_mul(BIG_XXX *x,BIG_XXX *y,BIG_XXX *z,int n); +/** @brief Reduce FF mod a modulus + * + This is slow + @param x FF instance to be reduced mod m - on exit = x mod m + @param m FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_mod(BIG_XXX *x,BIG_XXX *m,int n); +/** @brief Square an FF + * + Uses Karatsuba method internally + @param x FF instance, on exit = y^2 + @param y FF instance to be squared + @param n size of FF in BIGs + */ +extern void FF_WWW_sqr(BIG_XXX *x,BIG_XXX *y,int n); +/** @brief Reduces a double-length FF with respect to a given modulus + * + This is slow + @param x FF instance, on exit = y mod z + @param y FF instance, of double length 2*n + @param z FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_dmod(BIG_XXX *x,BIG_XXX *y,BIG_XXX *z,int n); +/** @brief Invert an FF mod a prime modulus + * + @param x FF instance, on exit = 1/y mod z + @param y FF instance + @param z FF prime modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_invmodp(BIG_XXX *x,BIG_XXX *y,BIG_XXX *z,int n); +/** @brief Create an FF from a random number generator + * + @param x FF instance, on exit x is a random number of length n BIGs with most significant bit a 1 + @param R an instance of a Cryptographically Secure Random Number Generator + @param n size of FF in BIGs + */ +extern void FF_WWW_random(BIG_XXX *x,csprng *R,int n); +/** @brief Create a random FF less than a given modulus from a random number generator + * + @param x FF instance, on exit x is a random number < y + @param y FF instance, the modulus + @param R an instance of a Cryptographically Secure Random Number Generator + @param n size of FF in BIGs + */ +extern void FF_WWW_randomnum(BIG_XXX *x,BIG_XXX *y,csprng *R,int n); +/** @brief Calculate r=x^e mod m, side channel resistant + * + @param r FF instance, on exit = x^e mod p + @param x FF instance + @param e FF exponent + @param m FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_skpow(BIG_XXX *r,BIG_XXX *x,BIG_XXX * e,BIG_XXX *m,int n); +/** @brief Calculate r=x^e mod m, side channel resistant + * + For short BIG exponent + @param r FF instance, on exit = x^e mod p + @param x FF instance + @param e BIG exponent + @param m FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_skspow(BIG_XXX *r,BIG_XXX *x,BIG_XXX e,BIG_XXX *m,int n); +/** @brief Calculate r=x^e mod m + * + For very short integer exponent + @param r FF instance, on exit = x^e mod p + @param x FF instance + @param e integer exponent + @param m FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_power(BIG_XXX *r,BIG_XXX *x,int e,BIG_XXX *m,int n); +/** @brief Calculate r=x^e mod m + * + @param r FF instance, on exit = x^e mod p + @param x FF instance + @param e FF exponent + @param m FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_pow(BIG_XXX *r,BIG_XXX *x,BIG_XXX *e,BIG_XXX *m,int n); +/** @brief Test if an FF has factor in common with integer s + * + @param x FF instance to be tested + @param s the supplied integer + @param n size of FF in BIGs + @return 1 if gcd(x,s)!=1, else return 0 + */ +extern int FF_WWW_cfactor(BIG_XXX *x,sign32 s,int n); +/** @brief Test if an FF is prime + * + Uses Miller-Rabin Method + @param x FF instance to be tested + @param R an instance of a Cryptographically Secure Random Number Generator + @param n size of FF in BIGs + @return 1 if x is (almost certainly) prime, else return 0 + */ +extern int FF_WWW_prime(BIG_XXX *x,csprng *R,int n); +/** @brief Calculate r=x^e.y^f mod m + * + @param r FF instance, on exit = x^e.y^f mod p + @param x FF instance + @param e BIG exponent + @param y FF instance + @param f BIG exponent + @param m FF modulus + @param n size of FF in BIGs + */ +extern void FF_WWW_pow2(BIG_XXX *r,BIG_XXX *x,BIG_XXX e,BIG_XXX *y,BIG_XXX f,BIG_XXX *m,int n); + +#endif http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp.c ---------------------------------------------------------------------- diff --git a/version3/c/fp.c b/version3/c/fp.c new file mode 100644 index 0000000..02a8521 --- /dev/null +++ b/version3/c/fp.c @@ -0,0 +1,772 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL mod p functions */ +/* Small Finite Field arithmetic */ +/* SU=m, SU is Stack Usage (NOT_SPECIAL Modulus) */ + +#include "fp_YYY.h" + +/* Fast Modular Reduction Methods */ + +/* r=d mod m */ +/* d MUST be normalised */ +/* Products must be less than pR in all cases !!! */ +/* So when multiplying two numbers, their product *must* be less than MODBITS+BASEBITS*NLEN */ +/* Results *may* be one bit bigger than MODBITS */ + +#if MODTYPE_YYY == PSEUDO_MERSENNE +/* r=d mod m */ + +/* Converts from BIG integer to residue form mod Modulus */ +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + BIG_XXX_copy(y->g,x); + y->XES=1; +} + +/* Converts from residue form back to BIG integer form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + BIG_XXX_copy(x,y->g); +} + +/* reduce a DBIG to a BIG exploiting the special form of the modulus */ +void FP_YYY_mod(BIG_XXX r,DBIG_XXX d) +{ + BIG_XXX t,b; + chunk v,tw; + BIG_XXX_split(t,b,d,MODBITS_YYY); + + /* Note that all of the excess gets pushed into t. So if squaring a value with a 4-bit excess, this results in + t getting all 8 bits of the excess product! So products must be less than pR which is Montgomery compatible */ + + if (MConst_YYY < NEXCESS_XXX) + { + BIG_XXX_imul(t,t,MConst_YYY); + BIG_XXX_norm(t); + BIG_XXX_add(r,t,b); + BIG_XXX_norm(r); + tw=r[NLEN_XXX-1]; + r[NLEN_XXX-1]&=TMASK_YYY; + r[0]+=MConst_YYY*((tw>>TBITS_YYY)); + } + else + { + v=BIG_XXX_pmul(t,t,MConst_YYY); + BIG_XXX_add(r,t,b); + BIG_XXX_norm(r); + tw=r[NLEN_XXX-1]; + r[NLEN_XXX-1]&=TMASK_YYY; +#if CHUNK == 16 + r[1]+=muladd_XXX(MConst_YYY,((tw>>TBITS_YYY)+(v<<(BASEBITS_XXX-TBITS_YYY))),0,&r[0]); +#else + r[0]+=MConst_YYY*((tw>>TBITS_YYY)+(v<<(BASEBITS_XXX-TBITS_YYY))); +#endif + } + BIG_XXX_norm(r); +} +#endif + +/* This only applies to Curve C448, so specialised (for now) */ +#if MODTYPE_YYY == GENERALISED_MERSENNE + +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + BIG_XXX_copy(y->g,x); + y->XES=1; +} + +/* Converts from residue form back to BIG integer form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + BIG_XXX_copy(x,y->g); +} + +/* reduce a DBIG to a BIG exploiting the special form of the modulus */ +void FP_YYY_mod(BIG_XXX r,DBIG_XXX d) +{ + BIG_XXX t,b; + chunk carry; + BIG_XXX_split(t,b,d,MBITS_YYY); + + BIG_XXX_add(r,t,b); + + BIG_XXX_dscopy(d,t); + BIG_XXX_dshl(d,MBITS_YYY/2); + + BIG_XXX_split(t,b,d,MBITS_YYY); + + BIG_XXX_add(r,r,t); + BIG_XXX_add(r,r,b); + BIG_XXX_norm(r); + BIG_XXX_shl(t,MBITS_YYY/2); + + BIG_XXX_add(r,r,t); + + carry=r[NLEN_XXX-1]>>TBITS_YYY; + + r[NLEN_XXX-1]&=TMASK_YYY; + r[0]+=carry; + + r[224/BASEBITS_XXX]+=carry<<(224%BASEBITS_XXX); /* need to check that this falls mid-word */ + BIG_XXX_norm(r); +} + +#endif + +#if MODTYPE_YYY == MONTGOMERY_FRIENDLY + +/* convert to Montgomery n-residue form */ +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + DBIG_XXX d; + BIG_XXX r; + BIG_XXX_rcopy(r,R2modp_YYY); + BIG_XXX_mul(d,x,r); + FP_YYY_mod(y->g,d); + y->XES=2; +} + +/* convert back to regular form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + DBIG_XXX d; + BIG_XXX_dzero(d); + BIG_XXX_dscopy(d,y->g); + FP_YYY_mod(x,d); +} + +/* fast modular reduction from DBIG to BIG exploiting special form of the modulus */ +void FP_YYY_mod(BIG_XXX a,DBIG_XXX d) +{ + int i; + + for (i=0; i<NLEN_XXX; i++) + d[NLEN_XXX+i]+=muladd_XXX(d[i],MConst_YYY-1,d[i],&d[NLEN_XXX+i-1]); + + BIG_XXX_sducopy(a,d); + BIG_XXX_norm(a); +} + +#endif + +#if MODTYPE_YYY == NOT_SPECIAL + +/* convert to Montgomery n-residue form */ +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + DBIG_XXX d; + BIG_XXX r; + BIG_XXX_rcopy(r,R2modp_YYY); + BIG_XXX_mul(d,x,r); + FP_YYY_mod(y->g,d); + y->XES=2; +} + +/* convert back to regular form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + DBIG_XXX d; + BIG_XXX_dzero(d); + BIG_XXX_dscopy(d,y->g); + FP_YYY_mod(x,d); +} + + +/* reduce a DBIG to a BIG using Montgomery's no trial division method */ +/* d is expected to be dnormed before entry */ +/* SU= 112 */ +void FP_YYY_mod(BIG_XXX a,DBIG_XXX d) +{ + BIG_XXX mdls; + BIG_XXX_rcopy(mdls,Modulus_YYY); + BIG_XXX_monty(a,mdls,MConst_YYY,d); +} + +#endif + +/* test x==0 ? */ +/* SU= 48 */ +int FP_YYY_iszilch(FP_YYY *x) +{ + BIG_XXX m,t; + BIG_XXX_rcopy(m,Modulus_YYY); + BIG_XXX_copy(t,x->g); + BIG_XXX_mod(t,m); + return BIG_XXX_iszilch(t); +} + +void FP_YYY_copy(FP_YYY *y,FP_YYY *x) +{ + BIG_XXX_copy(y->g,x->g); + y->XES=x->XES; +} + +void FP_YYY_rcopy(FP_YYY *y, const BIG_XXX c) +{ + BIG_XXX b; + BIG_XXX_rcopy(b,c); + FP_YYY_nres(y,b); +} + +/* Swap a and b if d=1 */ +void FP_YYY_cswap(FP_YYY *a,FP_YYY *b,int d) +{ + sign32 t,c=d; + BIG_XXX_cswap(a->g,b->g,d); + + c=~(c-1); + t=c&((a->XES)^(b->XES)); + a->XES^=t; + b->XES^=t; + +} + +/* Move b to a if d=1 */ +void FP_YYY_cmove(FP_YYY *a,FP_YYY *b,int d) +{ + sign32 c=-d; + + BIG_XXX_cmove(a->g,b->g,d); + a->XES^=(a->XES^b->XES)&c; +} + +void FP_YYY_zero(FP_YYY *x) +{ + BIG_XXX_zero(x->g); + x->XES=1; +} + +int FP_YYY_equals(FP_YYY *x,FP_YYY *y) +{ + FP_YYY xg,yg; + FP_YYY_copy(&xg,x); + FP_YYY_copy(&yg,y); + FP_YYY_reduce(&xg); + FP_YYY_reduce(&yg); + if (BIG_XXX_comp(xg.g,yg.g)==0) return 1; + return 0; +} + +/* output FP */ +/* SU= 48 */ +void FP_YYY_output(FP_YYY *r) +{ + BIG_XXX c; + FP_YYY_redc(c,r); + BIG_XXX_output(c); +} + +void FP_YYY_rawoutput(FP_YYY *r) +{ + BIG_XXX_rawoutput(r->g); +} + +#ifdef GET_STATS +int tsqr=0,rsqr=0,tmul=0,rmul=0; +int tadd=0,radd=0,tneg=0,rneg=0; +int tdadd=0,rdadd=0,tdneg=0,rdneg=0; +#endif + +#ifdef FUSED_MODMUL + +/* Insert fastest code here */ + +#endif + +/* r=a*b mod Modulus */ +/* product must be less that p.R - and we need to know this in advance! */ +/* SU= 88 */ +void FP_YYY_mul(FP_YYY *r,FP_YYY *a,FP_YYY *b) +{ + DBIG_XXX d; + + if ((sign64)a->XES*b->XES>(sign64)FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it\n"); +#endif + FP_YYY_reduce(a); /* it is sufficient to fully reduce just one of them < p */ + } + +#ifdef FUSED_MODMUL + FP_YYY_modmul(r->g,a->g,b->g); +#else + BIG_XXX_mul(d,a->g,b->g); + FP_YYY_mod(r->g,d); +#endif + r->XES=2; +} + + +/* multiplication by an integer, r=a*c */ +/* SU= 136 */ +void FP_YYY_imul(FP_YYY *r,FP_YYY *a,int c) +{ + int s=0; + + if (c<0) + { + c=-c; + s=1; + } + +#if MODTYPE_YYY==PSEUDO_MERSENNE || MODTYPE_YYY==GENERALISED_MERSENNE + DBIG_XXX d; + BIG_XXX_pxmul(d,a->g,c); + FP_YYY_mod(r->g,d); + r->XES=2; + +#else + //Montgomery + BIG_XXX k; + FP_YYY f; + if (a->XES*c<=FEXCESS_YYY) + { + BIG_XXX_pmul(r->g,a->g,c); + r->XES=a->XES*c; // careful here - XES jumps! + } + else + { + // don't want to do this - only a problem for Montgomery modulus and larger constants + BIG_XXX_zero(k); + BIG_XXX_inc(k,c); + BIG_XXX_norm(k); + FP_YYY_nres(&f,k); + FP_YYY_mul(r,a,&f); + } +#endif + + if (s) + { + FP_YYY_neg(r,r); + FP_YYY_norm(r); + } +} + +/* Set r=a^2 mod m */ +/* SU= 88 */ +void FP_YYY_sqr(FP_YYY *r,FP_YYY *a) +{ + DBIG_XXX d; + + if ((sign64)a->XES*a->XES>(sign64)FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it\n"); +#endif + FP_YYY_reduce(a); + } + + BIG_XXX_sqr(d,a->g); + FP_YYY_mod(r->g,d); + r->XES=2; +} + +/* SU= 16 */ +/* Set r=a+b */ +void FP_YYY_add(FP_YYY *r,FP_YYY *a,FP_YYY *b) +{ + BIG_XXX_add(r->g,a->g,b->g); + r->XES=a->XES+b->XES; + if (r->XES>FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Sum too large - reducing it \n"); +#endif + FP_YYY_reduce(r); + } +} + +/* Set r=a-b mod m */ +/* SU= 56 */ +void FP_YYY_sub(FP_YYY *r,FP_YYY *a,FP_YYY *b) +{ + FP_YYY n; + FP_YYY_neg(&n,b); + FP_YYY_add(r,a,&n); +} + +// https://graphics.stanford.edu/~seander/bithacks.html +// constant time log to base 2 (or number of bits in) + +static int logb2(unsign32 v) +{ + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + r = (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; + return r; +} + +// find appoximation to quotient of a/m +// Out by at most 2. +// Note that MAXXES is bounded to be 2-bits less than half a word +static int quo(BIG_XXX n,BIG_XXX m) +{ + int sh; + chunk num,den; + int hb=CHUNK/2; + if (TBITS_YYY<hb) + { + sh=hb-TBITS_YYY; + num=(n[NLEN_XXX-1]<<sh)|(n[NLEN_XXX-2]>>(BASEBITS_XXX-sh)); + den=(m[NLEN_XXX-1]<<sh)|(m[NLEN_XXX-2]>>(BASEBITS_XXX-sh)); + } + else + { + num=n[NLEN_XXX-1]; + den=m[NLEN_XXX-1]; + } + return (int)(num/(den+1)); +} + +/* SU= 48 */ +/* Fully reduce a mod Modulus */ +void FP_YYY_reduce(FP_YYY *a) +{ + BIG_XXX m,r; + int sr,sb,q; + chunk carry; + + BIG_XXX_rcopy(m,Modulus_YYY); + + BIG_XXX_norm(a->g); + + if (a->XES>16) + { + q=quo(a->g,m); + carry=BIG_XXX_pmul(r,m,q); + r[NLEN_XXX-1]+=(carry<<BASEBITS_XXX); // correction - put any carry out back in again + BIG_XXX_sub(a->g,a->g,r); + BIG_XXX_norm(a->g); + sb=2; + } + else sb=logb2(a->XES-1); // sb does not depend on the actual data + + BIG_XXX_fshl(m,sb); + + while (sb>0) + { +// constant time... + sr=BIG_XXX_ssn(r,a->g,m); // optimized combined shift, subtract and norm + BIG_XXX_cmove(a->g,r,1-sr); + sb--; + } + + //BIG_XXX_mod(a->g,m); + a->XES=1; +} + +void FP_YYY_norm(FP_YYY *x) +{ + BIG_XXX_norm(x->g); +} + +/* Set r=-a mod Modulus */ +/* SU= 64 */ +void FP_YYY_neg(FP_YYY *r,FP_YYY *a) +{ + int sb; + BIG_XXX m; + + BIG_XXX_rcopy(m,Modulus_YYY); + + sb=logb2(a->XES-1); + BIG_XXX_fshl(m,sb); + BIG_XXX_sub(r->g,m,a->g); + r->XES=((sign32)1<<sb)+1; + + if (r->XES>FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Negation too large - reducing it \n"); +#endif + FP_YYY_reduce(r); + } + +} + +/* Set r=a/2. */ +/* SU= 56 */ +void FP_YYY_div2(FP_YYY *r,FP_YYY *a) +{ + BIG_XXX m; + BIG_XXX_rcopy(m,Modulus_YYY); + FP_YYY_copy(r,a); + + if (BIG_XXX_parity(a->g)==0) + { + + BIG_XXX_fshr(r->g,1); + } + else + { + BIG_XXX_add(r->g,r->g,m); + BIG_XXX_norm(r->g); + BIG_XXX_fshr(r->g,1); + } +} + +#if MODTYPE_YYY == PSEUDO_MERSENNE || MODTYPE_YYY==GENERALISED_MERSENNE + +// See eprint paper https://eprint.iacr.org/2018/1038 +// If p=3 mod 4 r= x^{(p-3)/4}, if p=5 mod 8 r=x^{(p-5)/8} + +static void FP_YYY_fpow(FP_YYY *r,FP_YYY *x) +{ + int i,j,k,bw,w,c,nw,lo,m,n; + FP_YYY xp[11],t,key; + const int ac[]={1,2,3,6,12,15,30,60,120,240,255}; +// phase 1 + FP_YYY_copy(&xp[0],x); // 1 + FP_YYY_sqr(&xp[1],x); // 2 + FP_YYY_mul(&xp[2],&xp[1],x); //3 + FP_YYY_sqr(&xp[3],&xp[2]); // 6 + FP_YYY_sqr(&xp[4],&xp[3]); // 12 + FP_YYY_mul(&xp[5],&xp[4],&xp[2]); // 15 + FP_YYY_sqr(&xp[6],&xp[5]); // 30 + FP_YYY_sqr(&xp[7],&xp[6]); // 60 + FP_YYY_sqr(&xp[8],&xp[7]); // 120 + FP_YYY_sqr(&xp[9],&xp[8]); // 240 + FP_YYY_mul(&xp[10],&xp[9],&xp[5]); // 255 + +#if MODTYPE_YYY==PSEUDO_MERSENNE + n=MODBITS_YYY; +#endif +#if MODTYPE_YYY==GENERALISED_MERSENNE // Goldilocks ONLY + n=MODBITS_YYY/2; +#endif + + if (MOD8_YYY==5) + { + n-=3; + c=(MConst_YYY+5)/8; + } else { + n-=2; + c=(MConst_YYY+3)/4; + } + + bw=0; w=1; while (w<c) {w*=2; bw+=1;} + k=w-c; + + if (k!=0) + { + i=10; while (ac[i]>k) i--; + FP_YYY_copy(&key,&xp[i]); + k-=ac[i]; + } + while (k!=0) + { + i--; + if (ac[i]>k) continue; + FP_YYY_mul(&key,&key,&xp[i]); + k-=ac[i]; + } + +// phase 2 + FP_YYY_copy(&xp[1],&xp[2]); + FP_YYY_copy(&xp[2],&xp[5]); + FP_YYY_copy(&xp[3],&xp[10]); + + j=3; m=8; + nw=n-bw; + while (2*m<nw) + { + FP_YYY_copy(&t,&xp[j++]); + for (i=0;i<m;i++) + FP_YYY_sqr(&t,&t); + FP_YYY_mul(&xp[j],&xp[j-1],&t); + m*=2; + } + + lo=nw-m; + FP_YYY_copy(r,&xp[j]); + + while (lo!=0) + { + m/=2; j--; + if (lo<m) continue; + lo-=m; + FP_YYY_copy(&t,r); + for (i=0;i<m;i++) + FP_YYY_sqr(&t,&t); + FP_YYY_mul(r,&t,&xp[j]); + } +// phase 3 + + if (bw!=0) + { + for (i=0;i<bw;i++ ) + FP_YYY_sqr(r,r); + FP_YYY_mul(r,r,&key); + } +#if MODTYPE_YYY==GENERALISED_MERSENNE // Goldilocks ONLY + FP_YYY_copy(&key,r); + FP_YYY_sqr(&t,&key); + FP_YYY_mul(r,&t,x); + for (i=0;i<n+1;i++) + FP_YYY_sqr(r,r); + FP_YYY_mul(r,r,&key); +#endif +} + +void FP_YYY_inv(FP_YYY *r,FP_YYY *x) +{ + FP_YYY y,t; + FP_YYY_fpow(&y,x); + if (MOD8_YYY==5) + { // r=x^3.y^8 + FP_YYY_sqr(&t,x); + FP_YYY_mul(&t,&t,x); + FP_YYY_sqr(&y,&y); + FP_YYY_sqr(&y,&y); + FP_YYY_sqr(&y,&y); + FP_YYY_mul(r,&t,&y); + } else { + FP_YYY_sqr(&y,&y); + FP_YYY_sqr(&y,&y); + FP_YYY_mul(r,&y,x); + } +} + +#else + +void FP_YYY_pow(FP_YYY *r,FP_YYY *a,BIG_XXX b) +{ + sign8 w[1+(NLEN_XXX*BASEBITS_XXX+3)/4]; + FP_YYY tb[16]; + BIG_XXX t; + int i,nb; + + FP_YYY_norm(a); + BIG_XXX_norm(b); + BIG_XXX_copy(t,b); + nb=1+(BIG_XXX_nbits(t)+3)/4; + /* convert exponent to 4-bit window */ + for (i=0; i<nb; i++) + { + w[i]=BIG_XXX_lastbits(t,4); + BIG_XXX_dec(t,w[i]); + BIG_XXX_norm(t); + BIG_XXX_fshr(t,4); + } + + FP_YYY_one(&tb[0]); + FP_YYY_copy(&tb[1],a); + for (i=2;i<16;i++) + FP_YYY_mul(&tb[i],&tb[i-1],a); + + FP_YYY_copy(r,&tb[w[nb-1]]); + for (i=nb-2; i>=0; i--) + { + FP_YYY_sqr(r,r); + FP_YYY_sqr(r,r); + FP_YYY_sqr(r,r); + FP_YYY_sqr(r,r); + FP_YYY_mul(r,r,&tb[w[i]]); + } + FP_YYY_reduce(r); +} + +/* set w=1/x */ +void FP_YYY_inv(FP_YYY *w,FP_YYY *x) +{ + + BIG_XXX m2; + BIG_XXX_rcopy(m2,Modulus_YYY); + BIG_XXX_dec(m2,2); + BIG_XXX_norm(m2); + FP_YYY_pow(w,x,m2); +} +#endif + +/* SU=8 */ +/* set n=1 */ +void FP_YYY_one(FP_YYY *n) +{ + BIG_XXX b; + BIG_XXX_one(b); + FP_YYY_nres(n,b); +} + +/* is r a QR? */ +int FP_YYY_qr(FP_YYY *r) +{ + int j; + BIG_XXX m; + BIG_XXX b; + BIG_XXX_rcopy(m,Modulus_YYY); + FP_YYY_redc(b,r); + j=BIG_XXX_jacobi(b,m); + FP_YYY_nres(r,b); + if (j==1) return 1; + return 0; + +} + +/* Set a=sqrt(b) mod Modulus */ +/* SU= 160 */ +void FP_YYY_sqrt(FP_YYY *r,FP_YYY *a) +{ + FP_YYY v,i; + BIG_XXX b; + BIG_XXX m; + BIG_XXX_rcopy(m,Modulus_YYY); + BIG_XXX_mod(a->g,m); + BIG_XXX_copy(b,m); + if (MOD8_YYY==5) + { + FP_YYY_copy(&i,a); // i=x + BIG_XXX_fshl(i.g,1); // i=2x +#if MODTYPE_YYY == PSEUDO_MERSENNE || MODTYPE_YYY==GENERALISED_MERSENNE + FP_YYY_fpow(&v,&i); +#else + BIG_XXX_dec(b,5); + BIG_XXX_norm(b); + BIG_XXX_fshr(b,3); // (p-5)/8 + FP_YYY_pow(&v,&i,b); // v=(2x)^(p-5)/8 +#endif + FP_YYY_mul(&i,&i,&v); // i=(2x)^(p+3)/8 + FP_YYY_mul(&i,&i,&v); // i=(2x)^(p-1)/4 + BIG_XXX_dec(i.g,1); // i=(2x)^(p-1)/4 - 1 + FP_YYY_mul(r,a,&v); + FP_YYY_mul(r,r,&i); + FP_YYY_reduce(r); + } + if (MOD8_YYY==3 || MOD8_YYY==7) + { +#if MODTYPE_YYY == PSEUDO_MERSENNE || MODTYPE_YYY==GENERALISED_MERSENNE + FP_YYY_fpow(r,a); + FP_YYY_mul(r,r,a); +#else + BIG_XXX_inc(b,1); + BIG_XXX_norm(b); + BIG_XXX_fshr(b,2); /* (p+1)/4 */ + FP_YYY_pow(r,a,b); +#endif + } +} http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp.h ---------------------------------------------------------------------- diff --git a/version3/c/fp.h b/version3/c/fp.h new file mode 100644 index 0000000..a7883f2 --- /dev/null +++ b/version3/c/fp.h @@ -0,0 +1,245 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +/** + * @file fp.h + * @author Mike Scott + * @brief FP Header File + * + */ + +#ifndef FP_YYY_H +#define FP_YYY_H + +#include "big_XXX.h" +#include "config_field_YYY.h" + + +/** + @brief FP Structure - quadratic extension field +*/ + +typedef struct +{ + BIG_XXX g; /**< Big representation of field element */ + sign32 XES; /**< Excess */ +} FP_YYY; + + +/* Field Params - see rom.c */ +extern const BIG_XXX Modulus_YYY; /**< Actual Modulus set in romf_yyy.c */ +extern const BIG_XXX R2modp_YYY; /**< Montgomery constant */ +extern const chunk MConst_YYY; /**< Constant associated with Modulus - for Montgomery = 1/p mod 2^BASEBITS */ + + +#define MODBITS_YYY MBITS_YYY /**< Number of bits in Modulus for selected curve */ +#define TBITS_YYY (MBITS_YYY%BASEBITS_XXX) /**< Number of active bits in top word */ +#define TMASK_YYY (((chunk)1<<TBITS_YYY)-1) /**< Mask for active bits in top word */ +#define FEXCESS_YYY (((sign32)1<<MAXXES_YYY)-1) /**< 2^(BASEBITS*NLEN-MODBITS)-1 - normalised BIG can be multiplied by less than this before reduction */ +#define OMASK_YYY (-((chunk)(1)<<TBITS_YYY)) /**< for masking out overflow bits */ + +//#define FUSED_MODMUL +//#define DEBUG_REDUCE + +/* FP prototypes */ + +/** @brief Tests for FP equal to zero mod Modulus + * + @param x BIG number to be tested + @return 1 if zero, else returns 0 + */ +extern int FP_YYY_iszilch(FP_YYY *x); + + +/** @brief Set FP to zero + * + @param x FP number to be set to 0 + */ +extern void FP_YYY_zero(FP_YYY *x); + +/** @brief Copy an FP + * + @param y FP number to be copied to + @param x FP to be copied from + */ +extern void FP_YYY_copy(FP_YYY *y,FP_YYY *x); + +/** @brief Copy from ROM to an FP + * + @param y FP number to be copied to + @param x BIG to be copied from ROM + */ +extern void FP_YYY_rcopy(FP_YYY *y,const BIG_XXX x); + + +/** @brief Compares two FPs + * + @param x FP number + @param y FP number + @return 1 if equal, else returns 0 + */ +extern int FP_YYY_equals(FP_YYY *x,FP_YYY *y); + + +/** @brief Conditional constant time swap of two FP numbers + * + Conditionally swaps parameters in constant time (without branching) + @param x an FP number + @param y another FP number + @param s swap takes place if not equal to 0 + */ +extern void FP_YYY_cswap(FP_YYY *x,FP_YYY *y,int s); +/** @brief Conditional copy of FP number + * + Conditionally copies second parameter to the first (without branching) + @param x an FP number + @param y another FP number + @param s copy takes place if not equal to 0 + */ +extern void FP_YYY_cmove(FP_YYY *x,FP_YYY *y,int s); +/** @brief Converts from BIG integer to residue form mod Modulus + * + @param x BIG number to be converted + @param y FP result + */ +extern void FP_YYY_nres(FP_YYY *y,BIG_XXX x); +/** @brief Converts from residue form back to BIG integer form + * + @param y FP number to be converted to BIG + @param x BIG result + */ +extern void FP_YYY_redc(BIG_XXX x,FP_YYY *y); +/** @brief Sets FP to representation of unity in residue form + * + @param x FP number to be set equal to unity. + */ +extern void FP_YYY_one(FP_YYY *x); +/** @brief Reduces DBIG to BIG exploiting special form of the modulus + * + This function comes in different flavours depending on the form of Modulus that is currently in use. + @param r BIG number, on exit = d mod Modulus + @param d DBIG number to be reduced + */ +extern void FP_YYY_mod(BIG_XXX r,DBIG_XXX d); + +#ifdef FUSED_MODMUL +extern void FP_YYY_modmul(BIG_XXX,BIG_XXX,BIG_XXX); +#endif + +/** @brief Fast Modular multiplication of two FPs, mod Modulus + * + Uses appropriate fast modular reduction method + @param x FP number, on exit the modular product = y*z mod Modulus + @param y FP number, the multiplicand + @param z FP number, the multiplier + */ +extern void FP_YYY_mul(FP_YYY *x,FP_YYY *y,FP_YYY *z); +/** @brief Fast Modular multiplication of an FP, by a small integer, mod Modulus + * + @param x FP number, on exit the modular product = y*i mod Modulus + @param y FP number, the multiplicand + @param i a small number, the multiplier + */ +extern void FP_YYY_imul(FP_YYY *x,FP_YYY *y,int i); +/** @brief Fast Modular squaring of an FP, mod Modulus + * + Uses appropriate fast modular reduction method + @param x FP number, on exit the modular product = y^2 mod Modulus + @param y FP number, the number to be squared + + */ +extern void FP_YYY_sqr(FP_YYY *x,FP_YYY *y); +/** @brief Modular addition of two FPs, mod Modulus + * + @param x FP number, on exit the modular sum = y+z mod Modulus + @param y FP number + @param z FP number + */ +extern void FP_YYY_add(FP_YYY *x,FP_YYY *y,FP_YYY *z); +/** @brief Modular subtraction of two FPs, mod Modulus + * + @param x FP number, on exit the modular difference = y-z mod Modulus + @param y FP number + @param z FP number + */ +extern void FP_YYY_sub(FP_YYY *x,FP_YYY *y,FP_YYY *z); +/** @brief Modular division by 2 of an FP, mod Modulus + * + @param x FP number, on exit =y/2 mod Modulus + @param y FP number + */ +extern void FP_YYY_div2(FP_YYY *x,FP_YYY *y); +/** @brief Fast Modular exponentiation of an FP, to the power of a BIG, mod Modulus + * + @param x FP number, on exit = y^z mod Modulus + @param y FP number + @param z BIG number exponent + */ +extern void FP_YYY_pow(FP_YYY *x,FP_YYY *y,BIG_XXX z); +/** @brief Fast Modular square root of a an FP, mod Modulus + * + @param x FP number, on exit = sqrt(y) mod Modulus + @param y FP number, the number whose square root is calculated + + */ +extern void FP_YYY_sqrt(FP_YYY *x,FP_YYY *y); +/** @brief Modular negation of a an FP, mod Modulus + * + @param x FP number, on exit = -y mod Modulus + @param y FP number + */ +extern void FP_YYY_neg(FP_YYY *x,FP_YYY *y); +/** @brief Outputs an FP number to the console + * + Converts from residue form before output + @param x an FP number + */ +extern void FP_YYY_output(FP_YYY *x); +/** @brief Outputs an FP number to the console, in raw form + * + @param x a BIG number + */ +extern void FP_YYY_rawoutput(FP_YYY *x); +/** @brief Reduces possibly unreduced FP mod Modulus + * + @param x FP number, on exit reduced mod Modulus + */ +extern void FP_YYY_reduce(FP_YYY *x); +/** @brief normalizes FP + * + @param x FP number, on exit normalized + */ +extern void FP_YYY_norm(FP_YYY *x); +/** @brief Tests for FP a quadratic residue mod Modulus + * + @param x FP number to be tested + @return 1 if quadratic residue, else returns 0 if quadratic non-residue + */ +extern int FP_YYY_qr(FP_YYY *x); +/** @brief Modular inverse of a an FP, mod Modulus + * + @param x FP number, on exit = 1/y mod Modulus + @param y FP number + */ +extern void FP_YYY_inv(FP_YYY *x,FP_YYY *y); + + + + +#endif http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp12.c ---------------------------------------------------------------------- diff --git a/version3/c/fp12.c b/version3/c/fp12.c new file mode 100644 index 0000000..c103fb3 --- /dev/null +++ b/version3/c/fp12.c @@ -0,0 +1,726 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL Fp^12 functions */ +/* SU=m, m is Stack Usage (no lazy )*/ +/* FP12 elements are of the form a+i.b+i^2.c */ + +#include "fp12_YYY.h" + +/* return 1 if b==c, no branching */ +static int teq(sign32 b,sign32 c) +{ + sign32 x=b^c; + x-=1; // if x=0, x now -1 + return (int)((x>>31)&1); +} + + +/* Constant time select from pre-computed table */ +static void FP12_YYY_select(FP12_YYY *f,FP12_YYY g[],sign32 b) +{ + FP12_YYY invf; + sign32 m=b>>31; + sign32 babs=(b^m)-m; + + babs=(babs-1)/2; + + FP12_YYY_cmove(f,&g[0],teq(babs,0)); // conditional move + FP12_YYY_cmove(f,&g[1],teq(babs,1)); + FP12_YYY_cmove(f,&g[2],teq(babs,2)); + FP12_YYY_cmove(f,&g[3],teq(babs,3)); + FP12_YYY_cmove(f,&g[4],teq(babs,4)); + FP12_YYY_cmove(f,&g[5],teq(babs,5)); + FP12_YYY_cmove(f,&g[6],teq(babs,6)); + FP12_YYY_cmove(f,&g[7],teq(babs,7)); + + FP12_YYY_copy(&invf,f); + FP12_YYY_conj(&invf,&invf); // 1/f + FP12_YYY_cmove(f,&invf,(int)(m&1)); +} + + + +/* test x==0 ? */ +/* SU= 8 */ +int FP12_YYY_iszilch(FP12_YYY *x) +{ + if (FP4_YYY_iszilch(&(x->a)) && FP4_YYY_iszilch(&(x->b)) && FP4_YYY_iszilch(&(x->c))) return 1; + return 0; +} + +/* test x==1 ? */ +/* SU= 8 */ +int FP12_YYY_isunity(FP12_YYY *x) +{ + if (FP4_YYY_isunity(&(x->a)) && FP4_YYY_iszilch(&(x->b)) && FP4_YYY_iszilch(&(x->c))) return 1; + return 0; +} + +/* FP12 copy w=x */ +/* SU= 16 */ +void FP12_YYY_copy(FP12_YYY *w,FP12_YYY *x) +{ + if (x==w) return; + FP4_YYY_copy(&(w->a),&(x->a)); + FP4_YYY_copy(&(w->b),&(x->b)); + FP4_YYY_copy(&(w->c),&(x->c)); +} + +/* FP12 w=1 */ +/* SU= 8 */ +void FP12_YYY_one(FP12_YYY *w) +{ + FP4_YYY_one(&(w->a)); + FP4_YYY_zero(&(w->b)); + FP4_YYY_zero(&(w->c)); +} + +/* return 1 if x==y, else 0 */ +/* SU= 16 */ +int FP12_YYY_equals(FP12_YYY *x,FP12_YYY *y) +{ + if (FP4_YYY_equals(&(x->a),&(y->a)) && FP4_YYY_equals(&(x->b),&(y->b)) && FP4_YYY_equals(&(x->b),&(y->b))) + return 1; + return 0; +} + +/* Set w=conj(x) */ +/* SU= 8 */ +void FP12_YYY_conj(FP12_YYY *w,FP12_YYY *x) +{ + FP12_YYY_copy(w,x); + FP4_YYY_conj(&(w->a),&(w->a)); + FP4_YYY_nconj(&(w->b),&(w->b)); + FP4_YYY_conj(&(w->c),&(w->c)); +} + +/* Create FP12 from FP4 */ +/* SU= 8 */ +void FP12_YYY_from_FP4(FP12_YYY *w,FP4_YYY *a) +{ + FP4_YYY_copy(&(w->a),a); + FP4_YYY_zero(&(w->b)); + FP4_YYY_zero(&(w->c)); +} + +/* Create FP12 from 3 FP4's */ +/* SU= 16 */ +void FP12_YYY_from_FP4s(FP12_YYY *w,FP4_YYY *a,FP4_YYY *b,FP4_YYY *c) +{ + FP4_YYY_copy(&(w->a),a); + FP4_YYY_copy(&(w->b),b); + FP4_YYY_copy(&(w->c),c); +} + +/* Granger-Scott Unitary Squaring. This does not benefit from lazy reduction */ +/* SU= 600 */ +void FP12_YYY_usqr(FP12_YYY *w,FP12_YYY *x) +{ + FP4_YYY A,B,C,D; + + FP4_YYY_copy(&A,&(x->a)); + + FP4_YYY_sqr(&(w->a),&(x->a)); + FP4_YYY_add(&D,&(w->a),&(w->a)); + FP4_YYY_add(&(w->a),&D,&(w->a)); + + FP4_YYY_norm(&(w->a)); + FP4_YYY_nconj(&A,&A); + + FP4_YYY_add(&A,&A,&A); + FP4_YYY_add(&(w->a),&(w->a),&A); + FP4_YYY_sqr(&B,&(x->c)); + FP4_YYY_times_i(&B); + + FP4_YYY_add(&D,&B,&B); + FP4_YYY_add(&B,&B,&D); + FP4_YYY_norm(&B); + + FP4_YYY_sqr(&C,&(x->b)); + + FP4_YYY_add(&D,&C,&C); + FP4_YYY_add(&C,&C,&D); + + FP4_YYY_norm(&C); + FP4_YYY_conj(&(w->b),&(x->b)); + FP4_YYY_add(&(w->b),&(w->b),&(w->b)); + FP4_YYY_nconj(&(w->c),&(x->c)); + + FP4_YYY_add(&(w->c),&(w->c),&(w->c)); + FP4_YYY_add(&(w->b),&B,&(w->b)); + FP4_YYY_add(&(w->c),&C,&(w->c)); + + FP12_YYY_reduce(w); /* reduce here as in pow function repeated squarings would trigger multiple reductions */ +} + +/* FP12 squaring w=x^2 */ +/* SU= 600 */ +void FP12_YYY_sqr(FP12_YYY *w,FP12_YYY *x) +{ + /* Use Chung-Hasan SQR2 method from http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf */ + + FP4_YYY A,B,C,D; + + FP4_YYY_sqr(&A,&(x->a)); + FP4_YYY_mul(&B,&(x->b),&(x->c)); + FP4_YYY_add(&B,&B,&B); + FP4_YYY_norm(&B); + FP4_YYY_sqr(&C,&(x->c)); + + FP4_YYY_mul(&D,&(x->a),&(x->b)); + FP4_YYY_add(&D,&D,&D); + FP4_YYY_add(&(w->c),&(x->a),&(x->c)); + FP4_YYY_add(&(w->c),&(x->b),&(w->c)); + FP4_YYY_norm(&(w->c)); + + FP4_YYY_sqr(&(w->c),&(w->c)); + + FP4_YYY_copy(&(w->a),&A); + FP4_YYY_add(&A,&A,&B); + + FP4_YYY_norm(&A); + + FP4_YYY_add(&A,&A,&C); + FP4_YYY_add(&A,&A,&D); + + FP4_YYY_norm(&A); + FP4_YYY_neg(&A,&A); + FP4_YYY_times_i(&B); + FP4_YYY_times_i(&C); + + FP4_YYY_add(&(w->a),&(w->a),&B); + FP4_YYY_add(&(w->b),&C,&D); + FP4_YYY_add(&(w->c),&(w->c),&A); + + FP12_YYY_norm(w); +} + +/* FP12 full multiplication w=w*y */ + + +/* SU= 896 */ +/* FP12 full multiplication w=w*y */ +void FP12_YYY_mul(FP12_YYY *w,FP12_YYY *y) +{ + FP4_YYY z0,z1,z2,z3,t0,t1; + + FP4_YYY_mul(&z0,&(w->a),&(y->a)); + FP4_YYY_mul(&z2,&(w->b),&(y->b)); // + + FP4_YYY_add(&t0,&(w->a),&(w->b)); + FP4_YYY_add(&t1,&(y->a),&(y->b)); // + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&z1,&t0,&t1); + FP4_YYY_add(&t0,&(w->b),&(w->c)); + FP4_YYY_add(&t1,&(y->b),&(y->c)); // + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&z3,&t0,&t1); + + FP4_YYY_neg(&t0,&z0); + FP4_YYY_neg(&t1,&z2); + + FP4_YYY_add(&z1,&z1,&t0); // z1=z1-z0 + FP4_YYY_add(&(w->b),&z1,&t1); // z1=z1-z2 + FP4_YYY_add(&z3,&z3,&t1); // z3=z3-z2 + FP4_YYY_add(&z2,&z2,&t0); // z2=z2-z0 + + FP4_YYY_add(&t0,&(w->a),&(w->c)); + FP4_YYY_add(&t1,&(y->a),&(y->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&t0,&t1,&t0); + FP4_YYY_add(&z2,&z2,&t0); + + FP4_YYY_mul(&t0,&(w->c),&(y->c)); + FP4_YYY_neg(&t1,&t0); + + FP4_YYY_add(&(w->c),&z2,&t1); + FP4_YYY_add(&z3,&z3,&t1); + FP4_YYY_times_i(&t0); + FP4_YYY_add(&(w->b),&(w->b),&t0); + FP4_YYY_norm(&z3); + FP4_YYY_times_i(&z3); + FP4_YYY_add(&(w->a),&z0,&z3); + + FP12_YYY_norm(w); +} + +/* FP12 multiplication w=w*y */ +/* SU= 744 */ +/* catering for special case that arises from special form of ATE pairing line function */ +void FP12_YYY_smul(FP12_YYY *w,FP12_YYY *y,int type) +{ + FP4_YYY z0,z1,z2,z3,t0,t1; + + if (type==D_TYPE) + { + // y->c is 0 + + FP4_YYY_copy(&z3,&(w->b)); + FP4_YYY_mul(&z0,&(w->a),&(y->a)); + + FP4_YYY_pmul(&z2,&(w->b),&(y->b).a); + FP4_YYY_add(&(w->b),&(w->a),&(w->b)); + FP4_YYY_copy(&t1,&(y->a)); + FP2_YYY_add(&t1.a,&t1.a,&(y->b).a); + + FP4_YYY_norm(&t1); + FP4_YYY_norm(&(w->b)); + + FP4_YYY_mul(&(w->b),&(w->b),&t1); + FP4_YYY_add(&z3,&z3,&(w->c)); + FP4_YYY_norm(&z3); + FP4_YYY_pmul(&z3,&z3,&(y->b).a); + FP4_YYY_neg(&t0,&z0); + FP4_YYY_neg(&t1,&z2); + + FP4_YYY_add(&(w->b),&(w->b),&t0); // z1=z1-z0 + FP4_YYY_add(&(w->b),&(w->b),&t1); // z1=z1-z2 + + FP4_YYY_add(&z3,&z3,&t1); // z3=z3-z2 + FP4_YYY_add(&z2,&z2,&t0); // z2=z2-z0 + + FP4_YYY_add(&t0,&(w->a),&(w->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&z3); + + FP4_YYY_mul(&t0,&(y->a),&t0); + FP4_YYY_add(&(w->c),&z2,&t0); + + FP4_YYY_times_i(&z3); + FP4_YYY_add(&(w->a),&z0,&z3); + } + + if (type==M_TYPE) + { + // y->b is zero + FP4_YYY_mul(&z0,&(w->a),&(y->a)); + FP4_YYY_add(&t0,&(w->a),&(w->b)); + FP4_YYY_norm(&t0); + + FP4_YYY_mul(&z1,&t0,&(y->a)); + FP4_YYY_add(&t0,&(w->b),&(w->c)); + FP4_YYY_norm(&t0); + + FP4_YYY_pmul(&z3,&t0,&(y->c).b); + FP4_YYY_times_i(&z3); + + FP4_YYY_neg(&t0,&z0); + FP4_YYY_add(&z1,&z1,&t0); // z1=z1-z0 + + FP4_YYY_copy(&(w->b),&z1); + + FP4_YYY_copy(&z2,&t0); + + FP4_YYY_add(&t0,&(w->a),&(w->c)); + FP4_YYY_add(&t1,&(y->a),&(y->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&t0,&t1,&t0); + FP4_YYY_add(&z2,&z2,&t0); + + FP4_YYY_pmul(&t0,&(w->c),&(y->c).b); + FP4_YYY_times_i(&t0); + FP4_YYY_neg(&t1,&t0); + FP4_YYY_times_i(&t0); + + FP4_YYY_add(&(w->c),&z2,&t1); + FP4_YYY_add(&z3,&z3,&t1); + + FP4_YYY_add(&(w->b),&(w->b),&t0); + FP4_YYY_norm(&z3); + FP4_YYY_times_i(&z3); + FP4_YYY_add(&(w->a),&z0,&z3); + } + FP12_YYY_norm(w); +} + +/* Set w=1/x */ +/* SU= 600 */ +void FP12_YYY_inv(FP12_YYY *w,FP12_YYY *x) +{ + FP4_YYY f0,f1,f2,f3; + + FP4_YYY_sqr(&f0,&(x->a)); + FP4_YYY_mul(&f1,&(x->b),&(x->c)); + FP4_YYY_times_i(&f1); + FP4_YYY_sub(&f0,&f0,&f1); /* y.a */ + FP4_YYY_norm(&f0); + + FP4_YYY_sqr(&f1,&(x->c)); + FP4_YYY_times_i(&f1); + FP4_YYY_mul(&f2,&(x->a),&(x->b)); + FP4_YYY_sub(&f1,&f1,&f2); /* y.b */ + FP4_YYY_norm(&f1); + + FP4_YYY_sqr(&f2,&(x->b)); + FP4_YYY_mul(&f3,&(x->a),&(x->c)); + FP4_YYY_sub(&f2,&f2,&f3); /* y.c */ + FP4_YYY_norm(&f2); + + FP4_YYY_mul(&f3,&(x->b),&f2); + FP4_YYY_times_i(&f3); + FP4_YYY_mul(&(w->a),&f0,&(x->a)); + FP4_YYY_add(&f3,&(w->a),&f3); + FP4_YYY_mul(&(w->c),&f1,&(x->c)); + FP4_YYY_times_i(&(w->c)); + + FP4_YYY_add(&f3,&(w->c),&f3); + FP4_YYY_norm(&f3); + + FP4_YYY_inv(&f3,&f3); + + FP4_YYY_mul(&(w->a),&f0,&f3); + FP4_YYY_mul(&(w->b),&f1,&f3); + FP4_YYY_mul(&(w->c),&f2,&f3); + +} + +/* constant time powering by small integer of max length bts */ + +void FP12_YYY_pinpow(FP12_YYY *r,int e,int bts) +{ + int i,b; + FP12_YYY R[2]; + + FP12_YYY_one(&R[0]); + FP12_YYY_copy(&R[1],r); + + for (i=bts-1; i>=0; i--) + { + b=(e>>i)&1; + FP12_YYY_mul(&R[1-b],&R[b]); + FP12_YYY_usqr(&R[b],&R[b]); + } + FP12_YYY_copy(r,&R[0]); +} + +/* Compressed powering of unitary elements y=x^(e mod r) */ + +void FP12_YYY_compow(FP4_YYY *c,FP12_YYY *x,BIG_XXX e,BIG_XXX r) +{ + FP12_YYY g1,g2; + FP4_YYY cp,cpm1,cpm2; + FP2_YYY f; + BIG_XXX q,a,b,m; + + BIG_XXX_rcopy(a,Fra_YYY); + BIG_XXX_rcopy(b,Frb_YYY); + FP2_YYY_from_BIGs(&f,a,b); + + BIG_XXX_rcopy(q,Modulus_YYY); + + FP12_YYY_copy(&g1,x); + FP12_YYY_copy(&g2,x); + + BIG_XXX_copy(m,q); + BIG_XXX_mod(m,r); + + BIG_XXX_copy(a,e); + BIG_XXX_mod(a,m); + + BIG_XXX_copy(b,e); + BIG_XXX_sdiv(b,m); + + FP12_YYY_trace(c,&g1); + + if (BIG_XXX_iszilch(b)) + { + FP4_YYY_xtr_pow(c,c,e); + return; + } + + FP12_YYY_frob(&g2,&f); + FP12_YYY_trace(&cp,&g2); + + FP12_YYY_conj(&g1,&g1); + FP12_YYY_mul(&g2,&g1); + FP12_YYY_trace(&cpm1,&g2); + FP12_YYY_mul(&g2,&g1); + FP12_YYY_trace(&cpm2,&g2); + + FP4_YYY_xtr_pow2(c,&cp,c,&cpm1,&cpm2,a,b); +} + + +/* SU= 528 */ +/* set r=a^b */ +/* Note this is simple square and multiply, so not side-channel safe */ + +void FP12_YYY_pow(FP12_YYY *r,FP12_YYY *a,BIG_XXX b) +{ + FP12_YYY w,sf; + BIG_XXX b1,b3; + int i,nb,bt; + BIG_XXX_copy(b1,b); + BIG_XXX_norm(b1); + BIG_XXX_pmul(b3,b1,3); + BIG_XXX_norm(b3); + + FP12_YYY_copy(&sf,a); + FP12_YYY_norm(&sf); + FP12_YYY_copy(&w,&sf); + + + nb=BIG_XXX_nbits(b3); + for (i=nb-2; i>=1; i--) + { + FP12_YYY_usqr(&w,&w); + bt=BIG_XXX_bit(b3,i)-BIG_XXX_bit(b1,i); + if (bt==1) + FP12_YYY_mul(&w,&sf); + if (bt==-1) + { + FP12_YYY_conj(&sf,&sf); + FP12_YYY_mul(&w,&sf); + FP12_YYY_conj(&sf,&sf); + } + } + + FP12_YYY_copy(r,&w); + FP12_YYY_reduce(r); +} + +/* p=q0^u0.q1^u1.q2^u2.q3^u3 */ +/* Side channel attack secure */ +// Bos & Costello https://eprint.iacr.org/2013/458.pdf +// Faz-Hernandez & Longa & Sanchez https://eprint.iacr.org/2013/158.pdf + +void FP12_YYY_pow4(FP12_YYY *p,FP12_YYY *q,BIG_XXX u[4]) +{ + int i,j,k,nb,pb,bt; + FP12_YYY g[8],r; + BIG_XXX t[4],mt; + sign8 w[NLEN_XXX*BASEBITS_XXX+1]; + sign8 s[NLEN_XXX*BASEBITS_XXX+1]; + + for (i=0; i<4; i++) + BIG_XXX_copy(t[i],u[i]); + + +// Precomputed table + FP12_YYY_copy(&g[0],&q[0]); // q[0] + FP12_YYY_copy(&g[1],&g[0]); + FP12_YYY_mul(&g[1],&q[1]); // q[0].q[1] + FP12_YYY_copy(&g[2],&g[0]); + FP12_YYY_mul(&g[2],&q[2]); // q[0].q[2] + FP12_YYY_copy(&g[3],&g[1]); + FP12_YYY_mul(&g[3],&q[2]); // q[0].q[1].q[2] + FP12_YYY_copy(&g[4],&g[0]); + FP12_YYY_mul(&g[4],&q[3]); // q[0].q[3] + FP12_YYY_copy(&g[5],&g[1]); + FP12_YYY_mul(&g[5],&q[3]); // q[0].q[1].q[3] + FP12_YYY_copy(&g[6],&g[2]); + FP12_YYY_mul(&g[6],&q[3]); // q[0].q[2].q[3] + FP12_YYY_copy(&g[7],&g[3]); + FP12_YYY_mul(&g[7],&q[3]); // q[0].q[1].q[2].q[3] + +// Make it odd + pb=1-BIG_XXX_parity(t[0]); + BIG_XXX_inc(t[0],pb); + BIG_XXX_norm(t[0]); + +// Number of bits + BIG_XXX_zero(mt); + for (i=0; i<4; i++) + { + BIG_XXX_or(mt,mt,t[i]); + } + nb=1+BIG_XXX_nbits(mt); + +// Sign pivot + s[nb-1]=1; + for (i=0;i<nb-1;i++) + { + BIG_XXX_fshr(t[0],1); + s[i]=2*BIG_XXX_parity(t[0])-1; + } + +// Recoded exponent + for (i=0; i<nb; i++) + { + w[i]=0; + k=1; + for (j=1; j<4; j++) + { + bt=s[i]*BIG_XXX_parity(t[j]); + BIG_XXX_fshr(t[j],1); + + BIG_XXX_dec(t[j],(bt>>1)); + BIG_XXX_norm(t[j]); + w[i]+=bt*k; + k*=2; + } + } + +// Main loop + FP12_YYY_select(p,g,2*w[nb-1]+1); + for (i=nb-2; i>=0; i--) + { + FP12_YYY_select(&r,g,2*w[i]+s[i]); + FP12_YYY_usqr(p,p); + FP12_YYY_mul(p,&r); + } +// apply correction + FP12_YYY_conj(&r,&q[0]); + FP12_YYY_mul(&r,p); + FP12_YYY_cmove(p,&r,pb); + + FP12_YYY_reduce(p); +} + +/* Set w=w^p using Frobenius */ +/* SU= 160 */ +void FP12_YYY_frob(FP12_YYY *w,FP2_YYY *f) +{ + FP2_YYY f2,f3; + FP2_YYY_sqr(&f2,f); /* f2=f^2 */ + FP2_YYY_mul(&f3,&f2,f); /* f3=f^3 */ + + FP4_YYY_frob(&(w->a),&f3); + FP4_YYY_frob(&(w->b),&f3); + FP4_YYY_frob(&(w->c),&f3); + + FP4_YYY_pmul(&(w->b),&(w->b),f); + FP4_YYY_pmul(&(w->c),&(w->c),&f2); +} + +/* SU= 8 */ +/* normalise all components of w */ +void FP12_YYY_norm(FP12_YYY *w) +{ + FP4_YYY_norm(&(w->a)); + FP4_YYY_norm(&(w->b)); + FP4_YYY_norm(&(w->c)); +} + +/* SU= 8 */ +/* reduce all components of w */ +void FP12_YYY_reduce(FP12_YYY *w) +{ + FP4_YYY_reduce(&(w->a)); + FP4_YYY_reduce(&(w->b)); + FP4_YYY_reduce(&(w->c)); +} + +/* trace function w=trace(x) */ +/* SU= 8 */ +void FP12_YYY_trace(FP4_YYY *w,FP12_YYY *x) +{ + FP4_YYY_imul(w,&(x->a),3); + FP4_YYY_reduce(w); +} + +/* SU= 8 */ +/* Output w in hex */ +void FP12_YYY_output(FP12_YYY *w) +{ + printf("["); + FP4_YYY_output(&(w->a)); + printf(","); + FP4_YYY_output(&(w->b)); + printf(","); + FP4_YYY_output(&(w->c)); + printf("]"); +} + +/* SU= 64 */ +/* Convert g to octet string w */ +void FP12_YYY_toOctet(octet *W,FP12_YYY *g) +{ + BIG_XXX a; + W->len=12*MODBYTES_XXX; + + FP_YYY_redc(a,&(g->a.a.a)); + BIG_XXX_toBytes(&(W->val[0]),a); + FP_YYY_redc(a,&(g->a.a.b)); + BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->a.b.a)); + BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->a.b.b)); + BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.a.a)); + BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.a.b)); + BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.b.a)); + BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.b.b)); + BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.a.a)); + BIG_XXX_toBytes(&(W->val[8*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.a.b)); + BIG_XXX_toBytes(&(W->val[9*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.b.a)); + BIG_XXX_toBytes(&(W->val[10*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.b.b)); + BIG_XXX_toBytes(&(W->val[11*MODBYTES_XXX]),a); +} + +/* SU= 24 */ +/* Restore g from octet string w */ +void FP12_YYY_fromOctet(FP12_YYY *g,octet *W) +{ + BIG_XXX b; + BIG_XXX_fromBytes(b,&W->val[0]); + FP_YYY_nres(&(g->a.a.a),b); + BIG_XXX_fromBytes(b,&W->val[MODBYTES_XXX]); + FP_YYY_nres(&(g->a.a.b),b); + BIG_XXX_fromBytes(b,&W->val[2*MODBYTES_XXX]); + FP_YYY_nres(&(g->a.b.a),b); + BIG_XXX_fromBytes(b,&W->val[3*MODBYTES_XXX]); + FP_YYY_nres(&(g->a.b.b),b); + BIG_XXX_fromBytes(b,&W->val[4*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.a.a),b); + BIG_XXX_fromBytes(b,&W->val[5*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.a.b),b); + BIG_XXX_fromBytes(b,&W->val[6*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.b.a),b); + BIG_XXX_fromBytes(b,&W->val[7*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.b.b),b); + BIG_XXX_fromBytes(b,&W->val[8*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.a.a),b); + BIG_XXX_fromBytes(b,&W->val[9*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.a.b),b); + BIG_XXX_fromBytes(b,&W->val[10*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.b.a),b); + BIG_XXX_fromBytes(b,&W->val[11*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.b.b),b); +} + +/* Move b to a if d=1 */ +void FP12_YYY_cmove(FP12_YYY *f,FP12_YYY *g,int d) +{ + FP4_YYY_cmove(&(f->a),&(g->a),d); + FP4_YYY_cmove(&(f->b),&(g->b),d); + FP4_YYY_cmove(&(f->c),&(g->c),d); +} + http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp12.h ---------------------------------------------------------------------- diff --git a/version3/c/fp12.h b/version3/c/fp12.h new file mode 100644 index 0000000..99fed17 --- /dev/null +++ b/version3/c/fp12.h @@ -0,0 +1,216 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +/** + * @file fp12.h + * @author Mike Scott + * @brief FP12 Header File + * + */ + +#ifndef FP12_YYY_H +#define FP12_YYY_H + +#include "fp4_YYY.h" + +/** + @brief FP12 Structure - towered over three FP4 +*/ + +typedef struct +{ + FP4_YYY a; /**< first part of FP12 */ + FP4_YYY b; /**< second part of FP12 */ + FP4_YYY c; /**< third part of FP12 */ +} FP12_YYY; + +extern const BIG_XXX Fra_YYY; /**< real part of BN curve Frobenius Constant */ +extern const BIG_XXX Frb_YYY; /**< imaginary part of BN curve Frobenius Constant */ + +/* FP12 prototypes */ +/** @brief Tests for FP12 equal to zero + * + @param x FP12 number to be tested + @return 1 if zero, else returns 0 + */ +extern int FP12_YYY_iszilch(FP12_YYY *x); +/** @brief Tests for FP12 equal to unity + * + @param x FP12 number to be tested + @return 1 if unity, else returns 0 + */ +extern int FP12_YYY_isunity(FP12_YYY *x); +/** @brief Copy FP12 to another FP12 + * + @param x FP12 instance, on exit = y + @param y FP12 instance to be copied + */ +extern void FP12_YYY_copy(FP12_YYY *x,FP12_YYY *y); +/** @brief Set FP12 to unity + * + @param x FP12 instance to be set to one + */ +extern void FP12_YYY_one(FP12_YYY *x); +/** @brief Tests for equality of two FP12s + * + @param x FP12 instance to be compared + @param y FP12 instance to be compared + @return 1 if x=y, else returns 0 + */ +extern int FP12_YYY_equals(FP12_YYY *x,FP12_YYY *y); +/** @brief Conjugation of FP12 + * + If y=(a,b,c) (where a,b,c are its three FP4 components) on exit x=(conj(a),-conj(b),conj(c)) + @param x FP12 instance, on exit = conj(y) + @param y FP12 instance + */ +extern void FP12_YYY_conj(FP12_YYY *x,FP12_YYY *y); +/** @brief Initialise FP12 from single FP4 + * + Sets first FP4 component of an FP12, other components set to zero + @param x FP12 instance to be initialised + @param a FP4 to form first part of FP4 + */ +extern void FP12_YYY_from_FP4(FP12_YYY *x,FP4_YYY *a); +/** @brief Initialise FP12 from three FP4s + * + @param x FP12 instance to be initialised + @param a FP4 to form first part of FP12 + @param b FP4 to form second part of FP12 + @param c FP4 to form third part of FP12 + */ +extern void FP12_YYY_from_FP4s(FP12_YYY *x,FP4_YYY *a,FP4_YYY* b,FP4_YYY *c); +/** @brief Fast Squaring of an FP12 in "unitary" form + * + @param x FP12 instance, on exit = y^2 + @param y FP4 instance, must be unitary + */ +extern void FP12_YYY_usqr(FP12_YYY *x,FP12_YYY *y); +/** @brief Squaring an FP12 + * + @param x FP12 instance, on exit = y^2 + @param y FP12 instance + */ +extern void FP12_YYY_sqr(FP12_YYY *x,FP12_YYY *y); +/** @brief Fast multiplication of an FP12 by an FP12 that arises from an ATE pairing line function + * + Here the multiplier has a special form that can be exploited + @param x FP12 instance, on exit = x*y + @param y FP12 instance, of special form + @param t D_TYPE or M_TYPE twist + */ +extern void FP12_YYY_smul(FP12_YYY *x,FP12_YYY *y,int t); +/** @brief Multiplication of two FP12s + * + @param x FP12 instance, on exit = x*y + @param y FP12 instance, the multiplier + */ +extern void FP12_YYY_mul(FP12_YYY *x,FP12_YYY *y); +/** @brief Inverting an FP12 + * + @param x FP12 instance, on exit = 1/y + @param y FP12 instance + */ +extern void FP12_YYY_inv(FP12_YYY *x,FP12_YYY *y); +/** @brief Raises an FP12 to the power of a BIG + * + @param r FP12 instance, on exit = y^b + @param x FP12 instance + @param b BIG number + */ +extern void FP12_YYY_pow(FP12_YYY *r,FP12_YYY *x,BIG_XXX b); +/** @brief Raises an FP12 instance x to a small integer power, side-channel resistant + * + @param x FP12 instance, on exit = x^i + @param i small integer exponent + @param b maximum number of bits in exponent + */ +extern void FP12_YYY_pinpow(FP12_YYY *x,int i,int b); + +/** @brief Raises an FP12 instance x to a BIG power, compressed to FP4 + * + @param c FP4 instance, on exit = x^(e mod r) as FP4 + @param x FP12 input + @param e BIG exponent + @param r BIG group order + */ +extern void FP12_YYY_compow(FP4_YYY *c,FP12_YYY *x,BIG_XXX e,BIG_XXX r); + +/** @brief Calculate x[0]^b[0].x[1]^b[1].x[2]^b[2].x[3]^b[3], side-channel resistant + * + @param r FP12 instance, on exit = x[0]^b[0].x[1]^b[1].x[2]^b[2].x[3]^b[3] + @param x FP12 array with 4 FP12s + @param b BIG array of 4 exponents + */ +extern void FP12_YYY_pow4(FP12_YYY *r,FP12_YYY *x,BIG_XXX *b); +/** @brief Raises an FP12 to the power of the internal modulus p, using the Frobenius + * + @param x FP12 instance, on exit = x^p + @param f FP2 precalculated Frobenius constant + */ +extern void FP12_YYY_frob(FP12_YYY *x,FP2_YYY *f); +/** @brief Reduces all components of possibly unreduced FP12 mod Modulus + * + @param x FP12 instance, on exit reduced mod Modulus + */ +extern void FP12_YYY_reduce(FP12_YYY *x); +/** @brief Normalises the components of an FP12 + * + @param x FP12 instance to be normalised + */ +extern void FP12_YYY_norm(FP12_YYY *x); +/** @brief Formats and outputs an FP12 to the console + * + @param x FP12 instance to be printed + */ +extern void FP12_YYY_output(FP12_YYY *x); +/** @brief Formats and outputs an FP12 instance to an octet string + * + Serializes the components of an FP12 to big-endian base 256 form. + @param S output octet string + @param x FP12 instance to be converted to an octet string + */ +extern void FP12_YYY_toOctet(octet *S,FP12_YYY *x); +/** @brief Creates an FP12 instance from an octet string + * + De-serializes the components of an FP12 to create an FP12 from big-endian base 256 components. + @param x FP12 instance to be created from an octet string + @param S input octet string + + */ +extern void FP12_YYY_fromOctet(FP12_YYY *x,octet *S); +/** @brief Calculate the trace of an FP12 + * + @param t FP4 trace of x, on exit = tr(x) + @param x FP12 instance + + */ +extern void FP12_YYY_trace(FP4_YYY *t,FP12_YYY *x); + +/** @brief Conditional copy of FP12 number + * + Conditionally copies second parameter to the first (without branching) + @param x FP12 instance, set to y if s!=0 + @param y another FP12 instance + @param s copy only takes place if not equal to 0 + */ +extern void FP12_YYY_cmove(FP12_YYY *x,FP12_YYY *y,int s); + + +#endif
