http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/ff.c.in ---------------------------------------------------------------------- diff --git a/src/ff.c.in b/src/ff.c.in new file mode 100644 index 0000000..f4569e7 --- /dev/null +++ b/src/ff.c.in @@ -0,0 +1,1135 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL basic functions for Large Finite Field support */ + +#include "ff_WWW.h" + +/* Arazi and Qi inversion mod 256 */ +static int invmod256(int a) +{ + int U,t1,t2,b,c; + t1=0; + c=(a>>1)&1; + t1+=c; + t1&=1; + t1=2-t1; + t1<<=1; + U=t1+1; + + // i=2 + b=a&3; + t1=U*b; + t1>>=2; + c=(a>>2)&3; + t2=(U*c)&3; + t1+=t2; + t1*=U; + t1&=3; + t1=4-t1; + t1<<=2; + U+=t1; + + // i=4 + b=a&15; + t1=U*b; + t1>>=4; + c=(a>>4)&15; + t2=(U*c)&15; + t1+=t2; + t1*=U; + t1&=15; + t1=16-t1; + t1<<=4; + U+=t1; + + return U; +} + +/* a=1/a mod 2^BIGBITS. This is very fast! */ +void BIG_XXX_invmod2m(BIG_XXX a) +{ + int i; + BIG_XXX U,t1,b,c; + BIG_XXX_zero(U); + BIG_XXX_inc(U,invmod256(BIG_XXX_lastbits(a,8))); + for (i=8; i<BIGBITS_XXX; i<<=1) + { + BIG_XXX_norm(U); + BIG_XXX_copy(b,a); + BIG_XXX_mod2m(b,i); // bottom i bits of a + + BIG_XXX_smul(t1,U,b); + BIG_XXX_shr(t1,i); // top i bits of U*b + + BIG_XXX_copy(c,a); + BIG_XXX_shr(c,i); + BIG_XXX_mod2m(c,i); // top i bits of a + + BIG_XXX_smul(b,U,c); + BIG_XXX_mod2m(b,i); // bottom i bits of U*c + + BIG_XXX_add(t1,t1,b); + BIG_XXX_norm(t1); + BIG_XXX_smul(b,t1,U); + BIG_XXX_copy(t1,b); // (t1+b)*U + BIG_XXX_mod2m(t1,i); // bottom i bits of (t1+b)*U + + BIG_XXX_one(b); + BIG_XXX_shl(b,i); + BIG_XXX_sub(t1,b,t1); + BIG_XXX_norm(t1); + + BIG_XXX_shl(t1,i); + + BIG_XXX_add(U,U,t1); + } + BIG_XXX_copy(a,U); + BIG_XXX_norm(a); + BIG_XXX_mod2m(a,BIGBITS_XXX); +} + +/* x=y */ +void FF_WWW_copy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_copy(x[i],y[i]); +} + +/* x=y<<n */ +static void FF_WWW_dsucopy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(x[n+i],y[i]); + BIG_XXX_zero(x[i]); + } +} + +/* x=y */ +static void FF_WWW_dscopy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(x[i],y[i]); + BIG_XXX_zero(x[n+i]); + } +} + +/* x=y>>n */ +static void FF_WWW_sducopy(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_copy(x[i],y[n+i]); +} + +/* set to zero */ +void FF_WWW_zero(BIG_XXX x[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_zero(x[i]); +} + +/* test equals 0 */ +int FF_WWW_iszilch(BIG_XXX x[],int n) +{ + int i; + for (i=0; i<n; i++) + if (!BIG_XXX_iszilch(x[i])) return 0; + return 1; +} + +/* shift right by BIGBITS-bit words */ +static void FF_WWW_shrw(BIG_XXX a[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(a[i],a[i+n]); + BIG_XXX_zero(a[i+n]); + } +} + +/* shift left by BIGBITS-bit words */ +static void FF_WWW_shlw(BIG_XXX a[],int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_copy(a[i+n],a[i]); + BIG_XXX_zero(a[i]); + } +} + +/* extract last bit */ +int FF_WWW_parity(BIG_XXX x[]) +{ + return BIG_XXX_parity(x[0]); +} + +/* extract last m bits */ +int FF_WWW_lastbits(BIG_XXX x[],int m) +{ + return BIG_XXX_lastbits(x[0],m); +} + +/* x=1 */ +void FF_WWW_one(BIG_XXX x[],int n) +{ + int i; + BIG_XXX_one(x[0]); + for (i=1; i<n; i++) + BIG_XXX_zero(x[i]); +} + +/* x=m, where m is 32-bit int */ +void FF_WWW_init(BIG_XXX x[],sign32 m,int n) +{ + int i; + BIG_XXX_zero(x[0]); +#if CHUNK<64 + x[0][0]=(chunk)(m&BMASK_XXX); + x[0][1]=(chunk)(m>>BASEBITS_XXX); +#else + x[0][0]=(chunk)m; +#endif + for (i=1; i<n; i++) + BIG_XXX_zero(x[i]); +} + +/* compare x and y - must be normalised */ +int FF_WWW_comp(BIG_XXX x[],BIG_XXX y[],int n) +{ + int i,j; + for (i=n-1; i>=0; i--) + { + j=BIG_XXX_comp(x[i],y[i]); + if (j!=0) return j; + } + return 0; +} + +/* recursive add */ +static void FF_WWW_radd(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_add(z[zp+i],x[xp+i],y[yp+i]); +} + +/* recursive inc */ +static void FF_WWW_rinc(BIG_XXX z[],int zp,BIG_XXX y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_add(z[zp+i],z[zp+i],y[yp+i]); +} + +/* recursive dec */ +static void FF_WWW_rdec(BIG_XXX z[],int zp,BIG_XXX y[],int yp,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_sub(z[zp+i],z[zp+i],y[yp+i]); +} + +/* simple add */ +void FF_WWW_add(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_add(z[i],x[i],y[i]); +} + +/* simple sub */ +void FF_WWW_sub(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_sub(z[i],x[i],y[i]); +} + +/* increment/decrement by a small integer */ +void FF_WWW_inc(BIG_XXX x[],int m,int n) +{ + BIG_XXX_inc(x[0],m); + FF_WWW_norm(x,n); +} + +void FF_WWW_dec(BIG_XXX x[],int m,int n) +{ + BIG_XXX_dec(x[0],m); + FF_WWW_norm(x,n); +} + +/* normalise - but hold any overflow in top part unless n<0 */ +static void FF_WWW_rnorm(BIG_XXX z[],int zp,int n) +{ + int i,trunc=0; + chunk carry; + if (n<0) + { + /* -v n signals to do truncation */ + n=-n; + trunc=1; + } + for (i=0; i<n-1; i++) + { + carry=BIG_XXX_norm(z[zp+i]); + + z[zp+i][NLEN_XXX-1]^=carry<<P_TBITS_WWW; /* remove it */ + z[zp+i+1][0]+=carry; + } + carry=BIG_XXX_norm(z[zp+n-1]); + if (trunc) z[zp+n-1][NLEN_XXX-1]^=carry<<P_TBITS_WWW; +} + +void FF_WWW_norm(BIG_XXX z[],int n) +{ + FF_WWW_rnorm(z,0,n); +} + +/* shift left by one bit */ +void FF_WWW_shl(BIG_XXX x[],int n) +{ + int i; + int carry,delay_carry=0; + for (i=0; i<n-1; i++) + { + carry=BIG_XXX_fshl(x[i],1); + x[i][0]|=delay_carry; + x[i][NLEN_XXX-1]^=(chunk)carry<<P_TBITS_WWW; + delay_carry=carry; + } + BIG_XXX_fshl(x[n-1],1); + x[n-1][0]|=delay_carry; +} + +/* shift right by one bit */ +void FF_WWW_shr(BIG_XXX x[],int n) +{ + int i; + int carry; + for (i=n-1; i>0; i--) + { + carry=BIG_XXX_fshr(x[i],1); + x[i-1][NLEN_XXX-1]|=(chunk)carry<<P_TBITS_WWW; + } + BIG_XXX_fshr(x[0],1); +} + +void FF_WWW_output(BIG_XXX x[],int n) +{ + int i; + FF_WWW_norm(x,n); + for (i=n-1; i>=0; i--) + { + BIG_XXX_output(x[i]); + printf(" "); + } +} + +void FF_WWW_rawoutput(BIG_XXX x[],int n) +{ + int i; + for (i=n-1; i>=0; i--) + { + BIG_XXX_rawoutput(x[i]); + printf(" "); + } +} + +/* Convert FFs to/from octet strings */ +void FF_WWW_toOctet(octet *w,BIG_XXX x[],int n) +{ + int i; + w->len=n*MODBYTES_XXX; + for (i=0; i<n; i++) + { + BIG_XXX_toBytes(&(w->val[(n-i-1)*MODBYTES_XXX]),x[i]); + } +} + +void FF_WWW_fromOctet(BIG_XXX x[],octet *w,int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_fromBytes(x[i],&(w->val[(n-i-1)*MODBYTES_XXX])); + } +} + +/* in-place swapping using xor - side channel resistant */ +static void FF_WWW_cswap(BIG_XXX a[],BIG_XXX b[],int d,int n) +{ + int i; + for (i=0; i<n; i++) + BIG_XXX_cswap(a[i],b[i],d); + return; +} + +/* z=x*y, t is workspace */ +static void FF_WWW_karmul(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX y[],int yp,BIG_XXX t[],int tp,int n) +{ + int nd2; + if (n==1) + { + BIG_XXX_norm(x[xp]); + BIG_XXX_norm(y[yp]); + BIG_XXX_mul(t[tp],x[xp],y[yp]); + BIG_XXX_split(z[zp+1],z[zp],t[tp],BIGBITS_XXX); + return; + } + + nd2=n/2; + FF_WWW_radd(z,zp,x,xp,x,xp+nd2,nd2); + FF_WWW_rnorm(z,zp,nd2); /* needs this if recursion level too deep */ + + FF_WWW_radd(z,zp+nd2,y,yp,y,yp+nd2,nd2); + FF_WWW_rnorm(z,zp+nd2,nd2); + FF_WWW_karmul(t,tp,z,zp,z,zp+nd2,t,tp+n,nd2); + FF_WWW_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2); + FF_WWW_karmul(z,zp+n,x,xp+nd2,y,yp+nd2,t,tp+n,nd2); + FF_WWW_rdec(t,tp,z,zp,n); + FF_WWW_rdec(t,tp,z,zp+n,n); + FF_WWW_rinc(z,zp+nd2,t,tp,n); + FF_WWW_rnorm(z,zp,2*n); +} + +static void FF_WWW_karsqr(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX t[],int tp,int n) +{ + int nd2; + if (n==1) + { + BIG_XXX_norm(x[xp]); + BIG_XXX_sqr(t[tp],x[xp]); + BIG_XXX_split(z[zp+1],z[zp],t[tp],BIGBITS_XXX); + return; + } + nd2=n/2; + FF_WWW_karsqr(z,zp,x,xp,t,tp+n,nd2); + FF_WWW_karsqr(z,zp+n,x,xp+nd2,t,tp+n,nd2); + FF_WWW_karmul(t,tp,x,xp,x,xp+nd2,t,tp+n,nd2); + FF_WWW_rinc(z,zp+nd2,t,tp,n); + FF_WWW_rinc(z,zp+nd2,t,tp,n); + + FF_WWW_rnorm(z,zp+nd2,n); /* was FF_rnorm(z,zp,2*n) */ +} + +static void FF_WWW_karmul_lower(BIG_XXX z[],int zp,BIG_XXX x[],int xp,BIG_XXX y[],int yp,BIG_XXX t[],int tp,int n) +{ + /* Calculates Least Significant bottom half of x*y */ + int nd2; + if (n==1) + { + /* only calculate bottom half of product */ + BIG_XXX_norm(x[xp]); + BIG_XXX_norm(y[yp]); + BIG_XXX_smul(z[zp],x[xp],y[yp]); + return; + } + nd2=n/2; + FF_WWW_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2); + FF_WWW_karmul_lower(t,tp,x,xp+nd2,y,yp,t,tp+n,nd2); + FF_WWW_rinc(z,zp+nd2,t,tp,nd2); + FF_WWW_karmul_lower(t,tp,x,xp,y,yp+nd2,t,tp+n,nd2); + FF_WWW_rinc(z,zp+nd2,t,tp,nd2); + FF_WWW_rnorm(z,zp+nd2,-nd2); /* truncate it */ +} + +static void FF_WWW_karmul_upper(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],BIG_XXX t[],int n) +{ + /* Calculates Most Significant upper half of x*y, given lower part */ + int nd2; + + nd2=n/2; + FF_WWW_radd(z,n,x,0,x,nd2,nd2); + FF_WWW_radd(z,n+nd2,y,0,y,nd2,nd2); + FF_WWW_rnorm(z,n,nd2); + FF_WWW_rnorm(z,n+nd2,nd2); + + FF_WWW_karmul(t,0,z,n+nd2,z,n,t,n,nd2); /* t = (a0+a1)(b0+b1) */ + FF_WWW_karmul(z,n,x,nd2,y,nd2,t,n,nd2); /* z[n]= a1*b1 */ + /* z[0-nd2]=l(a0b0) z[nd2-n]= h(a0b0)+l(t)-l(a0b0)-l(a1b1) */ + FF_WWW_rdec(t,0,z,n,n); /* t=t-a1b1 */ + FF_WWW_rinc(z,nd2,z,0,nd2); /* z[nd2-n]+=l(a0b0) = h(a0b0)+l(t)-l(a1b1) */ + FF_WWW_rdec(z,nd2,t,0,nd2); /* z[nd2-n]=h(a0b0)+l(t)-l(a1b1)-l(t-a1b1)=h(a0b0) */ + FF_WWW_rnorm(z,0,-n); /* a0b0 now in z - truncate it */ + FF_WWW_rdec(t,0,z,0,n); /* (a0+a1)(b0+b1) - a0b0 */ + FF_WWW_rinc(z,nd2,t,0,n); + + FF_WWW_rnorm(z,nd2,n); +} + +/* z=x*y */ +void FF_WWW_mul(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; +#else + BIG_XXX t[2*n]; +#endif + // FF_norm(x,n); /* change here */ + // FF_norm(y,n); /* change here */ + FF_WWW_karmul(z,0,x,0,y,0,t,0,n); +} + +/* return low part of product */ +static void FF_WWW_lmul(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],int n) +{ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; +#else + BIG_XXX t[2*n]; +#endif + // FF_norm(x,n); /* change here */ + // FF_norm(y,n); /* change here */ + FF_WWW_karmul_lower(z,0,x,0,y,0,t,0,n); +} + +/* Set b=b mod c */ +void FF_WWW_mod(BIG_XXX b[],BIG_XXX c[],int n) +{ + int k=0; + + FF_WWW_norm(b,n); + if (FF_WWW_comp(b,c,n)<0) + return; + do + { + FF_WWW_shl(c,n); + k++; + } + while (FF_WWW_comp(b,c,n)>=0); + + while (k>0) + { + FF_WWW_shr(c,n); + if (FF_WWW_comp(b,c,n)>=0) + { + FF_WWW_sub(b,b,c,n); + FF_WWW_norm(b,n); + } + k--; + } +} + +/* z=x^2 */ +void FF_WWW_sqr(BIG_XXX z[],BIG_XXX x[],int n) +{ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; +#else + BIG_XXX t[2*n]; +#endif + // FF_norm(x,n); /* change here */ + FF_WWW_karsqr(z,0,x,0,t,0,n); +} + +/* r=t mod modulus, N is modulus, ND is Montgomery Constant */ +static void FF_WWW_reduce(BIG_XXX r[],BIG_XXX T[],BIG_XXX N[],BIG_XXX ND[],int n) +{ + /* fast karatsuba Montgomery reduction */ +#ifndef C99 + BIG_XXX t[2*FFLEN_WWW]; + BIG_XXX m[FFLEN_WWW]; +#else + BIG_XXX t[2*n]; + BIG_XXX m[n]; +#endif + FF_WWW_sducopy(r,T,n); /* keep top half of T */ + //FF_norm(T,n); /* change here */ + FF_WWW_karmul_lower(m,0,T,0,ND,0,t,0,n); /* m=T.(1/N) mod R */ + + //FF_norm(N,n); /* change here */ + FF_WWW_karmul_upper(T,N,m,t,n); /* T=mN */ + FF_WWW_sducopy(m,T,n); + + FF_WWW_add(r,r,N,n); + FF_WWW_sub(r,r,m,n); + FF_WWW_norm(r,n); +} + + +/* Set r=a mod b */ +/* a is of length - 2*n */ +/* r,b is of length - n */ +void FF_WWW_dmod(BIG_XXX r[],BIG_XXX a[],BIG_XXX b[],int n) +{ + int k; +#ifndef C99 + BIG_XXX m[2*FFLEN_WWW]; + BIG_XXX x[2*FFLEN_WWW]; +#else + BIG_XXX m[2*n]; + BIG_XXX x[2*n]; +#endif + FF_WWW_copy(x,a,2*n); + FF_WWW_norm(x,2*n); + FF_WWW_dsucopy(m,b,n); + k=BIGBITS_XXX*n; + + while (FF_WWW_comp(x,m,2*n)>=0) + { + FF_WWW_sub(x,x,m,2*n); + FF_WWW_norm(x,2*n); + } + + while (k>0) + { + FF_WWW_shr(m,2*n); + + if (FF_WWW_comp(x,m,2*n)>=0) + { + FF_WWW_sub(x,x,m,2*n); + FF_WWW_norm(x,2*n); + } + + k--; + } + FF_WWW_copy(r,x,n); + FF_WWW_mod(r,b,n); +} + +/* Set r=1/a mod p. Binary method - a<p on entry */ + +void FF_WWW_invmodp(BIG_XXX r[],BIG_XXX a[],BIG_XXX p[],int n) +{ +#ifndef C99 + BIG_XXX u[FFLEN_WWW],v[FFLEN_WWW],x1[FFLEN_WWW],x2[FFLEN_WWW],t[FFLEN_WWW],one[FFLEN_WWW]; +#else + BIG_XXX u[n],v[n],x1[n],x2[n],t[n],one[n]; +#endif + FF_WWW_copy(u,a,n); + FF_WWW_copy(v,p,n); + FF_WWW_one(one,n); + FF_WWW_copy(x1,one,n); + FF_WWW_zero(x2,n); + +// reduce n in here as well! + while (FF_WWW_comp(u,one,n)!=0 && FF_WWW_comp(v,one,n)!=0) + { + while (FF_WWW_parity(u)==0) + { + FF_WWW_shr(u,n); + if (FF_WWW_parity(x1)!=0) + { + FF_WWW_add(x1,p,x1,n); + FF_WWW_norm(x1,n); + } + FF_WWW_shr(x1,n); + } + while (FF_WWW_parity(v)==0) + { + FF_WWW_shr(v,n); + if (FF_WWW_parity(x2)!=0) + { + FF_WWW_add(x2,p,x2,n); + FF_WWW_norm(x2,n); + } + FF_WWW_shr(x2,n); + } + if (FF_WWW_comp(u,v,n)>=0) + { + + FF_WWW_sub(u,u,v,n); + FF_WWW_norm(u,n); + if (FF_WWW_comp(x1,x2,n)>=0) FF_WWW_sub(x1,x1,x2,n); + else + { + FF_WWW_sub(t,p,x2,n); + FF_WWW_add(x1,x1,t,n); + } + FF_WWW_norm(x1,n); + } + else + { + FF_WWW_sub(v,v,u,n); + FF_WWW_norm(v,n); + if (FF_WWW_comp(x2,x1,n)>=0) FF_WWW_sub(x2,x2,x1,n); + else + { + FF_WWW_sub(t,p,x1,n); + FF_WWW_add(x2,x2,t,n); + } + FF_WWW_norm(x2,n); + } + } + if (FF_WWW_comp(u,one,n)==0) + FF_WWW_copy(r,x1,n); + else + FF_WWW_copy(r,x2,n); +} + +/* nesidue mod m */ +static void FF_WWW_nres(BIG_XXX a[],BIG_XXX m[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + if (n==1) + { + BIG_XXX_dscopy(d[0],a[0]); + BIG_XXX_dshl(d[0],NLEN_XXX*BASEBITS_XXX); + BIG_XXX_dmod(a[0],d[0],m[0]); + } + else + { + FF_WWW_dsucopy(d,a,n); + FF_WWW_dmod(a,d,m,n); + } +} + +static void FF_WWW_redc(BIG_XXX a[],BIG_XXX m[],BIG_XXX ND[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + if (n==1) + { + BIG_XXX_dzero(d[0]); + BIG_XXX_dscopy(d[0],a[0]); + BIG_XXX_monty(a[0],m[0],((chunk)1<<BASEBITS_XXX)-ND[0][0],d[0]); + } + else + { + FF_WWW_mod(a,m,n); + FF_WWW_dscopy(d,a,n); + FF_WWW_reduce(a,d,m,ND,n); + FF_WWW_mod(a,m,n); + } +} + +/* U=1/a mod 2^m - Arazi & Qi */ +static void FF_WWW_invmod2m(BIG_XXX U[],BIG_XXX a[],int n) +{ + int i; +#ifndef C99 + BIG_XXX t1[FFLEN_WWW],b[FFLEN_WWW],c[FFLEN_WWW]; +#else + BIG_XXX t1[2*n],b[n],c[n]; +#endif + + FF_WWW_zero(U,n); + FF_WWW_zero(b,n); + FF_WWW_zero(c,n); + FF_WWW_zero(t1,2*n); + + BIG_XXX_copy(U[0],a[0]); + BIG_XXX_invmod2m(U[0]); + for (i=1; i<n; i<<=1) + { + FF_WWW_copy(b,a,i); + FF_WWW_mul(t1,U,b,i); + FF_WWW_shrw(t1,i); // top half to bottom half, top half=0 + + FF_WWW_copy(c,a,2*i); + FF_WWW_shrw(c,i); // top half of c + FF_WWW_lmul(b,U,c,i); // should set top half of b=0 + FF_WWW_add(t1,t1,b,i); + FF_WWW_norm(t1,2*i); + FF_WWW_lmul(b,t1,U,i); + FF_WWW_copy(t1,b,i); + FF_WWW_one(b,i); + FF_WWW_shlw(b,i); + FF_WWW_sub(t1,b,t1,2*i); + FF_WWW_norm(t1,2*i); + FF_WWW_shlw(t1,i); + FF_WWW_add(U,U,t1,2*i); + } + + FF_WWW_norm(U,n); +} + +void FF_WWW_random(BIG_XXX x[],csprng *rng,int n) +{ + int i; + for (i=0; i<n; i++) + { + BIG_XXX_random(x[i],rng); + } + /* make sure top bit is 1 */ + while (BIG_XXX_nbits(x[n-1])<MODBYTES_XXX*8) BIG_XXX_random(x[n-1],rng); +} + +/* generate random x mod p */ +void FF_WWW_randomnum(BIG_XXX x[],BIG_XXX p[],csprng *rng,int n) +{ + int i; +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + for (i=0; i<2*n; i++) + { + BIG_XXX_random(d[i],rng); + } + FF_WWW_dmod(x,d,p,n); +} + +static void FF_WWW_modmul(BIG_XXX z[],BIG_XXX x[],BIG_XXX y[],BIG_XXX p[],BIG_XXX ND[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + chunk ex=P_EXCESS_WWW(x[n-1]); + chunk ey=P_EXCESS_WWW(y[n-1]); +#ifdef dchunk + if ((dchunk)(ex+1)*(ey+1)>(dchunk)P_FEXCESS_WWW) +#else + if ((ex+1)>P_FEXCESS_WWW/(ey+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d %d\n",ex,ey); +#endif + FF_WWW_mod(x,p,n); + } + + if (n==1) + { + BIG_XXX_mul(d[0],x[0],y[0]); + BIG_XXX_monty(z[0],p[0],((chunk)1<<BASEBITS_XXX)-ND[0][0],d[0]); + } + else + { + FF_WWW_mul(d,x,y,n); + FF_WWW_reduce(z,d,p,ND,n); + } +} + +static void FF_WWW_modsqr(BIG_XXX z[],BIG_XXX x[],BIG_XXX p[],BIG_XXX ND[],int n) +{ +#ifndef C99 + BIG_XXX d[2*FFLEN_WWW]; +#else + BIG_XXX d[2*n]; +#endif + chunk ex=P_EXCESS_WWW(x[n-1]); +#ifdef dchunk + if ((dchunk)(ex+1)*(ex+1)>(dchunk)P_FEXCESS_WWW) +#else + if ((ex+1)>P_FEXCESS_WWW/(ex+1)) +#endif + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it %d\n",ex); +#endif + FF_WWW_mod(x,p,n); + } + if (n==1) + { + BIG_XXX_sqr(d[0],x[0]); + BIG_XXX_monty(z[0],p[0],((chunk)1<<BASEBITS_XXX)-ND[0][0],d[0]); + } + else + { + FF_WWW_sqr(d,x,n); + FF_WWW_reduce(z,d,p,ND,n); + } +} + +/* r=x^e mod p using side-channel resistant Montgomery Ladder, for large e */ +void FF_WWW_skpow(BIG_XXX r[],BIG_XXX x[],BIG_XXX e[],BIG_XXX p[],int n) +{ + int i,b; +#ifndef C99 + BIG_XXX R0[FFLEN_WWW],R1[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX R0[n],R1[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_one(R0,n); + FF_WWW_copy(R1,x,n); + FF_WWW_nres(R0,p,n); + FF_WWW_nres(R1,p,n); + + for (i=8*MODBYTES_XXX*n-1; i>=0; i--) + { + b=BIG_XXX_bit(e[i/BIGBITS_XXX],i%BIGBITS_XXX); + FF_WWW_modmul(r,R0,R1,p,ND,n); + + FF_WWW_cswap(R0,R1,b,n); + FF_WWW_modsqr(R0,R0,p,ND,n); + + FF_WWW_copy(R1,r,n); + FF_WWW_cswap(R0,R1,b,n); + } + FF_WWW_copy(r,R0,n); + FF_WWW_redc(r,p,ND,n); +} + +/* r=x^e mod p using side-channel resistant Montgomery Ladder, for short e */ +void FF_WWW_skspow(BIG_XXX r[],BIG_XXX x[],BIG_XXX e,BIG_XXX p[],int n) +{ + int i,b; +#ifndef C99 + BIG_XXX R0[FFLEN_WWW],R1[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX R0[n],R1[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + FF_WWW_one(R0,n); + FF_WWW_copy(R1,x,n); + FF_WWW_nres(R0,p,n); + FF_WWW_nres(R1,p,n); + for (i=8*MODBYTES_XXX-1; i>=0; i--) + { + b=BIG_XXX_bit(e,i); + FF_WWW_modmul(r,R0,R1,p,ND,n); + FF_WWW_cswap(R0,R1,b,n); + FF_WWW_modsqr(R0,R0,p,ND,n); + FF_WWW_copy(R1,r,n); + FF_WWW_cswap(R0,R1,b,n); + } + FF_WWW_copy(r,R0,n); + FF_WWW_redc(r,p,ND,n); +} + +/* raise to an integer power - right-to-left method */ +void FF_WWW_power(BIG_XXX r[],BIG_XXX x[],int e,BIG_XXX p[],int n) +{ + int f=1; +#ifndef C99 + BIG_XXX w[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX w[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_copy(w,x,n); + FF_WWW_nres(w,p,n); + + if (e==2) + { + FF_WWW_modsqr(r,w,p,ND,n); + } + else for (;;) + { + if (e%2==1) + { + if (f) FF_WWW_copy(r,w,n); + else FF_WWW_modmul(r,r,w,p,ND,n); + f=0; + } + e>>=1; + if (e==0) break; + FF_WWW_modsqr(w,w,p,ND,n); + } + + FF_WWW_redc(r,p,ND,n); +} + +/* r=x^e mod p, faster but not side channel resistant */ +void FF_WWW_pow(BIG_XXX r[],BIG_XXX x[],BIG_XXX e[],BIG_XXX p[],int n) +{ + int i,b; +#ifndef C99 + BIG_XXX w[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX w[n],ND[n]; +#endif + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_copy(w,x,n); + FF_WWW_one(r,n); + FF_WWW_nres(r,p,n); + FF_WWW_nres(w,p,n); + + for (i=8*MODBYTES_XXX*n-1; i>=0; i--) + { + FF_WWW_modsqr(r,r,p,ND,n); + b=BIG_XXX_bit(e[i/BIGBITS_XXX],i%BIGBITS_XXX); + if (b==1) FF_WWW_modmul(r,r,w,p,ND,n); + } + FF_WWW_redc(r,p,ND,n); +} + +/* double exponentiation r=x^e.y^f mod p */ +void FF_WWW_pow2(BIG_XXX r[],BIG_XXX x[],BIG_XXX e,BIG_XXX y[],BIG_XXX f,BIG_XXX p[],int n) +{ + int i,eb,fb; +#ifndef C99 + BIG_XXX xn[FFLEN_WWW],yn[FFLEN_WWW],xy[FFLEN_WWW],ND[FFLEN_WWW]; +#else + BIG_XXX xn[n],yn[n],xy[n],ND[n]; +#endif + + FF_WWW_invmod2m(ND,p,n); + + FF_WWW_copy(xn,x,n); + FF_WWW_copy(yn,y,n); + FF_WWW_nres(xn,p,n); + FF_WWW_nres(yn,p,n); + FF_WWW_modmul(xy,xn,yn,p,ND,n); + FF_WWW_one(r,n); + FF_WWW_nres(r,p,n); + + for (i=8*MODBYTES_XXX-1; i>=0; i--) + { + eb=BIG_XXX_bit(e,i); + fb=BIG_XXX_bit(f,i); + FF_WWW_modsqr(r,r,p,ND,n); + if (eb==1) + { + if (fb==1) FF_WWW_modmul(r,r,xy,p,ND,n); + else FF_WWW_modmul(r,r,xn,p,ND,n); + } + else + { + if (fb==1) FF_WWW_modmul(r,r,yn,p,ND,n); + } + } + FF_WWW_redc(r,p,ND,n); +} + +static sign32 igcd(sign32 x,sign32 y) +{ + /* integer GCD, returns GCD of x and y */ + sign32 r; + if (y==0) return x; + while ((r=x%y)!=0) + x=y,y=r; + return y; +} + +/* quick and dirty check for common factor with s */ +int FF_WWW_cfactor(BIG_XXX w[],sign32 s,int n) +{ + int r; + sign32 g; +#ifndef C99 + BIG_XXX x[FFLEN_WWW],y[FFLEN_WWW]; +#else + BIG_XXX x[n],y[n]; +#endif + FF_WWW_init(y,s,n); + FF_WWW_copy(x,w,n); + FF_WWW_norm(x,n); + + do + { + FF_WWW_sub(x,x,y,n); + FF_WWW_norm(x,n); + while (!FF_WWW_iszilch(x,n) && FF_WWW_parity(x)==0) FF_WWW_shr(x,n); + } + while (FF_WWW_comp(x,y,n)>0); +#if CHUNK<32 + g=x[0][0]+((sign32)(x[0][1])<<BASEBITS_XXX); +#else + g=(sign32)x[0][0]; +#endif + r=igcd(s,g); + if (r>1) return 1; + return 0; +} + +/* Miller-Rabin test for primality. Slow. */ +int FF_WWW_prime(BIG_XXX p[],csprng *rng,int n) +{ + int i,j,loop,s=0; +#ifndef C99 + BIG_XXX d[FFLEN_WWW],x[FFLEN_WWW],unity[FFLEN_WWW],nm1[FFLEN_WWW]; +#else + BIG_XXX d[n],x[n],unity[n],nm1[n]; +#endif + sign32 sf=4849845;/* 3*5*.. *19 */ + + FF_WWW_norm(p,n); + + if (FF_WWW_cfactor(p,sf,n)) return 0; + + FF_WWW_one(unity,n); + FF_WWW_sub(nm1,p,unity,n); + FF_WWW_norm(nm1,n); + FF_WWW_copy(d,nm1,n); + while (FF_WWW_parity(d)==0) + { + FF_WWW_shr(d,n); + s++; + } + if (s==0) return 0; + + for (i=0; i<10; i++) + { + FF_WWW_randomnum(x,p,rng,n); + FF_WWW_pow(x,x,d,p,n); + if (FF_WWW_comp(x,unity,n)==0 || FF_WWW_comp(x,nm1,n)==0) continue; + loop=0; + for (j=1; j<s; j++) + { + FF_WWW_power(x,x,2,p,n); + if (FF_WWW_comp(x,unity,n)==0) return 0; + if (FF_WWW_comp(x,nm1,n)==0 ) + { + loop=1; + break; + } + } + if (loop) continue; + return 0; + } + + return 1; +} + +/* +BIG P[4]= {{0x1670957,0x1568CD3C,0x2595E5,0xEED4F38,0x1FC9A971,0x14EF7E62,0xA503883,0x9E1E05E,0xBF59E3},{0x1844C908,0x1B44A798,0x3A0B1E7,0xD1B5B4E,0x1836046F,0x87E94F9,0x1D34C537,0xF7183B0,0x46D07},{0x17813331,0x19E28A90,0x1473A4D6,0x1CACD01F,0x1EEA8838,0xAF2AE29,0x1F85292A,0x1632585E,0xD945E5},{0x919F5EF,0x1567B39F,0x19F6AD11,0x16CE47CF,0x9B36EB1,0x35B7D3,0x483B28C,0xCBEFA27,0xB5FC21}}; + +int main() +{ + int i; + BIG p[4],e[4],x[4],r[4]; + csprng rng; + char raw[100]; + for (i=0;i<100;i++) raw[i]=i; + RAND_seed(&rng,100,raw); + + + FF_init(x,3,4); + + FF_copy(p,P,4); + FF_copy(e,p,4); + FF_dec(e,1,4); + FF_norm(e,4); + + + + printf("p= ");FF_output(p,4); printf("\n"); + if (FF_prime(p,&rng,4)) printf("p is a prime\n"); + printf("e= ");FF_output(e,4); printf("\n"); + + FF_skpow(r,x,e,p,4); + printf("r= ");FF_output(r,4); printf("\n"); +} + +*/
http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp.c.in ---------------------------------------------------------------------- diff --git a/src/fp.c.in b/src/fp.c.in new file mode 100644 index 0000000..9144fcf --- /dev/null +++ b/src/fp.c.in @@ -0,0 +1,650 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL mod p functions */ +/* Small Finite Field arithmetic */ +/* SU=m, SU is Stack Usage (NOT_SPECIAL Modulus) */ + +#include "fp_YYY.h" + +/* Fast Modular Reduction Methods */ + +/* r=d mod m */ +/* d MUST be normalised */ +/* Products must be less than pR in all cases !!! */ +/* So when multiplying two numbers, their product *must* be less than MODBITS+BASEBITS*NLEN */ +/* Results *may* be one bit bigger than MODBITS */ + +#if MODTYPE_YYY == PSEUDO_MERSENNE +/* r=d mod m */ + +/* Converts from BIG integer to residue form mod Modulus */ +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + BIG_XXX_copy(y->g,x); + y->XES=1; +} + +/* Converts from residue form back to BIG integer form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + BIG_XXX_copy(x,y->g); +} + +/* reduce a DBIG to a BIG exploiting the special form of the modulus */ +void FP_YYY_mod(BIG_XXX r,DBIG_XXX d) +{ + BIG_XXX t,b; + chunk v,tw; + BIG_XXX_split(t,b,d,MODBITS_YYY); + + /* Note that all of the excess gets pushed into t. So if squaring a value with a 4-bit excess, this results in + t getting all 8 bits of the excess product! So products must be less than pR which is Montgomery compatible */ + + if (MConst_YYY < NEXCESS_XXX) + { + BIG_XXX_imul(t,t,MConst_YYY); + BIG_XXX_norm(t); + BIG_XXX_add(r,t,b); + BIG_XXX_norm(r); + tw=r[NLEN_XXX-1]; + r[NLEN_XXX-1]&=TMASK_YYY; + r[0]+=MConst_YYY*((tw>>TBITS_YYY)); + } + else + { + v=BIG_XXX_pmul(t,t,MConst_YYY); + BIG_XXX_add(r,t,b); + BIG_XXX_norm(r); + tw=r[NLEN_XXX-1]; + r[NLEN_XXX-1]&=TMASK_YYY; +#if CHUNK == 16 + r[1]+=muladd_XXX(MConst_YYY,((tw>>TBITS_YYY)+(v<<(BASEBITS_XXX-TBITS_YYY))),0,&r[0]); +#else + r[0]+=MConst_YYY*((tw>>TBITS_YYY)+(v<<(BASEBITS_XXX-TBITS_YYY))); +#endif + } + BIG_XXX_norm(r); +} +#endif + +/* This only applies to Curve C448, so specialised (for now) */ +#if MODTYPE_YYY == GENERALISED_MERSENNE + +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + BIG_XXX_copy(y->g,x); + y->XES=1; +} + +/* Converts from residue form back to BIG integer form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + BIG_XXX_copy(x,y->g); +} + +/* reduce a DBIG to a BIG exploiting the special form of the modulus */ +void FP_YYY_mod(BIG_XXX r,DBIG_XXX d) +{ + BIG_XXX t,b; + chunk carry; + BIG_XXX_split(t,b,d,MBITS_YYY); + + BIG_XXX_add(r,t,b); + + BIG_XXX_dscopy(d,t); + BIG_XXX_dshl(d,MBITS_YYY/2); + + BIG_XXX_split(t,b,d,MBITS_YYY); + + BIG_XXX_add(r,r,t); + BIG_XXX_add(r,r,b); + BIG_XXX_norm(r); + BIG_XXX_shl(t,MBITS_YYY/2); + + BIG_XXX_add(r,r,t); + + carry=r[NLEN_XXX-1]>>TBITS_YYY; + + r[NLEN_XXX-1]&=TMASK_YYY; + r[0]+=carry; + + r[224/BASEBITS_XXX]+=carry<<(224%BASEBITS_XXX); /* need to check that this falls mid-word */ + BIG_XXX_norm(r); +} + +#endif + +#if MODTYPE_YYY == MONTGOMERY_FRIENDLY + +/* convert to Montgomery n-residue form */ +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + DBIG_XXX d; + BIG_XXX r; + BIG_XXX_rcopy(r,R2modp_YYY); + BIG_XXX_mul(d,x,r); + FP_YYY_mod(y->g,d); + y->XES=2; +} + +/* convert back to regular form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + DBIG_XXX d; + BIG_XXX_dzero(d); + BIG_XXX_dscopy(d,y->g); + FP_YYY_mod(x,d); +} + +/* fast modular reduction from DBIG to BIG exploiting special form of the modulus */ +void FP_YYY_mod(BIG_XXX a,DBIG_XXX d) +{ + int i; + + for (i=0; i<NLEN_XXX; i++) + d[NLEN_XXX+i]+=muladd_XXX(d[i],MConst_YYY-1,d[i],&d[NLEN_XXX+i-1]); + + BIG_XXX_sducopy(a,d); + BIG_XXX_norm(a); +} + +#endif + +#if MODTYPE_YYY == NOT_SPECIAL + +/* convert to Montgomery n-residue form */ +void FP_YYY_nres(FP_YYY *y,BIG_XXX x) +{ + DBIG_XXX d; + BIG_XXX r; + BIG_XXX_rcopy(r,R2modp_YYY); + BIG_XXX_mul(d,x,r); + FP_YYY_mod(y->g,d); + y->XES=2; +} + +/* convert back to regular form */ +void FP_YYY_redc(BIG_XXX x,FP_YYY *y) +{ + DBIG_XXX d; + BIG_XXX_dzero(d); + BIG_XXX_dscopy(d,y->g); + FP_YYY_mod(x,d); +} + + +/* reduce a DBIG to a BIG using Montgomery's no trial division method */ +/* d is expected to be dnormed before entry */ +/* SU= 112 */ +void FP_YYY_mod(BIG_XXX a,DBIG_XXX d) +{ + BIG_XXX mdls; + BIG_XXX_rcopy(mdls,Modulus_YYY); + BIG_XXX_monty(a,mdls,MConst_YYY,d); +} + +#endif + +/* test x==0 ? */ +/* SU= 48 */ +int FP_YYY_iszilch(FP_YYY *x) +{ + BIG_XXX m; + BIG_XXX_rcopy(m,Modulus_YYY); + BIG_XXX_mod(x->g,m); + return BIG_XXX_iszilch(x->g); +} + +void FP_YYY_copy(FP_YYY *y,FP_YYY *x) +{ + BIG_XXX_copy(y->g,x->g); + y->XES=x->XES; +} + +void FP_YYY_rcopy(FP_YYY *y, const BIG_XXX c) +{ + BIG_XXX b; + BIG_XXX_rcopy(b,c); + FP_YYY_nres(y,b); +} + +/* Swap a and b if d=1 */ +void FP_YYY_cswap(FP_YYY *a,FP_YYY *b,int d) +{ + sign32 t,c=d; + BIG_XXX_cswap(a->g,b->g,d); + + c=~(c-1); + t=c&((a->XES)^(b->XES)); + a->XES^=t; + b->XES^=t; + +} + +/* Move b to a if d=1 */ +void FP_YYY_cmove(FP_YYY *a,FP_YYY *b,int d) +{ + sign32 c=-d; + + BIG_XXX_cmove(a->g,b->g,d); + a->XES^=(a->XES^b->XES)&c; +} + +void FP_YYY_zero(FP_YYY *x) +{ + BIG_XXX_zero(x->g); + x->XES=1; +} + +int FP_YYY_equals(FP_YYY *x,FP_YYY *y) +{ + FP_YYY_reduce(x); + FP_YYY_reduce(y); + if (BIG_XXX_comp(x->g,y->g)==0) return 1; + return 0; +} + +/* output FP */ +/* SU= 48 */ +void FP_YYY_output(FP_YYY *r) +{ + BIG_XXX c; + FP_YYY_redc(c,r); + BIG_XXX_output(c); +} + +void FP_YYY_rawoutput(FP_YYY *r) +{ + BIG_XXX_rawoutput(r->g); +} + +#ifdef GET_STATS +int tsqr=0,rsqr=0,tmul=0,rmul=0; +int tadd=0,radd=0,tneg=0,rneg=0; +int tdadd=0,rdadd=0,tdneg=0,rdneg=0; +#endif + +#ifdef FUSED_MODMUL + +/* Insert fastest code here */ + +#endif + +/* r=a*b mod Modulus */ +/* product must be less that p.R - and we need to know this in advance! */ +/* SU= 88 */ +void FP_YYY_mul(FP_YYY *r,FP_YYY *a,FP_YYY *b) +{ + DBIG_XXX d; + + if ((sign64)a->XES*b->XES>(sign64)FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it\n"); +#endif + FP_YYY_reduce(a); /* it is sufficient to fully reduce just one of them < p */ + } + +#ifdef FUSED_MODMUL + FP_YYY_modmul(r->g,a->g,b->g); +#else + BIG_XXX_mul(d,a->g,b->g); + FP_YYY_mod(r->g,d); +#endif + r->XES=2; +} + + +/* multiplication by an integer, r=a*c */ +/* SU= 136 */ +void FP_YYY_imul(FP_YYY *r,FP_YYY *a,int c) +{ + int s=0; + + if (c<0) + { + c=-c; + s=1; + } + +#if MODTYPE_YYY==PSEUDO_MERSENNE || MODTYPE_YYY==GENERALISED_MERSENNE + DBIG_XXX d; + BIG_XXX_pxmul(d,a->g,c); + FP_YYY_mod(r->g,d); + r->XES=2; + +#else + //Montgomery + BIG_XXX k; + FP_YYY f; + if (a->XES*c<=FEXCESS_YYY) + { + BIG_XXX_pmul(r->g,a->g,c); + r->XES=a->XES*c; // careful here - XES jumps! + } + else + { + // don't want to do this - only a problem for Montgomery modulus and larger constants + BIG_XXX_zero(k); + BIG_XXX_inc(k,c); + BIG_XXX_norm(k); + FP_YYY_nres(&f,k); + FP_YYY_mul(r,a,&f); + } +#endif + if (s) + { + FP_YYY_neg(r,r); + FP_YYY_norm(r); + } +} + +/* Set r=a^2 mod m */ +/* SU= 88 */ +void FP_YYY_sqr(FP_YYY *r,FP_YYY *a) +{ + DBIG_XXX d; + + if ((sign64)a->XES*a->XES>(sign64)FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Product too large - reducing it\n"); +#endif + FP_YYY_reduce(a); + } + + BIG_XXX_sqr(d,a->g); + FP_YYY_mod(r->g,d); + r->XES=2; +} + +/* SU= 16 */ +/* Set r=a+b */ +void FP_YYY_add(FP_YYY *r,FP_YYY *a,FP_YYY *b) +{ + BIG_XXX_add(r->g,a->g,b->g); + r->XES=a->XES+b->XES; + if (r->XES>FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Sum too large - reducing it \n"); +#endif + FP_YYY_reduce(r); + } +} + +/* Set r=a-b mod m */ +/* SU= 56 */ +void FP_YYY_sub(FP_YYY *r,FP_YYY *a,FP_YYY *b) +{ + FP_YYY n; + FP_YYY_neg(&n,b); + FP_YYY_add(r,a,&n); +} + +/* SU= 48 */ +/* Fully reduce a mod Modulus */ +void FP_YYY_reduce(FP_YYY *a) +{ + BIG_XXX m; + BIG_XXX_rcopy(m,Modulus_YYY); + BIG_XXX_mod(a->g,m); + a->XES=1; +} + +void FP_YYY_norm(FP_YYY *x) +{ + BIG_XXX_norm(x->g); +} + +// https://graphics.stanford.edu/~seander/bithacks.html +// constant time log to base 2 (or number of bits in) + +static int logb2(unsign32 v) +{ + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + + v = v - ((v >> 1) & 0x55555555); + v = (v & 0x33333333) + ((v >> 2) & 0x33333333); + r = (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; + return r; +} + +/* Set r=-a mod Modulus */ +/* SU= 64 */ +void FP_YYY_neg(FP_YYY *r,FP_YYY *a) +{ + int sb; + BIG_XXX m; + + BIG_XXX_rcopy(m,Modulus_YYY); + + sb=logb2(a->XES-1); + BIG_XXX_fshl(m,sb); + BIG_XXX_sub(r->g,m,a->g); + r->XES=((sign32)1<<sb); + + if (r->XES>FEXCESS_YYY) + { +#ifdef DEBUG_REDUCE + printf("Negation too large - reducing it \n"); +#endif + FP_YYY_reduce(r); + } + +} + +/* Set r=a/2. */ +/* SU= 56 */ +void FP_YYY_div2(FP_YYY *r,FP_YYY *a) +{ + BIG_XXX m; + BIG_XXX_rcopy(m,Modulus_YYY); + FP_YYY_copy(r,a); + if (BIG_XXX_parity(a->g)==0) + { + + BIG_XXX_fshr(r->g,1); + } + else + { + BIG_XXX_add(r->g,r->g,m); + BIG_XXX_norm(r->g); + BIG_XXX_fshr(r->g,1); + } +} + +/* set w=1/x */ +void FP_YYY_inv(FP_YYY *w,FP_YYY *x) +{ + BIG_XXX m2; + BIG_XXX_rcopy(m2,Modulus_YYY); + BIG_XXX_dec(m2,2); + BIG_XXX_norm(m2); + FP_YYY_pow(w,x,m2); +} + +/* SU=8 */ +/* set n=1 */ +void FP_YYY_one(FP_YYY *n) +{ + BIG_XXX b; + BIG_XXX_one(b); + FP_YYY_nres(n,b); +} + +/* Set r=a^b mod Modulus */ +/* SU= 136 */ +void FP_YYY_pow(FP_YYY *r,FP_YYY *a,BIG_XXX b) +{ + sign8 w[1+(NLEN_XXX*BASEBITS_XXX+3)/4]; + FP_YYY tb[16]; + BIG_XXX t; + int i,nb; + + FP_YYY_norm(a); + BIG_XXX_norm(b); + BIG_XXX_copy(t,b); + nb=1+(BIG_XXX_nbits(t)+3)/4; + /* convert exponent to 4-bit window */ + for (i=0; i<nb; i++) + { + w[i]=BIG_XXX_lastbits(t,4); + BIG_XXX_dec(t,w[i]); + BIG_XXX_norm(t); + BIG_XXX_fshr(t,4); + } + + FP_YYY_one(&tb[0]); + FP_YYY_copy(&tb[1],a); + for (i=2; i<16; i++) + FP_YYY_mul(&tb[i],&tb[i-1],a); + + FP_YYY_copy(r,&tb[w[nb-1]]); + for (i=nb-2; i>=0; i--) + { + FP_YYY_sqr(r,r); + FP_YYY_sqr(r,r); + FP_YYY_sqr(r,r); + FP_YYY_sqr(r,r); + FP_YYY_mul(r,r,&tb[w[i]]); + } + FP_YYY_reduce(r); +} + +/* is r a QR? */ +int FP_YYY_qr(FP_YYY *r) +{ + int j; + BIG_XXX m; + BIG_XXX b; + BIG_XXX_rcopy(m,Modulus_YYY); + FP_YYY_redc(b,r); + j=BIG_XXX_jacobi(b,m); + FP_YYY_nres(r,b); + if (j==1) return 1; + return 0; + +} + +/* Set a=sqrt(b) mod Modulus */ +/* SU= 160 */ +void FP_YYY_sqrt(FP_YYY *r,FP_YYY *a) +{ + FP_YYY v,i; + BIG_XXX b; + BIG_XXX m; + BIG_XXX_rcopy(m,Modulus_YYY); + BIG_XXX_mod(a->g,m); + BIG_XXX_copy(b,m); + if (MOD8_YYY==5) + { + BIG_XXX_dec(b,5); + BIG_XXX_norm(b); + BIG_XXX_fshr(b,3); /* (p-5)/8 */ + FP_YYY_copy(&i,a); + BIG_XXX_fshl(i.g,1); + FP_YYY_pow(&v,&i,b); + FP_YYY_mul(&i,&i,&v); + FP_YYY_mul(&i,&i,&v); + BIG_XXX_dec(i.g,1); + FP_YYY_mul(r,a,&v); + FP_YYY_mul(r,r,&i); + FP_YYY_reduce(r); + } + if (MOD8_YYY==3 || MOD8_YYY==7) + { + BIG_XXX_inc(b,1); + BIG_XXX_norm(b); + BIG_XXX_fshr(b,2); /* (p+1)/4 */ + FP_YYY_pow(r,a,b); + } +} + +/* +int main() +{ + + BIG_XXX r; + + FP_YYY_one(r); + FP_YYY_sqr(r,r); + + BIG_XXX_output(r); + + int i,carry; + DBIG_XXX c={0,0,0,0,0,0,0,0}; + BIG_XXX a={1,2,3,4}; + BIG_XXX b={3,4,5,6}; + BIG_XXX r={11,12,13,14}; + BIG_XXX s={23,24,25,15}; + BIG_XXX w; + +// printf("NEXCESS_XXX= %d\n",NEXCESS_XXX); +// printf("MConst_YYY= %d\n",MConst_YYY); + + BIG_XXX_copy(b,Modulus_YYY); + BIG_XXX_dec(b,1); + BIG_XXX_norm(b); + + BIG_XXX_randomnum(r); BIG_XXX_norm(r); BIG_XXX_mod(r,Modulus_YYY); +// BIG_XXX_randomnum(s); norm(s); BIG_XXX_mod(s,Modulus_YYY); + +// BIG_XXX_output(r); +// BIG_XXX_output(s); + + BIG_XXX_output(r); + FP_YYY_nres(r); + BIG_XXX_output(r); + BIG_XXX_copy(a,r); + FP_YYY_redc(r); + BIG_XXX_output(r); + BIG_XXX_dscopy(c,a); + FP_YYY_mod(r,c); + BIG_XXX_output(r); + + +// exit(0); + +// copy(r,a); + printf("r= "); BIG_XXX_output(r); + BIG_XXX_modsqr(r,r,Modulus_YYY); + printf("r^2= "); BIG_XXX_output(r); + + FP_YYY_nres(r); + FP_YYY_sqrt(r,r); + FP_YYY_redc(r); + printf("r= "); BIG_XXX_output(r); + BIG_XXX_modsqr(r,r,Modulus_YYY); + printf("r^2= "); BIG_XXX_output(r); + + +// for (i=0;i<100000;i++) FP_YYY_sqr(r,r); +// for (i=0;i<100000;i++) + FP_YYY_sqrt(r,r); + + BIG_XXX_output(r); +} +*/ http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp12.c.in ---------------------------------------------------------------------- diff --git a/src/fp12.c.in b/src/fp12.c.in new file mode 100644 index 0000000..67c6242 --- /dev/null +++ b/src/fp12.c.in @@ -0,0 +1,870 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL Fp^12 functions */ +/* SU=m, m is Stack Usage (no lazy )*/ +/* FP12 elements are of the form a+i.b+i^2.c */ + +#include "fp12_YYY.h" + +/* return 1 if b==c, no branching */ +static int teq(sign32 b,sign32 c) +{ + sign32 x=b^c; + x-=1; // if x=0, x now -1 + return (int)((x>>31)&1); +} + + +/* Constant time select from pre-computed table */ +static void FP12_YYY_select(FP12_YYY *f,FP12_YYY g[],sign32 b) +{ + FP12_YYY invf; + sign32 m=b>>31; + sign32 babs=(b^m)-m; + + babs=(babs-1)/2; + + FP12_YYY_cmove(f,&g[0],teq(babs,0)); // conditional move + FP12_YYY_cmove(f,&g[1],teq(babs,1)); + FP12_YYY_cmove(f,&g[2],teq(babs,2)); + FP12_YYY_cmove(f,&g[3],teq(babs,3)); + FP12_YYY_cmove(f,&g[4],teq(babs,4)); + FP12_YYY_cmove(f,&g[5],teq(babs,5)); + FP12_YYY_cmove(f,&g[6],teq(babs,6)); + FP12_YYY_cmove(f,&g[7],teq(babs,7)); + + FP12_YYY_copy(&invf,f); + FP12_YYY_conj(&invf,&invf); // 1/f + FP12_YYY_cmove(f,&invf,(int)(m&1)); +} + + + +/* test x==0 ? */ +/* SU= 8 */ +int FP12_YYY_iszilch(FP12_YYY *x) +{ + if (FP4_YYY_iszilch(&(x->a)) && FP4_YYY_iszilch(&(x->b)) && FP4_YYY_iszilch(&(x->c))) return 1; + return 0; +} + +/* test x==1 ? */ +/* SU= 8 */ +int FP12_YYY_isunity(FP12_YYY *x) +{ + if (FP4_YYY_isunity(&(x->a)) && FP4_YYY_iszilch(&(x->b)) && FP4_YYY_iszilch(&(x->c))) return 1; + return 0; +} + +/* FP12 copy w=x */ +/* SU= 16 */ +void FP12_YYY_copy(FP12_YYY *w,FP12_YYY *x) +{ + if (x==w) return; + FP4_YYY_copy(&(w->a),&(x->a)); + FP4_YYY_copy(&(w->b),&(x->b)); + FP4_YYY_copy(&(w->c),&(x->c)); +} + +/* FP12 w=1 */ +/* SU= 8 */ +void FP12_YYY_one(FP12_YYY *w) +{ + FP4_YYY_one(&(w->a)); + FP4_YYY_zero(&(w->b)); + FP4_YYY_zero(&(w->c)); +} + +/* return 1 if x==y, else 0 */ +/* SU= 16 */ +int FP12_YYY_equals(FP12_YYY *x,FP12_YYY *y) +{ + if (FP4_YYY_equals(&(x->a),&(y->a)) && FP4_YYY_equals(&(x->b),&(y->b)) && FP4_YYY_equals(&(x->b),&(y->b))) + return 1; + return 0; +} + +/* Set w=conj(x) */ +/* SU= 8 */ +void FP12_YYY_conj(FP12_YYY *w,FP12_YYY *x) +{ + FP12_YYY_copy(w,x); + FP4_YYY_conj(&(w->a),&(w->a)); + FP4_YYY_nconj(&(w->b),&(w->b)); + FP4_YYY_conj(&(w->c),&(w->c)); +} + +/* Create FP12 from FP4 */ +/* SU= 8 */ +void FP12_YYY_from_FP4(FP12_YYY *w,FP4_YYY *a) +{ + FP4_YYY_copy(&(w->a),a); + FP4_YYY_zero(&(w->b)); + FP4_YYY_zero(&(w->c)); +} + +/* Create FP12 from 3 FP4's */ +/* SU= 16 */ +void FP12_YYY_from_FP4s(FP12_YYY *w,FP4_YYY *a,FP4_YYY *b,FP4_YYY *c) +{ + FP4_YYY_copy(&(w->a),a); + FP4_YYY_copy(&(w->b),b); + FP4_YYY_copy(&(w->c),c); +} + +/* Granger-Scott Unitary Squaring. This does not benefit from lazy reduction */ +/* SU= 600 */ +void FP12_YYY_usqr(FP12_YYY *w,FP12_YYY *x) +{ + FP4_YYY A,B,C,D; + + FP4_YYY_copy(&A,&(x->a)); + + FP4_YYY_sqr(&(w->a),&(x->a)); + FP4_YYY_add(&D,&(w->a),&(w->a)); + FP4_YYY_add(&(w->a),&D,&(w->a)); + + FP4_YYY_norm(&(w->a)); + FP4_YYY_nconj(&A,&A); + + FP4_YYY_add(&A,&A,&A); + FP4_YYY_add(&(w->a),&(w->a),&A); + FP4_YYY_sqr(&B,&(x->c)); + FP4_YYY_times_i(&B); + + FP4_YYY_add(&D,&B,&B); + FP4_YYY_add(&B,&B,&D); + FP4_YYY_norm(&B); + + FP4_YYY_sqr(&C,&(x->b)); + + FP4_YYY_add(&D,&C,&C); + FP4_YYY_add(&C,&C,&D); + + FP4_YYY_norm(&C); + FP4_YYY_conj(&(w->b),&(x->b)); + FP4_YYY_add(&(w->b),&(w->b),&(w->b)); + FP4_YYY_nconj(&(w->c),&(x->c)); + + FP4_YYY_add(&(w->c),&(w->c),&(w->c)); + FP4_YYY_add(&(w->b),&B,&(w->b)); + FP4_YYY_add(&(w->c),&C,&(w->c)); + + FP12_YYY_reduce(w); /* reduce here as in pow function repeated squarings would trigger multiple reductions */ +} + +/* FP12 squaring w=x^2 */ +/* SU= 600 */ +void FP12_YYY_sqr(FP12_YYY *w,FP12_YYY *x) +{ + /* Use Chung-Hasan SQR2 method from http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf */ + + FP4_YYY A,B,C,D; + + FP4_YYY_sqr(&A,&(x->a)); + FP4_YYY_mul(&B,&(x->b),&(x->c)); + FP4_YYY_add(&B,&B,&B); + FP4_YYY_norm(&B); + FP4_YYY_sqr(&C,&(x->c)); + + FP4_YYY_mul(&D,&(x->a),&(x->b)); + FP4_YYY_add(&D,&D,&D); + FP4_YYY_add(&(w->c),&(x->a),&(x->c)); + FP4_YYY_add(&(w->c),&(x->b),&(w->c)); + FP4_YYY_norm(&(w->c)); + + FP4_YYY_sqr(&(w->c),&(w->c)); + + FP4_YYY_copy(&(w->a),&A); + FP4_YYY_add(&A,&A,&B); + + FP4_YYY_norm(&A); + + FP4_YYY_add(&A,&A,&C); + FP4_YYY_add(&A,&A,&D); + + FP4_YYY_norm(&A); + FP4_YYY_neg(&A,&A); + FP4_YYY_times_i(&B); + FP4_YYY_times_i(&C); + + FP4_YYY_add(&(w->a),&(w->a),&B); + FP4_YYY_add(&(w->b),&C,&D); + FP4_YYY_add(&(w->c),&(w->c),&A); + + FP12_YYY_norm(w); +} + +/* FP12 full multiplication w=w*y */ +/* SU= 896 */ +void FP12_YYY_mul(FP12_YYY *w,FP12_YYY *y) +{ + FP4_YYY z0,z1,z2,z3,t0,t1; + + FP4_YYY_mul(&z0,&(w->a),&(y->a)); + FP4_YYY_mul(&z2,&(w->b),&(y->b)); + + FP4_YYY_add(&t0,&(w->a),&(w->b)); + FP4_YYY_add(&t1,&(y->a),&(y->b)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&z1,&t0,&t1); + FP4_YYY_add(&t0,&(w->b),&(w->c)); + FP4_YYY_add(&t1,&(y->b),&(y->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&z3,&t0,&t1); + + FP4_YYY_neg(&t0,&z0); + FP4_YYY_neg(&t1,&z2); + + FP4_YYY_add(&z1,&z1,&t0); // z1=z1-z0 + FP4_YYY_add(&(w->b),&z1,&t1); // z1=z1-z2 + FP4_YYY_add(&z3,&z3,&t1); // z3=z3-z2 + FP4_YYY_add(&z2,&z2,&t0); // z2=z2-z0 + + FP4_YYY_add(&t0,&(w->a),&(w->c)); + FP4_YYY_add(&t1,&(y->a),&(y->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&t0,&t1,&t0); + FP4_YYY_add(&z2,&z2,&t0); + + FP4_YYY_mul(&t0,&(w->c),&(y->c)); + FP4_YYY_neg(&t1,&t0); + + FP4_YYY_add(&(w->c),&z2,&t1); + FP4_YYY_add(&z3,&z3,&t1); + FP4_YYY_times_i(&t0); + FP4_YYY_add(&(w->b),&(w->b),&t0); + FP4_YYY_norm(&z3); + FP4_YYY_times_i(&z3); + FP4_YYY_add(&(w->a),&z0,&z3); + + FP12_YYY_norm(w); +} + +/* FP12 multiplication w=w*y */ +/* SU= 744 */ +/* catering for special case that arises from special form of ATE pairing line function */ +void FP12_YYY_smul(FP12_YYY *w,FP12_YYY *y,int type) +{ + FP4_YYY z0,z1,z2,z3,t0,t1; + + if (type==D_TYPE) + { + // y->c is 0 + + FP4_YYY_copy(&z3,&(w->b)); + FP4_YYY_mul(&z0,&(w->a),&(y->a)); + + FP4_YYY_pmul(&z2,&(w->b),&(y->b).a); + FP4_YYY_add(&(w->b),&(w->a),&(w->b)); + FP4_YYY_copy(&t1,&(y->a)); + FP2_YYY_add(&t1.a,&t1.a,&(y->b).a); + + FP4_YYY_norm(&t1); + FP4_YYY_norm(&(w->b)); + + FP4_YYY_mul(&(w->b),&(w->b),&t1); + FP4_YYY_add(&z3,&z3,&(w->c)); + FP4_YYY_norm(&z3); + FP4_YYY_pmul(&z3,&z3,&(y->b).a); + FP4_YYY_neg(&t0,&z0); + FP4_YYY_neg(&t1,&z2); + + FP4_YYY_add(&(w->b),&(w->b),&t0); // z1=z1-z0 + FP4_YYY_add(&(w->b),&(w->b),&t1); // z1=z1-z2 + + FP4_YYY_add(&z3,&z3,&t1); // z3=z3-z2 + FP4_YYY_add(&z2,&z2,&t0); // z2=z2-z0 + + FP4_YYY_add(&t0,&(w->a),&(w->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&z3); + + FP4_YYY_mul(&t0,&(y->a),&t0); + FP4_YYY_add(&(w->c),&z2,&t0); + + FP4_YYY_times_i(&z3); + FP4_YYY_add(&(w->a),&z0,&z3); + } + + if (type==M_TYPE) + { + // y->b is zero + FP4_YYY_mul(&z0,&(w->a),&(y->a)); + FP4_YYY_add(&t0,&(w->a),&(w->b)); + FP4_YYY_norm(&t0); + + FP4_YYY_mul(&z1,&t0,&(y->a)); + FP4_YYY_add(&t0,&(w->b),&(w->c)); + FP4_YYY_norm(&t0); + + FP4_YYY_pmul(&z3,&t0,&(y->c).b); + FP4_YYY_times_i(&z3); + + FP4_YYY_neg(&t0,&z0); + FP4_YYY_add(&z1,&z1,&t0); // z1=z1-z0 + + FP4_YYY_copy(&(w->b),&z1); + + FP4_YYY_copy(&z2,&t0); + + FP4_YYY_add(&t0,&(w->a),&(w->c)); + FP4_YYY_add(&t1,&(y->a),&(y->c)); + + FP4_YYY_norm(&t0); + FP4_YYY_norm(&t1); + + FP4_YYY_mul(&t0,&t1,&t0); + FP4_YYY_add(&z2,&z2,&t0); + + FP4_YYY_pmul(&t0,&(w->c),&(y->c).b); + FP4_YYY_times_i(&t0); + FP4_YYY_neg(&t1,&t0); + FP4_YYY_times_i(&t0); + + FP4_YYY_add(&(w->c),&z2,&t1); + FP4_YYY_add(&z3,&z3,&t1); + + FP4_YYY_add(&(w->b),&(w->b),&t0); + FP4_YYY_norm(&z3); + FP4_YYY_times_i(&z3); + FP4_YYY_add(&(w->a),&z0,&z3); + } + FP12_YYY_norm(w); +} + +/* Set w=1/x */ +/* SU= 600 */ +void FP12_YYY_inv(FP12_YYY *w,FP12_YYY *x) +{ + FP4_YYY f0,f1,f2,f3; + + FP4_YYY_sqr(&f0,&(x->a)); + FP4_YYY_mul(&f1,&(x->b),&(x->c)); + FP4_YYY_times_i(&f1); + FP4_YYY_sub(&f0,&f0,&f1); /* y.a */ + FP4_YYY_norm(&f0); + + FP4_YYY_sqr(&f1,&(x->c)); + FP4_YYY_times_i(&f1); + FP4_YYY_mul(&f2,&(x->a),&(x->b)); + FP4_YYY_sub(&f1,&f1,&f2); /* y.b */ + FP4_YYY_norm(&f1); + + FP4_YYY_sqr(&f2,&(x->b)); + FP4_YYY_mul(&f3,&(x->a),&(x->c)); + FP4_YYY_sub(&f2,&f2,&f3); /* y.c */ + FP4_YYY_norm(&f2); + + FP4_YYY_mul(&f3,&(x->b),&f2); + FP4_YYY_times_i(&f3); + FP4_YYY_mul(&(w->a),&f0,&(x->a)); + FP4_YYY_add(&f3,&(w->a),&f3); + FP4_YYY_mul(&(w->c),&f1,&(x->c)); + FP4_YYY_times_i(&(w->c)); + + FP4_YYY_add(&f3,&(w->c),&f3); + FP4_YYY_norm(&f3); + + FP4_YYY_inv(&f3,&f3); + + FP4_YYY_mul(&(w->a),&f0,&f3); + FP4_YYY_mul(&(w->b),&f1,&f3); + FP4_YYY_mul(&(w->c),&f2,&f3); + +} + +/* constant time powering by small integer of max length bts */ + +void FP12_YYY_pinpow(FP12_YYY *r,int e,int bts) +{ + int i,b; + FP12_YYY R[2]; + + FP12_YYY_one(&R[0]); + FP12_YYY_copy(&R[1],r); + + for (i=bts-1; i>=0; i--) + { + b=(e>>i)&1; + FP12_YYY_mul(&R[1-b],&R[b]); + FP12_YYY_usqr(&R[b],&R[b]); + } + FP12_YYY_copy(r,&R[0]); +} + +/* Compressed powering of unitary elements y=x^(e mod r) */ + +void FP12_YYY_compow(FP4_YYY *c,FP12_YYY *x,BIG_XXX e,BIG_XXX r) +{ + FP12_YYY g1,g2; + FP4_YYY cp,cpm1,cpm2; + FP2_YYY f; + BIG_XXX q,a,b,m; + + BIG_XXX_rcopy(a,Fra_YYY); + BIG_XXX_rcopy(b,Frb_YYY); + FP2_YYY_from_BIGs(&f,a,b); + + BIG_XXX_rcopy(q,Modulus_YYY); + + FP12_YYY_copy(&g1,x); + FP12_YYY_copy(&g2,x); + + BIG_XXX_copy(m,q); + BIG_XXX_mod(m,r); + + BIG_XXX_copy(a,e); + BIG_XXX_mod(a,m); + + BIG_XXX_copy(b,e); + BIG_XXX_sdiv(b,m); + + FP12_YYY_trace(c,&g1); + + if (BIG_XXX_iszilch(b)) + { + FP4_YYY_xtr_pow(c,c,e); + return; + } + + + FP12_YYY_frob(&g2,&f); + FP12_YYY_trace(&cp,&g2); + + FP12_YYY_conj(&g1,&g1); + FP12_YYY_mul(&g2,&g1); + FP12_YYY_trace(&cpm1,&g2); + FP12_YYY_mul(&g2,&g1); + FP12_YYY_trace(&cpm2,&g2); + + FP4_YYY_xtr_pow2(c,&cp,c,&cpm1,&cpm2,a,b); + +} + + +/* SU= 528 */ +/* set r=a^b */ +/* Note this is simple square and multiply, so not side-channel safe */ + +void FP12_YYY_pow(FP12_YYY *r,FP12_YYY *a,BIG_XXX b) +{ + FP12_YYY w; + BIG_XXX b3; + int i,nb,bt; + BIG_XXX_norm(b); + BIG_XXX_pmul(b3,b,3); + BIG_XXX_norm(b3); + + FP12_YYY_copy(&w,a); + + nb=BIG_XXX_nbits(b3); + for (i=nb-2; i>=1; i--) + { + FP12_YYY_usqr(&w,&w); + bt=BIG_XXX_bit(b3,i)-BIG_XXX_bit(b,i); + if (bt==1) + FP12_YYY_mul(&w,a); + if (bt==-1) + { + FP12_YYY_conj(a,a); + FP12_YYY_mul(&w,a); + FP12_YYY_conj(a,a); + } + } + + FP12_YYY_copy(r,&w); + FP12_YYY_reduce(r); +} + +/* p=q0^u0.q1^u1.q2^u2.q3^u3 */ +/* Side channel attack secure */ +// Bos & Costello https://eprint.iacr.org/2013/458.pdf +// Faz-Hernandez & Longa & Sanchez https://eprint.iacr.org/2013/158.pdf + +void FP12_YYY_pow4(FP12_YYY *p,FP12_YYY *q,BIG_XXX u[4]) +{ + int i,j,k,nb,pb,bt; + FP12_YYY g[8],r; + BIG_XXX t[4],mt; + sign8 w[NLEN_XXX*BASEBITS_XXX+1]; + sign8 s[NLEN_XXX*BASEBITS_XXX+1]; + + for (i=0; i<4; i++) + BIG_XXX_copy(t[i],u[i]); + + + // Precomputed table + FP12_YYY_copy(&g[0],&q[0]); // q[0] + FP12_YYY_copy(&g[1],&g[0]); + FP12_YYY_mul(&g[1],&q[1]); // q[0].q[1] + FP12_YYY_copy(&g[2],&g[0]); + FP12_YYY_mul(&g[2],&q[2]); // q[0].q[2] + FP12_YYY_copy(&g[3],&g[1]); + FP12_YYY_mul(&g[3],&q[2]); // q[0].q[1].q[2] + FP12_YYY_copy(&g[4],&g[0]); + FP12_YYY_mul(&g[4],&q[3]); // q[0].q[3] + FP12_YYY_copy(&g[5],&g[1]); + FP12_YYY_mul(&g[5],&q[3]); // q[0].q[1].q[3] + FP12_YYY_copy(&g[6],&g[2]); + FP12_YYY_mul(&g[6],&q[3]); // q[0].q[2].q[3] + FP12_YYY_copy(&g[7],&g[3]); + FP12_YYY_mul(&g[7],&q[3]); // q[0].q[1].q[2].q[3] + + // Make it odd + pb=1-BIG_XXX_parity(t[0]); + BIG_XXX_inc(t[0],pb); + BIG_XXX_norm(t[0]); + + // Number of bits + BIG_XXX_zero(mt); + for (i=0; i<4; i++) + { + BIG_XXX_or(mt,mt,t[i]); + } + nb=1+BIG_XXX_nbits(mt); + + // Sign pivot + s[nb-1]=1; + for (i=0; i<nb-1; i++) + { + BIG_XXX_fshr(t[0],1); + s[i]=2*BIG_XXX_parity(t[0])-1; + } + + // Recoded exponent + for (i=0; i<nb; i++) + { + w[i]=0; + k=1; + for (j=1; j<4; j++) + { + bt=s[i]*BIG_XXX_parity(t[j]); + BIG_XXX_fshr(t[j],1); + + BIG_XXX_dec(t[j],(bt>>1)); + BIG_XXX_norm(t[j]); + w[i]+=bt*k; + k*=2; + } + } + + // Main loop + FP12_YYY_select(p,g,2*w[nb-1]+1); + for (i=nb-2; i>=0; i--) + { + FP12_YYY_select(&r,g,2*w[i]+s[i]); + FP12_YYY_usqr(p,p); + FP12_YYY_mul(p,&r); + } + // apply correction + FP12_YYY_conj(&r,&q[0]); + FP12_YYY_mul(&r,p); + FP12_YYY_cmove(p,&r,pb); + + FP12_YYY_reduce(p); +} + +/* Set w=w^p using Frobenius */ +/* SU= 160 */ +void FP12_YYY_frob(FP12_YYY *w,FP2_YYY *f) +{ + FP2_YYY f2,f3; + FP2_YYY_sqr(&f2,f); /* f2=f^2 */ + FP2_YYY_mul(&f3,&f2,f); /* f3=f^3 */ + + FP4_YYY_frob(&(w->a),&f3); + FP4_YYY_frob(&(w->b),&f3); + FP4_YYY_frob(&(w->c),&f3); + + FP4_YYY_pmul(&(w->b),&(w->b),f); + FP4_YYY_pmul(&(w->c),&(w->c),&f2); +} + +/* SU= 8 */ +/* normalise all components of w */ +void FP12_YYY_norm(FP12_YYY *w) +{ + FP4_YYY_norm(&(w->a)); + FP4_YYY_norm(&(w->b)); + FP4_YYY_norm(&(w->c)); +} + +/* SU= 8 */ +/* reduce all components of w */ +void FP12_YYY_reduce(FP12_YYY *w) +{ + FP4_YYY_reduce(&(w->a)); + FP4_YYY_reduce(&(w->b)); + FP4_YYY_reduce(&(w->c)); +} + +/* trace function w=trace(x) */ +/* SU= 8 */ +void FP12_YYY_trace(FP4_YYY *w,FP12_YYY *x) +{ + FP4_YYY_imul(w,&(x->a),3); + FP4_YYY_reduce(w); +} + +/* SU= 8 */ +/* Output w in hex */ +void FP12_YYY_output(FP12_YYY *w) +{ + printf("["); + FP4_YYY_output(&(w->a)); + printf(","); + FP4_YYY_output(&(w->b)); + printf(","); + FP4_YYY_output(&(w->c)); + printf("]"); +} + +/* SU= 64 */ +/* Convert g to octet string w */ +void FP12_YYY_toOctet(octet *W,FP12_YYY *g) +{ + BIG_XXX a; + W->len=12*MODBYTES_XXX; + + FP_YYY_redc(a,&(g->a.a.a)); + BIG_XXX_toBytes(&(W->val[0]),a); + FP_YYY_redc(a,&(g->a.a.b)); + BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->a.b.a)); + BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->a.b.b)); + BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.a.a)); + BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.a.b)); + BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.b.a)); + BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->b.b.b)); + BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.a.a)); + BIG_XXX_toBytes(&(W->val[8*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.a.b)); + BIG_XXX_toBytes(&(W->val[9*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.b.a)); + BIG_XXX_toBytes(&(W->val[10*MODBYTES_XXX]),a); + FP_YYY_redc(a,&(g->c.b.b)); + BIG_XXX_toBytes(&(W->val[11*MODBYTES_XXX]),a); +} + +/* SU= 24 */ +/* Restore g from octet string w */ +void FP12_YYY_fromOctet(FP12_YYY *g,octet *W) +{ + BIG_XXX b; + BIG_XXX_fromBytes(b,&W->val[0]); + FP_YYY_nres(&(g->a.a.a),b); + BIG_XXX_fromBytes(b,&W->val[MODBYTES_XXX]); + FP_YYY_nres(&(g->a.a.b),b); + BIG_XXX_fromBytes(b,&W->val[2*MODBYTES_XXX]); + FP_YYY_nres(&(g->a.b.a),b); + BIG_XXX_fromBytes(b,&W->val[3*MODBYTES_XXX]); + FP_YYY_nres(&(g->a.b.b),b); + BIG_XXX_fromBytes(b,&W->val[4*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.a.a),b); + BIG_XXX_fromBytes(b,&W->val[5*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.a.b),b); + BIG_XXX_fromBytes(b,&W->val[6*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.b.a),b); + BIG_XXX_fromBytes(b,&W->val[7*MODBYTES_XXX]); + FP_YYY_nres(&(g->b.b.b),b); + BIG_XXX_fromBytes(b,&W->val[8*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.a.a),b); + BIG_XXX_fromBytes(b,&W->val[9*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.a.b),b); + BIG_XXX_fromBytes(b,&W->val[10*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.b.a),b); + BIG_XXX_fromBytes(b,&W->val[11*MODBYTES_XXX]); + FP_YYY_nres(&(g->c.b.b),b); +} + +/* Move b to a if d=1 */ +void FP12_YYY_cmove(FP12_YYY *f,FP12_YYY *g,int d) +{ + FP4_YYY_cmove(&(f->a),&(g->a),d); + FP4_YYY_cmove(&(f->b),&(g->b),d); + FP4_YYY_cmove(&(f->c),&(g->c),d); +} + +/* +int main(){ + FP2_YYY f,w0,w1; + FP4_YYY t0,t1,t2; + FP12_YYY w,t,lv; + BIG_XXX a,b; + BIG_XXX p; + + //Test w^(P^4) = w mod p^2 +// BIG_XXX_randomnum(a); +// BIG_XXX_randomnum(b); +// BIG_XXX_mod(a,Modulus); BIG_XXX_mod(b,Modulus); + BIG_XXX_zero(a); BIG_XXX_zero(b); BIG_XXX_inc(a,1); BIG_XXX_inc(b,2); FP_YYY_nres(a); FP_YYY_nres(b); + FP2_YYY_from_zps(&w0,a,b); + +// BIG_XXX_randomnum(a); BIG_XXX_randomnum(b); +// BIG_XXX_mod(a,Modulus); BIG_XXX_mod(b,Modulus); + BIG_XXX_zero(a); BIG_XXX_zero(b); BIG_XXX_inc(a,3); BIG_XXX_inc(b,4); FP_YYY_nres(a); FP_YYY_nres(b); + FP2_YYY_from_zps(&w1,a,b); + + FP4_YYY_from_FP2s(&t0,&w0,&w1); + FP4_YYY_reduce(&t0); + +// BIG_XXX_randomnum(a); +// BIG_XXX_randomnum(b); +// BIG_XXX_mod(a,Modulus); BIG_XXX_mod(b,Modulus); + BIG_XXX_zero(a); BIG_XXX_zero(b); BIG_XXX_inc(a,5); BIG_XXX_inc(b,6); FP_YYY_nres(a); FP_YYY_nres(b); + FP2_YYY_from_zps(&w0,a,b); + +// BIG_XXX_randomnum(a); BIG_XXX_randomnum(b); +// BIG_XXX_mod(a,Modulus); BIG_XXX_mod(b,Modulus); + + BIG_XXX_zero(a); BIG_XXX_zero(b); BIG_XXX_inc(a,7); BIG_XXX_inc(b,8); FP_YYY_nres(a); FP_YYY_nres(b); + FP2_YYY_from_zps(&w1,a,b); + + FP4_YYY_from_FP2s(&t1,&w0,&w1); + FP4_YYY_reduce(&t1); + +// BIG_XXX_randomnum(a); +// BIG_XXX_randomnum(b); +// BIG_XXX_mod(a,Modulus); BIG_XXX_mod(b,Modulus); + BIG_XXX_zero(a); BIG_XXX_zero(b); BIG_XXX_inc(a,9); BIG_XXX_inc(b,10); FP_YYY_nres(a); FP_YYY_nres(b); + FP2_YYY_from_zps(&w0,a,b); + +// BIG_XXX_randomnum(a); BIG_XXX_randomnum(b); +// BIG_XXX_mod(a,Modulus); BIG_XXX_mod(b,Modulus); + BIG_XXX_zero(a); BIG_XXX_zero(b); BIG_XXX_inc(a,11); BIG_XXX_inc(b,12); FP_YYY_nres(a); FP_YYY_nres(b); + FP2_YYY_from_zps(&w1,a,b); + + FP4_YYY_from_FP2s(&t2,&w0,&w1); + FP4_YYY_reduce(&t2); + + FP12_YYY_from_FP4s(&w,&t0,&t1,&t2); + + FP12_YYY_copy(&t,&w); + + printf("w= "); + FP12_YYY_output(&w); + printf("\n"); + + BIG_XXX_rcopy(p,Modulus); + //BIG_XXX_zero(p); BIG_XXX_inc(p,7); + + FP12_YYY_pow(&w,&w,p); + + printf("w^p= "); + FP12_YYY_output(&w); + printf("\n"); + + FP2_YYY_gfc(&f,12); + FP12_YYY_frob(&t,&f); + printf("w^p= "); + FP12_YYY_output(&t); + printf("\n"); + +//exit(0); + + FP12_YYY_pow(&w,&w,p); + //printf("w^p^2= "); + //FP12_YYY_output(&w); + //printf("\n"); + FP12_YYY_pow(&w,&w,p); + //printf("w^p^3= "); + //FP12_YYY_output(&w); + //printf("\n"); + FP12_YYY_pow(&w,&w,p); + FP12_YYY_pow(&w,&w,p); + FP12_YYY_pow(&w,&w,p); + printf("w^p^6= "); + FP12_YYY_output(&w); + printf("\n"); + FP12_YYY_pow(&w,&w,p); + FP12_YYY_pow(&w,&w,p); + printf("w^p^8= "); + FP12_YYY_output(&w); + printf("\n"); + FP12_YYY_pow(&w,&w,p); + FP12_YYY_pow(&w,&w,p); + FP12_YYY_pow(&w,&w,p); + printf("w^p^11= "); + FP12_YYY_output(&w); + printf("\n"); + + // BIG_XXX_zero(p); BIG_XXX_inc(p,7); BIG_XXX_norm(p); + FP12_YYY_pow(&w,&w,p); + + printf("w^p12= "); + FP12_YYY_output(&w); + printf("\n"); +//exit(0); + + FP12_YYY_inv(&t,&w); + printf("1/w mod p^4 = "); + FP12_YYY_output(&t); + printf("\n"); + + FP12_YYY_inv(&w,&t); + printf("1/(1/w) mod p^4 = "); + FP12_YYY_output(&w); + printf("\n"); + + + + FP12_YYY_inv(&lv,&w); +//printf("w= "); FP12_YYY_output(&w); printf("\n"); + FP12_YYY_conj(&w,&w); +//printf("w= "); FP12_YYY_output(&w); printf("\n"); +//exit(0); + FP12_YYY_mul(&w,&w,&lv); +//printf("w= "); FP12_YYY_output(&w); printf("\n"); + FP12_YYY_copy(&lv,&w); + FP12_YYY_frob(&w,&f); + FP12_YYY_frob(&w,&f); + FP12_YYY_mul(&w,&w,&lv); + +//printf("w= "); FP12_YYY_output(&w); printf("\n"); +//exit(0); + +w.unitary=0; +FP12_YYY_conj(&lv,&w); + printf("rx= "); FP12_YYY_output(&lv); printf("\n"); +FP12_YYY_inv(&lv,&w); + printf("ry= "); FP12_YYY_output(&lv); printf("\n"); + + + return 0; +} + +*/ http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto-c/blob/8d28d2c3/src/fp16.c.in ---------------------------------------------------------------------- diff --git a/src/fp16.c.in b/src/fp16.c.in new file mode 100644 index 0000000..a548526 --- /dev/null +++ b/src/fp16.c.in @@ -0,0 +1,693 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +*/ + +/* AMCL Fp^8 functions */ + +/* FP16 elements are of the form a+ib, where i is sqrt(sqrt(-1+sqrt(-1))) */ + +#include "fp16_YYY.h" + + +/* test x==0 ? */ +int FP16_YYY_iszilch(FP16_YYY *x) +{ + if (FP8_YYY_iszilch(&(x->a)) && FP8_YYY_iszilch(&(x->b))) return 1; + return 0; +} + +/* test x==1 ? */ +int FP16_YYY_isunity(FP16_YYY *x) +{ + if (FP8_YYY_isunity(&(x->a)) && FP8_YYY_iszilch(&(x->b))) return 1; + return 0; +} + +/* test is w real? That is in a+ib test b is zero */ +int FP16_YYY_isreal(FP16_YYY *w) +{ + return FP8_YYY_iszilch(&(w->b)); +} + +/* return 1 if x==y, else 0 */ +int FP16_YYY_equals(FP16_YYY *x,FP16_YYY *y) +{ + if (FP8_YYY_equals(&(x->a),&(y->a)) && FP8_YYY_equals(&(x->b),&(y->b))) + return 1; + return 0; +} + +/* set FP16 from two FP8s */ +void FP16_YYY_from_FP8s(FP16_YYY *w,FP8_YYY * x,FP8_YYY* y) +{ + FP8_YYY_copy(&(w->a), x); + FP8_YYY_copy(&(w->b), y); +} + +/* set FP16 from FP8 */ +void FP16_YYY_from_FP8(FP16_YYY *w,FP8_YYY *x) +{ + FP8_YYY_copy(&(w->a), x); + FP8_YYY_zero(&(w->b)); +} + +/* set high part of FP16 from FP8 */ +void FP16_YYY_from_FP8H(FP16_YYY *w,FP8_YYY *x) +{ + FP8_YYY_copy(&(w->b), x); + FP8_YYY_zero(&(w->a)); +} + +/* FP16 copy w=x */ +void FP16_YYY_copy(FP16_YYY *w,FP16_YYY *x) +{ + if (w==x) return; + FP8_YYY_copy(&(w->a), &(x->a)); + FP8_YYY_copy(&(w->b), &(x->b)); +} + +/* FP16 w=0 */ +void FP16_YYY_zero(FP16_YYY *w) +{ + FP8_YYY_zero(&(w->a)); + FP8_YYY_zero(&(w->b)); +} + +/* FP16 w=1 */ +void FP16_YYY_one(FP16_YYY *w) +{ + FP8_YYY_one(&(w->a)); + FP8_YYY_zero(&(w->b)); +} + +/* Set w=-x */ +void FP16_YYY_neg(FP16_YYY *w,FP16_YYY *x) +{ + /* Just one field neg */ + FP8_YYY m,t; + FP16_YYY_norm(x); + FP8_YYY_add(&m,&(x->a),&(x->b)); + FP8_YYY_norm(&m); + FP8_YYY_neg(&m,&m); + FP8_YYY_add(&t,&m,&(x->b)); + FP8_YYY_add(&(w->b),&m,&(x->a)); + FP8_YYY_copy(&(w->a),&t); + FP16_YYY_norm(w); +} + +/* Set w=conj(x) */ +void FP16_YYY_conj(FP16_YYY *w,FP16_YYY *x) +{ + FP8_YYY_copy(&(w->a), &(x->a)); + FP8_YYY_neg(&(w->b), &(x->b)); + FP16_YYY_norm(w); +} + +/* Set w=-conj(x) */ +void FP16_YYY_nconj(FP16_YYY *w,FP16_YYY *x) +{ + FP8_YYY_copy(&(w->b),&(x->b)); + FP8_YYY_neg(&(w->a), &(x->a)); + FP16_YYY_norm(w); +} + +/* Set w=x+y */ +void FP16_YYY_add(FP16_YYY *w,FP16_YYY *x,FP16_YYY *y) +{ + FP8_YYY_add(&(w->a), &(x->a), &(y->a)); + FP8_YYY_add(&(w->b), &(x->b), &(y->b)); +} + +/* Set w=x-y */ +/* Input y MUST be normed */ +void FP16_YYY_sub(FP16_YYY *w,FP16_YYY *x,FP16_YYY *y) +{ + FP16_YYY my; + + FP16_YYY_neg(&my, y); + FP16_YYY_add(w, x, &my); + +} + +/* reduce all components of w mod Modulus */ +void FP16_YYY_reduce(FP16_YYY *w) +{ + FP8_YYY_reduce(&(w->a)); + FP8_YYY_reduce(&(w->b)); +} + +/* normalise all elements of w */ +void FP16_YYY_norm(FP16_YYY *w) +{ + FP8_YYY_norm(&(w->a)); + FP8_YYY_norm(&(w->b)); +} + +/* Set w=s*x, where s is FP8 */ +void FP16_YYY_pmul(FP16_YYY *w,FP16_YYY *x,FP8_YYY *s) +{ + FP8_YYY_mul(&(w->a),&(x->a),s); + FP8_YYY_mul(&(w->b),&(x->b),s); +} + +/* Set w=s*x, where s is FP2 */ +void FP16_YYY_qmul(FP16_YYY *w,FP16_YYY *x,FP2_YYY *s) +{ + FP8_YYY_qmul(&(w->a),&(x->a),s); + FP8_YYY_qmul(&(w->b),&(x->b),s); +} + +/* Set w=s*x, where s is int */ +void FP16_YYY_imul(FP16_YYY *w,FP16_YYY *x,int s) +{ + FP8_YYY_imul(&(w->a),&(x->a),s); + FP8_YYY_imul(&(w->b),&(x->b),s); +} + +/* Set w=x^2 */ +/* Input MUST be normed */ +void FP16_YYY_sqr(FP16_YYY *w,FP16_YYY *x) +{ + FP8_YYY t1,t2,t3; + + FP8_YYY_mul(&t3,&(x->a),&(x->b)); /* norms x */ + FP8_YYY_copy(&t2,&(x->b)); + FP8_YYY_add(&t1,&(x->a),&(x->b)); + FP8_YYY_times_i(&t2); + + FP8_YYY_add(&t2,&(x->a),&t2); + + FP8_YYY_norm(&t1); // 2 + FP8_YYY_norm(&t2); // 2 + + FP8_YYY_mul(&(w->a),&t1,&t2); + + FP8_YYY_copy(&t2,&t3); + FP8_YYY_times_i(&t2); + + FP8_YYY_add(&t2,&t2,&t3); + + FP8_YYY_norm(&t2); // 2 + FP8_YYY_neg(&t2,&t2); + FP8_YYY_add(&(w->a),&(w->a),&t2); /* a=(a+b)(a+i^2.b)-i^2.ab-ab = a*a+ib*ib */ + FP8_YYY_add(&(w->b),&t3,&t3); /* b=2ab */ + + FP16_YYY_norm(w); +} + +/* Set w=x*y */ +/* Inputs MUST be normed */ +void FP16_YYY_mul(FP16_YYY *w,FP16_YYY *x,FP16_YYY *y) +{ + + FP8_YYY t1,t2,t3,t4; + FP8_YYY_mul(&t1,&(x->a),&(y->a)); + FP8_YYY_mul(&t2,&(x->b),&(y->b)); + + FP8_YYY_add(&t3,&(y->b),&(y->a)); + FP8_YYY_add(&t4,&(x->b),&(x->a)); + + FP8_YYY_norm(&t4); // 2 + FP8_YYY_norm(&t3); // 2 + + FP8_YYY_mul(&t4,&t4,&t3); /* (xa+xb)(ya+yb) */ + + FP8_YYY_neg(&t3,&t1); // 1 + FP8_YYY_add(&t4,&t4,&t3); //t4E=3 + FP8_YYY_norm(&t4); + + FP8_YYY_neg(&t3,&t2); // 1 + FP8_YYY_add(&(w->b),&t4,&t3); //wbE=3 + + FP8_YYY_times_i(&t2); + FP8_YYY_add(&(w->a),&t2,&t1); + + FP16_YYY_norm(w); +} + +/* output FP16 in format [a,b] */ +void FP16_YYY_output(FP16_YYY *w) +{ + printf("["); + FP8_YYY_output(&(w->a)); + printf(","); + FP8_YYY_output(&(w->b)); + printf("]"); +} + +void FP16_YYY_rawoutput(FP16_YYY *w) +{ + printf("["); + FP8_YYY_rawoutput(&(w->a)); + printf(","); + FP8_YYY_rawoutput(&(w->b)); + printf("]"); +} + +/* Set w=1/x */ +void FP16_YYY_inv(FP16_YYY *w,FP16_YYY *x) +{ + FP8_YYY t1,t2; + FP8_YYY_sqr(&t1,&(x->a)); + FP8_YYY_sqr(&t2,&(x->b)); + FP8_YYY_times_i(&t2); + FP8_YYY_norm(&t2); + + FP8_YYY_sub(&t1,&t1,&t2); + FP8_YYY_norm(&t1); + + FP8_YYY_inv(&t1,&t1); + + FP8_YYY_mul(&(w->a),&t1,&(x->a)); + FP8_YYY_neg(&t1,&t1); + FP8_YYY_norm(&t1); + FP8_YYY_mul(&(w->b),&t1,&(x->b)); +} + +/* w*=i where i = sqrt(sqrt(-1+sqrt(-1))) */ +void FP16_YYY_times_i(FP16_YYY *w) +{ + FP8_YYY s,t; + FP8_YYY_copy(&s,&(w->b)); + FP8_YYY_copy(&t,&(w->a)); + FP8_YYY_times_i(&s); + FP8_YYY_copy(&(w->a),&s); + FP8_YYY_copy(&(w->b),&t); + FP16_YYY_norm(w); +} + +void FP16_YYY_times_i2(FP16_YYY *w) +{ + FP8_YYY_times_i(&(w->a)); + FP8_YYY_times_i(&(w->b)); +} + +void FP16_YYY_times_i4(FP16_YYY *w) +{ + FP8_YYY_times_i2(&(w->a)); + FP8_YYY_times_i2(&(w->b)); +} + +/* Set w=w^p using Frobenius */ +void FP16_YYY_frob(FP16_YYY *w,FP2_YYY *f) +{ + // f=(i+1)^(p-3)/8 + FP2_YYY ff; + + FP2_YYY_sqr(&ff,f); // (i+1)^(p-3)/4 + FP2_YYY_norm(&ff); + + FP8_YYY_frob(&(w->a),&ff); + FP8_YYY_frob(&(w->b),&ff); + + FP8_YYY_qmul(&(w->b),&(w->b),f); // times (1+i)^(p-3)/8 + FP8_YYY_times_i(&(w->b)); // (i+1)^(p-1)/8 +} + +/* Set r=a^b mod m */ +void FP16_YYY_pow(FP16_YYY *r,FP16_YYY * a,BIG_XXX b) +{ + FP16_YYY w; + BIG_XXX z,zilch; + int bt; + + BIG_XXX_zero(zilch); + BIG_XXX_norm(b); + BIG_XXX_copy(z,b); + FP16_YYY_copy(&w,a); + FP16_YYY_one(r); + + while(1) + { + bt=BIG_XXX_parity(z); + BIG_XXX_shr(z,1); + if (bt) FP16_YYY_mul(r,r,&w); + if (BIG_XXX_comp(z,zilch)==0) break; + FP16_YYY_sqr(&w,&w); + } + FP16_YYY_reduce(r); +} + +/* Move b to a if d=1 */ +void FP16_YYY_cmove(FP16_YYY *f,FP16_YYY *g,int d) +{ + FP8_YYY_cmove(&(f->a),&(g->a),d); + FP8_YYY_cmove(&(f->b),&(g->b),d); +} + +#if CURVE_SECURITY_ZZZ == 256 + +/* XTR xtr_a function */ +void FP16_YYY_xtr_A(FP16_YYY *r,FP16_YYY *w,FP16_YYY *x,FP16_YYY *y,FP16_YYY *z) +{ + FP16_YYY t1,t2; + + FP16_YYY_copy(r,x); + FP16_YYY_sub(&t1,w,y); + FP16_YYY_norm(&t1); + FP16_YYY_pmul(&t1,&t1,&(r->a)); + FP16_YYY_add(&t2,w,y); + FP16_YYY_norm(&t2); + FP16_YYY_pmul(&t2,&t2,&(r->b)); + FP16_YYY_times_i(&t2); + + FP16_YYY_add(r,&t1,&t2); + FP16_YYY_add(r,r,z); + + FP16_YYY_reduce(r); +} + +/* XTR xtr_d function */ +void FP16_YYY_xtr_D(FP16_YYY *r,FP16_YYY *x) +{ + FP16_YYY w; + FP16_YYY_copy(r,x); + FP16_YYY_conj(&w,r); + FP16_YYY_add(&w,&w,&w); + FP16_YYY_sqr(r,r); + FP16_YYY_norm(&w); + FP16_YYY_sub(r,r,&w); + FP16_YYY_reduce(r); /* reduce here as multiple calls trigger automatic reductions */ +} + +/* r=x^n using XTR method on traces of FP12s */ +void FP16_YYY_xtr_pow(FP16_YYY *r,FP16_YYY *x,BIG_XXX n) +{ + int i,par,nb; + BIG_XXX v; + FP2_YYY w2; + FP4_YYY w4; + FP8_YYY w8; + FP16_YYY t,a,b,c; + + BIG_XXX_zero(v); + BIG_XXX_inc(v,3); + BIG_XXX_norm(v); + FP2_YYY_from_BIG(&w2,v); + FP4_YYY_from_FP2(&w4,&w2); + FP8_YYY_from_FP4(&w8,&w4); + FP16_YYY_from_FP8(&a,&w8); + + FP16_YYY_copy(&b,x); + FP16_YYY_xtr_D(&c,x); + + BIG_XXX_norm(n); + par=BIG_XXX_parity(n); + BIG_XXX_copy(v,n); + BIG_XXX_shr(v,1); + if (par==0) + { + BIG_XXX_dec(v,1); + BIG_XXX_norm(v); + } + + nb=BIG_XXX_nbits(v); + for (i=nb-1; i>=0; i--) + { + if (!BIG_XXX_bit(v,i)) + { + FP16_YYY_copy(&t,&b); + FP16_YYY_conj(x,x); + FP16_YYY_conj(&c,&c); + FP16_YYY_xtr_A(&b,&a,&b,x,&c); + FP16_YYY_conj(x,x); + FP16_YYY_xtr_D(&c,&t); + FP16_YYY_xtr_D(&a,&a); + } + else + { + FP16_YYY_conj(&t,&a); + FP16_YYY_xtr_D(&a,&b); + FP16_YYY_xtr_A(&b,&c,&b,x,&t); + FP16_YYY_xtr_D(&c,&c); + } + } + + if (par==0) FP16_YYY_copy(r,&c); + else FP16_YYY_copy(r,&b); + FP16_YYY_reduce(r); +} + +/* r=ck^a.cl^n using XTR double exponentiation method on traces of FP12s. See Stam thesis. */ +void FP16_YYY_xtr_pow2(FP16_YYY *r,FP16_YYY *ck,FP16_YYY *cl,FP16_YYY *ckml,FP16_YYY *ckm2l,BIG_XXX a,BIG_XXX b) +{ + int i,f2; + BIG_XXX d,e,w; + FP16_YYY t,cu,cv,cumv,cum2v; + + BIG_XXX_norm(a); + BIG_XXX_norm(b); + BIG_XXX_copy(e,a); + BIG_XXX_copy(d,b); + FP16_YYY_copy(&cu,ck); + FP16_YYY_copy(&cv,cl); + FP16_YYY_copy(&cumv,ckml); + FP16_YYY_copy(&cum2v,ckm2l); + + f2=0; + while (BIG_XXX_parity(d)==0 && BIG_XXX_parity(e)==0) + { + BIG_XXX_shr(d,1); + BIG_XXX_shr(e,1); + f2++; + } + while (BIG_XXX_comp(d,e)!=0) + { + if (BIG_XXX_comp(d,e)>0) + { + BIG_XXX_imul(w,e,4); + BIG_XXX_norm(w); + if (BIG_XXX_comp(d,w)<=0) + { + BIG_XXX_copy(w,d); + BIG_XXX_copy(d,e); + BIG_XXX_sub(e,w,e); + BIG_XXX_norm(e); + FP16_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v); + FP16_YYY_conj(&cum2v,&cumv); + FP16_YYY_copy(&cumv,&cv); + FP16_YYY_copy(&cv,&cu); + FP16_YYY_copy(&cu,&t); + } + else if (BIG_XXX_parity(d)==0) + { + BIG_XXX_shr(d,1); + FP16_YYY_conj(r,&cum2v); + FP16_YYY_xtr_A(&t,&cu,&cumv,&cv,r); + FP16_YYY_xtr_D(&cum2v,&cumv); + FP16_YYY_copy(&cumv,&t); + FP16_YYY_xtr_D(&cu,&cu); + } + else if (BIG_XXX_parity(e)==1) + { + BIG_XXX_sub(d,d,e); + BIG_XXX_norm(d); + BIG_XXX_shr(d,1); + FP16_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v); + FP16_YYY_xtr_D(&cu,&cu); + FP16_YYY_xtr_D(&cum2v,&cv); + FP16_YYY_conj(&cum2v,&cum2v); + FP16_YYY_copy(&cv,&t); + } + else + { + BIG_XXX_copy(w,d); + BIG_XXX_copy(d,e); + BIG_XXX_shr(d,1); + BIG_XXX_copy(e,w); + FP16_YYY_xtr_D(&t,&cumv); + FP16_YYY_conj(&cumv,&cum2v); + FP16_YYY_conj(&cum2v,&t); + FP16_YYY_xtr_D(&t,&cv); + FP16_YYY_copy(&cv,&cu); + FP16_YYY_copy(&cu,&t); + } + } + if (BIG_XXX_comp(d,e)<0) + { + BIG_XXX_imul(w,d,4); + BIG_XXX_norm(w); + if (BIG_XXX_comp(e,w)<=0) + { + BIG_XXX_sub(e,e,d); + BIG_XXX_norm(e); + FP16_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v); + FP16_YYY_copy(&cum2v,&cumv); + FP16_YYY_copy(&cumv,&cu); + FP16_YYY_copy(&cu,&t); + } + else if (BIG_XXX_parity(e)==0) + { + BIG_XXX_copy(w,d); + BIG_XXX_copy(d,e); + BIG_XXX_shr(d,1); + BIG_XXX_copy(e,w); + FP16_YYY_xtr_D(&t,&cumv); + FP16_YYY_conj(&cumv,&cum2v); + FP16_YYY_conj(&cum2v,&t); + FP16_YYY_xtr_D(&t,&cv); + FP16_YYY_copy(&cv,&cu); + FP16_YYY_copy(&cu,&t); + } + else if (BIG_XXX_parity(d)==1) + { + BIG_XXX_copy(w,e); + BIG_XXX_copy(e,d); + BIG_XXX_sub(w,w,d); + BIG_XXX_norm(w); + BIG_XXX_copy(d,w); + BIG_XXX_shr(d,1); + FP16_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v); + FP16_YYY_conj(&cumv,&cumv); + FP16_YYY_xtr_D(&cum2v,&cu); + FP16_YYY_conj(&cum2v,&cum2v); + FP16_YYY_xtr_D(&cu,&cv); + FP16_YYY_copy(&cv,&t); + } + else + { + BIG_XXX_shr(d,1); + FP16_YYY_conj(r,&cum2v); + FP16_YYY_xtr_A(&t,&cu,&cumv,&cv,r); + FP16_YYY_xtr_D(&cum2v,&cumv); + FP16_YYY_copy(&cumv,&t); + FP16_YYY_xtr_D(&cu,&cu); + } + } + } + FP16_YYY_xtr_A(r,&cu,&cv,&cumv,&cum2v); + for (i=0; i<f2; i++) FP16_YYY_xtr_D(r,r); + FP16_YYY_xtr_pow(r,r,d); +} + +#endif + +#ifdef HAS_MAIN +int main() +{ + FP2 w0,w1,f; + FP8 w,t; + FP8 c1,c2,c3,c4,cr; + BIG a,b; + BIG e,e1,e2; + BIG p,md; + + BIG_rcopy(md,Modulus); + //Test w^(P^4) = w mod p^2 + BIG_zero(a); + BIG_inc(a,27); + BIG_zero(b); + BIG_inc(b,45); + FP2_from_BIGs(&w0,a,b); + + BIG_zero(a); + BIG_inc(a,33); + BIG_zero(b); + BIG_inc(b,54); + FP2_from_BIGs(&w1,a,b); + + FP8_from_FP2s(&w,&w0,&w1); + FP8_reduce(&w); + + printf("w= "); + FP8_output(&w); + printf("\n"); + + FP8_copy(&t,&w); + + BIG_copy(p,md); + FP8_pow(&w,&w,p); + + printf("w^p= "); + FP8_output(&w); + printf("\n"); + + BIG_rcopy(a,CURVE_Fra); + BIG_rcopy(b,CURVE_Frb); + FP2_from_BIGs(&f,a,b); + + FP8_frob(&t,&f); + printf("w^p= "); + FP8_output(&t); + printf("\n"); + + FP8_pow(&w,&w,p); + FP8_pow(&w,&w,p); + FP8_pow(&w,&w,p); + printf("w^p4= "); + FP8_output(&w); + printf("\n"); + + // Test 1/(1/x) = x mod p^4 + FP8_from_FP2s(&w,&w0,&w1); + printf("Test Inversion \nw= "); + FP8_output(&w); + printf("\n"); + + FP8_inv(&w,&w); + printf("1/w mod p^4 = "); + FP8_output(&w); + printf("\n"); + + FP8_inv(&w,&w); + printf("1/(1/w) mod p^4 = "); + FP8_output(&w); + printf("\n"); + + BIG_zero(e); + BIG_inc(e,12); + + // FP8_xtr_A(&w,&t,&w,&t,&t); + FP8_xtr_pow(&w,&w,e); + + printf("w^e= "); + FP8_output(&w); + printf("\n"); + + BIG_zero(a); + BIG_inc(a,37); + BIG_zero(b); + BIG_inc(b,17); + FP2_from_BIGs(&w0,a,b); + + BIG_zero(a); + BIG_inc(a,49); + BIG_zero(b); + BIG_inc(b,31); + FP2_from_BIGs(&w1,a,b); + + FP8_from_FP2s(&c1,&w0,&w1); + FP8_from_FP2s(&c2,&w0,&w1); + FP8_from_FP2s(&c3,&w0,&w1); + FP8_from_FP2s(&c4,&w0,&w1); + + BIG_zero(e1); + BIG_inc(e1,3331); + BIG_zero(e2); + BIG_inc(e2,3372); + + FP8_xtr_pow2(&w,&c1,&w,&c2,&c3,e1,e2); + + printf("c^e= "); + FP8_output(&w); + printf("\n"); + + return 0; +} +#endif +
