[47/51] [partial] incubator-milagro-crypto git commit: update code

brianspector Tue, 15 Jan 2019 08:16:11 -0800

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/ecp.c
----------------------------------------------------------------------
diff --git a/version22/c/ecp.c b/version22/c/ecp.c
new file mode 100644
index 0000000..a6dcfad
--- /dev/null
+++ b/version22/c/ecp.c
@@ -0,0 +1,1176 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Elliptic Curve Functions */
+/* SU=m, SU is Stack Usage (Weierstrass Curves) */
+
+//#define HAS_MAIN
+
+#include "amcl.h"
+
+/* test for P=O point-at-infinity */
+int ECP_isinf(ECP *P)
+{
+#if CURVETYPE==EDWARDS
+    FP_reduce(P->x);
+    FP_reduce(P->y);
+    FP_reduce(P->z);
+    return (BIG_iszilch(P->x) && BIG_comp(P->y,P->z)==0);
+#else
+    return P->inf;
+#endif
+}
+
+/* Conditional swap of P and Q dependant on d */
+static void ECP_cswap(ECP *P,ECP *Q,int d)
+{
+    BIG_cswap(P->x,Q->x,d);
+#if CURVETYPE!=MONTGOMERY
+    BIG_cswap(P->y,Q->y,d);
+#endif
+    BIG_cswap(P->z,Q->z,d);
+#if CURVETYPE!=EDWARDS
+    d=~(d-1);
+    d=d&(P->inf^Q->inf);
+    P->inf^=d;
+    Q->inf^=d;
+#endif
+}
+
+#if CURVETYPE!=MONTGOMERY
+/* Conditional move Q to P dependant on d */
+static void ECP_cmove(ECP *P,ECP *Q,int d)
+{
+    BIG_cmove(P->x,Q->x,d);
+#if CURVETYPE!=MONTGOMERY
+    BIG_cmove(P->y,Q->y,d);
+#endif
+    BIG_cmove(P->z,Q->z,d);
+#if CURVETYPE!=EDWARDS
+    d=~(d-1);
+    P->inf^=(P->inf^Q->inf)&d;
+#endif
+}
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+#endif // CURVETYPE!=MONTGOMERY
+
+#if CURVETYPE!=MONTGOMERY
+/* Constant time select from pre-computed table */
+static void ECP_select(ECP *P,ECP W[],sign32 b)
+{
+    ECP MP;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    ECP_cmove(P,&W[0],teq(babs,0));  // conditional move
+    ECP_cmove(P,&W[1],teq(babs,1));
+    ECP_cmove(P,&W[2],teq(babs,2));
+    ECP_cmove(P,&W[3],teq(babs,3));
+    ECP_cmove(P,&W[4],teq(babs,4));
+    ECP_cmove(P,&W[5],teq(babs,5));
+    ECP_cmove(P,&W[6],teq(babs,6));
+    ECP_cmove(P,&W[7],teq(babs,7));
+
+    ECP_copy(&MP,P);
+    ECP_neg(&MP);  // minus P
+    ECP_cmove(P,&MP,(int)(m&1));
+}
+#endif
+
+/* Test P == Q */
+/* SU=168 */
+int ECP_equals(ECP *P,ECP *Q)
+{
+#if CURVETYPE==WEIERSTRASS
+    BIG pz2,qz2,a,b;
+    if (ECP_isinf(P) && ECP_isinf(Q)) return 1;
+    if (ECP_isinf(P) || ECP_isinf(Q)) return 0;
+
+    FP_sqr(pz2,P->z);
+    FP_sqr(qz2,Q->z);
+
+    FP_mul(a,P->x,qz2);
+    FP_mul(b,Q->x,pz2);
+    FP_reduce(a);
+    FP_reduce(b);
+    if (BIG_comp(a,b)!=0) return 0;
+
+    FP_mul(a,P->y,qz2);
+    FP_mul(a,a,Q->z);
+    FP_mul(b,Q->y,pz2);
+    FP_mul(b,b,P->z);
+    FP_reduce(a);
+    FP_reduce(b);
+    if (BIG_comp(a,b)!=0) return 0;
+    return 1;
+#else
+    BIG a,b;
+    if (ECP_isinf(P) && ECP_isinf(Q)) return 1;
+    if (ECP_isinf(P) || ECP_isinf(Q)) return 0;
+
+    FP_mul(a,P->x,Q->z);
+    FP_mul(b,Q->x,P->z);
+    FP_reduce(a);
+    FP_reduce(b);
+    if (BIG_comp(a,b)!=0) return 0;
+
+#if CURVETYPE==EDWARDS
+    FP_mul(a,P->y,Q->z);
+    FP_mul(b,Q->y,P->z);
+    FP_reduce(a);
+    FP_reduce(b);
+    if (BIG_comp(a,b)!=0) return 0;
+#endif
+
+    return 1;
+#endif
+}
+
+/* Set P=Q */
+/* SU=16 */
+void ECP_copy(ECP *P,ECP *Q)
+{
+#if CURVETYPE!=EDWARDS
+    P->inf=Q->inf;
+#endif
+    BIG_copy(P->x,Q->x);
+#if CURVETYPE!=MONTGOMERY
+    BIG_copy(P->y,Q->y);
+#endif
+    BIG_copy(P->z,Q->z);
+}
+
+/* Set P=-Q */
+#if CURVETYPE!=MONTGOMERY
+/* SU=8 */
+void ECP_neg(ECP *P)
+{
+    if (ECP_isinf(P)) return;
+#if CURVETYPE==WEIERSTRASS
+    FP_neg(P->y,P->y);
+    BIG_norm(P->y);
+#else
+    FP_neg(P->x,P->x);
+    BIG_norm(P->x);
+#endif
+
+}
+#endif
+
+/* Set P=O */
+void ECP_inf(ECP *P)
+{
+#if CURVETYPE==EDWARDS
+    BIG_zero(P->x);
+    FP_one(P->y);
+    FP_one(P->z);
+#else
+    P->inf=1;
+#endif
+}
+
+/* Calculate right Hand Side of curve equation y^2=RHS */
+/* SU=56 */
+void ECP_rhs(BIG v,BIG x)
+{
+#if CURVETYPE==WEIERSTRASS
+    /* x^3+Ax+B */
+    BIG t;
+    FP_sqr(t,x);
+    FP_mul(t,t,x);
+
+    if (CURVE_A==-3)
+    {
+        FP_neg(v,x);
+        BIG_norm(v);
+        BIG_imul(v,v,-CURVE_A);
+        BIG_norm(v);
+        FP_add(v,t,v);
+    }
+    else BIG_copy(v,t);
+
+    BIG_rcopy(t,CURVE_B);
+    FP_nres(t);
+    FP_add(v,t,v);
+    FP_reduce(v);
+#endif
+
+#if CURVETYPE==EDWARDS
+    /* (Ax^2-1)/(Bx^2-1) */
+    BIG t,m,one;
+    BIG_rcopy(m,Modulus);
+    FP_sqr(v,x);
+    FP_one(one);
+    BIG_rcopy(t,CURVE_B);
+    FP_nres(t);
+    FP_mul(t,v,t);
+    FP_sub(t,t,one);
+    if (CURVE_A==1) FP_sub(v,v,one);
+
+    if (CURVE_A==-1)
+    {
+        FP_add(v,v,one);
+        FP_neg(v,v);
+    }
+    FP_redc(v);
+    FP_redc(t);
+    BIG_moddiv(v,v,t,m);
+    FP_nres(v);
+#endif
+
+#if CURVETYPE==MONTGOMERY
+    /* x^3+Ax^2+x */
+    BIG x2,x3;
+    FP_sqr(x2,x);
+    FP_mul(x3,x2,x);
+    BIG_copy(v,x);
+    FP_imul(x2,x2,CURVE_A);
+    FP_add(v,v,x2);
+    FP_add(v,v,x3);
+    FP_reduce(v);
+#endif
+}
+
+/* Set P=(x,y) */
+
+#if CURVETYPE==MONTGOMERY
+
+/* Set P=(x,{y}) */
+
+int ECP_set(ECP *P,BIG x)
+{
+    BIG m,rhs;
+    BIG_rcopy(m,Modulus);
+    BIG_copy(rhs,x);
+    FP_nres(rhs);
+    ECP_rhs(rhs,rhs);
+    FP_redc(rhs);
+
+    if (BIG_jacobi(rhs,m)!=1)
+    {
+        ECP_inf(P);
+        return 0;
+    }
+    P->inf=0;
+    BIG_copy(P->x,x);
+    FP_nres(P->x);
+    FP_one(P->z);
+    return 1;
+}
+
+/* Extract x coordinate as BIG */
+int ECP_get(BIG x,ECP *P)
+{
+    if (ECP_isinf(P)) return -1;
+    ECP_affine(P);
+    BIG_copy(x,P->x);
+    FP_redc(x);
+    return 0;
+}
+
+
+#else
+/* Extract (x,y) and return sign of y. If x and y are the same return only x */
+/* SU=16 */
+int ECP_get(BIG x,BIG y,ECP *P)
+{
+    int s;
+#if CURVETYPE!=EDWARDS
+    if (ECP_isinf(P)) return -1;
+#endif
+    ECP_affine(P);
+
+    BIG_copy(y,P->y);
+    FP_redc(y);
+
+    s=BIG_parity(y);
+
+    BIG_copy(x,P->x);
+    FP_redc(x);
+
+    return s;
+}
+
+/* Set P=(x,{y}) */
+/* SU=96 */
+int ECP_set(ECP *P,BIG x,BIG y)
+{
+    BIG rhs,y2;
+    BIG_copy(y2,y);
+
+    FP_nres(y2);
+    FP_sqr(y2,y2);
+    FP_reduce(y2);
+
+
+
+    BIG_copy(rhs,x);
+    FP_nres(rhs);
+
+    ECP_rhs(rhs,rhs);
+
+    if (BIG_comp(y2,rhs)!=0)
+    {
+        ECP_inf(P);
+        return 0;
+    }
+#if CURVETYPE==WEIERSTRASS
+    P->inf=0;
+#endif
+    BIG_copy(P->x,x);
+    FP_nres(P->x);
+    BIG_copy(P->y,y);
+    FP_nres(P->y);
+    FP_one(P->z);
+    return 1;
+}
+
+/* Set P=(x,y), where y is calculated from x with sign s */
+/* SU=136 */
+int ECP_setx(ECP *P,BIG x,int s)
+{
+    BIG t,rhs,m;
+    BIG_rcopy(m,Modulus);
+
+    BIG_copy(rhs,x);
+    FP_nres(rhs);
+    ECP_rhs(rhs,rhs);
+    BIG_copy(t,rhs);
+    FP_redc(t);
+    if (BIG_jacobi(t,m)!=1)
+    {
+        ECP_inf(P);
+        return 0;
+    }
+#if CURVETYPE==WEIERSTRASS
+    P->inf=0;
+#endif
+    BIG_copy(P->x,x);
+    FP_nres(P->x);
+
+    FP_sqrt(P->y,rhs);
+    BIG_copy(rhs,P->y);
+    FP_redc(rhs);
+    if (BIG_parity(rhs)!=s)
+        FP_neg(P->y,P->y);
+    FP_reduce(P->y);
+    FP_one(P->z);
+    return 1;
+}
+
+#endif
+
+/* Convert P to Affine, from (x,y,z) to (x,y) */
+/* SU=160 */
+void ECP_affine(ECP *P)
+{
+    BIG one,iz,m;
+#if CURVETYPE==WEIERSTRASS
+    BIG izn;
+    if (ECP_isinf(P)) return;
+    FP_one(one);
+    if (BIG_comp(P->z,one)==0) return;
+    BIG_rcopy(m,Modulus);
+
+    FP_redc(P->z);
+    BIG_invmodp(iz,P->z,m);
+    FP_nres(iz);
+
+    FP_sqr(izn,iz);
+    FP_mul(P->x,P->x,izn);
+    FP_mul(izn,izn,iz);
+    FP_mul(P->y,P->y,izn);
+    FP_reduce(P->y);
+
+#endif
+#if CURVETYPE==EDWARDS
+    FP_one(one);
+    if (BIG_comp(P->z,one)==0) return;
+    BIG_rcopy(m,Modulus);
+
+    FP_redc(P->z);
+    BIG_invmodp(iz,P->z,m);
+    FP_nres(iz);
+
+    FP_mul(P->x,P->x,iz);
+    FP_mul(P->y,P->y,iz);
+    FP_reduce(P->y);
+
+#endif
+#if CURVETYPE==MONTGOMERY
+    if (ECP_isinf(P)) return;
+    FP_one(one);
+    if (BIG_comp(P->z,one)==0) return;
+
+    BIG_rcopy(m,Modulus);
+
+    FP_redc(P->z);
+    BIG_invmodp(iz,P->z,m);
+    FP_nres(iz);
+
+    FP_mul(P->x,P->x,iz);
+
+#endif
+    FP_reduce(P->x);
+    BIG_copy(P->z,one);
+}
+
+/* SU=120 */
+void ECP_outputxyz(ECP *P)
+{
+    BIG x,z;
+    if (ECP_isinf(P))
+    {
+        printf("Infinity\n");
+        return;
+    }
+    BIG_copy(x,P->x);
+    FP_reduce(x);
+    FP_redc(x);
+    BIG_copy(z,P->z);
+    FP_reduce(z);
+    FP_redc(z);
+
+#if CURVETYPE!=MONTGOMERY
+    BIG y;
+    BIG_copy(y,P->y);
+    FP_reduce(y);
+    FP_redc(y);
+    printf("(");
+    BIG_output(x);
+    printf(",");
+    BIG_output(y);
+    printf(",");
+    BIG_output(z);
+    printf(")\n");
+
+#else
+    printf("(");
+    BIG_output(x);
+    printf(",");
+    BIG_output(z);
+    printf(")\n");
+#endif
+}
+
+/* SU=16 */
+/* Output point P */
+void ECP_output(ECP *P)
+{
+    if (ECP_isinf(P))
+    {
+        printf("Infinity\n");
+        return;
+    }
+    ECP_affine(P);
+#if CURVETYPE!=MONTGOMERY
+    FP_redc(P->x);
+    FP_redc(P->y);
+    printf("(");
+    BIG_output(P->x);
+    printf(",");
+    BIG_output(P->y);
+    printf(")\n");
+    FP_nres(P->x);
+    FP_nres(P->y);
+#else
+    FP_redc(P->x);
+    printf("(");
+    BIG_output(P->x);
+    printf(")\n");
+    FP_nres(P->x);
+#endif
+}
+
+
+/* SU=88 */
+/* Convert P to octet string */
+void ECP_toOctet(octet *W,ECP *P)
+{
+#if CURVETYPE==MONTGOMERY
+    BIG x;
+    ECP_get(x,P);
+    W->len=MODBYTES+1;
+    W->val[0]=6;
+    BIG_toBytes(&(W->val[1]),x);
+#else
+    BIG x,y;
+    ECP_get(x,y,P);
+    W->len=2*MODBYTES+1;
+    W->val[0]=4;
+    BIG_toBytes(&(W->val[1]),x);
+    BIG_toBytes(&(W->val[MODBYTES+1]),y);
+#endif
+}
+
+/* SU=88 */
+/* Restore P from octet string */
+int ECP_fromOctet(ECP *P,octet *W)
+{
+#if CURVETYPE==MONTGOMERY
+    BIG x;
+    BIG_fromBytes(x,&(W->val[1]));
+    if (ECP_set(P,x)) return 1;
+    return 0;
+#else
+    BIG x,y;
+    BIG_fromBytes(x,&(W->val[1]));
+    BIG_fromBytes(y,&(W->val[MODBYTES+1]));
+    if (ECP_set(P,x,y)) return 1;
+    return 0;
+#endif
+}
+
+
+/* Set P=2P */
+/* SU=272 */
+void ECP_dbl(ECP *P)
+{
+#if CURVETYPE==WEIERSTRASS
+    BIG one;
+    BIG w1,w7,w8,w2,w3,w6;
+    if (ECP_isinf(P)) return;
+
+    if (BIG_iszilch(P->y))
+    {
+        P->inf=1;
+        return;
+    }
+    FP_one(one);
+    BIG_zero(w6);
+
+    if (CURVE_A==-3)
+    {
+        if (BIG_comp(P->z,one)==0) BIG_copy(w6,one);
+        else FP_sqr(w6,P->z);
+        FP_neg(w1,w6);
+        FP_add(w3,P->x,w1);
+        FP_add(w8,P->x,w6);
+        FP_mul(w3,w3,w8);
+        BIG_imul(w8,w3,3);
+    }
+    else
+    {
+        /* assuming A=0 */
+        FP_sqr(w1,P->x);
+        BIG_imul(w8,w1,3);
+    }
+
+    FP_sqr(w2,P->y);
+    FP_mul(w3,P->x,w2);
+
+    BIG_imul(w3,w3,4);
+    FP_neg(w1,w3);
+
+    BIG_norm(w1);
+
+    FP_sqr(P->x,w8);
+    FP_add(P->x,P->x,w1);
+    FP_add(P->x,P->x,w1);
+
+    BIG_norm(P->x);
+
+    if (BIG_comp(P->z,one)==0) BIG_copy(P->z,P->y);
+    else FP_mul(P->z,P->z,P->y);
+    FP_add(P->z,P->z,P->z);
+
+
+    FP_add(w7,w2,w2);
+    FP_sqr(w2,w7);
+
+    FP_add(w2,w2,w2);
+    FP_sub(w3,w3,P->x);
+    FP_mul(P->y,w8,w3);
+
+    FP_sub(P->y,P->y,w2);
+
+    BIG_norm(P->y);
+    BIG_norm(P->z);
+
+#endif
+
+#if CURVETYPE==EDWARDS
+    /* Not using square for multiplication swap, as (1) it needs more adds, 
and (2) it triggers more reductions */
+    BIG B,C,D,E,F,H,J;
+
+    FP_mul(B,P->x,P->y);
+    FP_add(B,B,B);
+    FP_sqr(C,P->x);
+    FP_sqr(D,P->y);
+    if (CURVE_A==1) BIG_copy(E,C);
+    if (CURVE_A==-1) FP_neg(E,C);
+    FP_add(F,E,D);
+
+    BIG_norm(F);
+
+    FP_sqr(H,P->z);
+    FP_add(H,H,H);
+    FP_sub(J,F,H);
+    FP_mul(P->x,B,J);
+    FP_sub(E,E,D);
+    FP_mul(P->y,F,E);
+    FP_mul(P->z,F,J);
+
+    BIG_norm(P->x);
+    BIG_norm(P->y);
+    BIG_norm(P->z);
+
+#endif
+
+#if CURVETYPE==MONTGOMERY
+    BIG A,B,AA,BB,C;
+    if (ECP_isinf(P)) return;
+
+    FP_add(A,P->x,P->z);
+    FP_sqr(AA,A);
+    FP_sub(B,P->x,P->z);
+    FP_sqr(BB,B);
+    FP_sub(C,AA,BB);
+
+    FP_mul(P->x,AA,BB);
+    FP_imul(A,C,(CURVE_A+2)/4);
+    FP_add(BB,BB,A);
+    FP_mul(P->z,BB,C);
+
+    BIG_norm(P->x);
+    BIG_norm(P->z);
+#endif
+}
+
+#if CURVETYPE==MONTGOMERY
+
+/* Set P+=Q. W is difference between P and Q and is affine */
+void ECP_add(ECP *P,ECP *Q,ECP *W)
+{
+    BIG A,B,C,D,DA,CB;
+
+    FP_add(A,P->x,P->z);
+    FP_sub(B,P->x,P->z);
+
+    FP_add(C,Q->x,Q->z);
+    FP_sub(D,Q->x,Q->z);
+
+    FP_mul(DA,D,A);
+    FP_mul(CB,C,B);
+
+    FP_add(A,DA,CB);
+    FP_sqr(A,A);
+    FP_sub(B,DA,CB);
+    FP_sqr(B,B);
+
+    BIG_copy(P->x,A);
+    FP_mul(P->z,W->x,B);
+
+    FP_reduce(P->z);
+    if (BIG_iszilch(P->z)) P->inf=1;
+    else P->inf=0;
+
+    BIG_norm(P->x);
+}
+
+
+#else
+
+/* Set P+=Q */
+/* SU=248 */
+void ECP_add(ECP *P,ECP *Q)
+{
+#if CURVETYPE==WEIERSTRASS
+    int aff;
+    BIG one,B,D,E,C,A;
+    if (ECP_isinf(Q)) return;
+    if (ECP_isinf(P))
+    {
+        ECP_copy(P,Q);
+        return;
+    }
+
+    FP_one(one);
+    aff=1;
+    if (BIG_comp(Q->z,one)!=0) aff=0;
+
+    if (!aff)
+    {
+        FP_sqr(A,Q->z);
+        FP_mul(C,A,Q->z);
+
+        FP_sqr(B,P->z);
+        FP_mul(D,B,P->z);
+
+        FP_mul(A,P->x,A);
+        FP_mul(C,P->y,C);
+    }
+    else
+    {
+        BIG_copy(A,P->x);
+        BIG_copy(C,P->y);
+
+        FP_sqr(B,P->z);
+        FP_mul(D,B,P->z);
+    }
+
+    FP_mul(B,Q->x,B);
+    FP_sub(B,B,A); /* B=Qx.z^2-x.Qz^2 */
+    FP_mul(D,Q->y,D);
+    FP_sub(D,D,C); /* D=Qy.z^3-y.Qz^3 */
+
+    FP_reduce(B);
+    if (BIG_iszilch(B))
+    {
+        FP_reduce(D);
+        if (BIG_iszilch(D))
+        {
+            ECP_dbl(P);
+            return;
+        }
+        else
+        {
+            ECP_inf(P);
+            return;
+        }
+    }
+    if (!aff) FP_mul(P->z,P->z,Q->z);
+    FP_mul(P->z,P->z,B);
+
+    FP_sqr(E,B);
+    FP_mul(B,B,E);
+    FP_mul(A,A,E);
+
+    FP_add(E,A,A);
+    FP_add(E,E,B);
+
+    FP_sqr(P->x,D);
+    FP_sub(P->x,P->x,E);
+
+    FP_sub(A,A,P->x);
+    FP_mul(P->y,A,D);
+    FP_mul(C,C,B);
+    FP_sub(P->y,P->y,C);
+
+    BIG_norm(P->x);
+    BIG_norm(P->y);
+    BIG_norm(P->z);
+
+#else
+    BIG b,A,B,C,D,E,F,G;
+
+    BIG_rcopy(b,CURVE_B);
+    FP_nres(b);
+    FP_mul(A,P->z,Q->z);
+
+    FP_sqr(B,A);
+    FP_mul(C,P->x,Q->x);
+    FP_mul(D,P->y,Q->y);
+    FP_mul(E,C,D);
+    FP_mul(E,E,b);
+
+    FP_sub(F,B,E);
+    FP_add(G,B,E);
+
+    if (CURVE_A==1) FP_sub(E,D,C);
+    FP_add(C,C,D);
+
+    FP_add(B,P->x,P->y);
+    FP_add(D,Q->x,Q->y);
+    FP_mul(B,B,D);
+    FP_sub(B,B,C);
+    FP_mul(B,B,F);
+    FP_mul(P->x,A,B);
+
+
+    if (CURVE_A==1) FP_mul(C,E,G);
+    if (CURVE_A==-1)FP_mul(C,C,G);
+
+    FP_mul(P->y,A,C);
+    FP_mul(P->z,F,G);
+
+    BIG_norm(P->x);
+    BIG_norm(P->y);
+    BIG_norm(P->z);
+
+#endif
+}
+
+/* Set P-=Q */
+/* SU=16 */
+void  ECP_sub(ECP *P,ECP *Q)
+{
+    ECP_neg(Q);
+    ECP_add(P,Q);
+    ECP_neg(Q);
+}
+
+#endif
+
+
+#if CURVETYPE==WEIERSTRASS
+/* normalises array of points. Assumes P[0] is normalised already */
+
+static void ECP_multiaffine(int m,ECP P[],BIG work[])
+{
+    int i;
+    BIG t1,t2;
+
+    FP_one(work[0]);
+    BIG_copy(work[1],P[0].z);
+    for (i=2; i<m; i++)
+        FP_mul(work[i],work[i-1],P[i-1].z);
+
+    FP_mul(t1,work[m-1],P[m-1].z);
+    FP_inv(t1,t1);
+
+    BIG_copy(t2,P[m-1].z);
+    FP_mul(work[m-1],work[m-1],t1);
+
+    for (i=m-2;; i--)
+    {
+        if (i==0)
+        {
+            FP_mul(work[0],t1,t2);
+            break;
+        }
+        FP_mul(work[i],work[i],t2);
+        FP_mul(work[i],work[i],t1);
+        FP_mul(t2,P[i].z,t2);
+    }
+    /* now work[] contains inverses of all Z coordinates */
+
+    for (i=0; i<m; i++)
+    {
+        FP_one(P[i].z);
+        FP_sqr(t1,work[i]);
+        FP_mul(P[i].x,P[i].x,t1);
+        FP_mul(t1,work[i],t1);
+        FP_mul(P[i].y,P[i].y,t1);
+    }
+}
+
+#endif
+
+#if CURVETYPE!=MONTGOMERY
+/* constant time multiply by small integer of length bts - use ladder */
+void ECP_pinmul(ECP *P,int e,int bts)
+{
+    int i,b;
+    ECP R0,R1;
+
+    ECP_affine(P);
+    ECP_inf(&R0);
+    ECP_copy(&R1,P);
+
+    for (i=bts-1; i>=0; i--)
+    {
+        b=(e>>i)&1;
+        ECP_copy(P,&R1);
+        ECP_add(P,&R0);
+        ECP_cswap(&R0,&R1,b);
+        ECP_copy(&R1,P);
+        ECP_dbl(&R0);
+        ECP_cswap(&R0,&R1,b);
+    }
+    ECP_copy(P,&R0);
+    ECP_affine(P);
+}
+#endif
+
+/* Set P=r*P */
+/* SU=424 */
+void ECP_mul(ECP *P,BIG e)
+{
+#if CURVETYPE==MONTGOMERY
+    /* Montgomery ladder */
+    int nb,i,b;
+    ECP R0,R1,D;
+    if (ECP_isinf(P)) return;
+    if (BIG_iszilch(e))
+    {
+        ECP_inf(P);
+        return;
+    }
+    ECP_affine(P);
+
+    ECP_copy(&R0,P);
+    ECP_copy(&R1,P);
+    ECP_dbl(&R1);
+    ECP_copy(&D,P);
+
+    nb=BIG_nbits(e);
+    for (i=nb-2; i>=0; i--)
+    {
+        b=BIG_bit(e,i);
+        ECP_copy(P,&R1);
+        ECP_add(P,&R0,&D);
+        ECP_cswap(&R0,&R1,b);
+        ECP_copy(&R1,P);
+        ECP_dbl(&R0);
+        ECP_cswap(&R0,&R1,b);
+    }
+    ECP_copy(P,&R0);
+
+#else
+    /* fixed size windows */
+    int i,nb,s,ns;
+    BIG mt,t;
+    ECP Q,W[8],C;
+    sign8 w[1+(NLEN*BASEBITS+3)/4];
+#if CURVETYPE==WEIERSTRASS
+    BIG work[8];
+#endif
+    if (ECP_isinf(P)) return;
+    if (BIG_iszilch(e))
+    {
+        ECP_inf(P);
+        return;
+    }
+
+    ECP_affine(P);
+
+    /* precompute table */
+
+    ECP_copy(&Q,P);
+    ECP_dbl(&Q);
+
+//printf("Q= ");ECP_output(&Q); printf("\n");
+
+    ECP_copy(&W[0],P);
+
+    for (i=1; i<8; i++)
+    {
+        ECP_copy(&W[i],&W[i-1]);
+        ECP_add(&W[i],&Q);
+    }
+
+//printf("W[1]= ");ECP_output(&W[1]); printf("\n");
+
+    /* convert the table to affine */
+#if CURVETYPE==WEIERSTRASS
+    ECP_multiaffine(8,W,work);
+#endif
+
+    /* make exponent odd - add 2P if even, P if odd */
+    BIG_copy(t,e);
+    s=BIG_parity(t);
+    BIG_inc(t,1);
+    BIG_norm(t);
+    ns=BIG_parity(t);
+    BIG_copy(mt,t);
+    BIG_inc(mt,1);
+    BIG_norm(mt);
+    BIG_cmove(t,mt,s);
+    ECP_cmove(&Q,P,ns);
+    ECP_copy(&C,&Q);
+
+    nb=1+(BIG_nbits(t)+3)/4;
+
+    /* convert exponent to signed 4-bit window */
+    for (i=0; i<nb; i++)
+    {
+        w[i]=BIG_lastbits(t,5)-16;
+        BIG_dec(t,w[i]);
+        BIG_norm(t);
+        BIG_fshr(t,4);
+    }
+    w[nb]=BIG_lastbits(t,5);
+
+    ECP_copy(P,&W[(w[nb]-1)/2]);
+    for (i=nb-1; i>=0; i--)
+    {
+        ECP_select(&Q,W,w[i]);
+        ECP_dbl(P);
+        ECP_dbl(P);
+        ECP_dbl(P);
+        ECP_dbl(P);
+        ECP_add(P,&Q);
+    }
+    ECP_sub(P,&C); /* apply correction */
+#endif
+    ECP_affine(P);
+}
+
+#if CURVETYPE!=MONTGOMERY
+/* Set P=eP+fQ double multiplication */
+/* constant time - as useful for GLV method in pairings */
+/* SU=456 */
+
+void ECP_mul2(ECP *P,ECP *Q,BIG e,BIG f)
+{
+    BIG te,tf,mt;
+    ECP S,T,W[8],C;
+    sign8 w[1+(NLEN*BASEBITS+1)/2];
+    int i,a,b,s,ns,nb;
+#if CURVETYPE==WEIERSTRASS
+    BIG work[8];
+#endif
+
+    ECP_affine(P);
+    ECP_affine(Q);
+
+    BIG_copy(te,e);
+    BIG_copy(tf,f);
+
+    /* precompute table */
+    ECP_copy(&W[1],P);
+    ECP_sub(&W[1],Q);  /* P+Q */
+    ECP_copy(&W[2],P);
+    ECP_add(&W[2],Q);  /* P-Q */
+    ECP_copy(&S,Q);
+    ECP_dbl(&S);  /* S=2Q */
+    ECP_copy(&W[0],&W[1]);
+    ECP_sub(&W[0],&S);
+    ECP_copy(&W[3],&W[2]);
+    ECP_add(&W[3],&S);
+    ECP_copy(&T,P);
+    ECP_dbl(&T); /* T=2P */
+    ECP_copy(&W[5],&W[1]);
+    ECP_add(&W[5],&T);
+    ECP_copy(&W[6],&W[2]);
+    ECP_add(&W[6],&T);
+    ECP_copy(&W[4],&W[5]);
+    ECP_sub(&W[4],&S);
+    ECP_copy(&W[7],&W[6]);
+    ECP_add(&W[7],&S);
+
+#if CURVETYPE==WEIERSTRASS
+    ECP_multiaffine(8,W,work);
+#endif
+
+    /* if multiplier is odd, add 2, else add 1 to multiplier, and add 2P or P 
to correction */
+
+    s=BIG_parity(te);
+    BIG_inc(te,1);
+    BIG_norm(te);
+    ns=BIG_parity(te);
+    BIG_copy(mt,te);
+    BIG_inc(mt,1);
+    BIG_norm(mt);
+    BIG_cmove(te,mt,s);
+    ECP_cmove(&T,P,ns);
+    ECP_copy(&C,&T);
+
+    s=BIG_parity(tf);
+    BIG_inc(tf,1);
+    BIG_norm(tf);
+    ns=BIG_parity(tf);
+    BIG_copy(mt,tf);
+    BIG_inc(mt,1);
+    BIG_norm(mt);
+    BIG_cmove(tf,mt,s);
+    ECP_cmove(&S,Q,ns);
+    ECP_add(&C,&S);
+
+    BIG_add(mt,te,tf);
+    BIG_norm(mt);
+    nb=1+(BIG_nbits(mt)+1)/2;
+
+    /* convert exponent to signed 2-bit window */
+    for (i=0; i<nb; i++)
+    {
+        a=BIG_lastbits(te,3)-4;
+        BIG_dec(te,a);
+        BIG_norm(te);
+        BIG_fshr(te,2);
+        b=BIG_lastbits(tf,3)-4;
+        BIG_dec(tf,b);
+        BIG_norm(tf);
+        BIG_fshr(tf,2);
+        w[i]=4*a+b;
+    }
+    w[nb]=(4*BIG_lastbits(te,3)+BIG_lastbits(tf,3));
+
+    ECP_copy(P,&W[(w[nb]-1)/2]);
+    for (i=nb-1; i>=0; i--)
+    {
+        ECP_select(&T,W,w[i]);
+        ECP_dbl(P);
+        ECP_dbl(P);
+        ECP_add(P,&T);
+    }
+    ECP_sub(P,&C); /* apply correction */
+    ECP_affine(P);
+}
+
+#endif
+
+
+#ifdef HAS_MAIN
+
+int main()
+{
+    int i;
+    ECP G,P;
+    csprng RNG;
+    BIG r,s,x,y,b,m,w,q;
+    BIG_rcopy(x,CURVE_Gx);
+#if CURVETYPE!=MONTGOMERY
+    BIG_rcopy(y,CURVE_Gy);
+#endif
+    BIG_rcopy(m,Modulus);
+
+    printf("x= ");
+    BIG_output(x);
+    printf("\n");
+#if CURVETYPE!=MONTGOMERY
+    printf("y= ");
+    BIG_output(y);
+    printf("\n");
+#endif
+    RNG_seed(&RNG,3,"abc");
+
+#if CURVETYPE!=MONTGOMERY
+    ECP_set(&G,x,y);
+#else
+    ECP_set(&G,x);
+#endif
+    if (ECP_isinf(&G)) printf("Failed to set - point not on curve\n");
+    else printf("set success\n");
+
+    ECP_output(&G);
+
+    BIG_rcopy(r,CURVE_Order); //BIG_dec(r,7);
+    printf("r= ");
+    BIG_output(r);
+    printf("\n");
+
+    ECP_copy(&P,&G);
+
+    ECP_mul(&P,r);
+
+    ECP_output(&P);
+//exit(0);
+    BIG_randomnum(w,&RNG);
+    BIG_mod(w,r);
+
+    ECP_copy(&P,&G);
+    ECP_mul(&P,w);
+
+    ECP_output(&P);
+
+    return 0;
+}
+
+#endif


http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/ecp2.c
----------------------------------------------------------------------
diff --git a/version22/c/ecp2.c b/version22/c/ecp2.c
new file mode 100644
index 0000000..4808569
--- /dev/null
+++ b/version22/c/ecp2.c
@@ -0,0 +1,696 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Weierstrass elliptic curve functions over FP2 */
+/* SU=m, m is Stack Usage */
+
+#include "amcl.h"
+
+int ECP2_isinf(ECP2 *P)
+{
+    return P->inf;
+}
+
+/* Set P=Q */
+/* SU= 16 */
+void ECP2_copy(ECP2 *P,ECP2 *Q)
+{
+    P->inf=Q->inf;
+    FP2_copy(&(P->x),&(Q->x));
+    FP2_copy(&(P->y),&(Q->y));
+    FP2_copy(&(P->z),&(Q->z));
+}
+
+/* set P to Infinity */
+/* SU= 8 */
+void ECP2_inf(ECP2 *P)
+{
+    P->inf=1;
+    FP2_zero(&(P->x));
+    FP2_zero(&(P->y));
+    FP2_zero(&(P->z));
+}
+
+/* Conditional move Q to P dependant on d */
+static void ECP2_cmove(ECP2 *P,ECP2 *Q,int d)
+{
+    FP2_cmove(&(P->x),&(Q->x),d);
+    FP2_cmove(&(P->y),&(Q->y),d);
+    FP2_cmove(&(P->z),&(Q->z),d);
+    d=~(d-1);
+    P->inf^=(P->inf^Q->inf)&d;
+}
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+
+/* Constant time select from pre-computed table */
+static void ECP2_select(ECP2 *P,ECP2 W[],sign32 b)
+{
+    ECP2 MP;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    ECP2_cmove(P,&W[0],teq(babs,0));  // conditional move
+    ECP2_cmove(P,&W[1],teq(babs,1));
+    ECP2_cmove(P,&W[2],teq(babs,2));
+    ECP2_cmove(P,&W[3],teq(babs,3));
+    ECP2_cmove(P,&W[4],teq(babs,4));
+    ECP2_cmove(P,&W[5],teq(babs,5));
+    ECP2_cmove(P,&W[6],teq(babs,6));
+    ECP2_cmove(P,&W[7],teq(babs,7));
+
+    ECP2_copy(&MP,P);
+    ECP2_neg(&MP);  // minus P
+    ECP2_cmove(P,&MP,(int)(m&1));
+}
+
+/* return 1 if P==Q, else 0 */
+/* SU= 312 */
+int ECP2_equals(ECP2 *P,ECP2 *Q)
+{
+    FP2 pz2,qz2,a,b;
+    if (P->inf && Q->inf) return 1;
+    if (P->inf || Q->inf) return 0;
+
+    FP2_sqr(&pz2,&(P->z));
+    FP2_sqr(&qz2,&(Q->z));
+
+    FP2_mul(&a,&(P->x),&qz2);
+    FP2_mul(&b,&(Q->x),&pz2);
+    if (!FP2_equals(&a,&b)) return 0;
+
+    FP2_mul(&a,&(P->y),&qz2);
+    FP2_mul(&a,&a,&(Q->z));
+    FP2_mul(&b,&(Q->y),&pz2);
+    FP2_mul(&b,&b,&(P->z));
+    if (!FP2_equals(&a,&b)) return 0;
+    return 1;
+}
+
+/* Make P affine (so z=1) */
+/* SU= 232 */
+void ECP2_affine(ECP2 *P)
+{
+    FP2 one,iz,izn;
+    if (P->inf) return;
+
+    FP2_one(&one);
+    if (FP2_isunity(&(P->z)))
+    {
+        FP2_reduce(&(P->x));
+        FP2_reduce(&(P->y));
+        return;
+    }
+
+    FP2_inv(&iz,&(P->z));
+    FP2_sqr(&izn,&iz);
+    FP2_mul(&(P->x),&(P->x),&izn);
+    FP2_mul(&izn,&izn,&iz);
+    FP2_mul(&(P->y),&(P->y),&izn);
+
+    FP2_reduce(&(P->x));
+    FP2_reduce(&(P->y));
+    FP2_copy(&(P->z),&one);
+}
+
+/* extract x, y from point P */
+/* SU= 16 */
+int ECP2_get(FP2 *x,FP2 *y,ECP2 *P)
+{
+    if (P->inf) return -1;
+    ECP2_affine(P);
+    FP2_copy(y,&(P->y));
+    FP2_copy(x,&(P->x));
+    return 0;
+}
+
+/* SU= 152 */
+/* Output point P */
+void ECP2_output(ECP2 *P)
+{
+    FP2 x,y;
+    if (P->inf)
+    {
+        printf("Infinity\n");
+        return;
+    }
+    ECP2_get(&x,&y,P);
+    printf("(");
+    FP2_output(&x);
+    printf(",");
+    FP2_output(&y);
+    printf(")\n");
+}
+
+/* SU= 232 */
+void ECP2_outputxyz(ECP2 *P)
+{
+    ECP2 Q;
+    if (P->inf)
+    {
+        printf("Infinity\n");
+        return;
+    }
+    ECP2_copy(&Q,P);
+    printf("(");
+    FP2_output(&(Q.x));
+    printf(",");
+    FP2_output(&(Q.y));
+    printf(",");
+    FP2_output(&(Q.z));
+    printf(")\n");
+}
+
+/* SU= 168 */
+/* Convert Q to octet string */
+void ECP2_toOctet(octet *W,ECP2 *Q)
+{
+    FP2 qx,qy;
+    ECP2_get(&qx,&qy,Q);
+    FP_redc(qx.a);
+    FP_redc(qx.b);
+    FP_redc(qy.a);
+    FP_redc(qy.b);
+    W->len=4*MODBYTES;
+
+    BIG_toBytes(&(W->val[0]),qx.a);
+    BIG_toBytes(&(W->val[MODBYTES]),qx.b);
+    BIG_toBytes(&(W->val[2*MODBYTES]),qy.a);
+    BIG_toBytes(&(W->val[3*MODBYTES]),qy.b);
+}
+
+/* SU= 176 */
+/* restore Q from octet string */
+int ECP2_fromOctet(ECP2 *Q,octet *W)
+{
+    FP2 qx,qy;
+    BIG_fromBytes(qx.a,&(W->val[0]));
+    BIG_fromBytes(qx.b,&(W->val[MODBYTES]));
+    BIG_fromBytes(qy.a,&(W->val[2*MODBYTES]));
+    BIG_fromBytes(qy.b,&(W->val[3*MODBYTES]));
+    FP_nres(qx.a);
+    FP_nres(qx.b);
+    FP_nres(qy.a);
+    FP_nres(qy.b);
+
+    if (ECP2_set(Q,&qx,&qy)) return 1;
+    return 0;
+}
+
+/* SU= 128 */
+/* Calculate RHS of twisted curve equation x^3+B/i */
+void ECP2_rhs(FP2 *rhs,FP2 *x)
+{
+    /* calculate RHS of elliptic curve equation */
+    FP2 t;
+    BIG b;
+    FP2_sqr(&t,x);
+
+    FP2_mul(rhs,&t,x);
+
+    /* Assuming CURVE_A=0 */
+
+    BIG_rcopy(b,CURVE_B);
+
+    FP2_from_BIG(&t,b);
+
+    FP2_div_ip(&t);   /* IMPORTANT - here we use the SEXTIC twist of the curve 
*/
+
+    FP2_add(rhs,&t,rhs);
+    FP2_reduce(rhs);
+}
+
+
+/* Set P=(x,y). Return 1 if (x,y) is on the curve, else return 0*/
+/* SU= 232 */
+int ECP2_set(ECP2 *P,FP2 *x,FP2 *y)
+{
+    FP2 one,rhs,y2;
+    FP2_copy(&y2,y);
+
+    FP2_sqr(&y2,&y2);
+    ECP2_rhs(&rhs,x);
+
+    if (!FP2_equals(&y2,&rhs))
+    {
+
+        P->inf=1;
+        return 0;
+    }
+
+    P->inf=0;
+    FP2_copy(&(P->x),x);
+    FP2_copy(&(P->y),y);
+
+    FP2_one(&one);
+    FP2_copy(&(P->z),&one);
+    return 1;
+}
+
+/* Set P=(x,y). Return 1 if (x,.) is on the curve, else return 0 */
+/* SU= 232 */
+int ECP2_setx(ECP2 *P,FP2 *x)
+{
+    FP2 y;
+    ECP2_rhs(&y,x);
+
+    if (!FP2_sqrt(&y,&y))
+    {
+        P->inf=1;
+        return 0;
+    }
+
+    P->inf=0;
+    FP2_copy(&(P->x),x);
+    FP2_copy(&(P->y),&y);
+    FP2_one(&(P->z));
+    return 1;
+}
+
+/* Set P=-P */
+/* SU= 8 */
+void ECP2_neg(ECP2 *P)
+{
+    FP2_neg(&(P->y),&(P->y));
+    FP2_norm(&(P->y));
+}
+
+/* R+=R */
+/* return -1 for Infinity, 0 for addition, 1 for doubling */
+/* SU= 448 */
+int ECP2_dbl(ECP2 *P)
+{
+    FP2 w1,w7,w8,w2,w3;
+    if (P->inf) return -1;
+
+    if (FP2_iszilch(&(P->y)))
+    {
+        P->inf=1;
+        return -1;
+    }
+
+    /* Assuming A=0 */
+    FP2_sqr(&w1,&(P->x));
+    FP2_imul(&w8,&w1,3);
+
+    FP2_sqr(&w2,&(P->y));
+    FP2_mul(&w3,&(P->x),&w2);
+    FP2_imul(&w3,&w3,4);
+
+    FP2_neg(&w1,&w3);
+
+    FP2_norm(&w1);
+
+    FP2_sqr(&(P->x),&w8);
+    FP2_add(&(P->x),&(P->x),&w1);
+    FP2_add(&(P->x),&(P->x),&w1);
+
+    FP2_norm(&(P->x));
+
+    if (FP2_isunity(&(P->z))) FP2_copy(&(P->z),&(P->y));
+    else FP2_mul(&(P->z),&(P->z),&(P->y));
+    FP2_add(&(P->z),&(P->z),&(P->z));
+
+    FP2_add(&w7,&w2,&w2);
+    FP2_sqr(&w2,&w7);
+
+    FP2_add(&w2,&w2,&w2);
+    FP2_sub(&w3,&w3,&(P->x));
+    FP2_mul(&(P->y),&w8,&w3);
+    FP2_sub(&(P->y),&(P->y),&w2);
+
+
+    FP2_norm(&(P->y));
+    FP2_norm(&(P->z));
+
+    return 1;
+}
+
+/* Set P+=Q */
+/* SU= 400 */
+int ECP2_add(ECP2 *P,ECP2 *Q)
+{
+    int aff;
+    FP2 B,D,E,C,A;
+    if (Q->inf) return 0;
+    if (P->inf)
+    {
+        ECP2_copy(P,Q);
+        return 0;
+    }
+
+    aff=1;
+    if (!FP2_isunity(&(Q->z))) aff=0;
+
+    if (!aff)
+    {
+        FP2_sqr(&A,&(Q->z));
+        FP2_mul(&C,&A,&(Q->z));
+
+        FP2_sqr(&B,&(P->z));
+        FP2_mul(&D,&B,&(P->z));
+
+        FP2_mul(&A,&(P->x),&A);
+        FP2_mul(&C,&(P->y),&C);
+    }
+    else
+    {
+        FP2_copy(&A,&(P->x));
+        FP2_copy(&C,&(P->y));
+
+        FP2_sqr(&B,&(P->z));
+        FP2_mul(&D,&B,&(P->z));
+    }
+
+    FP2_mul(&B,&(Q->x),&B);
+    FP2_sub(&B,&B,&A); /* B=Qx.z^2-x.Qz^2 */
+    FP2_mul(&D,&(Q->y),&D);
+    FP2_sub(&D,&D,&C); /* D=Qy.z^3-y.Qz^3 */
+
+    if (FP2_iszilch(&B))
+    {
+        if (FP2_iszilch(&D))
+        {
+            ECP2_dbl(P);
+            return 1;
+        }
+        else
+        {
+            ECP2_inf(P);
+            return -1;
+        }
+    }
+    if (!aff) FP2_mul(&(P->z),&(P->z),&(Q->z));
+    FP2_mul(&(P->z),&(P->z),&B);
+
+    FP2_sqr(&E,&B);
+    FP2_mul(&B,&B,&E);
+    FP2_mul(&A,&A,&E);
+
+    FP2_add(&E,&A,&A);
+    FP2_add(&E,&E,&B);
+
+    FP2_sqr(&(P->x),&D);
+    FP2_sub(&(P->x),&(P->x),&E);
+
+    FP2_sub(&A,&A,&(P->x));
+    FP2_mul(&(P->y),&A,&D);
+    FP2_mul(&C,&C,&B);
+    FP2_sub(&(P->y),&(P->y),&C);
+
+    FP2_norm(&(P->x));
+    FP2_norm(&(P->y));
+    FP2_norm(&(P->z));
+
+    return 0;
+}
+
+/* Set P-=Q */
+/* SU= 16 */
+void ECP2_sub(ECP2 *P,ECP2 *Q)
+{
+    ECP2_neg(Q);
+    ECP2_add(P,Q);
+    ECP2_neg(Q);
+}
+
+/* normalises m-array of ECP2 points. Requires work vector of m FP2s */
+/* SU= 200 */
+static void ECP2_multiaffine(int m,ECP2 *P,FP2 *work)
+{
+    int i;
+    FP2 t1,t2;
+
+    FP2_one(&work[0]);
+    FP2_copy(&work[1],&(P[0].z));
+    for (i=2; i<m; i++)
+        FP2_mul(&work[i],&work[i-1],&(P[i-1].z));
+    FP2_mul(&t1,&work[m-1],&(P[m-1].z));
+
+    FP2_inv(&t1,&t1);
+
+    FP2_copy(&t2,&(P[m-1].z));
+    FP2_mul(&work[m-1],&work[m-1],&t1);
+
+    for (i=m-2;; i--)
+    {
+        if (i==0)
+        {
+            FP2_mul(&work[0],&t1,&t2);
+            break;
+        }
+        FP2_mul(&work[i],&work[i],&t2);
+        FP2_mul(&work[i],&work[i],&t1);
+        FP2_mul(&t2,&(P[i].z),&t2);
+    }
+    /* now work[] contains inverses of all Z coordinates */
+
+    for (i=0; i<m; i++)
+    {
+        FP2_one(&(P[i].z));
+        FP2_sqr(&t1,&work[i]);
+        FP2_mul(&(P[i].x),&(P[i].x),&t1);
+        FP2_mul(&t1,&work[i],&t1);
+        FP2_mul(&(P[i].y),&(P[i].y),&t1);
+    }
+}
+
+/* P*=e */
+/* SU= 280 */
+void ECP2_mul(ECP2 *P,BIG e)
+{
+    /* fixed size windows */
+    int i,nb,s,ns;
+    BIG mt,t;
+    ECP2 Q,W[8],C;
+    sign8 w[1+(NLEN*BASEBITS+3)/4];
+    FP2 work[8];
+
+    if (ECP2_isinf(P)) return;
+    ECP2_affine(P);
+
+
+    /* precompute table */
+
+    ECP2_copy(&Q,P);
+    ECP2_dbl(&Q);
+    ECP2_copy(&W[0],P);
+
+    for (i=1; i<8; i++)
+    {
+        ECP2_copy(&W[i],&W[i-1]);
+        ECP2_add(&W[i],&Q);
+    }
+
+    /* convert the table to affine */
+
+    ECP2_multiaffine(8,W,work);
+
+    /* make exponent odd - add 2P if even, P if odd */
+    BIG_copy(t,e);
+    s=BIG_parity(t);
+    BIG_inc(t,1);
+    BIG_norm(t);
+    ns=BIG_parity(t);
+    BIG_copy(mt,t);
+    BIG_inc(mt,1);
+    BIG_norm(mt);
+    BIG_cmove(t,mt,s);
+    ECP2_cmove(&Q,P,ns);
+    ECP2_copy(&C,&Q);
+
+    nb=1+(BIG_nbits(t)+3)/4;
+
+    /* convert exponent to signed 4-bit window */
+    for (i=0; i<nb; i++)
+    {
+        w[i]=BIG_lastbits(t,5)-16;
+        BIG_dec(t,w[i]);
+        BIG_norm(t);
+        BIG_fshr(t,4);
+    }
+    w[nb]=BIG_lastbits(t,5);
+
+    ECP2_copy(P,&W[(w[nb]-1)/2]);
+    for (i=nb-1; i>=0; i--)
+    {
+        ECP2_select(&Q,W,w[i]);
+        ECP2_dbl(P);
+        ECP2_dbl(P);
+        ECP2_dbl(P);
+        ECP2_dbl(P);
+        ECP2_add(P,&Q);
+    }
+    ECP2_sub(P,&C); /* apply correction */
+    ECP2_affine(P);
+}
+
+/* Calculates q.P using Frobenius constant X */
+/* SU= 96 */
+void ECP2_frob(ECP2 *P,FP2 *X)
+{
+    FP2 X2;
+    if (P->inf) return;
+    FP2_sqr(&X2,X);
+    FP2_conj(&(P->x),&(P->x));
+    FP2_conj(&(P->y),&(P->y));
+    FP2_conj(&(P->z),&(P->z));
+    FP2_reduce(&(P->z));
+
+    FP2_mul(&(P->x),&X2,&(P->x));
+    FP2_mul(&(P->y),&X2,&(P->y));
+    FP2_mul(&(P->y),X,&(P->y));
+}
+
+void ECP2_mul4(ECP2 *P,ECP2 Q[4],BIG u[4])
+{
+    int i,j,a[4],nb;
+    ECP2 W[8],T,C;
+    BIG mt,t[4];
+    FP2 work[8];
+    sign8 w[NLEN*BASEBITS+1];
+
+    for (i=0; i<4; i++)
+    {
+        BIG_copy(t[i],u[i]);
+        ECP2_affine(&Q[i]);
+    }
+
+    /* precompute table */
+
+    ECP2_copy(&W[0],&Q[0]);
+    ECP2_sub(&W[0],&Q[1]);  /* P-Q */
+    ECP2_copy(&W[1],&W[0]);
+    ECP2_copy(&W[2],&W[0]);
+    ECP2_copy(&W[3],&W[0]);
+    ECP2_copy(&W[4],&Q[0]);
+    ECP2_add(&W[4],&Q[1]);  /* P+Q */
+    ECP2_copy(&W[5],&W[4]);
+    ECP2_copy(&W[6],&W[4]);
+    ECP2_copy(&W[7],&W[4]);
+
+    ECP2_copy(&T,&Q[2]);
+    ECP2_sub(&T,&Q[3]);       /* R-S */
+    ECP2_sub(&W[1],&T);
+    ECP2_add(&W[2],&T);
+    ECP2_sub(&W[5],&T);
+    ECP2_add(&W[6],&T);
+    ECP2_copy(&T,&Q[2]);
+    ECP2_add(&T,&Q[3]);      /* R+S */
+    ECP2_sub(&W[0],&T);
+    ECP2_add(&W[3],&T);
+    ECP2_sub(&W[4],&T);
+    ECP2_add(&W[7],&T);
+
+    ECP2_multiaffine(8,W,work);
+
+    /* if multiplier is even add 1 to multiplier, and add P to correction */
+    ECP2_inf(&C);
+
+    BIG_zero(mt);
+    for (i=0; i<4; i++)
+    {
+        if (BIG_parity(t[i])==0)
+        {
+            BIG_inc(t[i],1);
+            BIG_norm(t[i]);
+            ECP2_add(&C,&Q[i]);
+        }
+        BIG_add(mt,mt,t[i]);
+        BIG_norm(mt);
+    }
+
+    nb=1+BIG_nbits(mt);
+
+    /* convert exponent to signed 1-bit window */
+    for (j=0; j<nb; j++)
+    {
+        for (i=0; i<4; i++)
+        {
+            a[i]=BIG_lastbits(t[i],2)-2;
+            BIG_dec(t[i],a[i]);
+            BIG_norm(t[i]);
+            BIG_fshr(t[i],1);
+        }
+        w[j]=8*a[0]+4*a[1]+2*a[2]+a[3];
+    }
+    
w[nb]=8*BIG_lastbits(t[0],2)+4*BIG_lastbits(t[1],2)+2*BIG_lastbits(t[2],2)+BIG_lastbits(t[3],2);
+
+    ECP2_copy(P,&W[(w[nb]-1)/2]);
+    for (i=nb-1; i>=0; i--)
+    {
+        ECP2_select(&T,W,w[i]);
+        ECP2_dbl(P);
+        ECP2_add(P,&T);
+    }
+    ECP2_sub(P,&C); /* apply correction */
+
+    ECP2_affine(P);
+}
+
+/*
+
+int main()
+{
+       int i;
+       ECP2 G,P;
+       ECP2 *W;
+       FP2 x,y,w,z,f;
+       BIG r,xa,xb,ya,yb;
+
+       BIG_rcopy(xa,CURVE_Pxa);
+       BIG_rcopy(xb,CURVE_Pxb);
+       BIG_rcopy(ya,CURVE_Pya);
+       BIG_rcopy(yb,CURVE_Pyb);
+
+       FP2_from_BIGs(&x,xa,xb);
+       FP2_from_BIGs(&y,ya,yb);
+       ECP2_set(&G,&x,&y);
+       if (G.inf) printf("Failed to set - point not on curve\n");
+       else printf("set success\n");
+
+       ECP2_output(&G);
+
+//     BIG_copy(r,CURVE_Order);
+       BIG_rcopy(r,Modulus);
+
+       ECP2_copy(&P,&G);
+
+       ECP2_mul(&P,r);
+
+       ECP2_output(&P);
+
+       FP2_gfc(&f,12);
+
+       ECP2_frob(&G,&f);
+
+       ECP2_output(&G);
+
+       return 0;
+}
+
+*/

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/faster.c
----------------------------------------------------------------------
diff --git a/version22/c/faster.c b/version22/c/faster.c
new file mode 100644
index 0000000..7786880
--- /dev/null
+++ b/version22/c/faster.c
@@ -0,0 +1,98 @@
+
+#include <stdio.h>
+#include "amcl.h"
+
+#ifdef COMBA
+
+int main()
+{
+       int i,j,k,N;
+
+       N=NLEN;
+
+
+       printf("Insert this code in BIG_mul() in file big.c between #define 
UNWOUND and #else \n\n");
+
+       for (i=0;i<N;i++)
+               printf("\td[%d]=(dchunk)a[%d]*b[%d];\n",i,i,i);
+
+       printf("\n\ts=d[0];\n\tt = s; c[0]=(chunk)t&BMASK; co=t>>BASEBITS;\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\ts+=d[%d]; t=co+s ",k);
+               for (i=k;i>=1+k/2;i--)
+                       
printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i);
+               printf("; c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n",k);
+       }
+       printf("\n");
+       for (k=N;k<2*N-1;k++)
+       {
+               printf("\ts-=d[%d]; t=co+s ",k-N);
+               for (i=N-1;i>=1+k/2;i--)
+                       
printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i);
+               printf("; c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n",k);
+       }
+       printf("\tc[%d]=(chunk)co;\n",2*N-1);
+
+
+
+       printf("\nInsert this code in BIG_sqr() in file big.c between #define 
UNWOUND and #else \n\n");
+
+       printf("\n\tt=(dchunk)a[0]*a[0]; c[0]=(chunk)t&BMASK; 
co=t>>BASEBITS;\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\tt= ",k);
+               for (i=k;i>=1+k/2;i--)
+                       printf("+(dchunk)a[%d]*a[%d]",i,k-i);
+               printf("; t+=t; t+=co;");
+               if (k%2==0) printf(" t+=(dchunk)a[%d]*a[%d];",k/2,k/2);
+               printf(" c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n", k);
+       }
+       printf("\n");
+
+       for (k=N;k<2*N-2;k++)
+       {
+               printf("\tt= ",k-N);
+               for (i=N-1;i>=1+k/2;i--)
+                       printf("+(dchunk)a[%d]*a[%d]",i,k-i);
+               printf("; t+=t; t+=co;");
+               if (k%2==0) printf(" t+=(dchunk)a[%d]*a[%d];",k/2,k/2);
+               printf(" c[%d]=(chunk)t&BMASK; co=t>>BASEBITS; \n", k);
+       }
+       printf("\tt=co; t+=(dchunk)a[%d]*a[%d]; c[%d]=(chunk)t&BMASK; 
co=t>>BASEBITS; \n ",N-1,N-1,2*N-2);
+
+       printf("\tc[%d]=(chunk)co;\n",2*N-1);
+
+
+#if MODTYPE == NOT_SPECIAL
+
+       printf("\nInsert this code in BIG_monty() in file big.c between #define 
UNWOUND and #else \n\n");
+
+       printf("\tt=d[0]; v[0]=((chunk)t*MC)&BMASK; t+=(dchunk)v[0]*md[0];  
s=0; c=(t>>BASEBITS);\n\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\tt=d[%d]+c+s+(dchunk)v[0]*md[%d]",k,k);
+               for (i=k-1;i>k/2;i--) 
printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i);
+               printf("; v[%d]=((chunk)t*MC)&BMASK; t+=(dchunk)v[%d]*md[0]; 
",k,k);
+               printf(" dd[%d]=(dchunk)v[%d]*md[%d]; s+=dd[%d]; 
c=(t>>BASEBITS); \n",k,k,k,k);
+       }
+       printf("\n");
+       for (k=N;k<2*N-1;k++)
+       {
+               printf("\tt=d[%d]+c+s",k);
+               for (i=N-1;i>=1+k/2;i--) 
printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i);
+               printf("; a[%d]=(chunk)t&BMASK;  s-=dd[%d]; c=(t>>BASEBITS); 
\n",k-N,k-N+1);
+       }
+       printf("\ta[%d]=d[%d]+(chunk)c&BMASK;\n",N-1,2*N-1);    
+
+
+#endif
+
+}
+
+#endif
+
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/faster.txt
----------------------------------------------------------------------
diff --git a/version22/c/faster.txt b/version22/c/faster.txt
new file mode 100644
index 0000000..6995eab
--- /dev/null
+++ b/version22/c/faster.txt
@@ -0,0 +1,25 @@
+We assume than optimizing compilers will unwind loops at every opportunity. 
+
+But sometimes they don't. So time-critical code will run faster if we step
+in and unwind complex loops for the compiler.
+
+Once the architecture and ECC/RSA support is decided upon (that is amcl.h 
+and arch.h are settled), then compile and execute the program faster.c like 
+this (using MinGW port of GCC as an example), in the same directory as 
+arch.h and amcl.h
+
+gcc -O2 -std=c99 faster.c -o faster.exe
+faster > t.txt
+
+Now extract the code fragments from t.txt and insert them where indicated
+into big.c
+
+Finally make sure that
+
+#define UNWOUND
+
+appears somewhere in amcl.h
+
+Finally build the library as normal, and maybe get a 50% speed-up!
+If there is no significant improvement, don't use this method!
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/ff.c
----------------------------------------------------------------------
diff --git a/version22/c/ff.c b/version22/c/ff.c
new file mode 100644
index 0000000..3ae7029
--- /dev/null
+++ b/version22/c/ff.c
@@ -0,0 +1,1150 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL basic functions for Large Finite Field support */
+
+#include "amcl.h"
+
+/* Arazi and Qi inversion mod 256 */
+static int invmod256(int a)
+{
+    int U,t1,t2,b,c;
+    t1=0;
+    c=(a>>1)&1;
+    t1+=c;
+    t1&=1;
+    t1=2-t1;
+    t1<<=1;
+    U=t1+1;
+
+// i=2
+    b=a&3;
+    t1=U*b;
+    t1>>=2;
+    c=(a>>2)&3;
+    t2=(U*c)&3;
+    t1+=t2;
+    t1*=U;
+    t1&=3;
+    t1=4-t1;
+    t1<<=2;
+    U+=t1;
+
+// i=4
+    b=a&15;
+    t1=U*b;
+    t1>>=4;
+    c=(a>>4)&15;
+    t2=(U*c)&15;
+    t1+=t2;
+    t1*=U;
+    t1&=15;
+    t1=16-t1;
+    t1<<=4;
+    U+=t1;
+
+    return U;
+}
+
+/* a=1/a mod 2^BIGBITS. This is very fast! */
+void BIG_invmod2m(BIG a)
+{
+    int i;
+    BIG U,t1,b,c;
+    BIG_zero(U);
+    BIG_inc(U,invmod256(BIG_lastbits(a,8)));
+    for (i=8; i<BIGBITS; i<<=1)
+    {
+        BIG_copy(b,a);
+        BIG_mod2m(b,i);   // bottom i bits of a
+
+        BIG_smul(t1,U,b);
+        BIG_shr(t1,i); // top i bits of U*b
+
+        BIG_copy(c,a);
+        BIG_shr(c,i);
+        BIG_mod2m(c,i); // top i bits of a
+
+        BIG_smul(b,U,c);
+        BIG_mod2m(b,i);  // bottom i bits of U*c
+
+        BIG_add(t1,t1,b);
+        BIG_smul(b,t1,U);
+        BIG_copy(t1,b);  // (t1+b)*U
+        BIG_mod2m(t1,i);                               // bottom i bits of 
(t1+b)*U
+
+        BIG_one(b);
+        BIG_shl(b,i);
+        BIG_sub(t1,b,t1);
+        BIG_norm(t1);
+
+        BIG_shl(t1,i);
+
+        BIG_add(U,U,t1);
+    }
+    BIG_copy(a,U);
+    BIG_norm(a);
+    BIG_mod2m(a,BIGBITS);
+}
+
+/*
+void FF_rcopy(BIG x[],const BIG y[],int n)
+{
+       int i;
+       for (i=0;i<n;i++)
+               BIG_rcopy(x[i],y[i]);
+}
+*/
+
+/* x=y */
+void FF_copy(BIG x[],BIG y[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_copy(x[i],y[i]);
+}
+
+/* x=y<<n */
+static void FF_dsucopy(BIG x[],BIG y[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        BIG_copy(x[n+i],y[i]);
+        BIG_zero(x[i]);
+    }
+}
+
+/* x=y */
+static void FF_dscopy(BIG x[],BIG y[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        BIG_copy(x[i],y[i]);
+        BIG_zero(x[n+i]);
+    }
+}
+
+/* x=y>>n */
+static void FF_sducopy(BIG x[],BIG y[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_copy(x[i],y[n+i]);
+}
+
+/* set to zero */
+void FF_zero(BIG x[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_zero(x[i]);
+}
+
+/* test equals 0 */
+int FF_iszilch(BIG x[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        if (!BIG_iszilch(x[i])) return 0;
+    return 1;
+}
+
+/* shift right by BIGBITS-bit words */
+static void FF_shrw(BIG a[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        BIG_copy(a[i],a[i+n]);
+        BIG_zero(a[i+n]);
+    }
+}
+
+/* shift left by BIGBITS-bit words */
+static void FF_shlw(BIG a[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        BIG_copy(a[i+n],a[i]);
+        BIG_zero(a[i]);
+    }
+}
+
+/* extract last bit */
+int FF_parity(BIG x[])
+{
+    return BIG_parity(x[0]);
+}
+
+/* extract last m bits */
+int FF_lastbits(BIG x[],int m)
+{
+    return BIG_lastbits(x[0],m);
+}
+
+/* x=1 */
+void FF_one(BIG x[],int n)
+{
+    int i;
+    BIG_one(x[0]);
+    for (i=1; i<n; i++)
+        BIG_zero(x[i]);
+}
+
+/* x=m, where m is 32-bit int */
+void FF_init(BIG x[],sign32 m,int n)
+{
+    int i;
+    BIG_zero(x[0]);
+#if CHUNK<64
+    x[0][0]=(chunk)(m&BMASK);
+    x[0][1]=(chunk)(m>>BASEBITS);
+#else
+    x[0][0]=(chunk)m;
+#endif
+    for (i=1; i<n; i++)
+        BIG_zero(x[i]);
+}
+
+/* compare x and y - must be normalised */
+int FF_comp(BIG x[],BIG y[],int n)
+{
+    int i,j;
+    for (i=n-1; i>=0; i--)
+    {
+        j=BIG_comp(x[i],y[i]);
+        if (j!=0) return j;
+    }
+    return 0;
+}
+
+/* recursive add */
+static void FF_radd(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_add(z[zp+i],x[xp+i],y[yp+i]);
+}
+
+/* recursive inc */
+static void FF_rinc(BIG z[],int zp,BIG y[],int yp,int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_add(z[zp+i],z[zp+i],y[yp+i]);
+}
+
+/* recursive sub */
+/*
+static void FF_rsub(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,int n)
+{
+       int i;
+       for (i=0;i<n;i++)
+               BIG_sub(z[zp+i],x[xp+i],y[yp+i]);
+}
+*/
+
+/* recursive dec */
+static void FF_rdec(BIG z[],int zp,BIG y[],int yp,int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_sub(z[zp+i],z[zp+i],y[yp+i]);
+}
+
+/* simple add */
+void FF_add(BIG z[],BIG x[],BIG y[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_add(z[i],x[i],y[i]);
+}
+
+/* simple sub */
+void FF_sub(BIG z[],BIG x[],BIG y[],int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_sub(z[i],x[i],y[i]);
+}
+
+/* increment/decrement by a small integer */
+void FF_inc(BIG x[],int m,int n)
+{
+    BIG_inc(x[0],m);
+    FF_norm(x,n);
+}
+
+void FF_dec(BIG x[],int m,int n)
+{
+    BIG_dec(x[0],m);
+    FF_norm(x,n);
+}
+
+/* normalise - but hold any overflow in top part unless n<0 */
+static void FF_rnorm(BIG z[],int zp,int n)
+{
+    int i,trunc=0;
+    chunk carry;
+    if (n<0)
+    {
+        /* -v n signals to do truncation */
+        n=-n;
+        trunc=1;
+    }
+    for (i=0; i<n-1; i++)
+    {
+        carry=BIG_norm(z[zp+i]);
+
+        z[zp+i][NLEN-1]^=carry<<P_TBITS; /* remove it */
+        z[zp+i+1][0]+=carry;
+    }
+    carry=BIG_norm(z[zp+n-1]);
+    if (trunc) z[zp+n-1][NLEN-1]^=carry<<P_TBITS;
+}
+
+void FF_norm(BIG z[],int n)
+{
+    FF_rnorm(z,0,n);
+}
+
+/* shift left by one bit */
+void FF_shl(BIG x[],int n)
+{
+    int i;
+    int carry,delay_carry=0;
+    for (i=0; i<n-1; i++)
+    {
+        carry=BIG_fshl(x[i],1);
+        x[i][0]|=delay_carry;
+        x[i][NLEN-1]^=(chunk)carry<<P_TBITS;
+        delay_carry=carry;
+    }
+    BIG_fshl(x[n-1],1);
+    x[n-1][0]|=delay_carry;
+}
+
+/* shift right by one bit */
+void FF_shr(BIG x[],int n)
+{
+    int i;
+    int carry;
+    for (i=n-1; i>0; i--)
+    {
+        carry=BIG_fshr(x[i],1);
+        x[i-1][NLEN-1]|=(chunk)carry<<P_TBITS;
+    }
+    BIG_fshr(x[0],1);
+}
+
+void FF_output(BIG x[],int n)
+{
+    int i;
+    FF_norm(x,n);
+    for (i=n-1; i>=0; i--)
+    {
+        BIG_output(x[i]);
+        printf(" ");
+    }
+}
+
+void FF_rawoutput(BIG x[],int n)
+{
+    int i;
+    for (i=n-1; i>=0; i--)
+    {
+        BIG_rawoutput(x[i]);
+        printf(" ");
+    }
+}
+
+/* Convert FFs to/from octet strings */
+void FF_toOctet(octet *w,BIG x[],int n)
+{
+    int i;
+    w->len=n*MODBYTES;
+    for (i=0; i<n; i++)
+    {
+        BIG_toBytes(&(w->val[(n-i-1)*MODBYTES]),x[i]);
+    }
+}
+
+void FF_fromOctet(BIG x[],octet *w,int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        BIG_fromBytes(x[i],&(w->val[(n-i-1)*MODBYTES]));
+    }
+}
+
+/* in-place swapping using xor - side channel resistant */
+static void FF_cswap(BIG a[],BIG b[],int d,int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+        BIG_cswap(a[i],b[i],d);
+    return;
+}
+
+/* z=x*y, t is workspace */
+static void FF_karmul(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,BIG t[],int 
tp,int n)
+{
+    int nd2;
+    if (n==1)
+    {
+        BIG_mul(t[tp],x[xp],y[yp]);
+        BIG_split(z[zp+1],z[zp],t[tp],BIGBITS);
+        return;
+    }
+
+    nd2=n/2;
+    FF_radd(z,zp,x,xp,x,xp+nd2,nd2);
+    FF_rnorm(z,zp,nd2);  /* needs this if recursion level too deep */
+
+    FF_radd(z,zp+nd2,y,yp,y,yp+nd2,nd2);
+    FF_rnorm(z,zp+nd2,nd2);
+    FF_karmul(t,tp,z,zp,z,zp+nd2,t,tp+n,nd2);
+    FF_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2);
+    FF_karmul(z,zp+n,x,xp+nd2,y,yp+nd2,t,tp+n,nd2);
+    FF_rdec(t,tp,z,zp,n);
+    FF_rdec(t,tp,z,zp+n,n);
+    FF_rinc(z,zp+nd2,t,tp,n);
+    FF_rnorm(z,zp,2*n);
+}
+
+static void FF_karsqr(BIG z[],int zp,BIG x[],int xp,BIG t[],int tp,int n)
+{
+    int nd2;
+    if (n==1)
+    {
+        BIG_sqr(t[tp],x[xp]);
+        BIG_split(z[zp+1],z[zp],t[tp],BIGBITS);
+        return;
+    }
+    nd2=n/2;
+    FF_karsqr(z,zp,x,xp,t,tp+n,nd2);
+    FF_karsqr(z,zp+n,x,xp+nd2,t,tp+n,nd2);
+    FF_karmul(t,tp,x,xp,x,xp+nd2,t,tp+n,nd2);
+    FF_rinc(z,zp+nd2,t,tp,n);
+    FF_rinc(z,zp+nd2,t,tp,n);
+
+    FF_rnorm(z,zp+nd2,n);  /* was FF_rnorm(z,zp,2*n)  */
+}
+
+static void FF_karmul_lower(BIG z[],int zp,BIG x[],int xp,BIG y[],int yp,BIG 
t[],int tp,int n)
+{
+    /* Calculates Least Significant bottom half of x*y */
+    int nd2;
+    if (n==1)
+    {
+        /* only calculate bottom half of product */
+        BIG_smul(z[zp],x[xp],y[yp]);
+        return;
+    }
+    nd2=n/2;
+    FF_karmul(z,zp,x,xp,y,yp,t,tp+n,nd2);
+    FF_karmul_lower(t,tp,x,xp+nd2,y,yp,t,tp+n,nd2);
+    FF_rinc(z,zp+nd2,t,tp,nd2);
+    FF_karmul_lower(t,tp,x,xp,y,yp+nd2,t,tp+n,nd2);
+    FF_rinc(z,zp+nd2,t,tp,nd2);
+    FF_rnorm(z,zp+nd2,-nd2);  /* truncate it */
+}
+
+static void FF_karmul_upper(BIG z[],BIG x[],BIG y[],BIG t[],int n)
+{
+    /* Calculates Most Significant upper half of x*y, given lower part */
+    int nd2;
+
+    nd2=n/2;
+    FF_radd(z,n,x,0,x,nd2,nd2);
+    FF_radd(z,n+nd2,y,0,y,nd2,nd2);
+    FF_rnorm(z,n,nd2);
+    FF_rnorm(z,n+nd2,nd2);
+
+    FF_karmul(t,0,z,n+nd2,z,n,t,n,nd2);  /* t = (a0+a1)(b0+b1) */
+    FF_karmul(z,n,x,nd2,y,nd2,t,n,nd2); /* z[n]= a1*b1 */
+    /* z[0-nd2]=l(a0b0) z[nd2-n]= h(a0b0)+l(t)-l(a0b0)-l(a1b1) */
+    FF_rdec(t,0,z,n,n);              /* t=t-a1b1  */
+    FF_rinc(z,nd2,z,0,nd2);   /* z[nd2-n]+=l(a0b0) = h(a0b0)+l(t)-l(a1b1)  */
+    FF_rdec(z,nd2,t,0,nd2);   /* 
z[nd2-n]=h(a0b0)+l(t)-l(a1b1)-l(t-a1b1)=h(a0b0) */
+    FF_rnorm(z,0,-n);                                  /* a0b0 now in z - 
truncate it */
+    FF_rdec(t,0,z,0,n);         /* (a0+a1)(b0+b1) - a0b0 */
+    FF_rinc(z,nd2,t,0,n);
+
+    FF_rnorm(z,nd2,n);
+}
+
+/* z=x*y */
+void FF_mul(BIG z[],BIG x[],BIG y[],int n)
+{
+#ifndef C99
+    BIG t[2*FFLEN];
+#else
+    BIG t[2*n];
+#endif
+//     FF_norm(x,n); /* change here */
+//     FF_norm(y,n); /* change here */
+    FF_karmul(z,0,x,0,y,0,t,0,n);
+}
+
+/* return low part of product */
+static void FF_lmul(BIG z[],BIG x[],BIG y[],int n)
+{
+#ifndef C99
+    BIG t[2*FFLEN];
+#else
+    BIG t[2*n];
+#endif
+//     FF_norm(x,n); /* change here */
+//     FF_norm(y,n); /* change here */
+    FF_karmul_lower(z,0,x,0,y,0,t,0,n);
+}
+
+/* Set b=b mod c */
+void FF_mod(BIG b[],BIG c[],int n)
+{
+    int k=0;
+
+    FF_norm(b,n);
+    if (FF_comp(b,c,n)<0)
+        return;
+    do
+    {
+        FF_shl(c,n);
+        k++;
+    }
+    while (FF_comp(b,c,n)>=0);
+
+    while (k>0)
+    {
+        FF_shr(c,n);
+        if (FF_comp(b,c,n)>=0)
+        {
+            FF_sub(b,b,c,n);
+            FF_norm(b,n);
+        }
+        k--;
+    }
+}
+
+/* z=x^2 */
+void FF_sqr(BIG z[],BIG x[],int n)
+{
+#ifndef C99
+    BIG t[2*FFLEN];
+#else
+    BIG t[2*n];
+#endif
+//     FF_norm(x,n); /* change here */
+    FF_karsqr(z,0,x,0,t,0,n);
+}
+
+/* r=t mod modulus, N is modulus, ND is Montgomery Constant */
+static void FF_reduce(BIG r[],BIG T[],BIG N[],BIG ND[],int n)
+{
+    /* fast karatsuba Montgomery reduction */
+#ifndef C99
+    BIG t[2*FFLEN];
+    BIG m[FFLEN];
+#else
+    BIG t[2*n];
+    BIG m[n];
+#endif
+    FF_sducopy(r,T,n);  /* keep top half of T */
+    //FF_norm(T,n); /* change here */
+    FF_karmul_lower(m,0,T,0,ND,0,t,0,n);  /* m=T.(1/N) mod R */
+
+    //FF_norm(N,n);  /* change here */
+    FF_karmul_upper(T,N,m,t,n);  /* T=mN */
+    FF_sducopy(m,T,n);
+
+    FF_add(r,r,N,n);
+    FF_sub(r,r,m,n);
+    FF_norm(r,n);
+}
+
+
+/* Set r=a mod b */
+/* a is of length - 2*n */
+/* r,b is of length - n */
+void FF_dmod(BIG r[],BIG a[],BIG b[],int n)
+{
+    int k;
+#ifndef C99
+    BIG m[2*FFLEN];
+    BIG x[2*FFLEN];
+#else
+    BIG m[2*n];
+    BIG x[2*n];
+#endif
+    FF_copy(x,a,2*n);
+    FF_norm(x,2*n);
+    FF_dsucopy(m,b,n);
+    k=BIGBITS*n;
+
+    while (FF_comp(x,m,2*n)>=0)
+    {
+        FF_sub(x,x,m,2*n);
+        FF_norm(x,2*n);
+    }
+
+    while (k>0)
+    {
+        FF_shr(m,2*n);
+
+        if (FF_comp(x,m,2*n)>=0)
+        {
+            FF_sub(x,x,m,2*n);
+            FF_norm(x,2*n);
+        }
+
+        k--;
+    }
+    FF_copy(r,x,n);
+    FF_mod(r,b,n);
+}
+
+/* Set r=1/a mod p. Binary method - a<p on entry */
+
+void FF_invmodp(BIG r[],BIG a[],BIG p[],int n)
+{
+#ifndef C99
+    BIG u[FFLEN],v[FFLEN],x1[FFLEN],x2[FFLEN],t[FFLEN],one[FFLEN];
+#else
+    BIG u[n],v[n],x1[n],x2[n],t[n],one[n];
+#endif
+    FF_copy(u,a,n);
+    FF_copy(v,p,n);
+    FF_one(one,n);
+    FF_copy(x1,one,n);
+    FF_zero(x2,n);
+
+// reduce n in here as well!
+    while (FF_comp(u,one,n)!=0 && FF_comp(v,one,n)!=0)
+    {
+        while (FF_parity(u)==0)
+        {
+            FF_shr(u,n);
+            if (FF_parity(x1)!=0)
+            {
+                FF_add(x1,p,x1,n);
+                FF_norm(x1,n);
+            }
+            FF_shr(x1,n);
+        }
+        while (FF_parity(v)==0)
+        {
+            FF_shr(v,n);
+            if (FF_parity(x2)!=0)
+            {
+                FF_add(x2,p,x2,n);
+                FF_norm(x2,n);
+            }
+            FF_shr(x2,n);
+        }
+        if (FF_comp(u,v,n)>=0)
+        {
+
+            FF_sub(u,u,v,n);
+            FF_norm(u,n);
+            if (FF_comp(x1,x2,n)>=0) FF_sub(x1,x1,x2,n);
+            else
+            {
+                FF_sub(t,p,x2,n);
+                FF_add(x1,x1,t,n);
+            }
+            FF_norm(x1,n);
+        }
+        else
+        {
+            FF_sub(v,v,u,n);
+            FF_norm(v,n);
+            if (FF_comp(x2,x1,n)>=0) FF_sub(x2,x2,x1,n);
+            else
+            {
+                FF_sub(t,p,x1,n);
+                FF_add(x2,x2,t,n);
+            }
+            FF_norm(x2,n);
+        }
+    }
+    if (FF_comp(u,one,n)==0)
+        FF_copy(r,x1,n);
+    else
+        FF_copy(r,x2,n);
+}
+
+/* nesidue mod m */
+static void FF_nres(BIG a[],BIG m[],int n)
+{
+#ifndef C99
+    BIG d[2*FFLEN];
+#else
+    BIG d[2*n];
+#endif
+
+       if (n==1)
+       {
+               BIG_dscopy(d[0],a[0]);
+               BIG_dshl(d[0],NLEN*BASEBITS);
+               BIG_dmod(a[0],d[0],m[0]);
+       }
+       else
+       { 
+               FF_dsucopy(d,a,n);
+               FF_dmod(a,d,m,n);
+       }
+}
+
+static void FF_redc(BIG a[],BIG m[],BIG ND[],int n)
+{
+#ifndef C99
+    BIG d[2*FFLEN];
+#else
+    BIG d[2*n];
+#endif
+       if (n==1)
+       {
+               BIG_dzero(d[0]);
+               BIG_dscopy(d[0],a[0]);
+               BIG_monty(a[0],m[0],((chunk)1<<BASEBITS)-ND[0][0],d[0]);        
        
+       }
+       else
+       {
+               FF_mod(a,m,n);
+               FF_dscopy(d,a,n);
+               FF_reduce(a,d,m,ND,n);
+               FF_mod(a,m,n);
+       }
+}
+
+/* U=1/a mod 2^m - Arazi & Qi */
+static void FF_invmod2m(BIG U[],BIG a[],int n)
+{
+    int i;
+#ifndef C99
+    BIG t1[FFLEN],b[FFLEN],c[FFLEN];
+#else
+    BIG t1[2*n],b[n],c[n];
+#endif
+
+    FF_zero(U,n);
+    FF_zero(b,n);
+    FF_zero(c,n);
+    FF_zero(t1,2*n);
+
+    BIG_copy(U[0],a[0]);
+    BIG_invmod2m(U[0]);
+    for (i=1; i<n; i<<=1)
+    {
+        FF_copy(b,a,i);
+        FF_mul(t1,U,b,i);
+        FF_shrw(t1,i); // top half to bottom half, top half=0
+
+        FF_copy(c,a,2*i);
+        FF_shrw(c,i); // top half of c
+        FF_lmul(b,U,c,i); // should set top half of b=0
+        FF_add(t1,t1,b,i);
+        FF_norm(t1,2*i);
+        FF_lmul(b,t1,U,i);
+        FF_copy(t1,b,i);
+        FF_one(b,i);
+        FF_shlw(b,i);
+        FF_sub(t1,b,t1,2*i);
+        FF_norm(t1,2*i);
+        FF_shlw(t1,i);
+        FF_add(U,U,t1,2*i);
+    }
+
+    FF_norm(U,n);
+}
+
+void FF_random(BIG x[],csprng *rng,int n)
+{
+    int i;
+    for (i=0; i<n; i++)
+    {
+        BIG_random(x[i],rng);
+    }
+    /* make sure top bit is 1 */
+    while (BIG_nbits(x[n-1])<MODBYTES*8) BIG_random(x[n-1],rng);
+}
+
+/* generate random x mod p */
+void FF_randomnum(BIG x[],BIG p[],csprng *rng,int n)
+{
+    int i;
+#ifndef C99
+    BIG d[2*FFLEN];
+#else
+    BIG d[2*n];
+#endif
+    for (i=0; i<2*n; i++)
+    {
+        BIG_random(d[i],rng);
+    }
+    FF_dmod(x,d,p,n);
+}
+
+static void FF_modmul(BIG z[],BIG x[],BIG y[],BIG p[],BIG ND[],int n)
+{
+#ifndef C99
+    BIG d[2*FFLEN];
+#else
+    BIG d[2*n];
+#endif
+    chunk ex=P_EXCESS(x[n-1]);
+    chunk ey=P_EXCESS(y[n-1]);
+#ifdef dchunk
+    if ((dchunk)(ex+1)*(ey+1)>(dchunk)P_FEXCESS)
+#else
+    if ((ex+1)>P_FEXCESS/(ey+1))
+#endif
+    {
+#ifdef DEBUG_REDUCE
+        printf("Product too large - reducing it %d %d\n",ex,ey);
+#endif
+        FF_mod(x,p,n);
+    }
+
+       if (n==1)
+       {
+               BIG_mul(d[0],x[0],y[0]);
+               BIG_monty(z[0],p[0],((chunk)1<<BASEBITS)-ND[0][0],d[0]);
+       }
+       else
+       {
+               FF_mul(d,x,y,n);
+               FF_reduce(z,d,p,ND,n);
+       }
+}
+
+static void FF_modsqr(BIG z[],BIG x[],BIG p[],BIG ND[],int n)
+{
+#ifndef C99
+    BIG d[2*FFLEN];
+#else
+    BIG d[2*n];
+#endif
+    chunk ex=P_EXCESS(x[n-1]);
+#ifdef dchunk
+    if ((dchunk)(ex+1)*(ex+1)>(dchunk)P_FEXCESS)
+#else
+    if ((ex+1)>P_FEXCESS/(ex+1))
+#endif
+    {
+#ifdef DEBUG_REDUCE
+        printf("Product too large - reducing it %d\n",ex);
+#endif
+        FF_mod(x,p,n);
+    }
+
+       if (n==1)
+       {
+               BIG_sqr(d[0],x[0]);
+               BIG_monty(z[0],p[0],((chunk)1<<BASEBITS)-ND[0][0],d[0]);
+       }
+       else
+       {
+               FF_sqr(d,x,n);
+               FF_reduce(z,d,p,ND,n);
+       }
+}
+
+/* r=x^e mod p using side-channel resistant Montgomery Ladder, for large e */
+void FF_skpow(BIG r[],BIG x[],BIG e[],BIG p[],int n)
+{
+    int i,b;
+#ifndef C99
+    BIG R0[FFLEN],R1[FFLEN],ND[FFLEN];
+#else
+    BIG R0[n],R1[n],ND[n];
+#endif
+    FF_invmod2m(ND,p,n);
+
+    FF_one(R0,n);
+    FF_copy(R1,x,n);
+    FF_nres(R0,p,n);
+    FF_nres(R1,p,n);
+
+    for (i=8*MODBYTES*n-1; i>=0; i--)
+    {
+        b=BIG_bit(e[i/BIGBITS],i%BIGBITS);
+        FF_modmul(r,R0,R1,p,ND,n);
+
+        FF_cswap(R0,R1,b,n);
+        FF_modsqr(R0,R0,p,ND,n);
+
+        FF_copy(R1,r,n);
+        FF_cswap(R0,R1,b,n);
+    }
+    FF_copy(r,R0,n);
+    FF_redc(r,p,ND,n);
+}
+
+/* r=x^e mod p using side-channel resistant Montgomery Ladder, for short e */
+void FF_skspow(BIG r[],BIG x[],BIG e,BIG p[],int n)
+{
+    int i,b;
+#ifndef C99
+    BIG R0[FFLEN],R1[FFLEN],ND[FFLEN];
+#else
+    BIG R0[n],R1[n],ND[n];
+#endif
+    FF_invmod2m(ND,p,n);
+    FF_one(R0,n);
+    FF_copy(R1,x,n);
+    FF_nres(R0,p,n);
+    FF_nres(R1,p,n);
+    for (i=8*MODBYTES-1; i>=0; i--)
+    {
+        b=BIG_bit(e,i);
+        FF_modmul(r,R0,R1,p,ND,n);
+        FF_cswap(R0,R1,b,n);
+        FF_modsqr(R0,R0,p,ND,n);
+        FF_copy(R1,r,n);
+        FF_cswap(R0,R1,b,n);
+    }
+    FF_copy(r,R0,n);
+    FF_redc(r,p,ND,n);
+}
+
+/* raise to an integer power - right-to-left method */
+void FF_power(BIG r[],BIG x[],int e,BIG p[],int n)
+{
+    int f=1;
+#ifndef C99
+    BIG w[FFLEN],ND[FFLEN];
+#else
+    BIG w[n],ND[n];
+#endif
+    FF_invmod2m(ND,p,n);
+
+    FF_copy(w,x,n);
+    FF_nres(w,p,n);
+
+    if (e==2)
+    {
+        FF_modsqr(r,w,p,ND,n);
+    }
+    else for (;;)
+        {
+            if (e%2==1)
+            {
+                if (f) FF_copy(r,w,n);
+                else FF_modmul(r,r,w,p,ND,n);
+                f=0;
+            }
+            e>>=1;
+            if (e==0) break;
+            FF_modsqr(w,w,p,ND,n);
+        }
+
+    FF_redc(r,p,ND,n);
+}
+
+/* r=x^e mod p, faster but not side channel resistant */
+void FF_pow(BIG r[],BIG x[],BIG e[],BIG p[],int n)
+{
+    int i,b;
+#ifndef C99
+    BIG w[FFLEN],ND[FFLEN];
+#else
+    BIG w[n],ND[n];
+#endif
+    FF_invmod2m(ND,p,n);
+
+    FF_copy(w,x,n);
+    FF_one(r,n);
+    FF_nres(r,p,n);
+    FF_nres(w,p,n);
+
+    for (i=8*MODBYTES*n-1; i>=0; i--)
+    {
+        FF_modsqr(r,r,p,ND,n);
+        b=BIG_bit(e[i/BIGBITS],i%BIGBITS);
+        if (b==1) FF_modmul(r,r,w,p,ND,n);
+    }
+    FF_redc(r,p,ND,n);
+}
+
+/* double exponentiation r=x^e.y^f mod p */
+void FF_pow2(BIG r[],BIG x[],BIG e,BIG y[],BIG f,BIG p[],int n)
+{
+    int i,eb,fb;
+#ifndef C99
+    BIG xn[FFLEN],yn[FFLEN],xy[FFLEN],ND[FFLEN];
+#else
+    BIG xn[n],yn[n],xy[n],ND[n];
+#endif
+
+    FF_invmod2m(ND,p,n);
+
+    FF_copy(xn,x,n);
+    FF_copy(yn,y,n);
+    FF_nres(xn,p,n);
+    FF_nres(yn,p,n);
+    FF_modmul(xy,xn,yn,p,ND,n);
+    FF_one(r,n);
+    FF_nres(r,p,n);
+
+    for (i=8*MODBYTES-1; i>=0; i--)
+    {
+        eb=BIG_bit(e,i);
+        fb=BIG_bit(f,i);
+        FF_modsqr(r,r,p,ND,n);
+        if (eb==1)
+        {
+            if (fb==1) FF_modmul(r,r,xy,p,ND,n);
+            else FF_modmul(r,r,xn,p,ND,n);
+        }
+        else
+        {
+            if (fb==1) FF_modmul(r,r,yn,p,ND,n);
+        }
+    }
+    FF_redc(r,p,ND,n);
+}
+
+static sign32 igcd(sign32 x,sign32 y)
+{
+    /* integer GCD, returns GCD of x and y */
+    sign32 r;
+    if (y==0) return x;
+    while ((r=x%y)!=0)
+        x=y,y=r;
+    return y;
+}
+
+/* quick and dirty check for common factor with s */
+int FF_cfactor(BIG w[],sign32 s,int n)
+{
+    int r;
+    sign32 g;
+#ifndef C99
+    BIG x[FFLEN],y[FFLEN];
+#else
+    BIG x[n],y[n];
+#endif
+    FF_init(y,s,n);
+    FF_copy(x,w,n);
+    FF_norm(x,n);
+
+//     if (FF_parity(x)==0) return 1;
+    do
+    {
+        FF_sub(x,x,y,n);
+        FF_norm(x,n);
+        while (!FF_iszilch(x,n) && FF_parity(x)==0) FF_shr(x,n);
+    }
+    while (FF_comp(x,y,n)>0);
+#if CHUNK<32
+    g=x[0][0]+((sign32)(x[0][1])<<BASEBITS);
+#else
+    g=(sign32)x[0][0];
+#endif
+    r=igcd(s,g);
+    if (r>1) return 1;
+    return 0;
+}
+
+/* Miller-Rabin test for primality. Slow. */
+int FF_prime(BIG p[],csprng *rng,int n)
+{
+    int i,j,loop,s=0;
+#ifndef C99
+    BIG d[FFLEN],x[FFLEN],unity[FFLEN],nm1[FFLEN];
+#else
+    BIG d[n],x[n],unity[n],nm1[n];
+#endif
+    sign32 sf=4849845;/* 3*5*.. *19 */
+
+    FF_norm(p,n);
+
+    if (FF_cfactor(p,sf,n)) return 0;
+
+    FF_one(unity,n);
+    FF_sub(nm1,p,unity,n);
+    FF_norm(nm1,n);
+    FF_copy(d,nm1,n);
+    while (FF_parity(d)==0)
+    {
+        FF_shr(d,n);
+        s++;
+    }
+    if (s==0) return 0;
+
+    for (i=0; i<10; i++)
+    {
+        FF_randomnum(x,p,rng,n);
+        FF_pow(x,x,d,p,n);
+        if (FF_comp(x,unity,n)==0 || FF_comp(x,nm1,n)==0) continue;
+        loop=0;
+        for (j=1; j<s; j++)
+        {
+            FF_power(x,x,2,p,n);
+            if (FF_comp(x,unity,n)==0) return 0;
+            if (FF_comp(x,nm1,n)==0 )
+            {
+                loop=1;
+                break;
+            }
+        }
+        if (loop) continue;
+        return 0;
+    }
+
+    return 1;
+}
+
+/*
+BIG P[4]= 
{{0x1670957,0x1568CD3C,0x2595E5,0xEED4F38,0x1FC9A971,0x14EF7E62,0xA503883,0x9E1E05E,0xBF59E3},{0x1844C908,0x1B44A798,0x3A0B1E7,0xD1B5B4E,0x1836046F,0x87E94F9,0x1D34C537,0xF7183B0,0x46D07},{0x17813331,0x19E28A90,0x1473A4D6,0x1CACD01F,0x1EEA8838,0xAF2AE29,0x1F85292A,0x1632585E,0xD945E5},{0x919F5EF,0x1567B39F,0x19F6AD11,0x16CE47CF,0x9B36EB1,0x35B7D3,0x483B28C,0xCBEFA27,0xB5FC21}};
+
+int main()
+{
+       int i;
+       BIG p[4],e[4],x[4],r[4];
+       csprng rng;
+       char raw[100];
+       for (i=0;i<100;i++) raw[i]=i;
+    RAND_seed(&rng,100,raw);
+
+
+       FF_init(x,3,4);
+
+       FF_copy(p,P,4);
+       FF_copy(e,p,4);
+       FF_dec(e,1,4);
+       FF_norm(e,4);
+
+
+
+       printf("p= ");FF_output(p,4); printf("\n");
+       if (FF_prime(p,&rng,4)) printf("p is a prime\n");
+       printf("e= ");FF_output(e,4); printf("\n");
+
+       FF_skpow(r,x,e,p,4);
+       printf("r= ");FF_output(r,4); printf("\n");
+}
+
+*/

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version22/c/fp.c
----------------------------------------------------------------------
diff --git a/version22/c/fp.c b/version22/c/fp.c
new file mode 100644
index 0000000..5d48f1c
--- /dev/null
+++ b/version22/c/fp.c
@@ -0,0 +1,608 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL mod p functions */
+/* Small Finite Field arithmetic */
+/* SU=m, SU is Stack Usage (NOT_SPECIAL Modulus) */
+
+#include "amcl.h"
+
+/* Fast Modular Reduction Methods */
+
+/* r=d mod m */
+/* d MUST be normalised */
+/* Products must be less than pR in all cases !!! */
+/* So when multiplying two numbers, their product *must* be less than 
MODBITS+BASEBITS*NLEN */
+/* Results *may* be one bit bigger than MODBITS */
+
+#if MODTYPE == PSEUDO_MERSENNE
+/* r=d mod m */
+
+/* Converts from BIG integer to n-residue form mod Modulus */
+void FP_nres(BIG a)
+{
+    BIG tmp;
+    BIG_rcopy(tmp,a);
+}
+
+/* Converts from n-residue form back to BIG integer form */
+void FP_redc(BIG a)
+{
+    BIG tmp;
+    BIG_rcopy(tmp,a);
+}
+
+/* reduce a DBIG to a BIG exploiting the special form of the modulus */
+void FP_mod(BIG r,DBIG d)
+{
+    BIG t,b;
+    chunk v,tw;
+    BIG_split(t,b,d,MODBITS);
+
+    /* Note that all of the excess gets pushed into t. So if squaring a value 
with a 4-bit excess, this results in
+       t getting all 8 bits of the excess product! So products must be less 
than pR which is Montgomery compatible */
+
+    if (MConst < NEXCESS)
+    {
+        BIG_imul(t,t,MConst);
+
+        BIG_norm(t);
+        tw=t[NLEN-1];
+        t[NLEN-1]&=TMASK;
+        t[0]+=MConst*((tw>>TBITS));
+    }
+    else
+    {
+        v=BIG_pmul(t,t,MConst);
+        tw=t[NLEN-1];
+        t[NLEN-1]&=TMASK;
+#if CHUNK == 16
+        t[1]+=muladd(MConst,((tw>>TBITS)+(v<<(BASEBITS-TBITS))),0,&t[0]);
+#else
+        t[0]+=MConst*((tw>>TBITS)+(v<<(BASEBITS-TBITS)));
+#endif
+    }
+    BIG_add(r,t,b);
+    BIG_norm(r);
+}
+#endif
+
+/* This only applies to Curve C448, so specialised (for now) */
+#if MODTYPE == GENERALISED_MERSENNE
+
+/* Converts from BIG integer to n-residue form mod Modulus */
+void FP_nres(BIG a)
+{
+    BIG tmp;
+    BIG_rcopy(tmp,a);
+}
+
+/* Converts from n-residue form back to BIG integer form */
+void FP_redc(BIG a)
+{
+    BIG tmp;
+    BIG_rcopy(tmp,a);
+}
+
+/* reduce a DBIG to a BIG exploiting the special form of the modulus */
+void FP_mod(BIG r,DBIG d)
+{
+    BIG t,b;
+    chunk carry;
+    BIG_split(t,b,d,MBITS);
+
+    BIG_add(r,t,b);
+
+    BIG_dscopy(d,t);
+    BIG_dshl(d,MBITS/2);
+
+    BIG_split(t,b,d,MBITS);
+
+    BIG_add(r,r,t);
+    BIG_add(r,r,b);
+    BIG_norm(r);
+    BIG_shl(t,MBITS/2);
+
+    BIG_add(r,r,t);
+
+    carry=r[NLEN-1]>>TBITS;
+
+    r[NLEN-1]&=TMASK;
+    r[0]+=carry;
+
+    r[224/BASEBITS]+=carry<<(224%BASEBITS); /* need to check that this falls 
mid-word */
+    BIG_norm(r);
+
+}
+
+#endif
+
+#if MODTYPE == MONTGOMERY_FRIENDLY
+
+/* convert to Montgomery n-residue form */
+void FP_nres(BIG a)
+{
+    DBIG d;
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_dscopy(d,a);
+    BIG_dshl(d,NLEN*BASEBITS);
+    BIG_dmod(a,d,m);
+}
+
+/* convert back to regular form */
+void FP_redc(BIG a)
+{
+    DBIG d;
+    BIG_dzero(d);
+    BIG_dscopy(d,a);
+    FP_mod(a,d);
+}
+
+/* fast modular reduction from DBIG to BIG exploiting special form of the 
modulus */
+void FP_mod(BIG a,DBIG d)
+{
+    int i;
+
+    for (i=0; i<NLEN; i++)
+        d[NLEN+i]+=muladd(d[i],MConst-1,d[i],&d[NLEN+i-1]);
+
+    BIG_sducopy(a,d);
+    BIG_norm(a);
+}
+
+#endif
+
+#if MODTYPE == NOT_SPECIAL
+
+/* convert BIG a to Montgomery n-residue form */
+/* SU= 120 */
+void FP_nres(BIG a)
+{
+    DBIG d;
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_dscopy(d,a);
+    BIG_dshl(d,NLEN*BASEBITS);
+    BIG_dmod(a,d,m);
+}
+
+/* SU= 80 */
+/* convert back to regular form */
+void FP_redc(BIG a)
+{
+    DBIG d;
+    BIG_dzero(d);
+    BIG_dscopy(d,a);
+    FP_mod(a,d);
+}
+
+/* reduce a DBIG to a BIG using Montgomery's no trial division method */
+/* d is expected to be dnormed before entry */
+/* SU= 112 */
+void FP_mod(BIG a,DBIG d)
+{
+       BIG mdls;
+    BIG_rcopy(mdls,Modulus);
+       BIG_monty(a,mdls,MConst,d);
+}
+
+#endif
+
+/* test x==0 ? */
+/* SU= 48 */
+int FP_iszilch(BIG x)
+{
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_mod(x,m);
+    return BIG_iszilch(x);
+}
+
+/* output FP */
+/* SU= 48 */
+void FP_output(BIG r)
+{
+    BIG c;
+    BIG_copy(c,r);
+    FP_redc(c);
+    BIG_output(c);
+}
+
+void FP_rawoutput(BIG r)
+{
+    BIG_rawoutput(r);
+}
+
+#ifdef GET_STATS
+int tsqr=0,rsqr=0,tmul=0,rmul=0;
+int tadd=0,radd=0,tneg=0,rneg=0;
+int tdadd=0,rdadd=0,tdneg=0,rdneg=0;
+#endif
+
+/* r=a*b mod Modulus */
+/* product must be less that p.R - and we need to know this in advance! */
+/* SU= 88 */
+void FP_mul(BIG r,BIG a,BIG b)
+{
+    DBIG d;
+    chunk ea,eb;
+    BIG_norm(a);
+    BIG_norm(b);
+    ea=EXCESS(a);
+    eb=EXCESS(b);
+
+#ifdef dchunk
+    if ((dchunk)(ea+1)*(eb+1)>(dchunk)FEXCESS)
+#else
+    if ((ea+1)>FEXCESS/(eb+1))
+#endif
+    {
+#ifdef DEBUG_REDUCE
+        printf("Product too large - reducing it %d %d %d\n",ea,eb,FEXCESS);
+#endif
+        FP_reduce(a);  /* it is sufficient to fully reduce just one of them < 
p */
+#ifdef GET_STATS
+        rmul++;
+    }
+
+    tmul++;
+#else
+    }
+#endif
+
+    BIG_mul(d,a,b);
+    FP_mod(r,d);
+}
+
+/* multiplication by an integer, r=a*c */
+/* SU= 136 */
+void FP_imul(BIG r,BIG a,int c)
+{
+    DBIG d;
+    BIG m;
+    int s=0;
+    chunk afx;
+    BIG_norm(a);
+    if (c<0)
+    {
+        c=-c;
+        s=1;
+    }
+    afx=(EXCESS(a)+1)*(c+1)+1;
+    if (c<NEXCESS && afx<FEXCESS)
+        BIG_imul(r,a,c);
+    else
+    {
+        if (afx<FEXCESS)
+        {
+            BIG_pmul(r,a,c);
+        }
+        else
+        {
+            BIG_rcopy(m,Modulus);
+            BIG_pxmul(d,a,c);
+            BIG_dmod(r,d,m);
+        }
+    }
+    if (s) FP_neg(r,r);
+    BIG_norm(r);
+}
+
+/* Set r=a^2 mod m */
+/* SU= 88 */
+void FP_sqr(BIG r,BIG a)
+{
+    DBIG d;
+    chunk ea;
+    BIG_norm(a);
+    ea=EXCESS(a);
+#ifdef dchunk
+    if ((dchunk)(ea+1)*(ea+1)>(dchunk)FEXCESS)
+#else
+    if ((ea+1)>FEXCESS/(ea+1))
+#endif
+    {
+#ifdef DEBUG_REDUCE
+        printf("Product too large - reducing it %d\n",ea);
+#endif
+        FP_reduce(a);
+#ifdef GET_STATS
+        rsqr++;
+    }
+    tsqr++;
+#else
+    }
+#endif
+
+    BIG_sqr(d,a);
+    FP_mod(r,d);
+}
+
+/* SU= 16 */
+/* Set r=a+b */
+void FP_add(BIG r,BIG a,BIG b)
+{
+    BIG_add(r,a,b);
+    if (EXCESS(r)+2>=FEXCESS)  /* +2 because a and b not normalised */
+    {
+#ifdef DEBUG_REDUCE
+        printf("Sum too large - reducing it %d\n",EXCESS(r));
+#endif
+        FP_reduce(r);
+#ifdef GET_STATS
+        radd++;
+    }
+    tadd++;
+#else
+    }
+#endif
+}
+
+/* Set r=a-b mod m */
+/* SU= 56 */
+void FP_sub(BIG r,BIG a,BIG b)
+{
+    BIG n;
+    FP_neg(n,b);
+    FP_add(r,a,n);
+}
+
+/* SU= 48 */
+/* Fully reduce a mod Modulus */
+void FP_reduce(BIG a)
+{
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_mod(a,m);
+}
+
+// https://graphics.stanford.edu/~seander/bithacks.html
+// constant time log to base 2 (or number of bits in)
+
+static int logb2(unsign32 v)
+{
+    int r;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+
+    v = v - ((v >> 1) & 0x55555555);
+    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+    r = (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
+    return r+1;
+}
+
+/* Set r=-a mod Modulus */
+/* SU= 64 */
+void FP_neg(BIG r,BIG a)
+{
+    int sb;
+//    chunk ov;
+    BIG m;
+
+    BIG_rcopy(m,Modulus);
+    BIG_norm(a);
+
+    sb=logb2((unsign32)EXCESS(a));
+    /*
+        ov=EXCESS(a);
+        sb=1;
+        while(ov!=0)
+        {
+            sb++;    // only unpredictable branch
+            ov>>=1;
+        }
+    */
+    BIG_fshl(m,sb);
+    BIG_sub(r,m,a);
+
+    if (EXCESS(r)>=FEXCESS)
+    {
+#ifdef DEBUG_REDUCE
+        printf("Negation too large -  reducing it %d\n",EXCESS(r));
+#endif
+        FP_reduce(r);
+#ifdef GET_STATS
+        rneg++;
+    }
+    tneg++;
+#else
+    }
+#endif
+
+}
+
+/* Set r=a/2. */
+/* SU= 56 */
+void FP_div2(BIG r,BIG a)
+{
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_norm(a);
+    if (BIG_parity(a)==0)
+    {
+        BIG_copy(r,a);
+        BIG_fshr(r,1);
+    }
+    else
+    {
+        BIG_add(r,a,m);
+        BIG_norm(r);
+        BIG_fshr(r,1);
+    }
+}
+
+/* set w=1/x */
+void FP_inv(BIG w,BIG x)
+{
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_copy(w,x);
+    FP_redc(w);
+
+    BIG_invmodp(w,w,m);
+    FP_nres(w);
+}
+
+/* SU=8 */
+/* set n=1 */
+void FP_one(BIG n)
+{
+    BIG_one(n);
+    FP_nres(n);
+}
+
+/* Set r=a^b mod Modulus */
+/* SU= 136 */
+void FP_pow(BIG r,BIG a,BIG b)
+{
+    BIG w,z,zilch;
+    int bt;
+    BIG_zero(zilch);
+
+    BIG_norm(b);
+    BIG_copy(z,b);
+    BIG_copy(w,a);
+    FP_one(r);
+    while(1)
+    {
+        bt=BIG_parity(z);
+        BIG_fshr(z,1);
+        if (bt) FP_mul(r,r,w);
+        if (BIG_comp(z,zilch)==0) break;
+        FP_sqr(w,w);
+    }
+    FP_reduce(r);
+}
+
+/* is r a QR? */
+int FP_qr(BIG r)
+{
+    int j;
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    FP_redc(r);
+    j=BIG_jacobi(r,m);
+    FP_nres(r);
+    if (j==1) return 1;
+    return 0;
+
+}
+
+/* Set a=sqrt(b) mod Modulus */
+/* SU= 160 */
+void FP_sqrt(BIG r,BIG a)
+{
+    BIG v,i,b;
+    BIG m;
+    BIG_rcopy(m,Modulus);
+    BIG_mod(a,m);
+    BIG_copy(b,m);
+    if (MOD8==5)
+    {
+        BIG_dec(b,5);
+        BIG_norm(b);
+        BIG_fshr(b,3); /* (p-5)/8 */
+        BIG_copy(i,a);
+        BIG_fshl(i,1);
+        FP_pow(v,i,b);
+        FP_mul(i,i,v);
+        FP_mul(i,i,v);
+        BIG_dec(i,1);
+        FP_mul(r,a,v);
+        FP_mul(r,r,i);
+        BIG_mod(r,m);
+    }
+    if (MOD8==3 || MOD8==7)
+    {
+        BIG_inc(b,1);
+        BIG_norm(b);
+        BIG_fshr(b,2); /* (p+1)/4 */
+        FP_pow(r,a,b);
+    }
+}
+
+/*
+int main()
+{
+
+       BIG r;
+
+       FP_one(r);
+       FP_sqr(r,r);
+
+       BIG_output(r);
+
+       int i,carry;
+       DBIG c={0,0,0,0,0,0,0,0};
+       BIG a={1,2,3,4};
+       BIG b={3,4,5,6};
+       BIG r={11,12,13,14};
+       BIG s={23,24,25,15};
+       BIG w;
+
+//     printf("NEXCESS= %d\n",NEXCESS);
+//     printf("MConst= %d\n",MConst);
+
+       BIG_copy(b,Modulus);
+       BIG_dec(b,1);
+       BIG_norm(b);
+
+       BIG_randomnum(r); BIG_norm(r); BIG_mod(r,Modulus);
+//     BIG_randomnum(s); norm(s); BIG_mod(s,Modulus);
+
+//     BIG_output(r);
+//     BIG_output(s);
+
+       BIG_output(r);
+       FP_nres(r);
+       BIG_output(r);
+       BIG_copy(a,r);
+       FP_redc(r);
+       BIG_output(r);
+       BIG_dscopy(c,a);
+       FP_mod(r,c);
+       BIG_output(r);
+
+
+//     exit(0);
+
+//     copy(r,a);
+       printf("r=   "); BIG_output(r);
+       BIG_modsqr(r,r,Modulus);
+       printf("r^2= "); BIG_output(r);
+
+       FP_nres(r);
+       FP_sqrt(r,r);
+       FP_redc(r);
+       printf("r=   "); BIG_output(r);
+       BIG_modsqr(r,r,Modulus);
+       printf("r^2= "); BIG_output(r);
+
+
+//     for (i=0;i<100000;i++) FP_sqr(r,r);
+//     for (i=0;i<100000;i++)
+               FP_sqrt(r,r);
+
+       BIG_output(r);
+}
+*/

[47/51] [partial] incubator-milagro-crypto git commit: update code

Reply via email to