http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/ecp4.c
----------------------------------------------------------------------
diff --git a/version3/c/ecp4.c b/version3/c/ecp4.c
new file mode 100644
index 0000000..e8148c4
--- /dev/null
+++ b/version3/c/ecp4.c
@@ -0,0 +1,824 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Weierstrass elliptic curve functions over FP2 */
+
+#include "ecp4_ZZZ.h"
+
+int ECP4_ZZZ_isinf(ECP4_ZZZ *P)
+{
+       return (FP4_YYY_iszilch(&(P->x)) & FP4_YYY_iszilch(&(P->z)));
+}
+
+/* Set P=Q */
+void ECP4_ZZZ_copy(ECP4_ZZZ *P,ECP4_ZZZ *Q)
+{
+    FP4_YYY_copy(&(P->x),&(Q->x));
+    FP4_YYY_copy(&(P->y),&(Q->y));
+       FP4_YYY_copy(&(P->z),&(Q->z));
+}
+
+/* set P to Infinity */
+void ECP4_ZZZ_inf(ECP4_ZZZ *P)
+{
+    FP4_YYY_zero(&(P->x));
+    FP4_YYY_one(&(P->y));
+    FP4_YYY_zero(&(P->z));
+}
+
+/* Conditional move Q to P dependant on d */
+static void ECP4_ZZZ_cmove(ECP4_ZZZ *P,ECP4_ZZZ *Q,int d)
+{
+    FP4_YYY_cmove(&(P->x),&(Q->x),d);
+    FP4_YYY_cmove(&(P->y),&(Q->y),d);
+    FP4_YYY_cmove(&(P->z),&(Q->z),d);
+
+}
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+
+/* Constant time select from pre-computed table */
+static void ECP4_ZZZ_select(ECP4_ZZZ *P,ECP4_ZZZ W[],sign32 b)
+{
+    ECP4_ZZZ MP;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    ECP4_ZZZ_cmove(P,&W[0],teq(babs,0));  // conditional move
+    ECP4_ZZZ_cmove(P,&W[1],teq(babs,1));
+    ECP4_ZZZ_cmove(P,&W[2],teq(babs,2));
+    ECP4_ZZZ_cmove(P,&W[3],teq(babs,3));
+    ECP4_ZZZ_cmove(P,&W[4],teq(babs,4));
+    ECP4_ZZZ_cmove(P,&W[5],teq(babs,5));
+    ECP4_ZZZ_cmove(P,&W[6],teq(babs,6));
+    ECP4_ZZZ_cmove(P,&W[7],teq(babs,7));
+
+    ECP4_ZZZ_copy(&MP,P);
+    ECP4_ZZZ_neg(&MP);  // minus P
+    ECP4_ZZZ_cmove(P,&MP,(int)(m&1));
+}
+
+/* Make P affine (so z=1) */
+void ECP4_ZZZ_affine(ECP4_ZZZ *P)
+{
+    FP4_YYY one,iz;
+    if (ECP4_ZZZ_isinf(P)) return;
+
+    FP4_YYY_one(&one);
+    if (FP4_YYY_isunity(&(P->z)))
+    {
+        FP4_YYY_reduce(&(P->x));
+        FP4_YYY_reduce(&(P->y));
+        return;
+    }
+
+    FP4_YYY_inv(&iz,&(P->z));
+    FP4_YYY_mul(&(P->x),&(P->x),&iz);
+    FP4_YYY_mul(&(P->y),&(P->y),&iz);
+
+    FP4_YYY_reduce(&(P->x));
+    FP4_YYY_reduce(&(P->y));
+    FP4_YYY_copy(&(P->z),&one);
+}
+
+/* return 1 if P==Q, else 0 */
+/* SU= 312 */
+int ECP4_ZZZ_equals(ECP4_ZZZ *P,ECP4_ZZZ *Q)
+{
+    FP4_YYY a,b;
+
+    FP4_YYY_mul(&a,&(P->x),&(Q->z));
+    FP4_YYY_mul(&b,&(Q->x),&(P->z));
+    if (!FP4_YYY_equals(&a,&b)) return 0;
+
+    FP4_YYY_mul(&a,&(P->y),&(Q->z));
+    FP4_YYY_mul(&b,&(Q->y),&(P->z));
+    if (!FP4_YYY_equals(&a,&b)) return 0;
+    return 1;
+
+}
+
+/* extract x, y from point P */
+int ECP4_ZZZ_get(FP4_YYY *x,FP4_YYY *y,ECP4_ZZZ *P)
+{
+       ECP4_ZZZ W;
+       ECP4_ZZZ_copy(&W,P);
+       ECP4_ZZZ_affine(&W);
+    if (ECP4_ZZZ_isinf(&W)) return -1;
+    FP4_YYY_copy(y,&(W.y));
+    FP4_YYY_copy(x,&(W.x));
+    return 0;
+}
+
+/* Output point P */
+void ECP4_ZZZ_output(ECP4_ZZZ *P)
+{
+    FP4_YYY x,y;
+    if (ECP4_ZZZ_isinf(P))
+    {
+        printf("Infinity\n");
+        return;
+    }
+    ECP4_ZZZ_get(&x,&y,P);
+    printf("(");
+    FP4_YYY_output(&x);
+    printf(",");
+    FP4_YYY_output(&y);
+    printf(")\n");
+}
+
+/* Convert Q to octet string */
+void ECP4_ZZZ_toOctet(octet *W,ECP4_ZZZ *Q)
+{
+       BIG_XXX b;
+       FP4_YYY qx,qy;
+       FP2_YYY pa,pb;
+
+    ECP4_ZZZ_get(&qx,&qy,Q);
+
+       FP2_YYY_copy(&pa,&(qx.a));
+       FP2_YYY_copy(&pb,&(qx.b));
+
+       FP_YYY_redc(b,&(pa.a));
+    BIG_XXX_toBytes(&(W->val[0]),b);
+    FP_YYY_redc(b,&(pa.b));
+    BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.a));
+    BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.b));
+    BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),b);
+
+       FP2_YYY_copy(&pa,&(qy.a));
+       FP2_YYY_copy(&pb,&(qy.b));
+       
+       FP_YYY_redc(b,&(pa.a));
+    BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pa.b));
+    BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.a));
+    BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.b));
+    BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),b);
+
+    W->len=8*MODBYTES_XXX;
+}
+
+/* restore Q from octet string */
+int ECP4_ZZZ_fromOctet(ECP4_ZZZ *Q,octet *W)
+{
+       BIG_XXX b;
+    FP4_YYY qx,qy;
+       FP2_YYY pa,pb;
+
+    BIG_XXX_fromBytes(b,&(W->val[0]));
+       FP_YYY_nres(&(pa.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[MODBYTES_XXX]));
+    FP_YYY_nres(&(pa.b),b);
+    BIG_XXX_fromBytes(b,&(W->val[2*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[3*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.b),b);
+
+       FP2_YYY_copy(&(qx.a),&pa);
+       FP2_YYY_copy(&(qx.b),&pb);
+
+    BIG_XXX_fromBytes(b,&(W->val[4*MODBYTES_XXX]));
+       FP_YYY_nres(&(pa.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[5*MODBYTES_XXX]));
+    FP_YYY_nres(&(pa.b),b);
+    BIG_XXX_fromBytes(b,&(W->val[6*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[7*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.b),b);
+
+       FP2_YYY_copy(&(qy.a),&pa);
+       FP2_YYY_copy(&(qy.b),&pb);
+
+
+    if (ECP4_ZZZ_set(Q,&qx,&qy)) return 1;
+    return 0;
+}
+
+/* Calculate RHS of twisted curve equation x^3+B/i or x^3+Bi*/
+void ECP4_ZZZ_rhs(FP4_YYY *rhs,FP4_YYY *x)
+{
+    /* calculate RHS of elliptic curve equation */
+    FP4_YYY t;
+       FP2_YYY t2;
+    BIG_XXX b;
+    FP4_YYY_sqr(&t,x);
+
+    FP4_YYY_mul(rhs,&t,x);
+
+    /* Assuming CURVE_A=0 */
+
+    BIG_XXX_rcopy(b,CURVE_B_ZZZ);
+
+    FP2_YYY_from_BIG(&t2,b);
+       FP4_YYY_from_FP2(&t,&t2);
+
+#if SEXTIC_TWIST_ZZZ == D_TYPE 
+    FP4_YYY_div_i(&t);   /* IMPORTANT - here we use the correct SEXTIC twist 
of the curve */
+#endif
+
+#if SEXTIC_TWIST_ZZZ == M_TYPE 
+    FP4_YYY_times_i(&t);   /* IMPORTANT - here we use the correct SEXTIC twist 
of the curve */
+#endif
+
+    FP4_YYY_add(rhs,&t,rhs);
+    FP4_YYY_reduce(rhs);
+}
+
+/* Set P=(x,y). Return 1 if (x,y) is on the curve, else return 0*/
+/* SU= 232 */
+int ECP4_ZZZ_set(ECP4_ZZZ *P,FP4_YYY *x,FP4_YYY *y)
+{
+    FP4_YYY rhs,y2;
+
+    FP4_YYY_sqr(&y2,y);
+    ECP4_ZZZ_rhs(&rhs,x);
+
+    if (!FP4_YYY_equals(&y2,&rhs))
+    {
+               ECP4_ZZZ_inf(P);
+        return 0;
+    }
+
+ //   P->inf=0;
+    FP4_YYY_copy(&(P->x),x);
+    FP4_YYY_copy(&(P->y),y);
+
+    FP4_YYY_one(&(P->z));
+    return 1;
+}
+
+/* Set P=(x,y). Return 1 if (x,.) is on the curve, else return 0 */
+/* SU= 232 */
+int ECP4_ZZZ_setx(ECP4_ZZZ *P,FP4_YYY *x)
+{
+    FP4_YYY y;
+    ECP4_ZZZ_rhs(&y,x);
+
+    if (!FP4_YYY_sqrt(&y,&y))
+    {
+        ECP4_ZZZ_inf(P);
+        return 0;
+    }
+
+    FP4_YYY_copy(&(P->x),x);
+    FP4_YYY_copy(&(P->y),&y);
+
+    FP4_YYY_one(&(P->z));
+    return 1;
+}
+
+/* Set P=-P */
+/* SU= 8 */
+void ECP4_ZZZ_neg(ECP4_ZZZ *P)
+{
+       FP4_YYY_norm(&(P->y));
+    FP4_YYY_neg(&(P->y),&(P->y));
+    FP4_YYY_norm(&(P->y));
+}
+
+
+/* R+=R */
+/* return -1 for Infinity, 0 for addition, 1 for doubling */
+int ECP4_ZZZ_dbl(ECP4_ZZZ *P)
+{
+    FP4_YYY t0,t1,t2,t3,iy,x3,y3;
+
+       FP4_YYY_copy(&iy,&(P->y));              //FP4_YYY iy=new FP4_YYY(y);
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP4_YYY_times_i(&iy);                   //iy.mul_ip(); 
+#endif
+
+       FP4_YYY_sqr(&t0,&(P->y));                       //t0.sqr();   
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP4_YYY_times_i(&t0);                   //t0.mul_ip(); 
+#endif
+
+       FP4_YYY_mul(&t1,&iy,&(P->z));   //t1.mul(z);
+       FP4_YYY_sqr(&t2,&(P->z));                               //t2.sqr();
+
+       FP4_YYY_add(&(P->z),&t0,&t0);   //z.add(t0); 
+       FP4_YYY_norm(&(P->z));                          //z.norm(); 
+       FP4_YYY_add(&(P->z),&(P->z),&(P->z));   //z.add(z); 
+       FP4_YYY_add(&(P->z),&(P->z),&(P->z));   //z.add(z); 
+       FP4_YYY_norm(&(P->z));                  //z.norm();  
+
+       FP4_YYY_imul(&t2,&t2,3*CURVE_B_I_ZZZ);  //t2.imul(3*ROM.CURVE_B_I); 
+#if SEXTIC_TWIST_ZZZ==M_TYPE
+       FP4_YYY_times_i(&t2);
+#endif
+
+       FP4_YYY_mul(&x3,&t2,&(P->z));   //x3.mul(z); 
+
+       FP4_YYY_add(&y3,&t0,&t2);               //y3.add(t2); 
+       FP4_YYY_norm(&y3);                              //y3.norm();
+       FP4_YYY_mul(&(P->z),&(P->z),&t1);       //z.mul(t1);
+
+       FP4_YYY_add(&t1,&t2,&t2);               //t1.add(t2); 
+       FP4_YYY_add(&t2,&t2,&t1);               //t2.add(t1); 
+       FP4_YYY_norm(&t2);                              //t2.norm();  
+       FP4_YYY_sub(&t0,&t0,&t2);               //t0.sub(t2); 
+       FP4_YYY_norm(&t0);                              //t0.norm();            
               //y^2-9bz^2
+       FP4_YYY_mul(&y3,&y3,&t0);               //y3.mul(t0); 
+       FP4_YYY_add(&(P->y),&y3,&x3);           //y3.add(x3);                   
       //(y^2+3z*2)(y^2-9z^2)+3b.z^2.8y^2
+
+       FP4_YYY_mul(&t1,&(P->x),&iy);           //t1.mul(iy);                   
                        //
+
+       FP4_YYY_norm(&t0);                              //x.norm(); 
+       FP4_YYY_mul(&(P->x),&t0,&t1);   //x.mul(t1); 
+       FP4_YYY_add(&(P->x),&(P->x),&(P->x));   //x.add(x);       
//(y^2-9bz^2)xy2
+
+       FP4_YYY_norm(&(P->x));                  //x.norm(); 
+
+       FP4_YYY_norm(&(P->y));                  //y.norm();
+
+    return 1;
+}
+
+/* Set P+=Q */
+
+int ECP4_ZZZ_add(ECP4_ZZZ *P,ECP4_ZZZ *Q)
+{
+    FP4_YYY t0,t1,t2,t3,t4,x3,y3,z3;
+       int b3=3*CURVE_B_I_ZZZ;
+
+       FP4_YYY_mul(&t0,&(P->x),&(Q->x));       //t0.mul(Q.x);         // x.Q.x
+       FP4_YYY_mul(&t1,&(P->y),&(Q->y));       //t1.mul(Q.y);           // 
y.Q.y
+
+       FP4_YYY_mul(&t2,&(P->z),&(Q->z));       //t2.mul(Q.z);
+       FP4_YYY_add(&t3,&(P->x),&(P->y));       //t3.add(y); 
+       FP4_YYY_norm(&t3);                              //t3.norm();          
//t3=X1+Y1         
+       FP4_YYY_add(&t4,&(Q->x),&(Q->y));       //t4.add(Q.y); 
+       FP4_YYY_norm(&t4);                              //t4.norm();            
        //t4=X2+Y2
+       FP4_YYY_mul(&t3,&t3,&t4);               //t3.mul(t4);                   
                        //t3=(X1+Y1)(X2+Y2)
+       FP4_YYY_add(&t4,&t0,&t1);               //t4.add(t1);           
//t4=X1.X2+Y1.Y2
+
+       FP4_YYY_sub(&t3,&t3,&t4);               //t3.sub(t4); 
+       FP4_YYY_norm(&t3);                              //t3.norm(); 
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP4_YYY_times_i(&t3);                   //t3.mul_ip();  
//t3=(X1+Y1)(X2+Y2)-(X1.X2+Y1.Y2) = X1.Y2+X2.Y1      
+#endif
+                   
+       FP4_YYY_add(&t4,&(P->y),&(P->z));       //t4.add(z); 
+       FP4_YYY_norm(&t4);                              //t4.norm();            
        //t4=Y1+Z1
+
+       FP4_YYY_add(&x3,&(Q->y),&(Q->z));       //x3.add(Q.z); 
+       FP4_YYY_norm(&x3);                              //x3.norm();            
        //x3=Y2+Z2
+
+       FP4_YYY_mul(&t4,&t4,&x3);               //t4.mul(x3);                   
                        //t4=(Y1+Z1)(Y2+Z2)
+
+       FP4_YYY_add(&x3,&t1,&t2);               //x3.add(t2);                   
                        //X3=Y1.Y2+Z1.Z2
+       
+       FP4_YYY_sub(&t4,&t4,&x3);               //t4.sub(x3); 
+       FP4_YYY_norm(&t4);                              //t4.norm(); 
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP4_YYY_times_i(&t4);                   //t4.mul_ip(); 
//t4=(Y1+Z1)(Y2+Z2) - (Y1.Y2+Z1.Z2) = Y1.Z2+Y2.Z1      
+#endif
+
+       FP4_YYY_add(&x3,&(P->x),&(P->z));       //x3.add(z); 
+       FP4_YYY_norm(&x3);                              //x3.norm();    // 
x3=X1+Z1
+               
+       FP4_YYY_add(&y3,&(Q->x),&(Q->z));       //y3.add(Q.z); 
+       FP4_YYY_norm(&y3);                              //y3.norm();            
                // y3=X2+Z2
+       FP4_YYY_mul(&x3,&x3,&y3);               //x3.mul(y3);                   
                                // x3=(X1+Z1)(X2+Z2)
+
+       FP4_YYY_add(&y3,&t0,&t2);               //y3.add(t2);                   
                                // y3=X1.X2+Z1+Z2
+       FP4_YYY_sub(&y3,&x3,&y3);               //y3.rsub(x3); 
+       FP4_YYY_norm(&y3);                              //y3.norm();            
                // y3=(X1+Z1)(X2+Z2) - (X1.X2+Z1.Z2) = X1.Z2+X2.Z1
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP4_YYY_times_i(&t0);                   //t0.mul_ip(); 
+       FP4_YYY_times_i(&t1);                   //t1.mul_ip(); 
+#endif
+
+       FP4_YYY_add(&x3,&t0,&t0);               //x3.add(t0); 
+       FP4_YYY_add(&t0,&t0,&x3);               //t0.add(x3); 
+       FP4_YYY_norm(&t0);                              //t0.norm();
+       FP4_YYY_imul(&t2,&t2,b3);               //t2.imul(b);   
+#if SEXTIC_TWIST_ZZZ==M_TYPE
+       FP4_YYY_times_i(&t2);
+#endif
+
+       FP4_YYY_add(&z3,&t1,&t2);               //z3.add(t2); 
+       FP4_YYY_norm(&z3);                              //z3.norm();
+       FP4_YYY_sub(&t1,&t1,&t2);               //t1.sub(t2); 
+       FP4_YYY_norm(&t1);                              //t1.norm(); 
+       FP4_YYY_imul(&y3,&y3,b3);               //y3.imul(b); 
+#if SEXTIC_TWIST_ZZZ==M_TYPE
+       FP4_YYY_times_i(&y3);
+#endif
+
+       FP4_YYY_mul(&x3,&y3,&t4);               //x3.mul(t4); 
+
+       FP4_YYY_mul(&t2,&t3,&t1);               //t2.mul(t1); 
+       FP4_YYY_sub(&(P->x),&t2,&x3);           //x3.rsub(t2);
+       FP4_YYY_mul(&y3,&y3,&t0);               //y3.mul(t0); 
+       FP4_YYY_mul(&t1,&t1,&z3);               //t1.mul(z3); 
+       FP4_YYY_add(&(P->y),&y3,&t1);           //y3.add(t1);
+       FP4_YYY_mul(&t0,&t0,&t3);               //t0.mul(t3); 
+       FP4_YYY_mul(&z3,&z3,&t4);               //z3.mul(t4); 
+       FP4_YYY_add(&(P->z),&z3,&t0);           //z3.add(t0);
+
+
+       FP4_YYY_norm(&(P->x));                  //x.norm(); 
+       FP4_YYY_norm(&(P->y));                  //y.norm();
+       FP4_YYY_norm(&(P->z));                  //z.norm();
+
+    return 0;
+}
+
+/* Set P-=Q */
+/* SU= 16 */
+void ECP4_ZZZ_sub(ECP4_ZZZ *P,ECP4_ZZZ *Q)
+{
+       ECP4_ZZZ NQ;
+       ECP4_ZZZ_copy(&NQ,Q);
+       ECP4_ZZZ_neg(&NQ);
+    ECP4_ZZZ_add(P,&NQ);
+}
+
+
+void ECP4_ZZZ_reduce(ECP4_ZZZ *P)
+{
+       FP4_YYY_reduce(&(P->x));
+       FP4_YYY_reduce(&(P->y));
+       FP4_YYY_reduce(&(P->z));
+}
+
+/* P*=e */
+/* SU= 280 */
+void ECP4_ZZZ_mul(ECP4_ZZZ *P,BIG_XXX e)
+{
+    /* fixed size windows */
+    int i,nb,s,ns;
+    BIG_XXX mt,t;
+    ECP4_ZZZ Q,W[8],C;
+    sign8 w[1+(NLEN_XXX*BASEBITS_XXX+3)/4];
+
+    if (ECP4_ZZZ_isinf(P)) return;
+
+    /* precompute table */
+
+    ECP4_ZZZ_copy(&Q,P);
+    ECP4_ZZZ_dbl(&Q);
+    ECP4_ZZZ_copy(&W[0],P);
+
+    for (i=1; i<8; i++)
+    {
+        ECP4_ZZZ_copy(&W[i],&W[i-1]);
+        ECP4_ZZZ_add(&W[i],&Q);
+    }
+
+    /* make exponent odd - add 2P if even, P if odd */
+    BIG_XXX_copy(t,e);
+    s=BIG_XXX_parity(t);
+    BIG_XXX_inc(t,1);
+    BIG_XXX_norm(t);
+    ns=BIG_XXX_parity(t);
+    BIG_XXX_copy(mt,t);
+    BIG_XXX_inc(mt,1);
+    BIG_XXX_norm(mt);
+    BIG_XXX_cmove(t,mt,s);
+    ECP4_ZZZ_cmove(&Q,P,ns);
+    ECP4_ZZZ_copy(&C,&Q);
+
+    nb=1+(BIG_XXX_nbits(t)+3)/4;
+
+    /* convert exponent to signed 4-bit window */
+    for (i=0; i<nb; i++)
+    {
+        w[i]=BIG_XXX_lastbits(t,5)-16;
+        BIG_XXX_dec(t,w[i]);
+        BIG_XXX_norm(t);
+        BIG_XXX_fshr(t,4);
+    }
+    w[nb]=BIG_XXX_lastbits(t,5);
+
+    ECP4_ZZZ_copy(P,&W[(w[nb]-1)/2]);
+    for (i=nb-1; i>=0; i--)
+    {
+        ECP4_ZZZ_select(&Q,W,w[i]);
+        ECP4_ZZZ_dbl(P);
+        ECP4_ZZZ_dbl(P);
+        ECP4_ZZZ_dbl(P);
+        ECP4_ZZZ_dbl(P);
+        ECP4_ZZZ_add(P,&Q);
+    }
+    ECP4_ZZZ_sub(P,&C); /* apply correction */
+       ECP4_ZZZ_affine(P);
+}
+
+// calculate frobenius constants 
+void ECP4_ZZZ_frob_constants(FP2_YYY F[3])
+{
+    FP_YYY fx,fy;
+       FP2_YYY X;
+
+    FP_YYY_rcopy(&fx,Fra_YYY);
+    FP_YYY_rcopy(&fy,Frb_YYY);
+    FP2_YYY_from_FPs(&X,&fx,&fy);
+
+       FP2_YYY_sqr(&F[0],&X);          // FF=F^2=(1+i)^(p-7)/6
+       FP2_YYY_copy(&F[2],&F[0]);
+       FP2_YYY_mul_ip(&F[2]);          // W=(1+i)^6/6.(1+i)^(p-7)/6 = 
(1+i)^(p-1)/6
+       FP2_YYY_norm(&F[2]);
+       FP2_YYY_sqr(&F[1],&F[2]);
+       FP2_YYY_mul(&F[2],&F[2],&F[1]);  // W=(1+i)^(p-1)/2
+
+       FP2_YYY_copy(&F[1],&X);
+
+#if SEXTIC_TWIST_ZZZ == M_TYPE 
+       FP2_YYY_mul_ip(&F[1]);          // (1+i)^12/12.(1+i)^(p-7)/12 = 
(1+i)^(p+5)/12
+       FP2_YYY_inv(&F[1],&F[1]);               // (1+i)^-(p+5)/12
+       FP2_YYY_sqr(&F[0],&F[1]);               // (1+i)^-(p+5)/6
+#endif
+
+       FP2_YYY_mul_ip(&F[0]);          // FF=(1+i)^(p-7)/6.(1+i) = 
(1+i)^(p-1)/6                                       // (1+i)^6/6.(1+i)^-(p+5)/6 
= (1+i)^-(p-1)/6
+       FP2_YYY_norm(&F[0]);
+       FP2_YYY_mul(&F[1],&F[1],&F[0]);  // FFF = (1+i)^(p-7)/12 . 
(1+i)^(p-1)/6 = (1+i)^(p-3)/4        // (1+i)^-(p+5)/12 . (1+i)^-(p-1)/6 = 
(1+i)^-(p+1)/4
+
+}
+
+/* Calculates q^n.P using Frobenius constants */
+void ECP4_ZZZ_frob(ECP4_ZZZ *P,FP2_YYY F[3],int n)
+{
+       int i;
+       FP4_YYY X,Y,Z;
+
+       FP4_YYY_copy(&X,&(P->x));
+       FP4_YYY_copy(&Y,&(P->y));
+       FP4_YYY_copy(&Z,&(P->z));
+
+       for (i=0;i<n;i++)
+       {
+               FP4_YYY_frob(&X,&F[2]);         // X^p
+               FP4_YYY_pmul(&X,&X,&F[0]);      // X^p.(1+i)^(p-1)/6            
                                                        // X^p.(1+i)^-(p-1)/6
+               
+               FP4_YYY_frob(&Y,&F[2]);         // Y^p
+               FP4_YYY_pmul(&Y,&Y,&F[1]);
+               FP4_YYY_times_i(&Y);            // 
Y.p.(1+i)^(p-3)/4.(1+i)^(2/4) = Y^p.(1+i)^(p-1)/4    // (1+i)^-(p+1)/4 
.(1+i)^2/4 = Y^p.(1+i)^-(p-1)/4
+
+               FP4_YYY_frob(&Z,&F[2]);
+       }
+
+       FP4_YYY_copy(&(P->x),&X);
+       FP4_YYY_copy(&(P->y),&Y);
+       FP4_YYY_copy(&(P->z),&Z);
+}
+
+/* Side channel attack secure */
+// Bos & Costello https://eprint.iacr.org/2013/458.pdf
+// Faz-Hernandez & Longa & Sanchez  https://eprint.iacr.org/2013/158.pdf
+
+void ECP4_ZZZ_mul8(ECP4_ZZZ *P,ECP4_ZZZ Q[8],BIG_XXX u[8])
+{
+    int i,j,k,nb,pb1,pb2,bt;
+       ECP4_ZZZ T1[8],T2[8],W;
+    BIG_XXX mt,t[8];
+    sign8 w1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s2[NLEN_XXX*BASEBITS_XXX+1]; 
+       FP2_YYY X[3];
+
+       ECP4_ZZZ_frob_constants(X);
+
+    for (i=0; i<8; i++)
+       {
+        BIG_XXX_copy(t[i],u[i]);
+       }
+
+// Precomputed table
+    ECP4_ZZZ_copy(&T1[0],&Q[0]); // Q[0]
+    ECP4_ZZZ_copy(&T1[1],&T1[0]);
+       ECP4_ZZZ_add(&T1[1],&Q[1]);     // Q[0]+Q[1]
+    ECP4_ZZZ_copy(&T1[2],&T1[0]);
+       ECP4_ZZZ_add(&T1[2],&Q[2]);     // Q[0]+Q[2]
+       ECP4_ZZZ_copy(&T1[3],&T1[1]);
+       ECP4_ZZZ_add(&T1[3],&Q[2]);     // Q[0]+Q[1]+Q[2]
+       ECP4_ZZZ_copy(&T1[4],&T1[0]);
+       ECP4_ZZZ_add(&T1[4],&Q[3]);  // Q[0]+Q[3]
+       ECP4_ZZZ_copy(&T1[5],&T1[1]);
+       ECP4_ZZZ_add(&T1[5],&Q[3]);     // Q[0]+Q[1]+Q[3]
+       ECP4_ZZZ_copy(&T1[6],&T1[2]);
+       ECP4_ZZZ_add(&T1[6],&Q[3]);     // Q[0]+Q[2]+Q[3]
+       ECP4_ZZZ_copy(&T1[7],&T1[3]);
+       ECP4_ZZZ_add(&T1[7],&Q[3]);     // Q[0]+Q[1]+Q[2]+Q[3]
+
+//  Use Frobenius 
+
+       for (i=0;i<8;i++)
+       {
+               ECP4_ZZZ_copy(&T2[i],&T1[i]);
+               ECP4_ZZZ_frob(&T2[i],X,4);
+       }
+
+// Make them odd
+       pb1=1-BIG_XXX_parity(t[0]);
+       BIG_XXX_inc(t[0],pb1);
+       BIG_XXX_norm(t[0]);
+
+       pb2=1-BIG_XXX_parity(t[4]);
+       BIG_XXX_inc(t[4],pb2);
+       BIG_XXX_norm(t[4]);
+
+// Number of bits
+    BIG_XXX_zero(mt);
+    for (i=0; i<8; i++)
+    {
+        BIG_XXX_or(mt,mt,t[i]);
+    }
+    nb=1+BIG_XXX_nbits(mt);
+
+// Sign pivot 
+       s1[nb-1]=1;
+       s2[nb-1]=1;
+       for (i=0;i<nb-1;i++)
+       {
+        BIG_XXX_fshr(t[0],1);
+               s1[i]=2*BIG_XXX_parity(t[0])-1;
+        BIG_XXX_fshr(t[4],1);
+               s2[i]=2*BIG_XXX_parity(t[4])-1;
+       }
+
+
+// Recoded exponents
+    for (i=0; i<nb; i++)
+    {
+               w1[i]=0;
+               k=1;
+               for (j=1; j<4; j++)
+               {
+                       bt=s1[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w1[i]+=bt*k;
+                       k*=2;
+        }
+
+               w2[i]=0;
+               k=1;
+               for (j=5; j<8; j++)
+               {
+                       bt=s2[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w2[i]+=bt*k;
+                       k*=2;
+        }
+    }  
+
+// Main loop
+       ECP4_ZZZ_select(P,T1,2*w1[nb-1]+1);
+       ECP4_ZZZ_select(&W,T2,2*w2[nb-1]+1);
+       ECP4_ZZZ_add(P,&W);
+    for (i=nb-2; i>=0; i--)
+    {
+        ECP4_ZZZ_dbl(P);
+        ECP4_ZZZ_select(&W,T1,2*w1[i]+s1[i]);
+        ECP4_ZZZ_add(P,&W);
+        ECP4_ZZZ_select(&W,T2,2*w2[i]+s2[i]);
+        ECP4_ZZZ_add(P,&W);
+    }
+
+// apply corrections
+       ECP4_ZZZ_copy(&W,P);   
+       ECP4_ZZZ_sub(&W,&Q[0]);
+       ECP4_ZZZ_cmove(P,&W,pb1);
+       ECP4_ZZZ_copy(&W,P);   
+       ECP4_ZZZ_sub(&W,&Q[4]);
+       ECP4_ZZZ_cmove(P,&W,pb2);
+
+       ECP4_ZZZ_affine(P);
+}
+
+/* Map to hash value to point on G2 from random BIG_XXX */
+
+void ECP4_ZZZ_mapit(ECP4_ZZZ *Q,octet *W)
+{
+    BIG_XXX q,one,x,hv;
+    FP2_YYY X[3],T;
+       FP4_YYY X4,Y4;
+
+    ECP4_ZZZ xQ, x2Q, x3Q, x4Q;
+
+       BIG_XXX_fromBytes(hv,W->val);
+    BIG_XXX_rcopy(q,Modulus_YYY);
+    BIG_XXX_one(one);
+    BIG_XXX_mod(hv,q);
+
+    for (;;)
+    {
+        FP2_YYY_from_BIGs(&T,one,hv);  /*******/
+               FP4_YYY_from_FP2(&X4,&T);
+        if (ECP4_ZZZ_setx(Q,&X4)) break;
+        BIG_XXX_inc(hv,1);
+    }
+
+       ECP4_ZZZ_frob_constants(X);
+
+    BIG_XXX_rcopy(x,CURVE_Bnx_ZZZ);
+
+    // Efficient hash maps to G2 on BLS24 curves - Budroni, Pintore 
+       // Q -> x4Q -x3Q -Q + F(x3Q-x2Q) + F(F(x2Q-xQ)) + F(F(F(xQ-Q))) 
+F(F(F(F(2Q))))
+
+       ECP4_ZZZ_copy(&xQ,Q);
+       ECP4_ZZZ_mul(&xQ,x);
+       ECP4_ZZZ_copy(&x2Q,&xQ);
+       ECP4_ZZZ_mul(&x2Q,x);
+       ECP4_ZZZ_copy(&x3Q,&x2Q);
+       ECP4_ZZZ_mul(&x3Q,x);
+       ECP4_ZZZ_copy(&x4Q,&x3Q);
+       ECP4_ZZZ_mul(&x4Q,x);
+
+#if SIGN_OF_X_ZZZ==NEGATIVEX
+       ECP4_ZZZ_neg(&xQ);
+       ECP4_ZZZ_neg(&x3Q);
+#endif
+
+       ECP4_ZZZ_sub(&x4Q,&x3Q);
+       ECP4_ZZZ_sub(&x4Q,Q);
+
+       ECP4_ZZZ_sub(&x3Q,&x2Q);
+       ECP4_ZZZ_frob(&x3Q,X,1);
+
+       ECP4_ZZZ_sub(&x2Q,&xQ);
+       ECP4_ZZZ_frob(&x2Q,X,2);
+
+       ECP4_ZZZ_sub(&xQ,Q);
+       ECP4_ZZZ_frob(&xQ,X,3);
+
+       ECP4_ZZZ_dbl(Q);
+       ECP4_ZZZ_frob(Q,X,4);
+
+       ECP4_ZZZ_add(Q,&x4Q);
+       ECP4_ZZZ_add(Q,&x3Q);
+       ECP4_ZZZ_add(Q,&x2Q);
+       ECP4_ZZZ_add(Q,&xQ);
+
+       ECP4_ZZZ_affine(Q);
+
+}
+
+// ECP$ Get Group Generator
+
+void ECP4_ZZZ_generator(ECP4_ZZZ *G)
+{
+       BIG_XXX a,b;
+       FP2_YYY Aa,Bb;
+       FP4_YYY X,Y;
+
+       BIG_XXX_rcopy(a,CURVE_Pxaa_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pxab_ZZZ);
+       FP2_YYY_from_BIGs(&Aa,a,b);
+
+       BIG_XXX_rcopy(a,CURVE_Pxba_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pxbb_ZZZ);
+       FP2_YYY_from_BIGs(&Bb,a,b);
+
+       FP4_YYY_from_FP2s(&X,&Aa,&Bb);
+
+       BIG_XXX_rcopy(a,CURVE_Pyaa_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pyab_ZZZ);
+       FP2_YYY_from_BIGs(&Aa,a,b);
+
+       BIG_XXX_rcopy(a,CURVE_Pyba_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pybb_ZZZ);
+       FP2_YYY_from_BIGs(&Bb,a,b);
+
+       FP4_YYY_from_FP2s(&Y,&Aa,&Bb);
+
+       ECP4_ZZZ_set(G,&X,&Y);
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/ecp4.h
----------------------------------------------------------------------
diff --git a/version3/c/ecp4.h b/version3/c/ecp4.h
new file mode 100644
index 0000000..8e022c9
--- /dev/null
+++ b/version3/c/ecp4.h
@@ -0,0 +1,232 @@
+#ifndef ECP4_ZZZ_H
+#define ECP4_ZZZ_H
+
+#include "fp4_YYY.h"
+#include "config_curve_ZZZ.h"
+
+
+/**
+       @brief ECP4 Structure - Elliptic Curve Point over quadratic extension 
field
+*/
+
+typedef struct
+{
+ //   int inf; /**< Infinity Flag */
+    FP4_YYY x;   /**< x-coordinate of point */
+    FP4_YYY y;   /**< y-coordinate of point */
+       FP4_YYY z;
+} ECP4_ZZZ;
+
+
+/* Curve Params - see rom.c */
+extern const int CURVE_A_ZZZ;          /**< Elliptic curve A parameter */
+extern const int CURVE_B_I_ZZZ;                /**< Elliptic curve B parameter 
*/
+extern const BIG_XXX CURVE_B_ZZZ;     /**< Elliptic curve B parameter */
+extern const BIG_XXX CURVE_Order_ZZZ; /**< Elliptic curve group order */
+extern const BIG_XXX CURVE_Cof_ZZZ;   /**< Elliptic curve cofactor */
+extern const BIG_XXX CURVE_Bnx_ZZZ;   /**< Elliptic curve parameter */
+
+extern const BIG_XXX Fra_YYY; /**< real part of curve Frobenius Constant */
+extern const BIG_XXX Frb_YYY; /**< imaginary part of curve Frobenius Constant 
*/
+
+/* Generator point on G1 */
+extern const BIG_XXX CURVE_Gx_ZZZ; /**< x-coordinate of generator point in 
group G1  */
+extern const BIG_XXX CURVE_Gy_ZZZ; /**< y-coordinate of generator point in 
group G1  */
+
+/* For Pairings only */
+
+/* Generator point on G2 */
+extern const BIG_XXX CURVE_Pxaa_ZZZ; /**< real part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxab_ZZZ; /**< imaginary part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxba_ZZZ; /**< real part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxbb_ZZZ; /**< imaginary part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pyaa_ZZZ; /**< real part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pyab_ZZZ; /**< imaginary part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pyba_ZZZ; /**< real part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pybb_ZZZ; /**< imaginary part of y-coordinate of 
generator point in group G2 */
+
+/* ECP4 E(FP4) prototypes */
+/**    @brief Tests for ECP4 point equal to infinity
+ *
+       @param P ECP4 point to be tested
+       @return 1 if infinity, else returns 0
+ */
+extern int ECP4_ZZZ_isinf(ECP4_ZZZ *P);
+/**    @brief Copy ECP4 point to another ECP4 point
+ *
+       @param P ECP4 instance, on exit = Q
+       @param Q ECP4 instance to be copied
+ */
+extern void ECP4_ZZZ_copy(ECP4_ZZZ *P,ECP4_ZZZ *Q);
+/**    @brief Set ECP4 to point-at-infinity
+ *
+       @param P ECP4 instance to be set to infinity
+ */
+extern void ECP4_ZZZ_inf(ECP4_ZZZ *P);
+/**    @brief Tests for equality of two ECP4s
+ *
+       @param P ECP4 instance to be compared
+       @param Q ECP4 instance to be compared
+       @return 1 if P=Q, else returns 0
+ */
+extern int ECP4_ZZZ_equals(ECP4_ZZZ *P,ECP4_ZZZ *Q);
+
+/**    @brief Converts an ECP4 point from Projective (x,y,z) coordinates to 
affine (x,y) coordinates
+ *
+       @param P ECP4 instance to be converted to affine form
+ */
+extern void ECP4_ZZZ_affine(ECP4_ZZZ *P);
+
+/**    @brief Extract x and y coordinates of an ECP4 point P
+ *
+       If x=y, returns only x
+       @param x FP4 on exit = x coordinate of point
+       @param y FP4 on exit = y coordinate of point (unless x=y)
+       @param P ECP4 instance (x,y)
+       @return -1 if P is point-at-infinity, else 0
+ */
+extern int ECP4_ZZZ_get(FP4_YYY *x,FP4_YYY *y,ECP4_ZZZ *P);
+/**    @brief Formats and outputs an ECP4 point to the console, converted to 
affine coordinates
+ *
+       @param P ECP4 instance to be printed
+ */
+extern void ECP4_ZZZ_output(ECP4_ZZZ *P);
+
+/**    @brief Formats and outputs an ECP4 point to an octet string
+ *
+       The octet string is created in the form x|y.
+       Convert the real and imaginary parts of the x and y coordinates to 
big-endian base 256 form.
+       @param S output octet string
+       @param P ECP4 instance to be converted to an octet string
+ */
+extern void ECP4_ZZZ_toOctet(octet *S,ECP4_ZZZ *P);
+/**    @brief Creates an ECP4 point from an octet string
+ *
+       The octet string is in the form x|y
+       The real and imaginary parts of the x and y coordinates are in 
big-endian base 256 form.
+       @param P ECP4 instance to be created from the octet string
+       @param S input octet string
+       return 1 if octet string corresponds to a point on the curve, else 0
+ */
+extern int ECP4_ZZZ_fromOctet(ECP4_ZZZ *P,octet *S);
+/**    @brief Calculate Right Hand Side of curve equation y^2=f(x)
+ *
+       Function f(x)=x^3+Ax+B
+       Used internally.
+       @param r FP4 value of f(x)
+       @param x FP4 instance
+ */
+extern void ECP4_ZZZ_rhs(FP4_YYY *r,FP4_YYY *x);
+/**    @brief Set ECP4 to point(x,y) given x and y
+ *
+       Point P set to infinity if no such point on the curve.
+       @param P ECP4 instance to be set (x,y)
+       @param x FP4 x coordinate of point
+       @param y FP4 y coordinate of point
+       @return 1 if point exists, else 0
+ */
+extern int ECP4_ZZZ_set(ECP4_ZZZ *P,FP4_YYY *x,FP4_YYY *y);
+/**    @brief Set ECP to point(x,[y]) given x
+ *
+       Point P set to infinity if no such point on the curve. Otherwise y 
coordinate is calculated from x.
+       @param P ECP instance to be set (x,[y])
+       @param x BIG x coordinate of point
+       @return 1 if point exists, else 0
+ */
+extern int ECP4_ZZZ_setx(ECP4_ZZZ *P,FP4_YYY *x);
+/**    @brief Negation of an ECP4 point
+ *
+       @param P ECP4 instance, on exit = -P
+ */
+extern void ECP4_ZZZ_neg(ECP4_ZZZ *P);
+
+/**    @brief Reduction of an ECP4 point
+ *
+       @param P ECP4 instance, on exit (x,y) are reduced wrt the modulus
+ */
+extern void ECP4_ZZZ_reduce(ECP4_ZZZ *P);
+
+
+/**    @brief Doubles an ECP4 instance P and returns slope
+ *
+       @param P ECP4 instance, on exit =2*P
+       @param lam FP4 instance, slope of line
+ */
+//extern int ECP4_ZZZ_sdbl(ECP4_ZZZ *P,FP4_YYY *lam);
+/**    @brief Adds ECP4 instance Q to ECP4 instance P and returns slope
+ *
+       @param P ECP4 instance, on exit =P+Q
+       @param Q ECP4 instance to be added to P
+       @param lam FP4 instance, slope of line
+ */
+//extern int ECP4_ZZZ_sadd(ECP4_ZZZ *P,ECP4_ZZZ *Q,FP4_YYY *lam);
+
+
+/**    @brief Doubles an ECP4 instance P
+ *
+       @param P ECP4 instance, on exit =2*P
+ */
+extern int ECP4_ZZZ_dbl(ECP4_ZZZ *P);
+/**    @brief Adds ECP4 instance Q to ECP4 instance P
+ *
+       @param P ECP4 instance, on exit =P+Q
+       @param Q ECP4 instance to be added to P
+ */
+extern int ECP4_ZZZ_add(ECP4_ZZZ *P,ECP4_ZZZ *Q);
+/**    @brief Subtracts ECP instance Q from ECP4 instance P
+ *
+       @param P ECP4 instance, on exit =P-Q
+       @param Q ECP4 instance to be subtracted from P
+ */
+extern void ECP4_ZZZ_sub(ECP4_ZZZ *P,ECP4_ZZZ *Q);
+/**    @brief Multiplies an ECP4 instance P by a BIG, side-channel resistant
+ *
+       Uses fixed sized windows.
+       @param P ECP4 instance, on exit =b*P
+       @param b BIG number multiplier
+
+ */
+extern void ECP4_ZZZ_mul(ECP4_ZZZ *P,BIG_XXX b);
+
+/**    @brief Calculates required Frobenius constants
+ *
+       Calculate Frobenius constants
+       @param F array of FP2 precalculated constants
+
+ */
+extern void ECP4_ZZZ_frob_constants(FP2_YYY F[3]);
+
+/**    @brief Multiplies an ECP4 instance P by the internal modulus p^n, using 
precalculated Frobenius constants
+ *
+       Fast point multiplication using Frobenius
+       @param P ECP4 instance, on exit = p^n*P
+       @param F array of FP2 precalculated Frobenius constant
+       @param n power of prime
+
+ */
+extern void ECP4_ZZZ_frob(ECP4_ZZZ *P,FP2_YYY F[3],int n);
+
+/**    @brief Calculates P=Sigma b[i]*Q[i] for i=0 to 7
+ *
+       @param P ECP4 instance, on exit = Sigma b[i]*Q[i] for i=0 to 7
+       @param Q ECP4 array of 4 points
+       @param b BIG array of 4 multipliers
+ */
+extern void ECP4_ZZZ_mul8(ECP4_ZZZ *P,ECP4_ZZZ *Q,BIG_XXX *b);
+
+
+/**    @brief Maps random BIG to curve point of correct order
+ *
+       @param P ECP4 instance of correct order
+       @param W OCTET byte array to be mapped
+ */
+extern void ECP4_ZZZ_mapit(ECP4_ZZZ *P,octet *w);
+
+/**    @brief Get Group Generator from ROM
+ *
+       @param G ECP4 instance
+ */
+extern void ECP4_ZZZ_generator(ECP4_ZZZ *G);
+
+
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/ecp8.c
----------------------------------------------------------------------
diff --git a/version3/c/ecp8.c b/version3/c/ecp8.c
new file mode 100644
index 0000000..dff3594
--- /dev/null
+++ b/version3/c/ecp8.c
@@ -0,0 +1,1025 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Weierstrass elliptic curve functions over FP2 */
+
+//#include <iostream>
+#include "ecp8_ZZZ.h"
+
+
+int ECP8_ZZZ_isinf(ECP8_ZZZ *P)
+{
+       return (FP8_YYY_iszilch(&(P->x)) & FP8_YYY_iszilch(&(P->z)));
+}
+
+/* Set P=Q */
+void ECP8_ZZZ_copy(ECP8_ZZZ *P,ECP8_ZZZ *Q)
+{
+    FP8_YYY_copy(&(P->x),&(Q->x));
+    FP8_YYY_copy(&(P->y),&(Q->y));
+    FP8_YYY_copy(&(P->z),&(Q->z));
+}
+
+/* set P to Infinity */
+void ECP8_ZZZ_inf(ECP8_ZZZ *P)
+{
+    FP8_YYY_zero(&(P->x));
+    FP8_YYY_one(&(P->y));
+       FP8_YYY_zero(&(P->z));
+}
+
+/* Conditional move Q to P dependant on d */
+static void ECP8_ZZZ_cmove(ECP8_ZZZ *P,ECP8_ZZZ *Q,int d)
+{
+    FP8_YYY_cmove(&(P->x),&(Q->x),d);
+    FP8_YYY_cmove(&(P->y),&(Q->y),d);
+    FP8_YYY_cmove(&(P->z),&(Q->z),d);
+}
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+
+/* Constant time select from pre-computed table */
+static void ECP8_ZZZ_select(ECP8_ZZZ *P,ECP8_ZZZ W[],sign32 b)
+{
+    ECP8_ZZZ MP;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    ECP8_ZZZ_cmove(P,&W[0],teq(babs,0));  // conditional move
+    ECP8_ZZZ_cmove(P,&W[1],teq(babs,1));
+    ECP8_ZZZ_cmove(P,&W[2],teq(babs,2));
+    ECP8_ZZZ_cmove(P,&W[3],teq(babs,3));
+    ECP8_ZZZ_cmove(P,&W[4],teq(babs,4));
+    ECP8_ZZZ_cmove(P,&W[5],teq(babs,5));
+    ECP8_ZZZ_cmove(P,&W[6],teq(babs,6));
+    ECP8_ZZZ_cmove(P,&W[7],teq(babs,7));
+
+    ECP8_ZZZ_copy(&MP,P);
+    ECP8_ZZZ_neg(&MP);  // minus P
+    ECP8_ZZZ_cmove(P,&MP,(int)(m&1));
+}
+
+/* Make P affine (so z=1) */
+void ECP8_ZZZ_affine(ECP8_ZZZ *P)
+{
+    FP8_YYY one,iz;
+    if (ECP8_ZZZ_isinf(P)) return;
+
+    FP8_YYY_one(&one);
+    if (FP8_YYY_isunity(&(P->z)))
+    {
+        FP8_YYY_reduce(&(P->x));
+        FP8_YYY_reduce(&(P->y));
+        return;
+    }
+
+    FP8_YYY_inv(&iz,&(P->z));
+    FP8_YYY_mul(&(P->x),&(P->x),&iz);
+    FP8_YYY_mul(&(P->y),&(P->y),&iz);
+
+    FP8_YYY_reduce(&(P->x));
+    FP8_YYY_reduce(&(P->y));
+    FP8_YYY_copy(&(P->z),&one);
+}
+
+/* return 1 if P==Q, else 0 */
+/* SU= 312 */
+int ECP8_ZZZ_equals(ECP8_ZZZ *P,ECP8_ZZZ *Q)
+{
+    FP8_YYY a,b;
+
+    FP8_YYY_mul(&a,&(P->x),&(Q->z));
+    FP8_YYY_mul(&b,&(Q->x),&(P->z));
+    if (!FP8_YYY_equals(&a,&b)) return 0;
+
+    FP8_YYY_mul(&a,&(P->y),&(Q->z));
+    FP8_YYY_mul(&b,&(Q->y),&(P->z));
+    if (!FP8_YYY_equals(&a,&b)) return 0;
+    return 1;
+}
+
+/* extract x, y from point P */
+int ECP8_ZZZ_get(FP8_YYY *x,FP8_YYY *y,ECP8_ZZZ *P)
+{
+       ECP8_ZZZ W;
+       ECP8_ZZZ_copy(&W,P);
+       ECP8_ZZZ_affine(&W);
+    if (ECP8_ZZZ_isinf(&W)) return -1;
+    FP8_YYY_copy(y,&(W.y));
+    FP8_YYY_copy(x,&(W.x));
+    return 0;
+}
+
+/* Output point P */
+void ECP8_ZZZ_output(ECP8_ZZZ *P)
+{
+    FP8_YYY x,y;
+    if (ECP8_ZZZ_isinf(P))
+    {
+        printf("Infinity\n");
+        return;
+    }
+    ECP8_ZZZ_get(&x,&y,P);
+    printf("(");
+    FP8_YYY_output(&x);
+    printf(",");
+    FP8_YYY_output(&y);
+    printf(")\n");
+}
+
+/* Convert Q to octet string */
+void ECP8_ZZZ_toOctet(octet *W,ECP8_ZZZ *Q)
+{
+       BIG_XXX b;
+       FP8_YYY qx,qy;
+       FP4_YYY qa,qb;
+       FP2_YYY pa,pb;
+
+    ECP8_ZZZ_get(&qx,&qy,Q);
+
+       FP4_YYY_copy(&qa,&(qx.a));
+       FP4_YYY_copy(&qb,&(qx.b));
+
+       FP2_YYY_copy(&pa,&(qa.a));
+       FP2_YYY_copy(&pb,&(qa.b));
+
+       FP_YYY_redc(b,&(pa.a));
+    BIG_XXX_toBytes(&(W->val[0]),b);
+    FP_YYY_redc(b,&(pa.b));
+    BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.a));
+    BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.b));
+    BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),b);
+
+       FP2_YYY_copy(&pa,&(qb.a));
+       FP2_YYY_copy(&pb,&(qb.b));
+
+       FP_YYY_redc(b,&(pa.a));
+    BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pa.b));
+    BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.a));
+    BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.b));
+    BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),b);
+
+
+       FP4_YYY_copy(&qa,&(qy.a));
+       FP4_YYY_copy(&qb,&(qy.b));
+
+       FP2_YYY_copy(&pa,&(qa.a));
+       FP2_YYY_copy(&pb,&(qa.b));
+
+       FP_YYY_redc(b,&(pa.a));
+    BIG_XXX_toBytes(&(W->val[8*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pa.b));
+    BIG_XXX_toBytes(&(W->val[9*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.a));
+    BIG_XXX_toBytes(&(W->val[10*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.b));
+    BIG_XXX_toBytes(&(W->val[11*MODBYTES_XXX]),b);
+
+       FP2_YYY_copy(&pa,&(qb.a));
+       FP2_YYY_copy(&pb,&(qb.b));
+
+       FP_YYY_redc(b,&(pa.a));
+    BIG_XXX_toBytes(&(W->val[12*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pa.b));
+    BIG_XXX_toBytes(&(W->val[13*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.a));
+    BIG_XXX_toBytes(&(W->val[14*MODBYTES_XXX]),b);
+    FP_YYY_redc(b,&(pb.b));
+    BIG_XXX_toBytes(&(W->val[15*MODBYTES_XXX]),b);
+
+
+    W->len=16*MODBYTES_XXX;
+}
+
+/* restore Q from octet string */
+int ECP8_ZZZ_fromOctet(ECP8_ZZZ *Q,octet *W)
+{
+       BIG_XXX b;
+    FP8_YYY qx,qy;
+       FP4_YYY qa,qb;
+       FP2_YYY pa,pb;
+
+    BIG_XXX_fromBytes(b,&(W->val[0]));
+       FP_YYY_nres(&(pa.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[MODBYTES_XXX]));
+    FP_YYY_nres(&(pa.b),b);
+    BIG_XXX_fromBytes(b,&(W->val[2*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[3*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.b),b);
+
+       FP2_YYY_copy(&(qa.a),&pa);
+       FP2_YYY_copy(&(qa.b),&pb);
+
+    BIG_XXX_fromBytes(b,&(W->val[4*MODBYTES_XXX]));
+       FP_YYY_nres(&(pa.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[5*MODBYTES_XXX]));
+    FP_YYY_nres(&(pa.b),b);
+    BIG_XXX_fromBytes(b,&(W->val[6*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[7*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.b),b);
+
+       FP2_YYY_copy(&(qb.a),&pa);
+       FP2_YYY_copy(&(qb.b),&pb);
+
+       FP4_YYY_copy(&(qx.a),&qa);
+       FP4_YYY_copy(&(qx.b),&qb);
+
+
+    BIG_XXX_fromBytes(b,&(W->val[8*MODBYTES_XXX]));
+       FP_YYY_nres(&(pa.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[9*MODBYTES_XXX]));
+    FP_YYY_nres(&(pa.b),b);
+    BIG_XXX_fromBytes(b,&(W->val[10*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[11*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.b),b);
+
+       FP2_YYY_copy(&(qa.a),&pa);
+       FP2_YYY_copy(&(qa.b),&pb);
+
+    BIG_XXX_fromBytes(b,&(W->val[12*MODBYTES_XXX]));
+       FP_YYY_nres(&(pa.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[13*MODBYTES_XXX]));
+    FP_YYY_nres(&(pa.b),b);
+    BIG_XXX_fromBytes(b,&(W->val[14*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.a),b);
+    BIG_XXX_fromBytes(b,&(W->val[15*MODBYTES_XXX]));
+    FP_YYY_nres(&(pb.b),b);
+
+       FP2_YYY_copy(&(qb.a),&pa);
+       FP2_YYY_copy(&(qb.b),&pb);
+
+       FP4_YYY_copy(&(qy.a),&qa);
+       FP4_YYY_copy(&(qy.b),&qb);
+
+
+    if (ECP8_ZZZ_set(Q,&qx,&qy)) return 1;
+    return 0;
+}
+
+/* Calculate RHS of twisted curve equation x^3+B/i or x^3+Bi*/
+void ECP8_ZZZ_rhs(FP8_YYY *rhs,FP8_YYY *x)
+{
+    /* calculate RHS of elliptic curve equation */
+    FP8_YYY t;
+       FP4_YYY t4;
+       FP2_YYY t2;
+    BIG_XXX b;
+    FP8_YYY_sqr(&t,x);
+
+    FP8_YYY_mul(rhs,&t,x);
+
+    /* Assuming CURVE_A=0 */
+
+    BIG_XXX_rcopy(b,CURVE_B_ZZZ);
+
+    FP2_YYY_from_BIG(&t2,b);
+       FP4_YYY_from_FP2(&t4,&t2);
+       FP8_YYY_from_FP4(&t,&t4);
+
+#if SEXTIC_TWIST_ZZZ == D_TYPE 
+    FP8_YYY_div_i(&t);   /* IMPORTANT - here we use the correct SEXTIC twist 
of the curve */
+#endif
+
+#if SEXTIC_TWIST_ZZZ == M_TYPE 
+    FP8_YYY_times_i(&t);   /* IMPORTANT - here we use the correct SEXTIC twist 
of the curve */
+#endif
+
+    FP8_YYY_add(rhs,&t,rhs);
+    FP8_YYY_reduce(rhs);
+}
+
+/* Set P=(x,y). Return 1 if (x,y) is on the curve, else return 0*/
+/* SU= 232 */
+int ECP8_ZZZ_set(ECP8_ZZZ *P,FP8_YYY *x,FP8_YYY *y)
+{
+    FP8_YYY rhs,y2;
+
+    FP8_YYY_sqr(&y2,y);
+    ECP8_ZZZ_rhs(&rhs,x);
+
+    if (!FP8_YYY_equals(&y2,&rhs))
+    {
+               ECP8_ZZZ_inf(P);
+        return 0;
+    }
+
+    FP8_YYY_copy(&(P->x),x);
+    FP8_YYY_copy(&(P->y),y);
+
+    FP8_YYY_one(&(P->z));
+    return 1;
+}
+
+/* Set P=(x,y). Return 1 if (x,.) is on the curve, else return 0 */
+/* SU= 232 */
+int ECP8_ZZZ_setx(ECP8_ZZZ *P,FP8_YYY *x)
+{
+    FP8_YYY y;
+    ECP8_ZZZ_rhs(&y,x);
+
+    if (!FP8_YYY_sqrt(&y,&y))
+    {
+               ECP8_ZZZ_inf(P);
+        return 0;
+    }
+
+    FP8_YYY_copy(&(P->x),x);
+    FP8_YYY_copy(&(P->y),&y);
+    FP8_YYY_one(&(P->z));
+    return 1;
+}
+
+/* Set P=-P */
+/* SU= 8 */
+void ECP8_ZZZ_neg(ECP8_ZZZ *P)
+{
+       FP8_YYY_norm(&(P->y));
+    FP8_YYY_neg(&(P->y),&(P->y));
+    FP8_YYY_norm(&(P->y));
+}
+
+
+
+/* R+=R */
+/* return -1 for Infinity, 0 for addition, 1 for doubling */
+int ECP8_ZZZ_dbl(ECP8_ZZZ *P)
+{
+    FP8_YYY t0,t1,t2,t3,iy,x3,y3;
+
+       FP8_YYY_copy(&iy,&(P->y));              //FP8_YYY iy=new FP8_YYY(y);
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP8_YYY_times_i(&iy);                   //iy.mul_ip(); 
+#endif
+
+       FP8_YYY_sqr(&t0,&(P->y));                       //t0.sqr();   
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP8_YYY_times_i(&t0);                   //t0.mul_ip(); 
+#endif
+
+       FP8_YYY_mul(&t1,&iy,&(P->z));   //t1.mul(z);
+       FP8_YYY_sqr(&t2,&(P->z));                               //t2.sqr();
+
+       FP8_YYY_add(&(P->z),&t0,&t0);   //z.add(t0); 
+       FP8_YYY_norm(&(P->z));                          //z.norm(); 
+       FP8_YYY_add(&(P->z),&(P->z),&(P->z));   //z.add(z); 
+       FP8_YYY_add(&(P->z),&(P->z),&(P->z));   //z.add(z); 
+       FP8_YYY_norm(&(P->z));                  //z.norm();  
+
+       FP8_YYY_imul(&t2,&t2,3*CURVE_B_I_ZZZ);  //t2.imul(3*ROM.CURVE_B_I); 
+#if SEXTIC_TWIST_ZZZ==M_TYPE
+       FP8_YYY_times_i(&t2);
+#endif
+
+       FP8_YYY_mul(&x3,&t2,&(P->z));   //x3.mul(z); 
+
+       FP8_YYY_add(&y3,&t0,&t2);               //y3.add(t2); 
+       FP8_YYY_norm(&y3);                              //y3.norm();
+       FP8_YYY_mul(&(P->z),&(P->z),&t1);       //z.mul(t1);
+
+       FP8_YYY_add(&t1,&t2,&t2);               //t1.add(t2); 
+       FP8_YYY_add(&t2,&t2,&t1);               //t2.add(t1); 
+       FP8_YYY_norm(&t2);                              //t2.norm();  
+       FP8_YYY_sub(&t0,&t0,&t2);               //t0.sub(t2); 
+       FP8_YYY_norm(&t0);                              //t0.norm();            
               //y^2-9bz^2
+       FP8_YYY_mul(&y3,&y3,&t0);               //y3.mul(t0); 
+       FP8_YYY_add(&(P->y),&y3,&x3);           //y3.add(x3);                   
       //(y^2+3z*2)(y^2-9z^2)+3b.z^2.8y^2
+
+       FP8_YYY_mul(&t1,&(P->x),&iy);           //t1.mul(iy);                   
                        //
+
+       FP8_YYY_norm(&t0);                      //x.norm(); 
+       FP8_YYY_mul(&(P->x),&t0,&t1);   //x.mul(t1); 
+       FP8_YYY_add(&(P->x),&(P->x),&(P->x));   //x.add(x);       
//(y^2-9bz^2)xy2
+
+       FP8_YYY_norm(&(P->x));                  //x.norm(); 
+
+       FP8_YYY_norm(&(P->y));                  //y.norm();
+
+    return 1;
+}
+
+/* Set P+=Q */
+
+int ECP8_ZZZ_add(ECP8_ZZZ *P,ECP8_ZZZ *Q)
+{
+    FP8_YYY t0,t1,t2,t3,t4,x3,y3,z3;
+       int b3=3*CURVE_B_I_ZZZ;
+
+       FP8_YYY_mul(&t0,&(P->x),&(Q->x));       //t0.mul(Q.x);         // x.Q.x
+       FP8_YYY_mul(&t1,&(P->y),&(Q->y));       //t1.mul(Q.y);           // 
y.Q.y
+
+       FP8_YYY_mul(&t2,&(P->z),&(Q->z));       //t2.mul(Q.z);
+       FP8_YYY_add(&t3,&(P->x),&(P->y));       //t3.add(y); 
+       FP8_YYY_norm(&t3);                              //t3.norm();          
//t3=X1+Y1         
+       FP8_YYY_add(&t4,&(Q->x),&(Q->y));       //t4.add(Q.y); 
+       FP8_YYY_norm(&t4);                              //t4.norm();            
        //t4=X2+Y2
+       FP8_YYY_mul(&t3,&t3,&t4);               //t3.mul(t4);                   
                        //t3=(X1+Y1)(X2+Y2)
+       FP8_YYY_add(&t4,&t0,&t1);               //t4.add(t1);           
//t4=X1.X2+Y1.Y2
+
+       FP8_YYY_sub(&t3,&t3,&t4);               //t3.sub(t4); 
+       FP8_YYY_norm(&t3);                              //t3.norm(); 
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP8_YYY_times_i(&t3);                   //t3.mul_ip();  
//t3=(X1+Y1)(X2+Y2)-(X1.X2+Y1.Y2) = X1.Y2+X2.Y1     
+#endif
+                   
+       FP8_YYY_add(&t4,&(P->y),&(P->z));       //t4.add(z); 
+       FP8_YYY_norm(&t4);                              //t4.norm();            
        //t4=Y1+Z1
+
+       FP8_YYY_add(&x3,&(Q->y),&(Q->z));       //x3.add(Q.z); 
+       FP8_YYY_norm(&x3);                              //x3.norm();            
        //x3=Y2+Z2
+
+       FP8_YYY_mul(&t4,&t4,&x3);               //t4.mul(x3);                   
                        //t4=(Y1+Z1)(Y2+Z2)
+
+       FP8_YYY_add(&x3,&t1,&t2);               //x3.add(t2);                   
                        //X3=Y1.Y2+Z1.Z2
+       
+       FP8_YYY_sub(&t4,&t4,&x3);               //t4.sub(x3); 
+       FP8_YYY_norm(&t4);                              //t4.norm(); 
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP8_YYY_times_i(&t4);                   //t4.mul_ip(); 
//t4=(Y1+Z1)(Y2+Z2) - (Y1.Y2+Z1.Z2) = Y1.Z2+Y2.Z1     
+#endif
+
+       FP8_YYY_add(&x3,&(P->x),&(P->z));       //x3.add(z); 
+       FP8_YYY_norm(&x3);                              //x3.norm();    // 
x3=X1+Z1
+               
+       FP8_YYY_add(&y3,&(Q->x),&(Q->z));       //y3.add(Q.z); 
+       FP8_YYY_norm(&y3);                              //y3.norm();            
                // y3=X2+Z2
+       FP8_YYY_mul(&x3,&x3,&y3);               //x3.mul(y3);                   
                                // x3=(X1+Z1)(X2+Z2)
+
+       FP8_YYY_add(&y3,&t0,&t2);               //y3.add(t2);                   
                                // y3=X1.X2+Z1+Z2
+       FP8_YYY_sub(&y3,&x3,&y3);               //y3.rsub(x3); 
+       FP8_YYY_norm(&y3);                              //y3.norm();            
                // y3=(X1+Z1)(X2+Z2) - (X1.X2+Z1.Z2) = X1.Z2+X2.Z1
+#if SEXTIC_TWIST_ZZZ==D_TYPE
+       FP8_YYY_times_i(&t0);                   //t0.mul_ip(); 
+       FP8_YYY_times_i(&t1);                   //t1.mul_ip(); 
+#endif
+
+       FP8_YYY_add(&x3,&t0,&t0);               //x3.add(t0); 
+       FP8_YYY_add(&t0,&t0,&x3);               //t0.add(x3); 
+       FP8_YYY_norm(&t0);                              //t0.norm();
+       FP8_YYY_imul(&t2,&t2,b3);               //t2.imul(b);   
+#if SEXTIC_TWIST_ZZZ==M_TYPE
+       FP8_YYY_times_i(&t2);
+#endif
+
+       FP8_YYY_add(&z3,&t1,&t2);               //z3.add(t2); 
+       FP8_YYY_norm(&z3);                              //z3.norm();
+       FP8_YYY_sub(&t1,&t1,&t2);               //t1.sub(t2); 
+       FP8_YYY_norm(&t1);                              //t1.norm(); 
+       FP8_YYY_imul(&y3,&y3,b3);               //y3.imul(b); 
+#if SEXTIC_TWIST_ZZZ==M_TYPE
+       FP8_YYY_times_i(&y3);
+#endif
+
+       FP8_YYY_mul(&x3,&y3,&t4);               //x3.mul(t4); 
+
+       FP8_YYY_mul(&t2,&t3,&t1);               //t2.mul(t1); 
+       FP8_YYY_sub(&(P->x),&t2,&x3);           //x3.rsub(t2);
+       FP8_YYY_mul(&y3,&y3,&t0);               //y3.mul(t0); 
+       FP8_YYY_mul(&t1,&t1,&z3);               //t1.mul(z3); 
+       FP8_YYY_add(&(P->y),&y3,&t1);           //y3.add(t1);
+       FP8_YYY_mul(&t0,&t0,&t3);               //t0.mul(t3); 
+       FP8_YYY_mul(&z3,&z3,&t4);               //z3.mul(t4); 
+       FP8_YYY_add(&(P->z),&z3,&t0);           //z3.add(t0);
+
+
+       FP8_YYY_norm(&(P->x));                  //x.norm(); 
+       FP8_YYY_norm(&(P->y));                  //y.norm();
+       FP8_YYY_norm(&(P->z));                  //z.norm();
+
+    return 0;
+}
+
+/* Set P-=Q */
+/* SU= 16 */
+void ECP8_ZZZ_sub(ECP8_ZZZ *P,ECP8_ZZZ *Q)
+{
+       ECP8_ZZZ NQ;
+       ECP8_ZZZ_copy(&NQ,Q);
+       ECP8_ZZZ_neg(&NQ);
+    ECP8_ZZZ_add(P,&NQ);
+}
+
+
+void ECP8_ZZZ_reduce(ECP8_ZZZ *P)
+{
+       FP8_YYY_reduce(&(P->x));
+       FP8_YYY_reduce(&(P->y));
+       FP8_YYY_reduce(&(P->z));
+}
+
+/* P*=e */
+/* SU= 280 */
+void ECP8_ZZZ_mul(ECP8_ZZZ *P,BIG_XXX e)
+{
+    /* fixed size windows */
+    int i,nb,s,ns;
+    BIG_XXX mt,t;
+    ECP8_ZZZ Q,W[8],C;
+    sign8 w[1+(NLEN_XXX*BASEBITS_XXX+3)/4];
+
+    if (ECP8_ZZZ_isinf(P)) return;
+    
+    /* precompute table */
+
+    ECP8_ZZZ_copy(&Q,P);
+    ECP8_ZZZ_dbl(&Q);
+    ECP8_ZZZ_copy(&W[0],P);
+
+    for (i=1; i<8; i++)
+    {
+        ECP8_ZZZ_copy(&W[i],&W[i-1]);
+        ECP8_ZZZ_add(&W[i],&Q);
+    }
+
+    /* make exponent odd - add 2P if even, P if odd */
+    BIG_XXX_copy(t,e);
+    s=BIG_XXX_parity(t);
+    BIG_XXX_inc(t,1);
+    BIG_XXX_norm(t);
+    ns=BIG_XXX_parity(t);
+    BIG_XXX_copy(mt,t);
+    BIG_XXX_inc(mt,1);
+    BIG_XXX_norm(mt);
+    BIG_XXX_cmove(t,mt,s);
+    ECP8_ZZZ_cmove(&Q,P,ns);
+    ECP8_ZZZ_copy(&C,&Q);
+
+    nb=1+(BIG_XXX_nbits(t)+3)/4;
+
+    /* convert exponent to signed 4-bit window */
+    for (i=0; i<nb; i++)
+    {
+        w[i]=BIG_XXX_lastbits(t,5)-16;
+        BIG_XXX_dec(t,w[i]);
+        BIG_XXX_norm(t);
+        BIG_XXX_fshr(t,4);
+    }
+    w[nb]=BIG_XXX_lastbits(t,5);
+
+    ECP8_ZZZ_copy(P,&W[(w[nb]-1)/2]);
+    for (i=nb-1; i>=0; i--)
+    {
+        ECP8_ZZZ_select(&Q,W,w[i]);
+        ECP8_ZZZ_dbl(P);
+        ECP8_ZZZ_dbl(P);
+        ECP8_ZZZ_dbl(P);
+        ECP8_ZZZ_dbl(P);
+        ECP8_ZZZ_add(P,&Q);
+    }
+    ECP8_ZZZ_sub(P,&C); /* apply correction */
+       ECP8_ZZZ_affine(P);
+}
+
+void ECP8_ZZZ_frob_constants(FP2_YYY F[3])
+{
+    FP_YYY fx,fy;
+       FP2_YYY X;
+
+    FP_YYY_rcopy(&fx,Fra_YYY);
+    FP_YYY_rcopy(&fy,Frb_YYY);
+    FP2_YYY_from_FPs(&X,&fx,&fy);
+
+
+       FP2_YYY_sqr(&F[0],&X);                  // FF=F^2=(1+i)^(p-19)/12
+       FP2_YYY_copy(&F[2],&F[0]);
+       FP2_YYY_mul_ip(&F[2]);                  // 
W=(1+i)^12/12.(1+i)^(p-19)/12 = (1+i)^(p-7)/12
+       FP2_YYY_norm(&F[2]);
+       FP2_YYY_sqr(&F[1],&F[2]);
+       FP2_YYY_mul(&F[2],&F[2],&F[1]); // W=(1+i)^(p-7)/4
+
+       FP2_YYY_mul_ip(&F[2]);                  // W=(1+i)^4/4.W=(1+i)^(p-7)/4 
= (1+i)^(p-3)/4
+       FP2_YYY_norm(&F[2]);
+
+       FP2_YYY_copy(&F[1],&X);
+
+#if SEXTIC_TWIST_ZZZ == M_TYPE 
+       FP2_YYY_mul_ip(&F[1]);          // (1+i)^24/24.(1+i)^(p-19)/24 = 
(1+i)^(p+5)/24
+       FP2_YYY_inv(&F[1],&F[1]);               // (1+i)^-(p+5)/24
+       FP2_YYY_sqr(&F[0],&F[1]);               // (1+i)^-(p+5)/12
+#endif
+
+
+       FP2_YYY_mul_ip(&F[0]);          // FF=(1+i)^(p-19)/12.(1+i)^12/12 = 
(1+i)^(p-7)/12                                      // 
FF=(1+i)^12/12.(1+i)^-(p+5)/12 = (1+i)^-(p-7)/12
+       FP2_YYY_norm(&F[0]);
+
+       FP2_YYY_mul(&F[1],&F[1],&F[0]);  // (1+i)^(p-7)/12 . (1+i)^(p-19)/24 = 
(1+i)^(p-11)/8                           // (1+i)^-(p-7)/12 . (1+i)^-(p+5)/24 = 
(1+i)^-(p-3)/8
+
+}
+
+/* Calculates q^n.P using Frobenius constant X */
+void ECP8_ZZZ_frob(ECP8_ZZZ *P,FP2_YYY F[3],int n)
+{
+       int i;
+       FP8_YYY X,Y,Z;
+
+       FP8_YYY_copy(&X,&(P->x));
+       FP8_YYY_copy(&Y,&(P->y));
+       FP8_YYY_copy(&Z,&(P->z));
+
+       for (i=0;i<n;i++)
+       {
+               FP8_YYY_frob(&X,&F[2]);         // X^p          
+               FP8_YYY_qmul(&X,&X,&F[0]); 
+#if SEXTIC_TWIST_ZZZ == M_TYPE                 
+               FP8_YYY_div_i2(&X);                     // X^p.(1+i)^-(p-1)/12
+#endif
+#if SEXTIC_TWIST_ZZZ == D_TYPE                 
+               FP8_YYY_times_i2(&X);           // X^p.(1+i)^(p-1)/12
+#endif
+
+               FP8_YYY_frob(&Y,&F[2]);         // Y^p
+               FP8_YYY_qmul(&Y,&Y,&F[1]); 
+#if SEXTIC_TWIST_ZZZ == M_TYPE         
+               FP8_YYY_div_i(&Y);                      // Y^p.(1+i)^-(p-1)/8
+#endif
+#if SEXTIC_TWIST_ZZZ == D_TYPE
+               FP8_YYY_times_i2(&Y); FP8_YYY_times_i2(&Y); 
FP8_YYY_times_i(&Y);  // Y^p.(1+i)^(p-1)/8
+#endif
+               FP8_YYY_frob(&Z,&F[2]);
+       }
+
+       FP8_YYY_copy(&(P->x),&X);
+       FP8_YYY_copy(&(P->y),&Y);
+       FP8_YYY_copy(&(P->z),&Z);
+}
+
+/* Side channel attack secure */
+// Bos & Costello https://eprint.iacr.org/2013/458.pdf
+// Faz-Hernandez & Longa & Sanchez  https://eprint.iacr.org/2013/158.pdf
+
+void ECP8_ZZZ_mul16(ECP8_ZZZ *P,ECP8_ZZZ Q[16],BIG_XXX u[16])
+{
+    int i,j,k,nb,pb1,pb2,pb3,pb4,bt;
+       ECP8_ZZZ T1[8],T2[8],T3[8],T4[8],W;
+    BIG_XXX mt,t[16];
+    sign8 w1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s2[NLEN_XXX*BASEBITS_XXX+1]; 
+    sign8 w3[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s3[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w4[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s4[NLEN_XXX*BASEBITS_XXX+1]; 
+
+       FP2_YYY X[3];
+       ECP8_ZZZ_frob_constants(X);
+
+    for (i=0; i<16; i++)
+       {
+        BIG_XXX_copy(t[i],u[i]);
+       }
+// Precomputed table
+    ECP8_ZZZ_copy(&T1[0],&Q[0]); // Q[0]
+    ECP8_ZZZ_copy(&T1[1],&T1[0]);
+       ECP8_ZZZ_add(&T1[1],&Q[1]);     // Q[0]+Q[1]
+    ECP8_ZZZ_copy(&T1[2],&T1[0]);
+       ECP8_ZZZ_add(&T1[2],&Q[2]);     // Q[0]+Q[2]
+       ECP8_ZZZ_copy(&T1[3],&T1[1]);
+       ECP8_ZZZ_add(&T1[3],&Q[2]);     // Q[0]+Q[1]+Q[2]
+       ECP8_ZZZ_copy(&T1[4],&T1[0]);
+       ECP8_ZZZ_add(&T1[4],&Q[3]);  // Q[0]+Q[3]
+       ECP8_ZZZ_copy(&T1[5],&T1[1]);
+       ECP8_ZZZ_add(&T1[5],&Q[3]);     // Q[0]+Q[1]+Q[3]
+       ECP8_ZZZ_copy(&T1[6],&T1[2]);
+       ECP8_ZZZ_add(&T1[6],&Q[3]);     // Q[0]+Q[2]+Q[3]
+       ECP8_ZZZ_copy(&T1[7],&T1[3]);
+       ECP8_ZZZ_add(&T1[7],&Q[3]);     // Q[0]+Q[1]+Q[2]+Q[3]
+
+//  Use Frobenius 
+
+       for (i=0;i<8;i++)
+       {
+               ECP8_ZZZ_copy(&T2[i],&T1[i]);
+               ECP8_ZZZ_frob(&T2[i],X,4);
+
+               ECP8_ZZZ_copy(&T3[i],&T2[i]);
+               ECP8_ZZZ_frob(&T3[i],X,4);
+
+               ECP8_ZZZ_copy(&T4[i],&T3[i]);
+               ECP8_ZZZ_frob(&T4[i],X,4);
+       }
+
+// Make them odd
+       pb1=1-BIG_XXX_parity(t[0]);
+       BIG_XXX_inc(t[0],pb1);
+       BIG_XXX_norm(t[0]);
+
+       pb2=1-BIG_XXX_parity(t[4]);
+       BIG_XXX_inc(t[4],pb2);
+       BIG_XXX_norm(t[4]);
+
+       pb3=1-BIG_XXX_parity(t[8]);
+       BIG_XXX_inc(t[8],pb3);
+       BIG_XXX_norm(t[8]);
+
+       pb4=1-BIG_XXX_parity(t[12]);
+       BIG_XXX_inc(t[12],pb4);
+       BIG_XXX_norm(t[12]);
+
+// Number of bits
+    BIG_XXX_zero(mt);
+    for (i=0; i<16; i++)
+    {
+        BIG_XXX_or(mt,mt,t[i]);
+    }
+    nb=1+BIG_XXX_nbits(mt);
+
+// Sign pivot 
+       s1[nb-1]=1;
+       s2[nb-1]=1;
+       s3[nb-1]=1;
+       s4[nb-1]=1;
+       for (i=0;i<nb-1;i++)
+       {
+        BIG_XXX_fshr(t[0],1);
+               s1[i]=2*BIG_XXX_parity(t[0])-1;
+        BIG_XXX_fshr(t[4],1);
+               s2[i]=2*BIG_XXX_parity(t[4])-1;
+        BIG_XXX_fshr(t[8],1);
+               s3[i]=2*BIG_XXX_parity(t[8])-1;
+        BIG_XXX_fshr(t[12],1);
+               s4[i]=2*BIG_XXX_parity(t[12])-1;
+       }
+
+
+// Recoded exponents
+    for (i=0; i<nb; i++)
+    {
+               w1[i]=0;
+               k=1;
+               for (j=1; j<4; j++)
+               {
+                       bt=s1[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w1[i]+=bt*k;
+                       k*=2;
+        }
+
+               w2[i]=0;
+               k=1;
+               for (j=5; j<8; j++)
+               {
+                       bt=s2[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w2[i]+=bt*k;
+                       k*=2;
+        }
+
+               w3[i]=0;
+               k=1;
+               for (j=9; j<12; j++)
+               {
+                       bt=s3[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w3[i]+=bt*k;
+                       k*=2;
+        }
+
+               w4[i]=0;
+               k=1;
+               for (j=13; j<16; j++)
+               {
+                       bt=s4[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w4[i]+=bt*k;
+                       k*=2;
+        }
+    }  
+
+// Main loop
+       ECP8_ZZZ_select(P,T1,2*w1[nb-1]+1);
+       ECP8_ZZZ_select(&W,T2,2*w2[nb-1]+1);
+       ECP8_ZZZ_add(P,&W);
+       ECP8_ZZZ_select(&W,T3,2*w3[nb-1]+1);
+       ECP8_ZZZ_add(P,&W);
+       ECP8_ZZZ_select(&W,T4,2*w4[nb-1]+1);
+       ECP8_ZZZ_add(P,&W);
+
+    for (i=nb-2; i>=0; i--)
+    {
+        ECP8_ZZZ_dbl(P);
+        ECP8_ZZZ_select(&W,T1,2*w1[i]+s1[i]);
+        ECP8_ZZZ_add(P,&W);
+        ECP8_ZZZ_select(&W,T2,2*w2[i]+s2[i]);
+        ECP8_ZZZ_add(P,&W);
+        ECP8_ZZZ_select(&W,T3,2*w3[i]+s3[i]);
+        ECP8_ZZZ_add(P,&W);
+        ECP8_ZZZ_select(&W,T4,2*w4[i]+s4[i]);
+        ECP8_ZZZ_add(P,&W);
+    }
+
+// apply corrections
+       ECP8_ZZZ_copy(&W,P);   
+       ECP8_ZZZ_sub(&W,&Q[0]);
+       ECP8_ZZZ_cmove(P,&W,pb1);
+       ECP8_ZZZ_copy(&W,P);   
+       ECP8_ZZZ_sub(&W,&Q[4]);
+       ECP8_ZZZ_cmove(P,&W,pb2);
+
+       ECP8_ZZZ_copy(&W,P);   
+       ECP8_ZZZ_sub(&W,&Q[8]);
+       ECP8_ZZZ_cmove(P,&W,pb3);
+       ECP8_ZZZ_copy(&W,P);   
+       ECP8_ZZZ_sub(&W,&Q[12]);
+       ECP8_ZZZ_cmove(P,&W,pb4);
+       ECP8_ZZZ_affine(P);
+}
+
+/* Map to hash value to point on G2 from random BIG_XXX */
+
+void ECP8_ZZZ_mapit(ECP8_ZZZ *Q,octet *W)
+{
+    BIG_XXX q,one,x,hv;
+       FP_YYY Fx,Fy;
+    FP2_YYY T,X[3];
+       FP4_YYY X4;
+       FP8_YYY X8;
+
+    ECP8_ZZZ xQ, x2Q, x3Q, x4Q , x5Q, x6Q, x7Q, x8Q;
+
+       BIG_XXX_fromBytes(hv,W->val);
+    BIG_XXX_rcopy(q,Modulus_YYY);
+    BIG_XXX_one(one);
+    BIG_XXX_mod(hv,q);
+
+    for (;;)
+    {
+        FP2_YYY_from_BIGs(&T,one,hv);  /*******/
+               FP4_YYY_from_FP2(&X4,&T);
+               FP8_YYY_from_FP4(&X8,&X4);
+        if (ECP8_ZZZ_setx(Q,&X8)) break;
+        BIG_XXX_inc(hv,1);
+    }
+
+       ECP8_ZZZ_frob_constants(X);
+
+    BIG_XXX_rcopy(x,CURVE_Bnx_ZZZ);
+
+    // Efficient hash maps to G2 on BLS48 curves - Budroni, Pintore 
+       // Q -> x8Q -x7Q -Q +  F(x7Q-x6Q) + F(F(x6Q-x5Q)) +F(F(F(x5Q-x4Q))) 
+F(F(F(F(x4Q-x3Q)))) + F(F(F(F(F(x3Q-x2Q))))) + F(F(F(F(F(F(x2Q-xQ)))))) + 
F(F(F(F(F(F(F(xQ-Q))))))) +F(F(F(F(F(F(F(F(2Q))))))))
+
+       ECP8_ZZZ_copy(&xQ,Q);
+       ECP8_ZZZ_mul(&xQ,x);
+       ECP8_ZZZ_copy(&x2Q,&xQ);
+       ECP8_ZZZ_mul(&x2Q,x);
+       ECP8_ZZZ_copy(&x3Q,&x2Q);
+       ECP8_ZZZ_mul(&x3Q,x);
+       ECP8_ZZZ_copy(&x4Q,&x3Q);
+
+       ECP8_ZZZ_mul(&x4Q,x);
+       ECP8_ZZZ_copy(&x5Q,&x4Q);
+       ECP8_ZZZ_mul(&x5Q,x);
+       ECP8_ZZZ_copy(&x6Q,&x5Q);
+       ECP8_ZZZ_mul(&x6Q,x);
+       ECP8_ZZZ_copy(&x7Q,&x6Q);
+       ECP8_ZZZ_mul(&x7Q,x);
+       ECP8_ZZZ_copy(&x8Q,&x7Q);
+       ECP8_ZZZ_mul(&x8Q,x);
+
+#if SIGN_OF_X_ZZZ==NEGATIVEX
+       ECP8_ZZZ_neg(&xQ);
+       ECP8_ZZZ_neg(&x3Q);
+       ECP8_ZZZ_neg(&x5Q);
+       ECP8_ZZZ_neg(&x7Q);
+#endif
+
+       ECP8_ZZZ_sub(&x8Q,&x7Q);
+       ECP8_ZZZ_sub(&x8Q,Q);
+
+       ECP8_ZZZ_sub(&x7Q,&x6Q);
+       ECP8_ZZZ_frob(&x7Q,X,1);
+
+       ECP8_ZZZ_sub(&x6Q,&x5Q);
+       ECP8_ZZZ_frob(&x6Q,X,2);
+       
+       ECP8_ZZZ_sub(&x5Q,&x4Q);
+       ECP8_ZZZ_frob(&x5Q,X,3);
+       
+       ECP8_ZZZ_sub(&x4Q,&x3Q);
+       ECP8_ZZZ_frob(&x4Q,X,4);
+
+       ECP8_ZZZ_sub(&x3Q,&x2Q);
+       ECP8_ZZZ_frob(&x3Q,X,5);
+
+       ECP8_ZZZ_sub(&x2Q,&xQ);
+       ECP8_ZZZ_frob(&x2Q,X,6);
+
+       ECP8_ZZZ_sub(&xQ,Q);
+       ECP8_ZZZ_frob(&xQ,X,7);
+
+       ECP8_ZZZ_dbl(Q);
+       ECP8_ZZZ_frob(Q,X,8);
+
+
+       ECP8_ZZZ_add(Q,&x8Q);
+       ECP8_ZZZ_add(Q,&x7Q);
+       ECP8_ZZZ_add(Q,&x6Q);
+       ECP8_ZZZ_add(Q,&x5Q);
+
+       ECP8_ZZZ_add(Q,&x4Q);
+       ECP8_ZZZ_add(Q,&x3Q);
+       ECP8_ZZZ_add(Q,&x2Q);
+       ECP8_ZZZ_add(Q,&xQ);
+
+       ECP8_ZZZ_affine(Q);
+
+}
+
+// ECP$ Get Group Generator
+
+void ECP8_ZZZ_generator(ECP8_ZZZ *G)
+{
+       BIG_XXX a,b;
+       FP2_YYY Aa,Bb;
+       FP4_YYY A,B;
+       FP8_YYY X,Y;
+
+       BIG_XXX_rcopy(a,CURVE_Pxaaa_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pxaab_ZZZ);
+       FP2_YYY_from_BIGs(&Aa,a,b);
+
+       BIG_XXX_rcopy(a,CURVE_Pxaba_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pxabb_ZZZ);
+       FP2_YYY_from_BIGs(&Bb,a,b);
+
+       FP4_YYY_from_FP2s(&A,&Aa,&Bb);
+
+       BIG_XXX_rcopy(a,CURVE_Pxbaa_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pxbab_ZZZ);
+       FP2_YYY_from_BIGs(&Aa,a,b);
+
+       BIG_XXX_rcopy(a,CURVE_Pxbba_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pxbbb_ZZZ);
+       FP2_YYY_from_BIGs(&Bb,a,b);
+
+       FP4_YYY_from_FP2s(&B,&Aa,&Bb);
+
+       FP8_YYY_from_FP4s(&X,&A,&B);
+
+       BIG_XXX_rcopy(a,CURVE_Pyaaa_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pyaab_ZZZ);
+       FP2_YYY_from_BIGs(&Aa,a,b);
+
+       BIG_XXX_rcopy(a,CURVE_Pyaba_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pyabb_ZZZ);
+       FP2_YYY_from_BIGs(&Bb,a,b);
+
+       FP4_YYY_from_FP2s(&A,&Aa,&Bb);
+
+       BIG_XXX_rcopy(a,CURVE_Pybaa_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pybab_ZZZ);
+       FP2_YYY_from_BIGs(&Aa,a,b);
+
+       BIG_XXX_rcopy(a,CURVE_Pybba_ZZZ);
+       BIG_XXX_rcopy(b,CURVE_Pybbb_ZZZ);
+       FP2_YYY_from_BIGs(&Bb,a,b);
+
+       FP4_YYY_from_FP2s(&B,&Aa,&Bb);
+
+       FP8_YYY_from_FP4s(&Y,&A,&B);
+
+       ECP8_ZZZ_set(G,&X,&Y);
+}

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/ecp8.h
----------------------------------------------------------------------
diff --git a/version3/c/ecp8.h b/version3/c/ecp8.h
new file mode 100644
index 0000000..13aff45
--- /dev/null
+++ b/version3/c/ecp8.h
@@ -0,0 +1,246 @@
+#ifndef ECP8_ZZZ_H
+#define ECP8_ZZZ_H
+
+#include "fp8_YYY.h"
+#include "config_curve_ZZZ.h"
+
+
+extern const BIG_XXX Fra_YYY; /**< real part of BN curve Frobenius Constant */
+extern const BIG_XXX Frb_YYY; /**< imaginary part of BN curve Frobenius 
Constant */
+
+
+/**
+       @brief ECP8 Structure - Elliptic Curve Point over quadratic extension 
field
+*/
+
+typedef struct
+{
+//    int inf; /**< Infinity Flag */
+    FP8_YYY x;   /**< x-coordinate of point */
+    FP8_YYY y;   /**< y-coordinate of point */
+       FP8_YYY z;      /**< z-coordinate of point */
+} ECP8_ZZZ;
+
+
+/* Curve Params - see rom.c */
+extern const int CURVE_A_ZZZ;          /**< Elliptic curve A parameter */
+extern const int CURVE_B_I_ZZZ;                /**< Elliptic curve B parameter 
*/
+extern const BIG_XXX CURVE_B_ZZZ;     /**< Elliptic curve B parameter */
+extern const BIG_XXX CURVE_Order_ZZZ; /**< Elliptic curve group order */
+extern const BIG_XXX CURVE_Cof_ZZZ;   /**< Elliptic curve cofactor */
+extern const BIG_XXX CURVE_Bnx_ZZZ;   /**< Elliptic curve parameter */
+
+
+/* Generator point on G1 */
+extern const BIG_XXX CURVE_Gx; /**< x-coordinate of generator point in group 
G1  */
+extern const BIG_XXX CURVE_Gy; /**< y-coordinate of generator point in group 
G1  */
+
+/* For Pairings only */
+
+/* Generator point on G2 */
+extern const BIG_XXX CURVE_Pxaaa_ZZZ; /**< real part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxaab_ZZZ; /**< imaginary part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxaba_ZZZ; /**< real part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxabb_ZZZ; /**< imaginary part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxbaa_ZZZ; /**< real part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxbab_ZZZ; /**< imaginary part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxbba_ZZZ; /**< real part of x-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pxbbb_ZZZ; /**< imaginary part of x-coordinate of 
generator point in group G2 */
+
+extern const BIG_XXX CURVE_Pyaaa_ZZZ; /**< real part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pyaab_ZZZ; /**< imaginary part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pyaba_ZZZ; /**< real part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pyabb_ZZZ; /**< imaginary part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pybaa_ZZZ; /**< real part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pybab_ZZZ; /**< imaginary part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pybba_ZZZ; /**< real part of y-coordinate of 
generator point in group G2 */
+extern const BIG_XXX CURVE_Pybbb_ZZZ; /**< imaginary part of y-coordinate of 
generator point in group G2 */
+
+
+/* ECP8 E(FP8) prototypes */
+/**    @brief Tests for ECP8 point equal to infinity
+ *
+       @param P ECP8 point to be tested
+       @return 1 if infinity, else returns 0
+ */
+extern int ECP8_ZZZ_isinf(ECP8_ZZZ *P);
+/**    @brief Copy ECP8 point to another ECP8 point
+ *
+       @param P ECP8 instance, on exit = Q
+       @param Q ECP8 instance to be copied
+ */
+extern void ECP8_ZZZ_copy(ECP8_ZZZ *P,ECP8_ZZZ *Q);
+/**    @brief Set ECP8 to point-at-infinity
+ *
+       @param P ECP8 instance to be set to infinity
+ */
+extern void ECP8_ZZZ_inf(ECP8_ZZZ *P);
+/**    @brief Tests for equality of two ECP8s
+ *
+       @param P ECP8 instance to be compared
+       @param Q ECP8 instance to be compared
+       @return 1 if P=Q, else returns 0
+ */
+extern int ECP8_ZZZ_equals(ECP8_ZZZ *P,ECP8_ZZZ *Q);
+
+
+/**    @brief Converts an ECP8 point from Projective (x,y,z) coordinates to 
affine (x,y) coordinates
+ *
+       @param P ECP8 instance to be converted to affine form
+ */
+extern void ECP8_ZZZ_affine(ECP8_ZZZ *P);
+
+
+/**    @brief Extract x and y coordinates of an ECP8 point P
+ *
+       If x=y, returns only x
+       @param x FP8 on exit = x coordinate of point
+       @param y FP8 on exit = y coordinate of point (unless x=y)
+       @param P ECP8 instance (x,y)
+       @return -1 if P is point-at-infinity, else 0
+ */
+extern int ECP8_ZZZ_get(FP8_YYY *x,FP8_YYY *y,ECP8_ZZZ *P);
+/**    @brief Formats and outputs an ECP8 point to the console, converted to 
affine coordinates
+ *
+       @param P ECP8 instance to be printed
+ */
+extern void ECP8_ZZZ_output(ECP8_ZZZ *P);
+
+/**    @brief Formats and outputs an ECP8 point to an octet string
+ *
+       The octet string is created in the form x|y.
+       Convert the real and imaginary parts of the x and y coordinates to 
big-endian base 256 form.
+       @param S output octet string
+       @param P ECP8 instance to be converted to an octet string
+ */
+extern void ECP8_ZZZ_toOctet(octet *S,ECP8_ZZZ *P);
+/**    @brief Creates an ECP8 point from an octet string
+ *
+       The octet string is in the form x|y
+       The real and imaginary parts of the x and y coordinates are in 
big-endian base 256 form.
+       @param P ECP8 instance to be created from the octet string
+       @param S input octet string
+       return 1 if octet string corresponds to a point on the curve, else 0
+ */
+extern int ECP8_ZZZ_fromOctet(ECP8_ZZZ *P,octet *S);
+/**    @brief Calculate Right Hand Side of curve equation y^2=f(x)
+ *
+       Function f(x)=x^3+Ax+B
+       Used internally.
+       @param r FP8 value of f(x)
+       @param x FP8 instance
+ */
+extern void ECP8_ZZZ_rhs(FP8_YYY *r,FP8_YYY *x);
+/**    @brief Set ECP8 to point(x,y) given x and y
+ *
+       Point P set to infinity if no such point on the curve.
+       @param P ECP8 instance to be set (x,y)
+       @param x FP8 x coordinate of point
+       @param y FP8 y coordinate of point
+       @return 1 if point exists, else 0
+ */
+extern int ECP8_ZZZ_set(ECP8_ZZZ *P,FP8_YYY *x,FP8_YYY *y);
+/**    @brief Set ECP to point(x,[y]) given x
+ *
+       Point P set to infinity if no such point on the curve. Otherwise y 
coordinate is calculated from x.
+       @param P ECP instance to be set (x,[y])
+       @param x BIG x coordinate of point
+       @return 1 if point exists, else 0
+ */
+extern int ECP8_ZZZ_setx(ECP8_ZZZ *P,FP8_YYY *x);
+/**    @brief Negation of an ECP8 point
+ *
+       @param P ECP8 instance, on exit = -P
+ */
+extern void ECP8_ZZZ_neg(ECP8_ZZZ *P);
+
+/**    @brief Reduction of an ECP8 point
+ *
+       @param P ECP8 instance, on exit (x,y) are reduced wrt the modulus
+ */
+extern void ECP8_ZZZ_reduce(ECP8_ZZZ *P);
+
+
+/**    @brief Doubles an ECP8 instance P and returns slope
+ *
+       @param P ECP8 instance, on exit =2*P
+       @param lam FP8 instance, slope of line
+ */
+//extern int ECP8_ZZZ_sdbl(ECP8_ZZZ *P,FP8_YYY *lam);
+/**    @brief Adds ECP8 instance Q to ECP8 instance P and returns slope
+ *
+       @param P ECP8 instance, on exit =P+Q
+       @param Q ECP8 instance to be added to P
+       @param lam FP8 instance, slope of line
+ */
+//extern int ECP8_ZZZ_sadd(ECP8_ZZZ *P,ECP8_ZZZ *Q,FP8_YYY *lam);
+
+
+/**    @brief Doubles an ECP8 instance P
+ *
+       @param P ECP8 instance, on exit =2*P
+ */
+extern int ECP8_ZZZ_dbl(ECP8_ZZZ *P);
+/**    @brief Adds ECP8 instance Q to ECP8 instance P
+ *
+       @param P ECP8 instance, on exit =P+Q
+       @param Q ECP8 instance to be added to P
+ */
+extern int ECP8_ZZZ_add(ECP8_ZZZ *P,ECP8_ZZZ *Q);
+/**    @brief Subtracts ECP instance Q from ECP8 instance P
+ *
+       @param P ECP8 instance, on exit =P-Q
+       @param Q ECP8 instance to be subtracted from P
+ */
+extern void ECP8_ZZZ_sub(ECP8_ZZZ *P,ECP8_ZZZ *Q);
+/**    @brief Multiplies an ECP8 instance P by a BIG, side-channel resistant
+ *
+       Uses fixed sized windows.
+       @param P ECP8 instance, on exit =b*P
+       @param b BIG number multiplier
+
+ */
+extern void ECP8_ZZZ_mul(ECP8_ZZZ *P,BIG_XXX b);
+
+/**    @brief Calculates required Frobenius constants
+ *
+       Calculate Frobenius constants
+       @param F array of FP2 precalculated constants
+
+ */
+extern void ECP8_ZZZ_frob_constants(FP2_YYY F[3]);
+
+/**    @brief Multiplies an ECP8 instance P by the internal modulus p^n, using 
precalculated Frobenius constants
+ *
+       Fast point multiplication using Frobenius
+       @param P ECP8 instance, on exit = p^n*P
+       @param F array of FP2 precalculated Frobenius constant
+       @param n power of prime
+
+ */
+extern void ECP8_ZZZ_frob(ECP8_ZZZ *P,FP2_YYY F[3],int n);
+
+/**    @brief Calculates P=Sigma b[i]*Q[i] for i=0 to 7
+ *
+       @param P ECP8 instance, on exit = Sigma b[i]*Q[i] for i=0 to 7
+       @param Q ECP8 array of 4 points
+       @param b BIG array of 4 multipliers
+ */
+extern void ECP8_ZZZ_mul16(ECP8_ZZZ *P,ECP8_ZZZ *Q,BIG_XXX *b);
+
+
+/**    @brief Maps random BIG to curve point of correct order
+ *
+       @param P ECP8 instance of correct order
+       @param W OCTET byte array to be mapped
+ */
+extern void ECP8_ZZZ_mapit(ECP8_ZZZ *P,octet *w);
+
+/**    @brief Get Group Generator from ROM
+ *
+       @param G ECP8 instance
+ */
+extern void ECP8_ZZZ_generator(ECP8_ZZZ *G);
+
+
+#endif
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/faster.c
----------------------------------------------------------------------
diff --git a/version3/c/faster.c b/version3/c/faster.c
new file mode 100644
index 0000000..6d8bbf5
--- /dev/null
+++ b/version3/c/faster.c
@@ -0,0 +1,97 @@
+
+#include <stdio.h>
+#include "big_XXX.h"
+
+#ifdef COMBA
+
+int main()
+{
+       int i,j,k,N;
+
+       N=NLEN_XXX;
+
+
+       printf("Insert this code in BIG_XXX_mul() in file big_XXX.c between 
#define UNWOUND and #else \n\n");
+
+       for (i=0;i<N;i++)
+               printf("\td[%d]=(dchunk)a[%d]*b[%d];\n",i,i,i);
+
+       printf("\n\ts=d[0];\n\tt = s; c[0]=(chunk)t&BMASK_XXX; 
co=t>>BASEBITS_XXX;\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\ts+=d[%d]; t=co+s ",k);
+               for (i=k;i>=1+k/2;i--)
+                       
printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i);
+               printf("; c[%d]=(chunk)t&BMASK_XXX; co=t>>BASEBITS_XXX; \n",k);
+       }
+       printf("\n");
+       for (k=N;k<2*N-1;k++)
+       {
+               printf("\ts-=d[%d]; t=co+s ",k-N);
+               for (i=N-1;i>=1+k/2;i--)
+                       
printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i);
+               printf("; c[%d]=(chunk)t&BMASK_XXX; co=t>>BASEBITS_XXX; \n",k);
+       }
+       printf("\tc[%d]=(chunk)co;\n",2*N-1);
+
+
+
+       printf("\nInsert this code in BIG_XXX_sqr() in file big_XXX.c between 
#define UNWOUND and #else \n\n");
+
+       printf("\n\tt=(dchunk)a[0]*a[0]; c[0]=(chunk)t&BMASK_XXX; 
co=t>>BASEBITS_XXX;\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\tt= ",k);
+               for (i=k;i>=1+k/2;i--)
+                       printf("+(dchunk)a[%d]*a[%d]",i,k-i);
+               printf("; t+=t; t+=co;");
+               if (k%2==0) printf(" t+=(dchunk)a[%d]*a[%d];",k/2,k/2);
+               printf(" c[%d]=(chunk)t&BMASK_XXX; co=t>>BASEBITS_XXX; \n", k);
+       }
+       printf("\n");
+
+       for (k=N;k<2*N-2;k++)
+       {
+               printf("\tt= ",k-N);
+               for (i=N-1;i>=1+k/2;i--)
+                       printf("+(dchunk)a[%d]*a[%d]",i,k-i);
+               printf("; t+=t; t+=co;");
+               if (k%2==0) printf(" t+=(dchunk)a[%d]*a[%d];",k/2,k/2);
+               printf(" c[%d]=(chunk)t&BMASK_XXX; co=t>>BASEBITS_XXX; \n", k);
+       }
+       printf("\tt=co; t+=(dchunk)a[%d]*a[%d]; c[%d]=(chunk)t&BMASK_XXX; 
co=t>>BASEBITS_XXX; \n ",N-1,N-1,2*N-2);
+
+       printf("\tc[%d]=(chunk)co;\n",2*N-1);
+
+
+
+
+       printf("\nInsert this code in BIG_XXX_monty() in file big_XXX.c between 
#define UNWOUND and #else \n\n");
+
+       printf("\tt=d[0]; v[0]=((chunk)t*MC)&BMASK_XXX; t+=(dchunk)v[0]*md[0];  
s=0; c=(t>>BASEBITS_XXX);\n\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\tt=d[%d]+c+s+(dchunk)v[0]*md[%d]",k,k);
+               for (i=k-1;i>k/2;i--) 
printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i);
+               printf("; v[%d]=((chunk)t*MC)&BMASK_XXX; 
t+=(dchunk)v[%d]*md[0]; ",k,k);
+               printf(" dd[%d]=(dchunk)v[%d]*md[%d]; s+=dd[%d]; 
c=(t>>BASEBITS_XXX); \n",k,k,k,k);
+       }
+       printf("\n");
+       for (k=N;k<2*N-1;k++)
+       {
+               printf("\tt=d[%d]+c+s",k);
+               for (i=N-1;i>=1+k/2;i--) 
printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i);
+               printf("; a[%d]=(chunk)t&BMASK_XXX;  s-=dd[%d]; 
c=(t>>BASEBITS_XXX); \n",k-N,k-N+1);
+       }
+       printf("\ta[%d]=d[%d]+(chunk)c&BMASK_XXX;\n",N-1,2*N-1);        
+
+
+
+}
+
+#endif
+
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/faster.txt
----------------------------------------------------------------------
diff --git a/version3/c/faster.txt b/version3/c/faster.txt
new file mode 100644
index 0000000..c6e948c
--- /dev/null
+++ b/version3/c/faster.txt
@@ -0,0 +1,29 @@
+We assume than optimizing compilers will unwind loops at every opportunity. 
+
+But sometimes they don't. So time-critical code will run faster if we step
+in and unwind complex loops for the compiler.
+
+Once the architecture and ECC/RSA support is decided upon choose 
+which BIG numbers need to be optimized. So for example for a 32-bit build and
+if using 256 bit BIGs and a base of 2^29, replace XXX with 256_29 inside 
+faster.c
+
+Then compile and execute the program faster.c like this (using MinGW 
+port of GCC as an example), in the same directory as arch.h and big_256_29.h
+
+gcc -O2 -std=c99 faster.c -o faster.exe
+faster > t.txt
+
+Now extract the code fragments from t.txt and insert them where indicated
+into big_256_29.c (look for UNWOUND)
+
+Finally make sure that
+
+#define UNWOUND
+
+appears somewhere in big_256_29.h
+
+Finally compile and replace the big_256_29 module in the library, and maybe 
+get a 30% speed-up! If there is no significant improvement, don't use this 
+method!
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fastest.c
----------------------------------------------------------------------
diff --git a/version3/c/fastest.c b/version3/c/fastest.c
new file mode 100644
index 0000000..6a6f7c7
--- /dev/null
+++ b/version3/c/fastest.c
@@ -0,0 +1,59 @@
+
+#include <stdio.h>
+#include "fp_YYY.h"
+
+#ifdef COMBA
+
+int main()
+{
+       int i,j,k,N;
+
+       N=NLEN_XXX;
+
+       printf("Insert this code in file fp_YYY.c\n\n");
+
+       printf("void FP_YYY_modmul(BIG_XXX r,BIG_XXX a,BIG_XXX b)\n");
+
+       printf("{\n");
+       printf("\tdchunk t,c,s;\n");
+       printf("\tdchunk d[%d],dd[%d];\n",N,N);
+       printf("\tchunk v[%d],md[%d];\n",N,N);  
+       printf("\tchunk MC=MConst_YYY;\n");
+
+       printf("\tBIG_XXX_rcopy(md,Modulus_YYY);\n");
+
+       for (i=0;i<N;i++)
+               printf("\td[%d]=(dchunk)a[%d]*b[%d];\n",i,i,i);
+
+       printf("\n\ts=d[0];  t=s; v[0]=((chunk)t*MC)&BMASK_XXX; 
t+=(dchunk)v[0]*md[0]; c=(t>>BASEBITS_XXX);\n\n");
+
+       for (k=1;k<N;k++)
+       {
+               printf("\ts+=d[%d]; t=s ",k);
+               for (i=k;i>=1+k/2;i--)
+                       
printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i);
+               printf("; t+=c+(dchunk)v[0]*md[%d]",k,k);
+               for (i=k-1;i>k/2;i--) 
printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i);
+               printf("; v[%d]=((chunk)t*MC)&BMASK_XXX; 
t+=(dchunk)v[%d]*md[0]; ",k,k);
+               printf(" dd[%d]=(dchunk)v[%d]*md[%d]; s+=dd[%d]; 
c=(t>>BASEBITS_XXX); \n",k,k,k,k);
+       }
+       printf("\n");
+       for (k=N;k<2*N-1;k++)
+       {
+               printf("\ts-=d[%d]; t=s ",k-N);
+               for (i=N-1;i>=1+k/2;i--)
+                       
printf("+(dchunk)(a[%d]-a[%d])*(b[%d]-b[%d])",i,k-i,k-i,i);
+               printf("; t+=c",k);
+               for (i=N-1;i>=1+k/2;i--) 
printf("+(dchunk)(v[%d]-v[%d])*(md[%d]-md[%d])",k-i,i,i,k-i);
+               printf("; r[%d]=(chunk)t&BMASK_XXX;  s-=dd[%d]; 
c=(t>>BASEBITS_XXX); \n",k-N,k-N+1);
+       }
+       
+       printf("\tr[%d]=(chunk)c&BMASK_XXX;\n",N-1);    
+
+    printf("}\n");
+
+}
+
+#endif
+
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fastest.txt
----------------------------------------------------------------------
diff --git a/version3/c/fastest.txt b/version3/c/fastest.txt
new file mode 100644
index 0000000..cc7ced4
--- /dev/null
+++ b/version3/c/fastest.txt
@@ -0,0 +1,28 @@
+When using Montgomery reduction, some advantage comes from "fusing" the 
+multiplication with the modular reduction and unrolling the loops.
+
+For a 32-bit build and if for example using 256 bit BIGs and a base of 2^28
+with the NIST256 curve, replace XXX with 256_28 and YYY with NIST256 in 
+fastest.c
+
+
+Then compile and execute the program fastest.c like this (using MinGW
+port of GCC as an example), in the same directory as arch.h and fp_NIST256.h
+
+gcc -O2 -std=c99 fastest.c -o fastest.exe
+fastest > t.txt
+
+Now extract the code fragment from t.txt and insert it where indicated
+into fp_NIST256.c (look for FUSED_MODMUL)
+
+Finally make sure that
+
+#define FUSED_MODMUL
+
+appears somewhere in fp_NIST256.h
+
+Finally compile and replace the fp_YYY module in the library, and maybe 
+get a 30% speed-up! If there is no significant improvement, don't use this 
+method!
+
+NOTE: This method is experimental. It might impact on numerical stability.

Reply via email to