http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp4.h
----------------------------------------------------------------------
diff --git a/version3/c/fp4.h b/version3/c/fp4.h
new file mode 100644
index 0000000..451ab4a
--- /dev/null
+++ b/version3/c/fp4.h
@@ -0,0 +1,305 @@
+/*
+       Licensed to the Apache Software Foundation (ASF) under one
+       or more contributor license agreements.  See the NOTICE file
+       distributed with this work for additional information
+       regarding copyright ownership.  The ASF licenses this file
+       to you under the Apache License, Version 2.0 (the
+       "License"); you may not use this file except in compliance
+       with the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+       Unless required by applicable law or agreed to in writing,
+       software distributed under the License is distributed on an
+       "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+       KIND, either express or implied.  See the License for the
+       specific language governing permissions and limitations
+       under the License.
+*/
+
+/**
+ * @file fp4.h
+ * @author Mike Scott
+ * @brief FP4 Header File
+ *
+ */
+
+#ifndef FP4_YYY_H
+#define FP4_YYY_H
+
+#include "fp2_YYY.h"
+#include "config_curve_ZZZ.h"
+
+/**
+       @brief FP4 Structure - towered over two FP2
+*/
+
+typedef struct
+{
+    FP2_YYY a; /**< real part of FP4 */
+    FP2_YYY b; /**< imaginary part of FP4 */
+} FP4_YYY;
+
+
+/* FP4 prototypes */
+/**    @brief Tests for FP4 equal to zero
+ *
+       @param x FP4 number to be tested
+       @return 1 if zero, else returns 0
+ */
+extern int FP4_YYY_iszilch(FP4_YYY *x);
+/**    @brief Tests for FP4 equal to unity
+ *
+       @param x FP4 number to be tested
+       @return 1 if unity, else returns 0
+ */
+extern int FP4_YYY_isunity(FP4_YYY *x);
+/**    @brief Tests for equality of two FP4s
+ *
+       @param x FP4 instance to be compared
+       @param y FP4 instance to be compared
+       @return 1 if x=y, else returns 0
+ */
+extern int FP4_YYY_equals(FP4_YYY *x,FP4_YYY *y);
+/**    @brief Tests for FP4 having only a real part and no imaginary part
+ *
+       @param x FP4 number to be tested
+       @return 1 if real, else returns 0
+ */
+extern int FP4_YYY_isreal(FP4_YYY *x);
+/**    @brief Initialise FP4 from two FP2s
+ *
+       @param x FP4 instance to be initialised
+       @param a FP2 to form real part of FP4
+       @param b FP2 to form imaginary part of FP4
+ */
+extern void FP4_YYY_from_FP2s(FP4_YYY *x,FP2_YYY *a,FP2_YYY *b);
+/**    @brief Initialise FP4 from single FP2
+ *
+       Imaginary part is set to zero
+       @param x FP4 instance to be initialised
+       @param a FP2 to form real part of FP4
+ */
+extern void FP4_YYY_from_FP2(FP4_YYY *x,FP2_YYY *a);
+
+/**    @brief Initialise FP4 from single FP2
+ *
+       real part is set to zero
+       @param x FP4 instance to be initialised
+       @param a FP2 to form imaginary part of FP4
+ */
+extern void FP4_YYY_from_FP2H(FP4_YYY *x,FP2_YYY *a);
+
+
+/**    @brief Copy FP4 to another FP4
+ *
+       @param x FP4 instance, on exit = y
+       @param y FP4 instance to be copied
+ */
+extern void FP4_YYY_copy(FP4_YYY *x,FP4_YYY *y);
+/**    @brief Set FP4 to zero
+ *
+       @param x FP4 instance to be set to zero
+ */
+extern void FP4_YYY_zero(FP4_YYY *x);
+/**    @brief Set FP4 to unity
+ *
+       @param x FP4 instance to be set to one
+ */
+extern void FP4_YYY_one(FP4_YYY *x);
+/**    @brief Negation of FP4
+ *
+       @param x FP4 instance, on exit = -y
+       @param y FP4 instance
+ */
+extern void FP4_YYY_neg(FP4_YYY *x,FP4_YYY *y);
+/**    @brief Conjugation of FP4
+ *
+       If y=(a,b) on exit x=(a,-b)
+       @param x FP4 instance, on exit = conj(y)
+       @param y FP4 instance
+ */
+extern void FP4_YYY_conj(FP4_YYY *x,FP4_YYY *y);
+/**    @brief Negative conjugation of FP4
+ *
+       If y=(a,b) on exit x=(-a,b)
+       @param x FP4 instance, on exit = -conj(y)
+       @param y FP4 instance
+ */
+extern void FP4_YYY_nconj(FP4_YYY *x,FP4_YYY *y);
+/**    @brief addition of two FP4s
+ *
+       @param x FP4 instance, on exit = y+z
+       @param y FP4 instance
+       @param z FP4 instance
+ */
+extern void FP4_YYY_add(FP4_YYY *x,FP4_YYY *y,FP4_YYY *z);
+/**    @brief subtraction of two FP4s
+ *
+       @param x FP4 instance, on exit = y-z
+       @param y FP4 instance
+       @param z FP4 instance
+ */
+extern void FP4_YYY_sub(FP4_YYY *x,FP4_YYY *y,FP4_YYY *z);
+/**    @brief Multiplication of an FP4 by an FP2
+ *
+       @param x FP4 instance, on exit = y*a
+       @param y FP4 instance
+       @param a FP2 multiplier
+ */
+extern void FP4_YYY_pmul(FP4_YYY *x,FP4_YYY *y,FP2_YYY *a);
+
+/**    @brief Multiplication of an FP4 by an FP
+ *
+       @param x FP4 instance, on exit = y*a
+       @param y FP4 instance
+       @param a FP multiplier
+ */
+extern void FP4_YYY_qmul(FP4_YYY *x,FP4_YYY *y,FP_YYY *a);
+
+/**    @brief Multiplication of an FP4 by a small integer
+ *
+       @param x FP4 instance, on exit = y*i
+       @param y FP4 instance
+       @param i an integer
+ */
+extern void FP4_YYY_imul(FP4_YYY *x,FP4_YYY *y,int i);
+/**    @brief Squaring an FP4
+ *
+       @param x FP4 instance, on exit = y^2
+       @param y FP4 instance
+ */
+extern void FP4_YYY_sqr(FP4_YYY *x,FP4_YYY *y);
+/**    @brief Multiplication of two FP4s
+ *
+       @param x FP4 instance, on exit = y*z
+       @param y FP4 instance
+       @param z FP4 instance
+ */
+extern void FP4_YYY_mul(FP4_YYY *x,FP4_YYY *y,FP4_YYY *z);
+/**    @brief Inverting an FP4
+ *
+       @param x FP4 instance, on exit = 1/y
+       @param y FP4 instance
+ */
+extern void FP4_YYY_inv(FP4_YYY *x,FP4_YYY *y);
+/**    @brief Formats and outputs an FP4 to the console
+ *
+       @param x FP4 instance to be printed
+ */
+extern void FP4_YYY_output(FP4_YYY *x);
+/**    @brief Formats and outputs an FP4 to the console in raw form (for 
debugging)
+ *
+       @param x FP4 instance to be printed
+ */
+extern void FP4_YYY_rawoutput(FP4_YYY *x);
+/**    @brief multiplies an FP4 instance by irreducible polynomial 
sqrt(1+sqrt(-1))
+ *
+       @param x FP4 instance, on exit = sqrt(1+sqrt(-1)*x
+ */
+extern void FP4_YYY_times_i(FP4_YYY *x);
+/**    @brief Normalises the components of an FP4
+ *
+       @param x FP4 instance to be normalised
+ */
+extern void FP4_YYY_norm(FP4_YYY *x);
+/**    @brief Reduces all components of possibly unreduced FP4 mod Modulus
+ *
+       @param x FP4 instance, on exit reduced mod Modulus
+ */
+extern void FP4_YYY_reduce(FP4_YYY *x);
+/**    @brief Raises an FP4 to the power of a BIG
+ *
+       @param x FP4 instance, on exit = y^b
+       @param y FP4 instance
+       @param b BIG number
+ */
+extern void FP4_YYY_pow(FP4_YYY *x,FP4_YYY *y,BIG_XXX b);
+/**    @brief Raises an FP4 to the power of the internal modulus p, using the 
Frobenius
+ *
+       @param x FP4 instance, on exit = x^p
+       @param f FP2 precalculated Frobenius constant
+ */
+extern void FP4_YYY_frob(FP4_YYY *x,FP2_YYY *f);
+/**    @brief Calculates the XTR addition function r=w*x-conj(x)*y+z
+ *
+       @param r FP4 instance, on exit = w*x-conj(x)*y+z
+       @param w FP4 instance
+       @param x FP4 instance
+       @param y FP4 instance
+       @param z FP4 instance
+ */
+extern void FP4_YYY_xtr_A(FP4_YYY *r,FP4_YYY *w,FP4_YYY *x,FP4_YYY *y,FP4_YYY 
*z);
+/**    @brief Calculates the XTR doubling function r=x^2-2*conj(x)
+ *
+       @param r FP4 instance, on exit = x^2-2*conj(x)
+       @param x FP4 instance
+ */
+extern void FP4_YYY_xtr_D(FP4_YYY *r,FP4_YYY *x);
+/**    @brief Calculates FP4 trace of an FP12 raised to the power of a BIG 
number
+ *
+       XTR single exponentiation
+       @param r FP4 instance, on exit = trace(w^b)
+       @param x FP4 instance, trace of an FP12 w
+       @param b BIG number
+ */
+extern void FP4_YYY_xtr_pow(FP4_YYY *r,FP4_YYY *x,BIG_XXX b);
+/**    @brief Calculates FP4 trace of c^a.d^b, where c and d are derived from 
FP4 traces of FP12s
+ *
+       XTR double exponentiation
+       Assumes c=tr(x^m), d=tr(x^n), e=tr(x^(m-n)), f=tr(x^(m-2n))
+       @param r FP4 instance, on exit = trace(c^a.d^b)
+       @param c FP4 instance, trace of an FP12
+       @param d FP4 instance, trace of an FP12
+       @param e FP4 instance, trace of an FP12
+       @param f FP4 instance, trace of an FP12
+       @param a BIG number
+       @param b BIG number
+ */
+extern void FP4_YYY_xtr_pow2(FP4_YYY *r,FP4_YYY *c,FP4_YYY *d,FP4_YYY 
*e,FP4_YYY *f,BIG_XXX a,BIG_XXX b);
+
+/**    @brief Conditional copy of FP4 number
+ *
+       Conditionally copies second parameter to the first (without branching)
+       @param x FP4 instance, set to y if s!=0
+       @param y another FP4 instance
+       @param s copy only takes place if not equal to 0
+ */
+extern void FP4_YYY_cmove(FP4_YYY *x,FP4_YYY *y,int s);
+
+
+/**    @brief Calculate square root of an FP4
+ *
+       Square root
+       @param r FP4 instance, on exit = sqrt(x)
+       @param x FP4 instance
+       @return 1 x is a QR, otherwise 0
+ */
+extern int  FP4_YYY_sqrt(FP4_YYY *r,FP4_YYY *x);
+
+
+/**    @brief Divide FP4 number by QNR
+ *
+       Divide FP4 by the QNR
+       @param x FP4 instance
+ */
+extern void FP4_YYY_div_i(FP4_YYY *x);
+
+/**    @brief Divide an FP4 by QNR/2
+ *
+       Divide FP4 by the QNR/2
+       @param x FP4 instance
+ */
+extern void FP4_YYY_div_2i(FP4_YYY *x);
+
+
+
+/**    @brief Divide an FP4 by 2
+ *
+       @param x FP4 instance, on exit = y/2
+       @param y FP4 instance
+ */
+extern void FP4_YYY_div2(FP4_YYY *x,FP4_YYY *y);
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp48.c
----------------------------------------------------------------------
diff --git a/version3/c/fp48.c b/version3/c/fp48.c
new file mode 100644
index 0000000..68f4ddd
--- /dev/null
+++ b/version3/c/fp48.c
@@ -0,0 +1,1031 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Fp^48 functions */
+/* SU=m, m is Stack Usage (no lazy )*/
+/* FP48 elements are of the form a+i.b+i^2.c */
+
+#include "fp48_YYY.h"
+
+/* return 1 if b==c, no branching */
+static int teq(sign32 b,sign32 c)
+{
+    sign32 x=b^c;
+    x-=1;  // if x=0, x now -1
+    return (int)((x>>31)&1);
+}
+
+
+/* Constant time select from pre-computed table */
+static void FP48_YYY_select(FP48_YYY *f,FP48_YYY g[],sign32 b)
+{
+    FP48_YYY invf;
+    sign32 m=b>>31;
+    sign32 babs=(b^m)-m;
+
+    babs=(babs-1)/2;
+
+    FP48_YYY_cmove(f,&g[0],teq(babs,0));  // conditional move
+    FP48_YYY_cmove(f,&g[1],teq(babs,1));
+    FP48_YYY_cmove(f,&g[2],teq(babs,2));
+    FP48_YYY_cmove(f,&g[3],teq(babs,3));
+    FP48_YYY_cmove(f,&g[4],teq(babs,4));
+    FP48_YYY_cmove(f,&g[5],teq(babs,5));
+    FP48_YYY_cmove(f,&g[6],teq(babs,6));
+    FP48_YYY_cmove(f,&g[7],teq(babs,7));
+
+    FP48_YYY_copy(&invf,f);
+    FP48_YYY_conj(&invf,&invf);  // 1/f
+    FP48_YYY_cmove(f,&invf,(int)(m&1));
+}
+
+
+/* test x==0 ? */
+/* SU= 8 */
+int FP48_YYY_iszilch(FP48_YYY *x)
+{
+    if (FP16_YYY_iszilch(&(x->a)) && FP16_YYY_iszilch(&(x->b)) && 
FP16_YYY_iszilch(&(x->c))) return 1;
+    return 0;
+}
+
+/* test x==1 ? */
+/* SU= 8 */
+int FP48_YYY_isunity(FP48_YYY *x)
+{
+    if (FP16_YYY_isunity(&(x->a)) && FP16_YYY_iszilch(&(x->b)) && 
FP16_YYY_iszilch(&(x->c))) return 1;
+    return 0;
+}
+
+/* FP48 copy w=x */
+/* SU= 16 */
+void FP48_YYY_copy(FP48_YYY *w,FP48_YYY *x)
+{
+    if (x==w) return;
+    FP16_YYY_copy(&(w->a),&(x->a));
+    FP16_YYY_copy(&(w->b),&(x->b));
+    FP16_YYY_copy(&(w->c),&(x->c));
+}
+
+/* FP48 w=1 */
+/* SU= 8 */
+void FP48_YYY_one(FP48_YYY *w)
+{
+    FP16_YYY_one(&(w->a));
+    FP16_YYY_zero(&(w->b));
+    FP16_YYY_zero(&(w->c));
+}
+
+/* return 1 if x==y, else 0 */
+/* SU= 16 */
+int FP48_YYY_equals(FP48_YYY *x,FP48_YYY *y)
+{
+    if (FP16_YYY_equals(&(x->a),&(y->a)) && FP16_YYY_equals(&(x->b),&(y->b)) 
&& FP16_YYY_equals(&(x->b),&(y->b)))
+        return 1;
+    return 0;
+}
+
+/* Set w=conj(x) */
+/* SU= 8 */
+void FP48_YYY_conj(FP48_YYY *w,FP48_YYY *x)
+{
+    FP48_YYY_copy(w,x);
+    FP16_YYY_conj(&(w->a),&(w->a));
+    FP16_YYY_nconj(&(w->b),&(w->b));
+    FP16_YYY_conj(&(w->c),&(w->c));
+}
+
+/* Create FP48 from FP16 */
+/* SU= 8 */
+void FP48_YYY_from_FP16(FP48_YYY *w,FP16_YYY *a)
+{
+    FP16_YYY_copy(&(w->a),a);
+    FP16_YYY_zero(&(w->b));
+    FP16_YYY_zero(&(w->c));
+}
+
+/* Create FP48 from 3 FP16's */
+/* SU= 16 */
+void FP48_YYY_from_FP16s(FP48_YYY *w,FP16_YYY *a,FP16_YYY *b,FP16_YYY *c)
+{
+    FP16_YYY_copy(&(w->a),a);
+    FP16_YYY_copy(&(w->b),b);
+    FP16_YYY_copy(&(w->c),c);
+}
+
+/* Granger-Scott Unitary Squaring. This does not benefit from lazy reduction */
+/* SU= 600 */
+void FP48_YYY_usqr(FP48_YYY *w,FP48_YYY *x)
+{
+    FP16_YYY A,B,C,D;
+
+    FP16_YYY_copy(&A,&(x->a));
+
+    FP16_YYY_sqr(&(w->a),&(x->a));
+    FP16_YYY_add(&D,&(w->a),&(w->a));
+    FP16_YYY_add(&(w->a),&D,&(w->a));
+
+    FP16_YYY_norm(&(w->a));
+    FP16_YYY_nconj(&A,&A);
+
+    FP16_YYY_add(&A,&A,&A);
+    FP16_YYY_add(&(w->a),&(w->a),&A);
+    FP16_YYY_sqr(&B,&(x->c));
+    FP16_YYY_times_i(&B);
+
+    FP16_YYY_add(&D,&B,&B);
+    FP16_YYY_add(&B,&B,&D);
+    FP16_YYY_norm(&B);
+
+    FP16_YYY_sqr(&C,&(x->b));
+
+    FP16_YYY_add(&D,&C,&C);
+    FP16_YYY_add(&C,&C,&D);
+
+    FP16_YYY_norm(&C);
+    FP16_YYY_conj(&(w->b),&(x->b));
+    FP16_YYY_add(&(w->b),&(w->b),&(w->b));
+    FP16_YYY_nconj(&(w->c),&(x->c));
+
+    FP16_YYY_add(&(w->c),&(w->c),&(w->c));
+    FP16_YYY_add(&(w->b),&B,&(w->b));
+    FP16_YYY_add(&(w->c),&C,&(w->c));
+
+    FP48_YYY_reduce(w);            /* reduce here as in pow function repeated 
squarings would trigger multiple reductions */
+}
+
+/* FP48 squaring w=x^2 */
+/* SU= 600 */
+void FP48_YYY_sqr(FP48_YYY *w,FP48_YYY *x)
+{
+    /* Use Chung-Hasan SQR2 method from 
http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf */
+
+    FP16_YYY A,B,C,D;
+
+    FP16_YYY_sqr(&A,&(x->a));
+    FP16_YYY_mul(&B,&(x->b),&(x->c));
+    FP16_YYY_add(&B,&B,&B);
+       FP16_YYY_norm(&B);
+    FP16_YYY_sqr(&C,&(x->c));
+
+    FP16_YYY_mul(&D,&(x->a),&(x->b));
+    FP16_YYY_add(&D,&D,&D);
+
+    FP16_YYY_add(&(w->c),&(x->a),&(x->c));
+    FP16_YYY_add(&(w->c),&(x->b),&(w->c));
+       FP16_YYY_norm(&(w->c)); 
+
+    FP16_YYY_sqr(&(w->c),&(w->c));
+
+    FP16_YYY_copy(&(w->a),&A);
+    FP16_YYY_add(&A,&A,&B);
+
+    FP16_YYY_norm(&A);
+
+    FP16_YYY_add(&A,&A,&C);
+    FP16_YYY_add(&A,&A,&D);
+
+    FP16_YYY_norm(&A);
+
+    FP16_YYY_neg(&A,&A);
+    FP16_YYY_times_i(&B);
+    FP16_YYY_times_i(&C);
+
+    FP16_YYY_add(&(w->a),&(w->a),&B);
+    FP16_YYY_add(&(w->b),&C,&D);
+    FP16_YYY_add(&(w->c),&(w->c),&A);
+
+    FP48_YYY_norm(w);
+}
+
+/* FP48 full multiplication w=w*y */
+
+
+/* SU= 896 */
+/* FP48 full multiplication w=w*y */
+void FP48_YYY_mul(FP48_YYY *w,FP48_YYY *y)
+{
+    FP16_YYY z0,z1,z2,z3,t0,t1;
+
+    FP16_YYY_mul(&z0,&(w->a),&(y->a));
+    FP16_YYY_mul(&z2,&(w->b),&(y->b));  //
+
+    FP16_YYY_add(&t0,&(w->a),&(w->b));
+    FP16_YYY_add(&t1,&(y->a),&(y->b));  //
+
+       FP16_YYY_norm(&t0);
+       FP16_YYY_norm(&t1);
+
+    FP16_YYY_mul(&z1,&t0,&t1);
+    FP16_YYY_add(&t0,&(w->b),&(w->c));
+    FP16_YYY_add(&t1,&(y->b),&(y->c));  //
+
+       FP16_YYY_norm(&t0);
+       FP16_YYY_norm(&t1);
+
+    FP16_YYY_mul(&z3,&t0,&t1);
+
+    FP16_YYY_neg(&t0,&z0);
+    FP16_YYY_neg(&t1,&z2);
+
+    FP16_YYY_add(&z1,&z1,&t0);   // z1=z1-z0
+    FP16_YYY_add(&(w->b),&z1,&t1); // z1=z1-z2
+    FP16_YYY_add(&z3,&z3,&t1);        // z3=z3-z2
+    FP16_YYY_add(&z2,&z2,&t0);        // z2=z2-z0
+
+    FP16_YYY_add(&t0,&(w->a),&(w->c));
+    FP16_YYY_add(&t1,&(y->a),&(y->c));
+
+       FP16_YYY_norm(&t0);
+       FP16_YYY_norm(&t1);
+
+    FP16_YYY_mul(&t0,&t1,&t0);
+    FP16_YYY_add(&z2,&z2,&t0);
+
+    FP16_YYY_mul(&t0,&(w->c),&(y->c));
+    FP16_YYY_neg(&t1,&t0);
+
+    FP16_YYY_add(&(w->c),&z2,&t1);
+    FP16_YYY_add(&z3,&z3,&t1);
+    FP16_YYY_times_i(&t0);
+    FP16_YYY_add(&(w->b),&(w->b),&t0);
+       FP16_YYY_norm(&z3);
+    FP16_YYY_times_i(&z3);
+    FP16_YYY_add(&(w->a),&z0,&z3);
+
+    FP48_YYY_norm(w);
+}
+
+/* FP48 multiplication w=w*y */
+/* SU= 744 */
+/* catering for special case that arises from special form of ATE pairing line 
function */
+void FP48_YYY_smul(FP48_YYY *w,FP48_YYY *y,int type)
+{
+    FP16_YYY z0,z1,z2,z3,t0,t1;
+
+       if (type==D_TYPE)
+       { // y->c is 0
+
+               FP16_YYY_copy(&z3,&(w->b));
+               FP16_YYY_mul(&z0,&(w->a),&(y->a));
+
+               FP16_YYY_pmul(&z2,&(w->b),&(y->b).a);
+               FP16_YYY_add(&(w->b),&(w->a),&(w->b));
+               FP16_YYY_copy(&t1,&(y->a));
+               FP8_YYY_add(&t1.a,&t1.a,&(y->b).a);
+
+               FP16_YYY_norm(&t1);
+               FP16_YYY_norm(&(w->b));
+
+               FP16_YYY_mul(&(w->b),&(w->b),&t1);
+               FP16_YYY_add(&z3,&z3,&(w->c));
+               FP16_YYY_norm(&z3);
+               FP16_YYY_pmul(&z3,&z3,&(y->b).a);
+               FP16_YYY_neg(&t0,&z0);
+               FP16_YYY_neg(&t1,&z2);
+
+               FP16_YYY_add(&(w->b),&(w->b),&t0);   // z1=z1-z0
+               FP16_YYY_add(&(w->b),&(w->b),&t1);   // z1=z1-z2
+
+               FP16_YYY_add(&z3,&z3,&t1);        // z3=z3-z2
+               FP16_YYY_add(&z2,&z2,&t0);        // z2=z2-z0
+
+               FP16_YYY_add(&t0,&(w->a),&(w->c));
+
+               FP16_YYY_norm(&t0);
+               FP16_YYY_norm(&z3);
+
+               FP16_YYY_mul(&t0,&(y->a),&t0);
+               FP16_YYY_add(&(w->c),&z2,&t0);
+
+               FP16_YYY_times_i(&z3);
+               FP16_YYY_add(&(w->a),&z0,&z3);
+       }
+
+       if (type==M_TYPE)
+       { // y->b is zero
+               FP16_YYY_mul(&z0,&(w->a),&(y->a));
+               FP16_YYY_add(&t0,&(w->a),&(w->b));
+               FP16_YYY_norm(&t0);
+
+               FP16_YYY_mul(&z1,&t0,&(y->a));
+               FP16_YYY_add(&t0,&(w->b),&(w->c));
+               FP16_YYY_norm(&t0);
+
+               FP16_YYY_pmul(&z3,&t0,&(y->c).b);
+               FP16_YYY_times_i(&z3);
+
+               FP16_YYY_neg(&t0,&z0);
+               FP16_YYY_add(&z1,&z1,&t0);   // z1=z1-z0
+
+               FP16_YYY_copy(&(w->b),&z1);
+
+               FP16_YYY_copy(&z2,&t0);
+
+               FP16_YYY_add(&t0,&(w->a),&(w->c));
+               FP16_YYY_add(&t1,&(y->a),&(y->c));
+
+               FP16_YYY_norm(&t0);
+               FP16_YYY_norm(&t1);
+
+               FP16_YYY_mul(&t0,&t1,&t0);
+               FP16_YYY_add(&z2,&z2,&t0);
+
+               FP16_YYY_pmul(&t0,&(w->c),&(y->c).b);
+               FP16_YYY_times_i(&t0);
+               FP16_YYY_neg(&t1,&t0);
+               FP16_YYY_times_i(&t0);
+
+               FP16_YYY_add(&(w->c),&z2,&t1);
+               FP16_YYY_add(&z3,&z3,&t1);
+
+               FP16_YYY_add(&(w->b),&(w->b),&t0);
+               FP16_YYY_norm(&z3);
+               FP16_YYY_times_i(&z3);
+               FP16_YYY_add(&(w->a),&z0,&z3);
+       }
+    FP48_YYY_norm(w);
+}
+
+/* Set w=1/x */
+/* SU= 600 */
+void FP48_YYY_inv(FP48_YYY *w,FP48_YYY *x)
+{
+    FP16_YYY f0,f1,f2,f3;
+
+    FP16_YYY_sqr(&f0,&(x->a));
+    FP16_YYY_mul(&f1,&(x->b),&(x->c));
+    FP16_YYY_times_i(&f1);
+    FP16_YYY_sub(&f0,&f0,&f1);  /* y.a */
+       FP16_YYY_norm(&f0);             
+
+    FP16_YYY_sqr(&f1,&(x->c));
+    FP16_YYY_times_i(&f1);
+    FP16_YYY_mul(&f2,&(x->a),&(x->b));
+    FP16_YYY_sub(&f1,&f1,&f2);  /* y.b */
+       FP16_YYY_norm(&f1); 
+
+    FP16_YYY_sqr(&f2,&(x->b));
+    FP16_YYY_mul(&f3,&(x->a),&(x->c));
+    FP16_YYY_sub(&f2,&f2,&f3);  /* y.c */
+       FP16_YYY_norm(&f2); 
+
+    FP16_YYY_mul(&f3,&(x->b),&f2);
+    FP16_YYY_times_i(&f3);
+    FP16_YYY_mul(&(w->a),&f0,&(x->a));
+    FP16_YYY_add(&f3,&(w->a),&f3);
+    FP16_YYY_mul(&(w->c),&f1,&(x->c));
+    FP16_YYY_times_i(&(w->c));
+
+    FP16_YYY_add(&f3,&(w->c),&f3);
+       FP16_YYY_norm(&f3);
+       
+    FP16_YYY_inv(&f3,&f3);
+    FP16_YYY_mul(&(w->a),&f0,&f3);
+    FP16_YYY_mul(&(w->b),&f1,&f3);
+    FP16_YYY_mul(&(w->c),&f2,&f3);
+
+}
+
+/* constant time powering by small integer of max length bts */
+
+void FP48_YYY_pinpow(FP48_YYY *r,int e,int bts)
+{
+    int i,b;
+    FP48_YYY R[2];
+
+    FP48_YYY_one(&R[0]);
+    FP48_YYY_copy(&R[1],r);
+
+    for (i=bts-1; i>=0; i--)
+    {
+        b=(e>>i)&1;
+        FP48_YYY_mul(&R[1-b],&R[b]);
+        FP48_YYY_usqr(&R[b],&R[b]);
+    }
+    FP48_YYY_copy(r,&R[0]);
+}
+
+/* Compressed powering of unitary elements y=x^(e mod r) */
+
+void FP48_YYY_compow(FP16_YYY *c,FP48_YYY *x,BIG_XXX e,BIG_XXX r)
+{
+    FP48_YYY g1,g2;
+       FP16_YYY cp,cpm1,cpm2;
+    FP2_YYY  f;
+       BIG_XXX q,a,b,m;
+
+    BIG_XXX_rcopy(a,Fra_YYY);
+    BIG_XXX_rcopy(b,Frb_YYY);
+    FP2_YYY_from_BIGs(&f,a,b);
+
+    BIG_XXX_rcopy(q,Modulus_YYY);
+
+    FP48_YYY_copy(&g1,x);
+       FP48_YYY_copy(&g2,x);
+
+    BIG_XXX_copy(m,q);
+    BIG_XXX_mod(m,r);
+
+    BIG_XXX_copy(a,e);
+    BIG_XXX_mod(a,m);
+
+    BIG_XXX_copy(b,e);
+    BIG_XXX_sdiv(b,m);
+
+    FP48_YYY_trace(c,&g1);
+
+       if (BIG_XXX_iszilch(b))
+       {
+               FP16_YYY_xtr_pow(c,c,e);
+               return;
+       }
+
+    FP48_YYY_frob(&g2,&f,1);
+    FP48_YYY_trace(&cp,&g2);
+    FP48_YYY_conj(&g1,&g1);
+    FP48_YYY_mul(&g2,&g1);
+    FP48_YYY_trace(&cpm1,&g2);
+    FP48_YYY_mul(&g2,&g1);
+
+    FP48_YYY_trace(&cpm2,&g2);
+
+    FP16_YYY_xtr_pow2(c,&cp,c,&cpm1,&cpm2,a,b);
+
+}
+
+/* Note this is simple square and multiply, so not side-channel safe */
+
+void FP48_YYY_pow(FP48_YYY *r,FP48_YYY *a,BIG_XXX b)
+{
+    FP48_YYY w,sf;
+    BIG_XXX b1,b3;
+    int i,nb,bt;
+       BIG_XXX_copy(b1,b);
+    BIG_XXX_norm(b1);
+       BIG_XXX_pmul(b3,b1,3);
+       BIG_XXX_norm(b3);
+
+       FP48_YYY_copy(&sf,a);
+       FP48_YYY_norm(&sf);
+    FP48_YYY_copy(&w,&sf);
+
+       nb=BIG_XXX_nbits(b3);
+       for (i=nb-2;i>=1;i--)
+       {
+               FP48_YYY_usqr(&w,&w);
+               bt=BIG_XXX_bit(b3,i)-BIG_XXX_bit(b1,i);
+               if (bt==1)
+                       FP48_YYY_mul(&w,&sf);
+               if (bt==-1)
+               {
+                       FP48_YYY_conj(&sf,&sf);
+                       FP48_YYY_mul(&w,&sf);
+                       FP48_YYY_conj(&sf,&sf);
+               }
+       }
+
+       FP48_YYY_copy(r,&w);
+       FP48_YYY_reduce(r);
+}
+
+/* p=q0^u0.q1^u1.q2^u2.q3^u3... */
+/* Side channel attack secure */
+// Bos & Costello https://eprint.iacr.org/2013/458.pdf
+// Faz-Hernandez & Longa & Sanchez  https://eprint.iacr.org/2013/158.pdf
+
+void FP48_YYY_pow16(FP48_YYY *p,FP48_YYY *q,BIG_XXX u[16])
+{
+    int i,j,k,nb,pb1,pb2,pb3,pb4,bt;
+       FP48_YYY g1[8],g2[8],g3[8],g4[8],r;
+       BIG_XXX t[16],mt;
+    sign8 w1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s1[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s2[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w3[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s3[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 w4[NLEN_XXX*BASEBITS_XXX+1];
+    sign8 s4[NLEN_XXX*BASEBITS_XXX+1];
+    FP_YYY fx,fy;
+       FP2_YYY  X;
+
+    FP_YYY_rcopy(&fx,Fra_YYY);
+    FP_YYY_rcopy(&fy,Frb_YYY);
+    FP2_YYY_from_FPs(&X,&fx,&fy);
+
+    for (i=0; i<16; i++)
+        BIG_XXX_copy(t[i],u[i]);
+
+// Precomputed table
+    FP48_YYY_copy(&g1[0],&q[0]); // q[0]
+    FP48_YYY_copy(&g1[1],&g1[0]);
+       FP48_YYY_mul(&g1[1],&q[1]);     // q[0].q[1]
+    FP48_YYY_copy(&g1[2],&g1[0]);
+       FP48_YYY_mul(&g1[2],&q[2]);     // q[0].q[2]
+       FP48_YYY_copy(&g1[3],&g1[1]);
+       FP48_YYY_mul(&g1[3],&q[2]);     // q[0].q[1].q[2]
+       FP48_YYY_copy(&g1[4],&g1[0]);
+       FP48_YYY_mul(&g1[4],&q[3]);  // q[0].q[3]
+       FP48_YYY_copy(&g1[5],&g1[1]);
+       FP48_YYY_mul(&g1[5],&q[3]);     // q[0].q[1].q[3]
+       FP48_YYY_copy(&g1[6],&g1[2]);
+       FP48_YYY_mul(&g1[6],&q[3]);     // q[0].q[2].q[3]
+       FP48_YYY_copy(&g1[7],&g1[3]);
+       FP48_YYY_mul(&g1[7],&q[3]);     // q[0].q[1].q[2].q[3]
+
+// Use Frobenius
+
+       for (i=0;i<8;i++)
+       {
+               FP48_YYY_copy(&g2[i],&g1[i]);
+               FP48_YYY_frob(&g2[i],&X,4);
+
+               FP48_YYY_copy(&g3[i],&g2[i]);
+               FP48_YYY_frob(&g3[i],&X,4);
+
+               FP48_YYY_copy(&g4[i],&g3[i]);
+               FP48_YYY_frob(&g4[i],&X,4);
+       }
+
+// Make them odd
+       pb1=1-BIG_XXX_parity(t[0]);
+       BIG_XXX_inc(t[0],pb1);
+       BIG_XXX_norm(t[0]);
+
+       pb2=1-BIG_XXX_parity(t[4]);
+       BIG_XXX_inc(t[4],pb2);
+       BIG_XXX_norm(t[4]);
+
+       pb3=1-BIG_XXX_parity(t[8]);
+       BIG_XXX_inc(t[8],pb3);
+       BIG_XXX_norm(t[8]);
+
+       pb4=1-BIG_XXX_parity(t[12]);
+       BIG_XXX_inc(t[12],pb4);
+       BIG_XXX_norm(t[12]);
+
+// Number of bits
+    BIG_XXX_zero(mt);
+    for (i=0; i<16; i++)
+    {
+        BIG_XXX_or(mt,mt,t[i]);
+    }
+    nb=1+BIG_XXX_nbits(mt);
+
+// Sign pivot 
+       s1[nb-1]=1;
+       s2[nb-1]=1;
+       s3[nb-1]=1;
+       s4[nb-1]=1;
+       for (i=0;i<nb-1;i++)
+       {
+        BIG_XXX_fshr(t[0],1);
+               s1[i]=2*BIG_XXX_parity(t[0])-1;
+        BIG_XXX_fshr(t[4],1);
+               s2[i]=2*BIG_XXX_parity(t[4])-1;
+        BIG_XXX_fshr(t[8],1);
+               s3[i]=2*BIG_XXX_parity(t[8])-1;
+        BIG_XXX_fshr(t[12],1);
+               s4[i]=2*BIG_XXX_parity(t[12])-1;
+       }
+
+// Recoded exponents
+    for (i=0; i<nb; i++)
+    {
+               w1[i]=0;
+               k=1;
+               for (j=1; j<4; j++)
+               {
+                       bt=s1[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w1[i]+=bt*k;
+                       k*=2;
+        }
+
+               w2[i]=0;
+               k=1;
+               for (j=5; j<8; j++)
+               {
+                       bt=s2[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w2[i]+=bt*k;
+                       k*=2;
+        }
+
+               w3[i]=0;
+               k=1;
+               for (j=9; j<12; j++)
+               {
+                       bt=s3[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w3[i]+=bt*k;
+                       k*=2;
+        }
+
+               w4[i]=0;
+               k=1;
+               for (j=13; j<16; j++)
+               {
+                       bt=s4[i]*BIG_XXX_parity(t[j]);
+                       BIG_XXX_fshr(t[j],1);
+
+                       BIG_XXX_dec(t[j],(bt>>1));
+                       BIG_XXX_norm(t[j]);
+                       w4[i]+=bt*k;
+                       k*=2;
+        }
+    }  
+
+// Main loop
+       FP48_YYY_select(p,g1,2*w1[nb-1]+1);
+       FP48_YYY_select(&r,g2,2*w2[nb-1]+1);
+       FP48_YYY_mul(p,&r);
+       FP48_YYY_select(&r,g3,2*w3[nb-1]+1);
+       FP48_YYY_mul(p,&r);
+       FP48_YYY_select(&r,g4,2*w4[nb-1]+1);
+       FP48_YYY_mul(p,&r);
+    for (i=nb-2; i>=0; i--)
+    {
+               FP48_YYY_usqr(p,p);
+        FP48_YYY_select(&r,g1,2*w1[i]+s1[i]);
+        FP48_YYY_mul(p,&r);
+        FP48_YYY_select(&r,g2,2*w2[i]+s2[i]);
+        FP48_YYY_mul(p,&r);
+        FP48_YYY_select(&r,g3,2*w3[i]+s3[i]);
+        FP48_YYY_mul(p,&r);
+        FP48_YYY_select(&r,g4,2*w4[i]+s4[i]);
+        FP48_YYY_mul(p,&r);
+    }
+
+// apply correction
+       FP48_YYY_conj(&r,&q[0]);   
+       FP48_YYY_mul(&r,p);
+       FP48_YYY_cmove(p,&r,pb1);
+       FP48_YYY_conj(&r,&q[4]);   
+       FP48_YYY_mul(&r,p);
+       FP48_YYY_cmove(p,&r,pb2);
+
+       FP48_YYY_conj(&r,&q[8]);   
+       FP48_YYY_mul(&r,p);
+       FP48_YYY_cmove(p,&r,pb3);
+       FP48_YYY_conj(&r,&q[12]);   
+       FP48_YYY_mul(&r,p);
+       FP48_YYY_cmove(p,&r,pb4);
+
+       FP48_YYY_reduce(p);
+}
+
+/* Set w=w^p using Frobenius */
+/* SU= 160 */
+void FP48_YYY_frob(FP48_YYY *w,FP2_YYY  *f,int n)
+{
+       int i;
+       FP8_YYY X2,X4;
+       FP4_YYY F;
+    FP2_YYY  f3,f2;                            // f=(1+i)^(p-19)/24
+    FP2_YYY_sqr(&f2,f);     // 
+    FP2_YYY_mul(&f3,&f2,f); // f3=f^3=(1+i)^(p-19)/8
+
+       FP2_YYY_mul_ip(&f3);
+       FP2_YYY_norm(&f3);
+       FP2_YYY_mul_ip(&f3);    // f3 = (1+i)^16/8.(1+i)^(p-19)/8 = 
(1+i)^(p-3)/8 
+       FP2_YYY_norm(&f3);
+
+       for (i=0;i<n;i++)
+       {
+               FP16_YYY_frob(&(w->a),&f3);   // a=a^p
+               FP16_YYY_frob(&(w->b),&f3);   // b=b^p
+               FP16_YYY_frob(&(w->c),&f3);   // c=c^p
+  
+               FP16_YYY_qmul(&(w->b),&(w->b),f); FP16_YYY_times_i4(&(w->b)); 
FP16_YYY_times_i2(&(w->b)); 
+               FP16_YYY_qmul(&(w->c),&(w->c),&f2); FP16_YYY_times_i4(&(w->c)); 
FP16_YYY_times_i4(&(w->c)); FP16_YYY_times_i4(&(w->c)); 
+
+       }
+}
+
+/* SU= 8 */
+/* normalise all components of w */
+void FP48_YYY_norm(FP48_YYY *w)
+{
+    FP16_YYY_norm(&(w->a));
+    FP16_YYY_norm(&(w->b));
+    FP16_YYY_norm(&(w->c));
+}
+
+/* SU= 8 */
+/* reduce all components of w */
+void FP48_YYY_reduce(FP48_YYY *w)
+{
+    FP16_YYY_reduce(&(w->a));
+    FP16_YYY_reduce(&(w->b));
+    FP16_YYY_reduce(&(w->c));
+}
+
+/* trace function w=trace(x) */
+/* SU= 8 */
+void FP48_YYY_trace(FP16_YYY *w,FP48_YYY *x)
+{
+    FP16_YYY_imul(w,&(x->a),3);
+    FP16_YYY_reduce(w);
+}
+
+/* SU= 8 */
+/* Output w in hex */
+void FP48_YYY_output(FP48_YYY *w)
+{
+    printf("[");
+    FP16_YYY_output(&(w->a));
+    printf(",");
+    FP16_YYY_output(&(w->b));
+    printf(",");
+    FP16_YYY_output(&(w->c));
+    printf("]");
+}
+
+/* Convert g to octet string w */
+void FP48_YYY_toOctet(octet *W,FP48_YYY *g)
+{
+    BIG_XXX a;
+    W->len=48*MODBYTES_XXX;
+
+    FP_YYY_redc(a,&(g->a.a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[0]),a);
+    FP_YYY_redc(a,&(g->a.a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[MODBYTES_XXX]),a);
+    
+       FP_YYY_redc(a,&(g->a.a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[2*MODBYTES_XXX]),a);
+       FP_YYY_redc(a,&(g->a.a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[3*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[4*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[5*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[6*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[7*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[8*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[9*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[10*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[11*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[12*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[13*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->a.b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[14*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->a.b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[15*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[16*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[17*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[18*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[19*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[20*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[21*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[22*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[23*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[24*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[25*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[26*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[27*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[28*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[29*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->b.b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[30*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->b.b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[31*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.a.a.a));
+    BIG_XXX_toBytes(&(W->val[32*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.a.a.b));
+    BIG_XXX_toBytes(&(W->val[33*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.a.b.a));
+    BIG_XXX_toBytes(&(W->val[34*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.a.b.b));
+    BIG_XXX_toBytes(&(W->val[35*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.b.a.a));
+    BIG_XXX_toBytes(&(W->val[36*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.b.a.b));
+    BIG_XXX_toBytes(&(W->val[37*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.a.b.b.a));
+    BIG_XXX_toBytes(&(W->val[38*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.a.b.b.b));
+    BIG_XXX_toBytes(&(W->val[39*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.a.a.a));
+    BIG_XXX_toBytes(&(W->val[40*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.a.a.b));
+    BIG_XXX_toBytes(&(W->val[41*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.a.b.a));
+    BIG_XXX_toBytes(&(W->val[42*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.a.b.b));
+    BIG_XXX_toBytes(&(W->val[43*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.b.a.a));
+    BIG_XXX_toBytes(&(W->val[44*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.b.a.b));
+    BIG_XXX_toBytes(&(W->val[45*MODBYTES_XXX]),a);
+
+    FP_YYY_redc(a,&(g->c.b.b.b.a));
+    BIG_XXX_toBytes(&(W->val[46*MODBYTES_XXX]),a);
+    FP_YYY_redc(a,&(g->c.b.b.b.b));
+    BIG_XXX_toBytes(&(W->val[47*MODBYTES_XXX]),a);
+
+}
+
+/* Restore g from octet string w */
+void FP48_YYY_fromOctet(FP48_YYY *g,octet *W)
+{
+       BIG_XXX b;
+
+    BIG_XXX_fromBytes(b,&W->val[0]);
+    FP_YYY_nres(&(g->a.a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[2*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[3*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[4*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[5*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[6*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[7*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[8*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[9*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[10*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[11*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[12*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[13*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[14*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[15*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->a.b.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[16*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[17*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[18*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[19*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[20*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[21*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[22*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[23*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[24*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[25*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[26*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[27*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[28*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[29*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[30*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[31*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->b.b.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[32*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[33*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[34*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[35*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[36*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[37*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[38*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[39*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.a.b.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[40*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[41*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[42*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[43*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.a.b.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[44*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.a.a),b);
+    BIG_XXX_fromBytes(b,&W->val[45*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.a.b),b);
+
+    BIG_XXX_fromBytes(b,&W->val[46*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.b.a),b);
+    BIG_XXX_fromBytes(b,&W->val[47*MODBYTES_XXX]);
+    FP_YYY_nres(&(g->c.b.b.b.b),b);
+
+}
+
+/* Move b to a if d=1 */
+void FP48_YYY_cmove(FP48_YYY *f,FP48_YYY *g,int d)
+{
+    FP16_YYY_cmove(&(f->a),&(g->a),d);
+    FP16_YYY_cmove(&(f->b),&(g->b),d);
+    FP16_YYY_cmove(&(f->c),&(g->c),d);
+}

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp48.h
----------------------------------------------------------------------
diff --git a/version3/c/fp48.h b/version3/c/fp48.h
new file mode 100644
index 0000000..75065b5
--- /dev/null
+++ b/version3/c/fp48.h
@@ -0,0 +1,196 @@
+#ifndef FP48_YYY_H
+#define FP48_YYY_H
+
+#include "fp16_YYY.h"
+
+/**
+       @brief FP12 Structure - towered over three FP16
+*/
+
+typedef struct
+{
+    FP16_YYY a; /**< first part of FP12 */
+    FP16_YYY b; /**< second part of FP12 */
+    FP16_YYY c; /**< third part of FP12 */
+} FP48_YYY;
+
+extern const BIG_XXX Fra_YYY; /**< real part of BN curve Frobenius Constant */
+extern const BIG_XXX Frb_YYY; /**< imaginary part of BN curve Frobenius 
Constant */
+
+/* FP48 prototypes */
+/**    @brief Tests for FP48 equal to zero
+ *
+       @param x FP48 number to be tested
+       @return 1 if zero, else returns 0
+ */
+extern int FP48_YYY_iszilch(FP48_YYY *x);
+/**    @brief Tests for FP48 equal to unity
+ *
+       @param x FP48 number to be tested
+       @return 1 if unity, else returns 0
+ */
+extern int FP48_YYY_isunity(FP48_YYY *x);
+/**    @brief Copy FP48 to another FP48
+ *
+       @param x FP48 instance, on exit = y
+       @param y FP48 instance to be copied
+ */
+extern void FP48_YYY_copy(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Set FP48 to unity
+ *
+       @param x FP48 instance to be set to one
+ */
+extern void FP48_YYY_one(FP48_YYY *x);
+/**    @brief Tests for equality of two FP48s
+ *
+       @param x FP48 instance to be compared
+       @param y FP48 instance to be compared
+       @return 1 if x=y, else returns 0
+ */
+extern int FP48_YYY_equals(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Conjugation of FP48
+ *
+       If y=(a,b,c) (where a,b,c are its three FP16 components) on exit 
x=(conj(a),-conj(b),conj(c))
+       @param x FP48 instance, on exit = conj(y)
+       @param y FP48 instance
+ */
+extern void FP48_YYY_conj(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Initialise FP48 from single FP16
+ *
+       Sets first FP16 component of an FP48, other components set to zero
+       @param x FP48 instance to be initialised
+       @param a FP16 to form first part of FP48
+ */
+extern void FP48_YYY_from_FP16(FP48_YYY *x,FP16_YYY *a);
+/**    @brief Initialise FP48 from three FP16s
+ *
+       @param x FP48 instance to be initialised
+       @param a FP16 to form first part of FP48
+       @param b FP16 to form second part of FP48
+       @param c FP16 to form third part of FP48
+ */
+extern void FP48_YYY_from_FP16s(FP48_YYY *x,FP16_YYY *a,FP16_YYY* b,FP16_YYY 
*c);
+/**    @brief Fast Squaring of an FP48 in "unitary" form
+ *
+       @param x FP48 instance, on exit = y^2
+       @param y FP16 instance, must be unitary
+ */
+extern void FP48_YYY_usqr(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Squaring an FP48
+ *
+       @param x FP48 instance, on exit = y^2
+       @param y FP48 instance
+ */
+extern void FP48_YYY_sqr(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Fast multiplication of an FP48 by an FP48 that arises from an 
ATE pairing line function
+ *
+       Here the multiplier has a special form that can be exploited
+       @param x FP48 instance, on exit = x*y
+       @param y FP48 instance, of special form
+       @param t D_TYPE or M_TYPE twist
+ */
+extern void FP48_YYY_smul(FP48_YYY *x,FP48_YYY *y,int t);
+/**    @brief Multiplication of two FP48s
+ *
+       @param x FP48 instance, on exit = x*y
+       @param y FP48 instance, the multiplier
+ */
+extern void FP48_YYY_mul(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Inverting an FP48
+ *
+       @param x FP48 instance, on exit = 1/y
+       @param y FP48 instance
+ */
+extern void FP48_YYY_inv(FP48_YYY *x,FP48_YYY *y);
+/**    @brief Raises an FP48 to the power of a BIG
+ *
+       @param r FP48 instance, on exit = y^b
+       @param x FP48 instance
+       @param b BIG number
+ */
+extern void FP48_YYY_pow(FP48_YYY *r,FP48_YYY *x,BIG_XXX b);
+
+//extern void FP48_ppow(FP48 *r,FP48 *x,BIG b);
+
+/**    @brief Raises an FP48 instance x to a small integer power, side-channel 
resistant
+ *
+       @param x FP48 instance, on exit = x^i
+       @param i small integer exponent
+       @param b maximum number of bits in exponent
+ */
+extern void FP48_YYY_pinpow(FP48_YYY *x,int i,int b);
+
+/**    @brief Raises an FP48 instance x to a BIG_XXX power, compressed to FP16 
+ *
+       @param c FP16 instance, on exit = x^(e mod r) as FP16
+       @param x FP48 input
+       @param e BIG exponent
+       @param r BIG group order
+ */
+extern void FP48_YYY_compow(FP16_YYY *c,FP48_YYY *x,BIG_XXX e,BIG_XXX r);
+
+/**    @brief Calculate Pi x[i]^b[i] for i=0 to 15, side-channel resistant
+ *
+       @param r FP48 instance, on exit = Pi x[i]^b[i] for i=0 to 15
+       @param x FP48 array with 16 FP48s
+       @param b BIG array of 16 exponents
+ */
+extern void FP48_YYY_pow16(FP48_YYY *r,FP48_YYY *x,BIG_XXX *b);
+
+
+/**    @brief Raises an FP48 to the power of the internal modulus p, using the 
Frobenius
+ *
+       @param x FP48 instance, on exit = x^p^n
+       @param f FP2 precalculated Frobenius constant
+       @param n power of p
+ */
+extern void FP48_YYY_frob(FP48_YYY *x,FP2_YYY *f,int n);
+
+/**    @brief Reduces all components of possibly unreduced FP48 mod Modulus
+ *
+       @param x FP48 instance, on exit reduced mod Modulus
+ */
+extern void FP48_YYY_reduce(FP48_YYY *x);
+/**    @brief Normalises the components of an FP48
+ *
+       @param x FP48 instance to be normalised
+ */
+extern void FP48_YYY_norm(FP48_YYY *x);
+/**    @brief Formats and outputs an FP48 to the console
+ *
+       @param x FP48 instance to be printed
+ */
+extern void FP48_YYY_output(FP48_YYY *x);
+/**    @brief Formats and outputs an FP48 instance to an octet string
+ *
+       Serializes the components of an FP48 to big-endian base 256 form.
+       @param S output octet string
+       @param x FP48 instance to be converted to an octet string
+ */
+extern void FP48_YYY_toOctet(octet *S,FP48_YYY *x);
+/**    @brief Creates an FP48 instance from an octet string
+ *
+       De-serializes the components of an FP48 to create an FP48 from 
big-endian base 256 components.
+       @param x FP48 instance to be created from an octet string
+       @param S input octet string
+
+ */
+extern void FP48_YYY_fromOctet(FP48_YYY *x,octet *S);
+/**    @brief Calculate the trace of an FP48
+ *
+       @param t FP16 trace of x, on exit = tr(x)
+       @param x FP48 instance
+
+ */
+extern void FP48_YYY_trace(FP16_YYY *t,FP48_YYY *x);
+
+/**    @brief Conditional copy of FP48 number
+ *
+       Conditionally copies second parameter to the first (without branching)
+       @param x FP48 instance, set to y if s!=0
+       @param y another FP48 instance
+       @param s copy only takes place if not equal to 0
+ */
+extern void FP48_YYY_cmove(FP48_YYY *x,FP48_YYY *y,int s);
+
+#endif

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp8.c
----------------------------------------------------------------------
diff --git a/version3/c/fp8.c b/version3/c/fp8.c
new file mode 100644
index 0000000..3786a7a
--- /dev/null
+++ b/version3/c/fp8.c
@@ -0,0 +1,677 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/* AMCL Fp^8 functions */
+
+/* FP8 elements are of the form a+ib, where i is sqrt(sqrt(-1+sqrt(-1))) */
+
+#include "fp8_YYY.h"
+
+
+/* test x==0 ? */
+int FP8_YYY_iszilch(FP8_YYY *x)
+{
+    if (FP4_YYY_iszilch(&(x->a)) && FP4_YYY_iszilch(&(x->b))) return 1;
+    return 0;
+}
+
+/* test x==1 ? */
+int FP8_YYY_isunity(FP8_YYY *x)
+{
+    if (FP4_YYY_isunity(&(x->a)) && FP4_YYY_iszilch(&(x->b))) return 1;
+    return 0;
+}
+
+/* test is w real? That is in a+ib test b is zero */
+int FP8_YYY_isreal(FP8_YYY *w)
+{
+    return FP4_YYY_iszilch(&(w->b));
+}
+
+/* return 1 if x==y, else 0 */
+int FP8_YYY_equals(FP8_YYY *x,FP8_YYY *y)
+{
+    if (FP4_YYY_equals(&(x->a),&(y->a)) && FP4_YYY_equals(&(x->b),&(y->b)))
+        return 1;
+    return 0;
+}
+
+/* set FP8 from two FP4s */
+void FP8_YYY_from_FP4s(FP8_YYY *w,FP4_YYY * x,FP4_YYY* y)
+{
+    FP4_YYY_copy(&(w->a), x);
+    FP4_YYY_copy(&(w->b), y);
+}
+
+/* set FP8 from FP4 */
+void FP8_YYY_from_FP4(FP8_YYY *w,FP4_YYY *x)
+{
+    FP4_YYY_copy(&(w->a), x);
+    FP4_YYY_zero(&(w->b));
+}
+
+/* set high part of FP8 from FP4 */
+void FP8_YYY_from_FP4H(FP8_YYY *w,FP4_YYY *x)
+{
+    FP4_YYY_copy(&(w->b), x);
+    FP4_YYY_zero(&(w->a));
+}
+
+/* FP8 copy w=x */
+void FP8_YYY_copy(FP8_YYY *w,FP8_YYY *x)
+{
+    if (w==x) return;
+    FP4_YYY_copy(&(w->a), &(x->a));
+    FP4_YYY_copy(&(w->b), &(x->b));
+}
+
+/* FP8 w=0 */
+void FP8_YYY_zero(FP8_YYY *w)
+{
+    FP4_YYY_zero(&(w->a));
+    FP4_YYY_zero(&(w->b));
+}
+
+/* FP8 w=1 */
+void FP8_YYY_one(FP8_YYY *w)
+{
+    FP4_YYY_one(&(w->a));
+    FP4_YYY_zero(&(w->b));
+}
+
+/* Set w=-x */
+void FP8_YYY_neg(FP8_YYY *w,FP8_YYY *x)
+{
+    /* Just one field neg */
+    FP4_YYY m,t;
+       FP8_YYY_norm(x);
+    FP4_YYY_add(&m,&(x->a),&(x->b));
+       FP4_YYY_norm(&m);
+    FP4_YYY_neg(&m,&m);
+    FP4_YYY_add(&t,&m,&(x->b));
+    FP4_YYY_add(&(w->b),&m,&(x->a));
+    FP4_YYY_copy(&(w->a),&t);
+       FP8_YYY_norm(w);
+}
+
+/* Set w=conj(x) */
+void FP8_YYY_conj(FP8_YYY *w,FP8_YYY *x)
+{
+    FP4_YYY_copy(&(w->a), &(x->a));
+    FP4_YYY_neg(&(w->b), &(x->b));
+       FP8_YYY_norm(w);
+}
+
+/* Set w=-conj(x) */
+void FP8_YYY_nconj(FP8_YYY *w,FP8_YYY *x)
+{
+    FP4_YYY_copy(&(w->b),&(x->b));
+    FP4_YYY_neg(&(w->a), &(x->a));
+       FP8_YYY_norm(w);
+}
+
+/* Set w=x+y */
+void FP8_YYY_add(FP8_YYY *w,FP8_YYY *x,FP8_YYY *y)
+{
+    FP4_YYY_add(&(w->a), &(x->a), &(y->a));
+    FP4_YYY_add(&(w->b), &(x->b), &(y->b));
+}
+
+/* Set w=x-y */
+/* Input y MUST be normed */
+void FP8_YYY_sub(FP8_YYY *w,FP8_YYY *x,FP8_YYY *y)
+{
+    FP8_YYY my;
+
+    FP8_YYY_neg(&my, y);
+    FP8_YYY_add(w, x, &my);
+
+}
+
+/* reduce all components of w mod Modulus */
+void FP8_YYY_reduce(FP8_YYY *w)
+{
+    FP4_YYY_reduce(&(w->a));
+    FP4_YYY_reduce(&(w->b));
+}
+
+/* normalise all elements of w */
+void FP8_YYY_norm(FP8_YYY *w)
+{
+    FP4_YYY_norm(&(w->a));
+    FP4_YYY_norm(&(w->b));
+}
+
+/* Set w=s*x, where s is FP4 */
+void FP8_YYY_pmul(FP8_YYY *w,FP8_YYY *x,FP4_YYY *s)
+{
+    FP4_YYY_mul(&(w->a),&(x->a),s);
+    FP4_YYY_mul(&(w->b),&(x->b),s);
+}
+
+/* Set w=s*x, where s is FP2 */
+void FP8_YYY_qmul(FP8_YYY *w,FP8_YYY *x,FP2_YYY *s)
+{
+    FP4_YYY_pmul(&(w->a),&(x->a),s);
+    FP4_YYY_pmul(&(w->b),&(x->b),s);
+}
+
+/* Set w=s*x, where s is FP2 */
+void FP8_YYY_tmul(FP8_YYY *w,FP8_YYY *x,FP_YYY *s)
+{
+    FP4_YYY_qmul(&(w->a),&(x->a),s);
+    FP4_YYY_qmul(&(w->b),&(x->b),s);
+}
+
+/* Set w=s*x, where s is int */
+void FP8_YYY_imul(FP8_YYY *w,FP8_YYY *x,int s)
+{
+    FP4_YYY_imul(&(w->a),&(x->a),s);
+    FP4_YYY_imul(&(w->b),&(x->b),s);
+}
+
+/* Set w=x^2 */
+/* Input MUST be normed  */
+void FP8_YYY_sqr(FP8_YYY *w,FP8_YYY *x)
+{
+    FP4_YYY t1,t2,t3;
+
+    FP4_YYY_mul(&t3,&(x->a),&(x->b)); /* norms x */
+    FP4_YYY_copy(&t2,&(x->b));
+    FP4_YYY_add(&t1,&(x->a),&(x->b));
+    FP4_YYY_times_i(&t2);
+
+    FP4_YYY_add(&t2,&(x->a),&t2);
+
+       FP4_YYY_norm(&t1);  // 2
+       FP4_YYY_norm(&t2);  // 2
+
+    FP4_YYY_mul(&(w->a),&t1,&t2);
+
+    FP4_YYY_copy(&t2,&t3);
+    FP4_YYY_times_i(&t2);
+
+    FP4_YYY_add(&t2,&t2,&t3);
+
+       FP4_YYY_norm(&t2);  // 2
+    FP4_YYY_neg(&t2,&t2);
+    FP4_YYY_add(&(w->a),&(w->a),&t2);  /* a=(a+b)(a+i^2.b)-i^2.ab-ab = 
a*a+ib*ib */
+    FP4_YYY_add(&(w->b),&t3,&t3);  /* b=2ab */
+
+    FP8_YYY_norm(w);
+}
+
+/* Set w=x*y */
+/* Inputs MUST be normed  */
+void FP8_YYY_mul(FP8_YYY *w,FP8_YYY *x,FP8_YYY *y)
+{
+
+    FP4_YYY t1,t2,t3,t4;
+    FP4_YYY_mul(&t1,&(x->a),&(y->a)); 
+    FP4_YYY_mul(&t2,&(x->b),&(y->b)); 
+
+    FP4_YYY_add(&t3,&(y->b),&(y->a));
+    FP4_YYY_add(&t4,&(x->b),&(x->a));
+
+       FP4_YYY_norm(&t4); // 2
+       FP4_YYY_norm(&t3); // 2
+
+    FP4_YYY_mul(&t4,&t4,&t3); /* (xa+xb)(ya+yb) */
+
+       FP4_YYY_neg(&t3,&t1);  // 1
+       FP4_YYY_add(&t4,&t4,&t3);  //t4E=3
+    FP4_YYY_norm(&t4);
+
+       FP4_YYY_neg(&t3,&t2);  // 1
+       FP4_YYY_add(&(w->b),&t4,&t3); //wbE=3
+
+    FP4_YYY_times_i(&t2);
+    FP4_YYY_add(&(w->a),&t2,&t1);
+
+    FP8_YYY_norm(w);
+}
+
+/* output FP8 in format [a,b] */
+void FP8_YYY_output(FP8_YYY *w)
+{
+    printf("[");
+    FP4_YYY_output(&(w->a));
+    printf(",");
+    FP4_YYY_output(&(w->b));
+    printf("]");
+}
+
+void FP8_YYY_rawoutput(FP8_YYY *w)
+{
+    printf("[");
+    FP4_YYY_rawoutput(&(w->a));
+    printf(",");
+    FP4_YYY_rawoutput(&(w->b));
+    printf("]");
+}
+
+/* Set w=1/x */
+void FP8_YYY_inv(FP8_YYY *w,FP8_YYY *x)
+{
+    FP4_YYY t1,t2;
+    FP4_YYY_sqr(&t1,&(x->a));
+    FP4_YYY_sqr(&t2,&(x->b));
+    FP4_YYY_times_i(&t2);
+       FP4_YYY_norm(&t2);
+
+    FP4_YYY_sub(&t1,&t1,&t2);
+       FP4_YYY_norm(&t1);
+    FP4_YYY_inv(&t1,&t1);
+
+    FP4_YYY_mul(&(w->a),&t1,&(x->a));
+    FP4_YYY_neg(&t1,&t1);
+       FP4_YYY_norm(&t1);
+    FP4_YYY_mul(&(w->b),&t1,&(x->b));
+}
+
+/* w*=i where i = sqrt(sqrt(-1+sqrt(-1))) */
+void FP8_YYY_times_i(FP8_YYY *w)
+{
+       FP4_YYY s,t;
+       FP4_YYY_copy(&s,&(w->b));
+       FP4_YYY_copy(&t,&(w->a));
+       FP4_YYY_times_i(&s);
+       FP4_YYY_copy(&(w->a),&s);
+       FP4_YYY_copy(&(w->b),&t);
+       FP8_YYY_norm(w);
+}
+
+void FP8_YYY_times_i2(FP8_YYY *w)
+{
+       FP4_YYY_times_i(&(w->a));
+       FP4_YYY_times_i(&(w->b));
+}
+
+/* Set w=w^p using Frobenius */
+void FP8_YYY_frob(FP8_YYY *w,FP2_YYY *f)
+{ // f=(i+1)^(p-3)/4
+       FP2_YYY ff;
+       FP2_YYY_sqr(&ff,f);  // (i+1)^(p-3)/2
+       FP2_YYY_mul_ip(&ff); // (i+1)^(p-1)/2
+       FP2_YYY_norm(&ff);
+       FP4_YYY_frob(&(w->a),&ff);
+       FP4_YYY_frob(&(w->b),&ff);
+       FP4_YYY_pmul(&(w->b),&(w->b),f);  // times (1+i)^(p-3)/4
+       FP4_YYY_times_i(&(w->b));               // (i+1)^(p-1)/4
+}
+
+/* Set r=a^b mod m */
+void FP8_YYY_pow(FP8_YYY *r,FP8_YYY* a,BIG_XXX b)
+{
+    FP8_YYY w;
+    BIG_XXX z,zilch;
+    int bt;
+
+    BIG_XXX_zero(zilch);
+
+    BIG_XXX_copy(z,b);
+    FP8_YYY_copy(&w,a);
+       FP8_YYY_norm(&w);
+    FP8_YYY_one(r);
+    BIG_XXX_norm(z);
+    while(1)
+    {
+        bt=BIG_XXX_parity(z);
+        BIG_XXX_shr(z,1);
+        if (bt) FP8_YYY_mul(r,r,&w);
+        if (BIG_XXX_comp(z,zilch)==0) break;
+        FP8_YYY_sqr(&w,&w);
+    }
+    FP8_YYY_reduce(r);
+}
+
+#if CURVE_SECURITY_ZZZ == 192
+
+/* XTR xtr_a function */
+void FP8_YYY_xtr_A(FP8_YYY *r,FP8_YYY *w,FP8_YYY *x,FP8_YYY *y,FP8_YYY *z)
+{
+    FP8_YYY t1,t2;
+
+    FP8_YYY_copy(r,x);
+    FP8_YYY_sub(&t1,w,y);
+       FP8_YYY_norm(&t1);
+    FP8_YYY_pmul(&t1,&t1,&(r->a));
+    FP8_YYY_add(&t2,w,y);
+       FP8_YYY_norm(&t2);
+    FP8_YYY_pmul(&t2,&t2,&(r->b));
+    FP8_YYY_times_i(&t2);
+
+    FP8_YYY_add(r,&t1,&t2);
+    FP8_YYY_add(r,r,z);
+
+    FP8_YYY_reduce(r);
+}
+
+/* XTR xtr_d function */
+void FP8_YYY_xtr_D(FP8_YYY *r,FP8_YYY *x)
+{
+    FP8_YYY w;
+    FP8_YYY_copy(r,x);
+    FP8_YYY_conj(&w,r);
+    FP8_YYY_add(&w,&w,&w);
+    FP8_YYY_sqr(r,r);
+       FP8_YYY_norm(&w);
+    FP8_YYY_sub(r,r,&w);
+    FP8_YYY_reduce(r);    /* reduce here as multiple calls trigger automatic 
reductions */
+}
+
+/* r=x^n using XTR method on traces of FP12s */
+void FP8_YYY_xtr_pow(FP8_YYY *r,FP8_YYY *x,BIG_XXX n)
+{
+    int i,par,nb;
+    BIG_XXX v;
+    FP2_YYY w2;
+       FP4_YYY w4;
+    FP8_YYY t,a,b,c,sf;
+
+    BIG_XXX_zero(v);
+    BIG_XXX_inc(v,3);
+       BIG_XXX_norm(v);
+    FP2_YYY_from_BIG(&w2,v);
+    FP4_YYY_from_FP2(&w4,&w2);
+    FP8_YYY_from_FP4(&a,&w4);
+       FP8_YYY_copy(&sf,x);
+       FP8_YYY_norm(&sf);
+       FP8_YYY_copy(&b,&sf);
+    FP8_YYY_xtr_D(&c,&sf);
+
+    par=BIG_XXX_parity(n);
+    BIG_XXX_copy(v,n);
+    BIG_XXX_norm(v);
+    BIG_XXX_shr(v,1);
+    if (par==0)
+    {
+        BIG_XXX_dec(v,1);
+        BIG_XXX_norm(v);
+    }
+
+    nb=BIG_XXX_nbits(v);
+    for (i=nb-1; i>=0; i--)
+    {
+        if (!BIG_XXX_bit(v,i))
+        {
+            FP8_YYY_copy(&t,&b);
+            FP8_YYY_conj(&sf,&sf);
+            FP8_YYY_conj(&c,&c);
+            FP8_YYY_xtr_A(&b,&a,&b,&sf,&c);
+            FP8_YYY_conj(&sf,&sf);
+            FP8_YYY_xtr_D(&c,&t);
+            FP8_YYY_xtr_D(&a,&a);
+        }
+        else
+        {
+            FP8_YYY_conj(&t,&a);
+            FP8_YYY_xtr_D(&a,&b);
+            FP8_YYY_xtr_A(&b,&c,&b,&sf,&t);
+            FP8_YYY_xtr_D(&c,&c);
+        }
+    }
+
+    if (par==0) FP8_YYY_copy(r,&c);
+    else FP8_YYY_copy(r,&b);
+    FP8_YYY_reduce(r);
+}
+
+/* r=ck^a.cl^n using XTR double exponentiation method on traces of FP12s. See 
Stam thesis. */
+void FP8_YYY_xtr_pow2(FP8_YYY *r,FP8_YYY *ck,FP8_YYY *cl,FP8_YYY *ckml,FP8_YYY 
*ckm2l,BIG_XXX a,BIG_XXX b)
+{
+    int i,f2;
+    BIG_XXX d,e,w;
+    FP8_YYY t,cu,cv,cumv,cum2v;
+
+
+    BIG_XXX_copy(e,a);
+    BIG_XXX_copy(d,b);
+    BIG_XXX_norm(e);
+       BIG_XXX_norm(d);
+    FP8_YYY_copy(&cu,ck);
+    FP8_YYY_copy(&cv,cl);
+    FP8_YYY_copy(&cumv,ckml);
+    FP8_YYY_copy(&cum2v,ckm2l);
+
+    f2=0;
+    while (BIG_XXX_parity(d)==0 && BIG_XXX_parity(e)==0)
+    {
+        BIG_XXX_shr(d,1);
+        BIG_XXX_shr(e,1);
+        f2++;
+    }
+    while (BIG_XXX_comp(d,e)!=0)
+    {
+        if (BIG_XXX_comp(d,e)>0)
+        {
+            BIG_XXX_imul(w,e,4);
+            BIG_XXX_norm(w);
+            if (BIG_XXX_comp(d,w)<=0)
+            {
+                BIG_XXX_copy(w,d);
+                BIG_XXX_copy(d,e);
+                BIG_XXX_sub(e,w,e);
+                BIG_XXX_norm(e);
+                FP8_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP8_YYY_conj(&cum2v,&cumv);
+                FP8_YYY_copy(&cumv,&cv);
+                FP8_YYY_copy(&cv,&cu);
+                FP8_YYY_copy(&cu,&t);
+            }
+            else if (BIG_XXX_parity(d)==0)
+            {
+                BIG_XXX_shr(d,1);
+                FP8_YYY_conj(r,&cum2v);
+                FP8_YYY_xtr_A(&t,&cu,&cumv,&cv,r);
+                FP8_YYY_xtr_D(&cum2v,&cumv);
+                FP8_YYY_copy(&cumv,&t);
+                FP8_YYY_xtr_D(&cu,&cu);
+            }
+            else if (BIG_XXX_parity(e)==1)
+            {
+                BIG_XXX_sub(d,d,e);
+                BIG_XXX_norm(d);
+                BIG_XXX_shr(d,1);
+                FP8_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP8_YYY_xtr_D(&cu,&cu);
+                FP8_YYY_xtr_D(&cum2v,&cv);
+                FP8_YYY_conj(&cum2v,&cum2v);
+                FP8_YYY_copy(&cv,&t);
+            }
+            else
+            {
+                BIG_XXX_copy(w,d);
+                BIG_XXX_copy(d,e);
+                BIG_XXX_shr(d,1);
+                BIG_XXX_copy(e,w);
+                FP8_YYY_xtr_D(&t,&cumv);
+                FP8_YYY_conj(&cumv,&cum2v);
+                FP8_YYY_conj(&cum2v,&t);
+                FP8_YYY_xtr_D(&t,&cv);
+                FP8_YYY_copy(&cv,&cu);
+                FP8_YYY_copy(&cu,&t);
+            }
+        }
+        if (BIG_XXX_comp(d,e)<0)
+        {
+            BIG_XXX_imul(w,d,4);
+            BIG_XXX_norm(w);
+            if (BIG_XXX_comp(e,w)<=0)
+            {
+                BIG_XXX_sub(e,e,d);
+                BIG_XXX_norm(e);
+                FP8_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP8_YYY_copy(&cum2v,&cumv);
+                FP8_YYY_copy(&cumv,&cu);
+                FP8_YYY_copy(&cu,&t);
+            }
+            else if (BIG_XXX_parity(e)==0)
+            {
+                BIG_XXX_copy(w,d);
+                BIG_XXX_copy(d,e);
+                BIG_XXX_shr(d,1);
+                BIG_XXX_copy(e,w);
+                FP8_YYY_xtr_D(&t,&cumv);
+                FP8_YYY_conj(&cumv,&cum2v);
+                FP8_YYY_conj(&cum2v,&t);
+                FP8_YYY_xtr_D(&t,&cv);
+                FP8_YYY_copy(&cv,&cu);
+                FP8_YYY_copy(&cu,&t);
+            }
+            else if (BIG_XXX_parity(d)==1)
+            {
+                BIG_XXX_copy(w,e);
+                BIG_XXX_copy(e,d);
+                BIG_XXX_sub(w,w,d);
+                BIG_XXX_norm(w);
+                BIG_XXX_copy(d,w);
+                BIG_XXX_shr(d,1);
+                FP8_YYY_xtr_A(&t,&cu,&cv,&cumv,&cum2v);
+                FP8_YYY_conj(&cumv,&cumv);
+                FP8_YYY_xtr_D(&cum2v,&cu);
+                FP8_YYY_conj(&cum2v,&cum2v);
+                FP8_YYY_xtr_D(&cu,&cv);
+                FP8_YYY_copy(&cv,&t);
+            }
+            else
+            {
+                BIG_XXX_shr(d,1);
+                FP8_YYY_conj(r,&cum2v);
+                FP8_YYY_xtr_A(&t,&cu,&cumv,&cv,r);
+                FP8_YYY_xtr_D(&cum2v,&cumv);
+                FP8_YYY_copy(&cumv,&t);
+                FP8_YYY_xtr_D(&cu,&cu);
+            }
+        }
+    }
+    FP8_YYY_xtr_A(r,&cu,&cv,&cumv,&cum2v);
+    for (i=0; i<f2; i++)       FP8_YYY_xtr_D(r,r);
+    FP8_YYY_xtr_pow(r,r,d);
+}
+
+#endif
+
+
+/* New stuff for ECp8 support */
+
+/* Move b to a if d=1 */
+void FP8_YYY_cmove(FP8_YYY *f,FP8_YYY *g,int d)
+{
+    FP4_YYY_cmove(&(f->a),&(g->a),d);
+    FP4_YYY_cmove(&(f->b),&(g->b),d);
+}
+
+#if CURVE_SECURITY_ZZZ == 256
+
+/* sqrt(a+xb) = 
sqrt((a+sqrt(a*a-n*b*b))/2)+x.b/(2*sqrt((a+sqrt(a*a-n*b*b))/2)) */
+/* returns true if x is QR */
+int FP8_YYY_sqrt(FP8_YYY *r,FP8_YYY* x)
+{
+       FP4_YYY a,s,t;
+
+       FP8_YYY_copy(r,x);
+       if (FP8_YYY_iszilch(x))
+               return 1;
+       
+       FP4_YYY_copy(&a,&(x->a));
+       FP4_YYY_copy(&s,&(x->b));
+
+       if (FP4_YYY_iszilch(&s))
+       {
+               if (FP4_YYY_sqrt(&t,&a))
+               {
+                       FP8_YYY_from_FP4(r,&t);
+               }
+               else
+               {
+                       FP4_YYY_div_i(&a);
+                       FP4_YYY_sqrt(&t,&a);
+                       FP8_YYY_from_FP4H(r,&t);
+               }
+               return 1;
+       }
+
+       FP4_YYY_sqr(&s,&s);  // s*=s
+       FP4_YYY_sqr(&a,&a);  // a*=a
+       FP4_YYY_times_i(&s);
+       FP4_YYY_norm(&s);
+       FP4_YYY_sub(&a,&a,&s); // a-=txx(s)
+
+       if (!FP4_YYY_sqrt(&s,&a)) return 0;
+
+       FP4_YYY_sqr(&t,&s);
+
+
+       FP4_YYY_copy(&t,&(x->a));
+       FP4_YYY_add(&a,&t,&s);
+       FP4_YYY_norm(&a);
+       FP4_YYY_div2(&a,&a);
+
+       if (!FP4_YYY_sqrt(&a,&a))
+       {
+               FP4_YYY_sub(&a,&t,&s);
+               FP4_YYY_norm(&a);
+               FP4_YYY_div2(&a,&a);
+               if (!FP4_YYY_sqrt(&a,&a)) return 0;
+       }
+
+       FP4_YYY_copy(&t,&(x->b));
+       FP4_YYY_add(&s,&a,&a);
+       FP4_YYY_inv(&s,&s);
+
+       FP4_YYY_mul(&t,&t,&s);
+       FP8_YYY_from_FP4s(r,&a,&t);
+
+       return 1;
+
+}
+
+
+void FP8_YYY_div_i(FP8_YYY *f)
+{
+       FP4_YYY u,v;
+       FP4_YYY_copy(&u,&(f->a));
+       FP4_YYY_copy(&v,&(f->b));
+       FP4_YYY_div_i(&u);
+       FP4_YYY_copy(&(f->a),&v);
+       FP4_YYY_copy(&(f->b),&u);
+}
+
+void FP8_YYY_div_i2(FP8_YYY *f)
+{
+       FP4_YYY_div_i(&(f->a));
+       FP4_YYY_div_i(&(f->b));
+}
+
+
+void FP8_YYY_div_2i(FP8_YYY *f)
+{
+       FP4_YYY u,v;
+       FP4_YYY_copy(&u,&(f->a));
+       FP4_YYY_copy(&v,&(f->b));
+       FP4_YYY_div_2i(&u);
+       FP4_YYY_add(&v,&v,&v);
+       FP4_YYY_norm(&v);
+       FP4_YYY_copy(&(f->a),&v);
+       FP4_YYY_copy(&(f->b),&u);
+}
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/fp8.h
----------------------------------------------------------------------
diff --git a/version3/c/fp8.h b/version3/c/fp8.h
new file mode 100644
index 0000000..82b543d
--- /dev/null
+++ b/version3/c/fp8.h
@@ -0,0 +1,294 @@
+#ifndef FP8_YYY_H
+#define FP8_YYY_H
+
+#include "fp4_YYY.h"
+#include "config_curve_ZZZ.h"
+
+
+/**
+       @brief FP8 Structure - towered over two FP4
+*/
+
+typedef struct
+{
+    FP4_YYY a; /**< real part of FP8 */
+    FP4_YYY b; /**< imaginary part of FP8 */
+} FP8_YYY;
+
+
+/* FP8 prototypes */
+/**    @brief Tests for FP8 equal to zero
+ *
+       @param x FP8 number to be tested
+       @return 1 if zero, else returns 0
+ */
+extern int FP8_YYY_iszilch(FP8_YYY *x);
+/**    @brief Tests for FP8 equal to unity
+ *
+       @param x FP8 number to be tested
+       @return 1 if unity, else returns 0
+ */
+extern int FP8_YYY_isunity(FP8_YYY *x);
+/**    @brief Tests for equality of two FP8s
+ *
+       @param x FP8 instance to be compared
+       @param y FP8 instance to be compared
+       @return 1 if x=y, else returns 0
+ */
+extern int FP8_YYY_equals(FP8_YYY *x,FP8_YYY *y);
+/**    @brief Tests for FP8 having only a real part and no imaginary part
+ *
+       @param x FP8 number to be tested
+       @return 1 if real, else returns 0
+ */
+extern int FP8_YYY_isreal(FP8_YYY *x);
+/**    @brief Initialise FP8 from two FP4s
+ *
+       @param x FP8 instance to be initialised
+       @param a FP4 to form real part of FP8
+       @param b FP4 to form imaginary part of FP8
+ */
+extern void FP8_YYY_from_FP4s(FP8_YYY *x,FP4_YYY *a,FP4_YYY *b);
+/**    @brief Initialise FP8 from single FP4
+ *
+       Imaginary part is set to zero
+       @param x FP8 instance to be initialised
+       @param a FP4 to form real part of FP8
+ */
+extern void FP8_YYY_from_FP4(FP8_YYY *x,FP4_YYY *a);
+
+/**    @brief Initialise FP8 from single FP4
+ *
+       real part is set to zero
+       @param x FP8 instance to be initialised
+       @param a FP4 to form imaginary part of FP8
+ */
+extern void FP8_YYY_from_FP4H(FP8_YYY *x,FP4_YYY *a);
+
+
+/**    @brief Copy FP8 to another FP8
+ *
+       @param x FP8 instance, on exit = y
+       @param y FP8 instance to be copied
+ */
+extern void FP8_YYY_copy(FP8_YYY *x,FP8_YYY *y);
+/**    @brief Set FP8 to zero
+ *
+       @param x FP8 instance to be set to zero
+ */
+extern void FP8_YYY_zero(FP8_YYY *x);
+/**    @brief Set FP8 to unity
+ *
+       @param x FP8 instance to be set to one
+ */
+extern void FP8_YYY_one(FP8_YYY *x);
+/**    @brief Negation of FP8
+ *
+       @param x FP8 instance, on exit = -y
+       @param y FP8 instance
+ */
+extern void FP8_YYY_neg(FP8_YYY *x,FP8_YYY *y);
+/**    @brief Conjugation of FP8
+ *
+       If y=(a,b) on exit x=(a,-b)
+       @param x FP8 instance, on exit = conj(y)
+       @param y FP8 instance
+ */
+extern void FP8_YYY_conj(FP8_YYY *x,FP8_YYY *y);
+/**    @brief Negative conjugation of FP8
+ *
+       If y=(a,b) on exit x=(-a,b)
+       @param x FP8 instance, on exit = -conj(y)
+       @param y FP8 instance
+ */
+extern void FP8_YYY_nconj(FP8_YYY *x,FP8_YYY *y);
+/**    @brief addition of two FP8s
+ *
+       @param x FP8 instance, on exit = y+z
+       @param y FP8 instance
+       @param z FP8 instance
+ */
+extern void FP8_YYY_add(FP8_YYY *x,FP8_YYY *y,FP8_YYY *z);
+/**    @brief subtraction of two FP8s
+ *
+       @param x FP8 instance, on exit = y-z
+       @param y FP8 instance
+       @param z FP8 instance
+ */
+extern void FP8_YYY_sub(FP8_YYY *x,FP8_YYY *y,FP8_YYY *z);
+/**    @brief Multiplication of an FP8 by an FP4
+ *
+       @param x FP8 instance, on exit = y*a
+       @param y FP8 instance
+       @param a FP4 multiplier
+ */
+extern void FP8_YYY_pmul(FP8_YYY *x,FP8_YYY *y,FP4_YYY *a);
+
+/**    @brief Multiplication of an FP8 by an FP2
+ *
+       @param x FP8 instance, on exit = y*a
+       @param y FP8 instance
+       @param a FP2 multiplier
+ */
+extern void FP8_YYY_qmul(FP8_YYY *x,FP8_YYY *y,FP2_YYY *a);
+
+/**    @brief Multiplication of an FP8 by an FP
+ *
+       @param x FP8 instance, on exit = y*a
+       @param y FP8 instance
+       @param a FP multiplier
+ */
+extern void FP8_YYY_tmul(FP8_YYY *x,FP8_YYY *y,FP_YYY *a);
+
+/**    @brief Multiplication of an FP8 by a small integer
+ *
+       @param x FP8 instance, on exit = y*i
+       @param y FP8 instance
+       @param i an integer
+ */
+extern void FP8_YYY_imul(FP8_YYY *x,FP8_YYY *y,int i);
+/**    @brief Squaring an FP8
+ *
+       @param x FP8 instance, on exit = y^2
+       @param y FP8 instance
+ */
+extern void FP8_YYY_sqr(FP8_YYY *x,FP8_YYY *y);
+/**    @brief Multiplication of two FP8s
+ *
+       @param x FP8 instance, on exit = y*z
+       @param y FP8 instance
+       @param z FP8 instance
+ */
+extern void FP8_YYY_mul(FP8_YYY *x,FP8_YYY *y,FP8_YYY *z);
+/**    @brief Inverting an FP8
+ *
+       @param x FP8 instance, on exit = 1/y
+       @param y FP8 instance
+ */
+extern void FP8_YYY_inv(FP8_YYY *x,FP8_YYY *y);
+/**    @brief Formats and outputs an FP8 to the console
+ *
+       @param x FP8 instance to be printed
+ */
+extern void FP8_YYY_output(FP8_YYY *x);
+/**    @brief Formats and outputs an FP8 to the console in raw form (for 
debugging)
+ *
+       @param x FP8 instance to be printed
+ */
+extern void FP8_YYY_rawoutput(FP8_YYY *x);
+/**    @brief multiplies an FP8 instance by irreducible polynomial 
sqrt(1+sqrt(-1))
+ *
+       @param x FP8 instance, on exit = sqrt(1+sqrt(-1)*x
+ */
+extern void FP8_YYY_times_i(FP8_YYY *x);
+/**    @brief multiplies an FP8 instance by irreducible polynomial (1+sqrt(-1))
+ *
+       @param x FP8 instance, on exit = (1+sqrt(-1)*x
+ */
+extern void FP8_YYY_times_i2(FP8_YYY *x);
+
+/**    @brief Normalises the components of an FP8
+ *
+       @param x FP8 instance to be normalised
+ */
+extern void FP8_YYY_norm(FP8_YYY *x);
+/**    @brief Reduces all components of possibly unreduced FP8 mod Modulus
+ *
+       @param x FP8 instance, on exit reduced mod Modulus
+ */
+extern void FP8_YYY_reduce(FP8_YYY *x);
+/**    @brief Raises an FP8 to the power of a BIG
+ *
+       @param x FP8 instance, on exit = y^b
+       @param y FP8 instance
+       @param b BIG number
+ */
+extern void FP8_YYY_pow(FP8_YYY *x,FP8_YYY *y,BIG_XXX b);
+/**    @brief Raises an FP8 to the power of the internal modulus p, using the 
Frobenius
+ *
+       @param x FP8 instance, on exit = x^p
+       @param f FP2 precalculated Frobenius constant
+ */
+extern void FP8_YYY_frob(FP8_YYY *x,FP2_YYY *f);
+/**    @brief Calculates the XTR addition function r=w*x-conj(x)*y+z
+ *
+       @param r FP8 instance, on exit = w*x-conj(x)*y+z
+       @param w FP8 instance
+       @param x FP8 instance
+       @param y FP8 instance
+       @param z FP8 instance
+ */
+extern void FP8_YYY_xtr_A(FP8_YYY *r,FP8_YYY *w,FP8_YYY *x,FP8_YYY *y,FP8_YYY 
*z);
+/**    @brief Calculates the XTR doubling function r=x^2-2*conj(x)
+ *
+       @param r FP8 instance, on exit = x^2-2*conj(x)
+       @param x FP8 instance
+ */
+extern void FP8_YYY_xtr_D(FP8_YYY *r,FP8_YYY *x);
+/**    @brief Calculates FP8 trace of an FP12 raised to the power of a BIG 
number
+ *
+       XTR single exponentiation
+       @param r FP8 instance, on exit = trace(w^b)
+       @param x FP8 instance, trace of an FP12 w
+       @param b BIG number
+ */
+extern void FP8_YYY_xtr_pow(FP8_YYY *r,FP8_YYY *x,BIG_XXX b);
+/**    @brief Calculates FP8 trace of c^a.d^b, where c and d are derived from 
FP8 traces of FP12s
+ *
+       XTR double exponentiation
+       Assumes c=tr(x^m), d=tr(x^n), e=tr(x^(m-n)), f=tr(x^(m-2n))
+       @param r FP8 instance, on exit = trace(c^a.d^b)
+       @param c FP8 instance, trace of an FP12
+       @param d FP8 instance, trace of an FP12
+       @param e FP8 instance, trace of an FP12
+       @param f FP8 instance, trace of an FP12
+       @param a BIG number
+       @param b BIG number
+ */
+extern void FP8_YYY_xtr_pow2(FP8_YYY *r,FP8_YYY *c,FP8_YYY *d,FP8_YYY 
*e,FP8_YYY *f,BIG_XXX a,BIG_XXX b);
+
+
+/**    @brief Calculate square root of an FP8
+ *
+       Square root
+       @param r FP8 instance, on exit = sqrt(x)
+       @param x FP8 instance
+       @return 1 x is a QR, otherwise 0
+ */
+extern int  FP8_YYY_sqrt(FP8_YYY *r,FP8_YYY *x);
+
+
+/**    @brief Conditional copy of FP8 number
+ *
+       Conditionally copies second parameter to the first (without branching)
+       @param x FP8 instance, set to y if s!=0
+       @param y another FP8 instance
+       @param s copy only takes place if not equal to 0
+ */
+extern void FP8_YYY_cmove(FP8_YYY *x,FP8_YYY *y,int s);
+
+
+/**    @brief Divide FP8 number by QNR
+ *
+       Divide FP8 by the QNR
+       @param x FP8 instance
+ */
+extern void FP8_YYY_div_i(FP8_YYY *x);
+
+/**    @brief Divide FP8 number by QNR twice
+ *
+       Divide FP8 by the QNR twice
+       @param x FP8 instance
+ */
+extern void FP8_YYY_div_i2(FP8_YYY *x);
+
+/**    @brief Divide FP8 number by QNR/2
+ *
+       Divide FP8 by the QNR/2
+       @param x FP8 instance
+ */
+extern void FP8_YYY_div_2i(FP8_YYY *x);
+
+
+#endif
+

http://git-wip-us.apache.org/repos/asf/incubator-milagro-crypto/blob/c25f9e5c/version3/c/gcm.c
----------------------------------------------------------------------
diff --git a/version3/c/gcm.c b/version3/c/gcm.c
new file mode 100644
index 0000000..3bd9b8d
--- /dev/null
+++ b/version3/c/gcm.c
@@ -0,0 +1,411 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+*/
+
+/*
+ * Implementation of the AES-GCM Encryption/Authentication
+ *
+ * Some restrictions..
+ * 1. Only for use with AES
+ * 2. Returned tag is always 128-bits. Truncate at your own risk.
+ * 3. The order of function calls must follow some rules
+ *
+ * Typical sequence of calls..
+ * 1. call GCM_init
+ * 2. call GCM_add_header any number of times, as long as length of header is 
multiple of 16 bytes (block size)
+ * 3. call GCM_add_header one last time with any length of header
+ * 4. call GCM_add_cipher any number of times, as long as length of 
cipher/plaintext is multiple of 16 bytes
+ * 5. call GCM_add_cipher one last time with any length of cipher/plaintext
+ * 6. call GCM_finish to extract the tag.
+ *
+ * See http://www.mindspring.com/~dmcgrew/gcm-nist-6.pdf
+ */
+/* SU=m, m is Stack Usage */
+
+#include <stdlib.h>
+#include <string.h>
+#include "arch.h"
+#include "amcl.h"
+
+#define NB 4
+#define MR_TOBYTE(x) ((uchar)((x)))
+
+static unsign32 pack(const uchar *b)
+{
+    /* pack bytes into a 32-bit Word */
+    return 
((unsign32)b[0]<<24)|((unsign32)b[1]<<16)|((unsign32)b[2]<<8)|(unsign32)b[3];
+}
+
+static void unpack(unsign32 a,uchar *b)
+{
+    /* unpack bytes from a word */
+    b[3]=MR_TOBYTE(a);
+    b[2]=MR_TOBYTE(a>>8);
+    b[1]=MR_TOBYTE(a>>16);
+    b[0]=MR_TOBYTE(a>>24);
+}
+
+static void precompute(gcm *g,uchar *H)
+{
+    /* precompute small 2k bytes gf2m table of x^n.H */
+    int i,j;
+    unsign32 *last,*next,b;
+
+    for (i=j=0; i<NB; i++,j+=4) g->table[0][i]=pack((uchar *)&H[j]);
+
+    for (i=1; i<128; i++)
+    {
+        next=g->table[i];
+        last=g->table[i-1];
+        b=0;
+        for (j=0; j<NB; j++)
+        {
+            next[j]=b|(last[j])>>1;
+            b=last[j]<<31;
+        }
+        if (b) next[0]^=0xE1000000; /* irreducible polynomial */
+    }
+}
+
+/* SU= 32 */
+static void gf2mul(gcm *g)
+{
+    /* gf2m mul - Z=H*X mod 2^128 */
+    int i,j,m,k;
+    unsign32 P[4];
+    unsign32 b;
+
+    P[0]=P[1]=P[2]=P[3]=0;
+    j=8;
+    m=0;
+    for (i=0; i<128; i++)
+    {
+        b=(unsign32)(g->stateX[m]>>(--j))&1;
+        b=~b+1;
+        for (k=0; k<NB; k++) P[k]^=(g->table[i][k]&b);
+        if (j==0)
+        {
+            j=8;
+            m++;
+            if (m==16) break;
+        }
+    }
+    for (i=j=0; i<NB; i++,j+=4) unpack(P[i],(uchar *)&g->stateX[j]);
+}
+
+/* SU= 32 */
+static void GCM_wrap(gcm *g)
+{
+    /* Finish off GHASH */
+    int i,j;
+    unsign32 F[4];
+    uchar L[16];
+
+    /* convert lengths from bytes to bits */
+    F[0]=(g->lenA[0]<<3)|(g->lenA[1]&0xE0000000)>>29;
+    F[1]=g->lenA[1]<<3;
+    F[2]=(g->lenC[0]<<3)|(g->lenC[1]&0xE0000000)>>29;
+    F[3]=g->lenC[1]<<3;
+    for (i=j=0; i<NB; i++,j+=4) unpack(F[i],(uchar *)&L[j]);
+
+    for (i=0; i<16; i++) g->stateX[i]^=L[i];
+    gf2mul(g);
+}
+
+static int GCM_ghash(gcm *g,char *plain,int len)
+{
+    int i,j=0;
+    if (g->status==GCM_ACCEPTING_HEADER) g->status=GCM_ACCEPTING_CIPHER;
+    if (g->status!=GCM_ACCEPTING_CIPHER) return 0;
+
+    while (j<len)
+    {
+        for (i=0; i<16 && j<len; i++)
+        {
+            g->stateX[i]^=plain[j++];
+            g->lenC[1]++;
+            if (g->lenC[1]==0) g->lenC[0]++;
+        }
+        gf2mul(g);
+    }
+    if (len%16!=0) g->status=GCM_NOT_ACCEPTING_MORE;
+    return 1;
+}
+
+/* SU= 48 */
+/* Initialize GCM mode */
+void GCM_init(gcm* g,int nk,char *key,int niv,char *iv)
+{
+    /* iv size niv is usually 12 bytes (96 bits). AES key size nk can be 16,24 
or 32 bytes */
+    int i;
+    uchar H[16];
+    for (i=0; i<16; i++)
+    {
+        H[i]=0;
+        g->stateX[i]=0;
+    }
+
+    AES_init(&(g->a),ECB,nk,key,iv);
+    AES_ecb_encrypt(&(g->a),H);     /* E(K,0) */
+    precompute(g,H);
+
+    g->lenA[0]=g->lenC[0]=g->lenA[1]=g->lenC[1]=0;
+    if (niv==12)
+    {
+        for (i=0; i<12; i++) g->a.f[i]=iv[i];
+        unpack((unsign32)1,(uchar *)&(g->a.f[12]));  /* initialise IV */
+        for (i=0; i<16; i++) g->Y_0[i]=g->a.f[i];
+    }
+    else
+    {
+        g->status=GCM_ACCEPTING_CIPHER;
+        GCM_ghash(g,iv,niv); /* GHASH(H,0,IV) */
+        GCM_wrap(g);
+        for (i=0; i<16; i++)
+        {
+            g->a.f[i]=g->stateX[i];
+            g->Y_0[i]=g->a.f[i];
+            g->stateX[i]=0;
+        }
+        g->lenA[0]=g->lenC[0]=g->lenA[1]=g->lenC[1]=0;
+    }
+    g->status=GCM_ACCEPTING_HEADER;
+}
+
+/* SU= 24 */
+/* Add Header data - included but not encrypted */
+int GCM_add_header(gcm* g,char *header,int len)
+{
+    /* Add some header. Won't be encrypted, but will be authenticated. len is 
length of header */
+    int i,j=0;
+    if (g->status!=GCM_ACCEPTING_HEADER) return 0;
+
+    while (j<len)
+    {
+        for (i=0; i<16 && j<len; i++)
+        {
+            g->stateX[i]^=header[j++];
+            g->lenA[1]++;
+            if (g->lenA[1]==0) g->lenA[0]++;
+        }
+        gf2mul(g);
+    }
+    if (len%16!=0) g->status=GCM_ACCEPTING_CIPHER;
+    return 1;
+}
+
+/* SU= 48 */
+/* Add Plaintext - included and encrypted */
+int GCM_add_plain(gcm *g,char *cipher,char *plain,int len)
+{
+    /* Add plaintext to extract ciphertext, len is length of plaintext.  */
+    int i,j=0;
+    unsign32 counter;
+    uchar B[16];
+    if (g->status==GCM_ACCEPTING_HEADER) g->status=GCM_ACCEPTING_CIPHER;
+    if (g->status!=GCM_ACCEPTING_CIPHER) return 0;
+
+    while (j<len)
+    {
+        counter=pack((uchar *)&(g->a.f[12]));
+        counter++;
+        unpack(counter,(uchar *)&(g->a.f[12]));  /* increment counter */
+        for (i=0; i<16; i++) B[i]=g->a.f[i];
+        AES_ecb_encrypt(&(g->a),B);        /* encrypt it  */
+
+        for (i=0; i<16 && j<len; i++)
+        {
+            cipher[j]=plain[j]^B[i];
+            g->stateX[i]^=cipher[j++];
+            g->lenC[1]++;
+            if (g->lenC[1]==0) g->lenC[0]++;
+        }
+        gf2mul(g);
+    }
+    if (len%16!=0) g->status=GCM_NOT_ACCEPTING_MORE;
+    return 1;
+}
+
+/* SU= 48 */
+/* Add Ciphertext - decrypts to plaintext */
+int GCM_add_cipher(gcm *g,char *plain,char *cipher,int len)
+{
+    /* Add ciphertext to extract plaintext, len is length of ciphertext. */
+    int i,j=0;
+    unsign32 counter;
+    char oc;
+    uchar B[16];
+    if (g->status==GCM_ACCEPTING_HEADER) g->status=GCM_ACCEPTING_CIPHER;
+    if (g->status!=GCM_ACCEPTING_CIPHER) return 0;
+
+    while (j<len)
+    {
+        counter=pack((uchar *)&(g->a.f[12]));
+        counter++;
+        unpack(counter,(uchar *)&(g->a.f[12]));  /* increment counter */
+        for (i=0; i<16; i++) B[i]=g->a.f[i];
+        AES_ecb_encrypt(&(g->a),B);        /* encrypt it  */
+        for (i=0; i<16 && j<len; i++)
+        {
+            oc=cipher[j];
+            plain[j]=cipher[j]^B[i];
+            g->stateX[i]^=oc;
+            j++;
+            g->lenC[1]++;
+            if (g->lenC[1]==0) g->lenC[0]++;
+        }
+        gf2mul(g);
+    }
+    if (len%16!=0) g->status=GCM_NOT_ACCEPTING_MORE;
+    return 1;
+}
+
+/* SU= 16 */
+/* Finish and extract Tag */
+void GCM_finish(gcm *g,char *tag)
+{
+    /* Finish off GHASH and extract tag (MAC) */
+    int i;
+
+    GCM_wrap(g);
+
+    /* extract tag */
+    if (tag!=NULL)
+    {
+        AES_ecb_encrypt(&(g->a),g->Y_0);        /* E(K,Y0) */
+        for (i=0; i<16; i++) g->Y_0[i]^=g->stateX[i];
+        for (i=0; i<16; i++)
+        {
+            tag[i]=g->Y_0[i];
+            g->Y_0[i]=g->stateX[i]=0;
+        }
+    }
+    g->status=GCM_FINISHED;
+    AES_end(&(g->a));
+}
+
+
+// Compile with
+// gcc -O2 gcm.c aes.c -o gcm.exe
+/* SU= 16
+*/
+
+/* static void hex2bytes(char *hex,char *bin) */
+/* { */
+/*     int i; */
+/*     char v; */
+/*     int len=strlen(hex); */
+/*     for (i = 0; i < len/2; i++) { */
+/*         char c = hex[2*i]; */
+/*         if (c >= '0' && c <= '9') { */
+/*             v = c - '0'; */
+/*         } else if (c >= 'A' && c <= 'F') { */
+/*             v = c - 'A' + 10; */
+/*         } else if (c >= 'a' && c <= 'f') { */
+/*             v = c - 'a' + 10; */
+/*         } else { */
+/*             v = 0; */
+/*         } */
+/*         v <<= 4; */
+/*         c = hex[2*i + 1]; */
+/*         if (c >= '0' && c <= '9') { */
+/*             v += c - '0'; */
+/*         } else if (c >= 'A' && c <= 'F') { */
+/*             v += c - 'A' + 10; */
+/*         } else if (c >= 'a' && c <= 'f') { */
+/*             v += c - 'a' + 10; */
+/*         } else { */
+/*             v = 0; */
+/*         } */
+/*         bin[i] = v; */
+/*     } */
+/* } */
+
+/*
+int main()
+{
+       int i;
+
+//     char* KT="feffe9928665731c6d6a8f9467308308";
+//     char* 
MT="d9313225f88406e5a55909c5aff5269a86a7a9531534f7da2e4c303d8a318a721c3c0c95956809532fcf0e2449a6b525b16aedf5aa0de657ba637b39";
+//     char* HT="feedfacedeadbeeffeedfacedeadbeefabaddad2";
+//     char* NT="cafebabefacedbaddecaf888";
+// Tag should be 5bc94fbc3221a5db94fae95ae7121a47
+//     char* 
NT="9313225df88406e555909c5aff5269aa6a7a9538534f7da1e4c303d2a318a728c3c0c95156809539fcf0e2429a6b525416aedbf5a0de6a57a637b39b";
+// Tag should be 619cc5aefffe0bfa462af43c1699d050
+
+  char* KT="6dfb5dc68af6ae2f3242e9184f100918";
+  char* MT="47809d16c2c6ec685962c90e53fe1bba";
+  char* HT="dd0fa6e494031139d71ee45f00d56fa4";
+  char* NT="37d36f5c54d53479d4745dd1";
+
+
+       int len=strlen(MT)/2;
+       int lenH=strlen(HT)/2;
+       int lenK=strlen(KT)/2;
+       int lenIV=strlen(NT)/2;
+
+       char T[16];   // Tag
+       char K[16];   // AES Key
+       char H[64];   // Header - to be included in Authentication, but not 
encrypted
+       char N[100];   // IV - Initialisation vector
+       char M[100];  // Plaintext to be encrypted/authenticated
+       char C[100];  // Ciphertext
+       char P[100];  // Recovered Plaintext
+
+       gcm g;
+
+    hex2bytes(MT, M);
+    hex2bytes(HT, H);
+    hex2bytes(NT, N);
+       hex2bytes(KT, K);
+
+       printf("lenK= %d\n",lenK);
+
+       printf("Plaintext=\n");
+       for (i=0;i<len;i++) printf("%02x",(unsigned char)M[i]);
+       printf("\n");
+
+       GCM_init(&g,16,K,lenIV,N);
+       GCM_add_header(&g,H,lenH);
+       GCM_add_plain(&g,C,M,len);
+       GCM_finish(&g,T);
+
+       printf("Ciphertext=\n");
+       for (i=0;i<len;i++) printf("%02x",(unsigned char)C[i]);
+       printf("\n");
+
+       printf("Tag=\n");
+       for (i=0;i<16;i++) printf("%02x",(unsigned char)T[i]);
+       printf("\n");
+
+       GCM_init(&g,16,K,lenIV,N);
+       GCM_add_header(&g,H,lenH);
+       GCM_add_cipher(&g,P,C,len);
+       GCM_finish(&g,T);
+
+       printf("Plaintext=\n");
+       for (i=0;i<len;i++) printf("%02x",(unsigned char)P[i]);
+       printf("\n");
+
+       printf("Tag=\n");
+       for (i=0;i<16;i++) printf("%02x",(unsigned char)T[i]);
+       printf("\n");
+}
+
+*/

Reply via email to