Module Name: src
Committed By: isaki
Date: Fri Apr 19 13:31:11 UTC 2013
Modified Files:
src/sys/arch/m68k/fpe: files.fpe fpu_emulate.h fpu_hyperb.c fpu_trig.c
Added Files:
src/sys/arch/m68k/fpe: fpu_cordic.c
Log Message:
Introduce the CORDIC algorithm.
o sine and cosine (e.g., FSIN, FCOS and FSINCOS instructions) is now
calculated in the CORDIC instead of Taylor expansion.
o tangent (FTAN) is not touched from a viewpoint of the code size.
o The CORDIC is applicable for hyperbolic functions (e.g., FSINH,
FCOSH, FTANH instructions), but I didn't use it because its working
range is poor.
o The CORDIC is also usable for inverse trigonometric functions,
I will commit it at next phase.
o The code size becomes a bit big. I cannot evaluate speed on m68k
for some reasons, but in test on i386 the CORDIC is approximately
100 times faster in sin/cos.
To generate a diff of this commit:
cvs rdiff -u -r1.2 -r1.3 src/sys/arch/m68k/fpe/files.fpe
cvs rdiff -u -r0 -r1.1 src/sys/arch/m68k/fpe/fpu_cordic.c
cvs rdiff -u -r1.23 -r1.24 src/sys/arch/m68k/fpe/fpu_emulate.h
cvs rdiff -u -r1.8 -r1.9 src/sys/arch/m68k/fpe/fpu_hyperb.c
cvs rdiff -u -r1.10 -r1.11 src/sys/arch/m68k/fpe/fpu_trig.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/m68k/fpe/files.fpe
diff -u src/sys/arch/m68k/fpe/files.fpe:1.2 src/sys/arch/m68k/fpe/files.fpe:1.3
--- src/sys/arch/m68k/fpe/files.fpe:1.2 Fri Nov 3 04:51:51 1995
+++ src/sys/arch/m68k/fpe/files.fpe Fri Apr 19 13:31:11 2013
@@ -1,10 +1,11 @@
-# $NetBSD: files.fpe,v 1.2 1995/11/03 04:51:51 briggs Exp $
+# $NetBSD: files.fpe,v 1.3 2013/04/19 13:31:11 isaki Exp $
# Config(.new) file for m68k floating point emulator.
# Included by ports that need it.
file arch/m68k/fpe/fpu_add.c fpu_emulate
file arch/m68k/fpe/fpu_calcea.c fpu_emulate
+file arch/m68k/fpe/fpu_cordic.c fpu_emulate
file arch/m68k/fpe/fpu_div.c fpu_emulate
file arch/m68k/fpe/fpu_emulate.c fpu_emulate
file arch/m68k/fpe/fpu_exp.c fpu_emulate
Index: src/sys/arch/m68k/fpe/fpu_emulate.h
diff -u src/sys/arch/m68k/fpe/fpu_emulate.h:1.23 src/sys/arch/m68k/fpe/fpu_emulate.h:1.24
--- src/sys/arch/m68k/fpe/fpu_emulate.h:1.23 Thu Apr 11 13:27:11 2013
+++ src/sys/arch/m68k/fpe/fpu_emulate.h Fri Apr 19 13:31:11 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_emulate.h,v 1.23 2013/04/11 13:27:11 isaki Exp $ */
+/* $NetBSD: fpu_emulate.h,v 1.24 2013/04/19 13:31:11 isaki Exp $ */
/*
* Copyright (c) 1995 Gordon Ross
@@ -246,7 +246,13 @@ int fpu_emul_fscale(struct fpemu *, stru
int fpu_emulate(struct frame *, struct fpframe *, ksiginfo_t *);
struct fpn *fpu_cmp(struct fpemu *);
-struct fpn *fpu_sincos_taylor(struct fpemu *, struct fpn *, uint32_t, int);
+/* fpu_cordic.c */
+extern const struct fpn fpu_cordic_inv_gain1;
+extern const struct fpn fpu_cordic_inv_gain2;
+void fpu_cordit1(struct fpemu *,
+ struct fpn *, struct fpn *, struct fpn *, const struct fpn *);
+void fpu_cordit2(struct fpemu *,
+ struct fpn *, struct fpn *, struct fpn *, const struct fpn *);
/*
* "helper" functions
@@ -254,6 +260,7 @@ struct fpn *fpu_sincos_taylor(struct fpe
/* return values from constant rom */
struct fpn *fpu_const(struct fpn *, uint32_t);
#define FPU_CONST_PI (0x00) /* pi */
+#define FPU_CONST_0 (0x0f) /* 0.0 */
#define FPU_CONST_LN_2 (0x30) /* ln(2) */
#define FPU_CONST_LN_10 (0x31) /* ln(10) */
#define FPU_CONST_1 (0x32) /* 1.0 */
Index: src/sys/arch/m68k/fpe/fpu_hyperb.c
diff -u src/sys/arch/m68k/fpe/fpu_hyperb.c:1.8 src/sys/arch/m68k/fpe/fpu_hyperb.c:1.9
--- src/sys/arch/m68k/fpe/fpu_hyperb.c:1.8 Thu Apr 11 13:27:11 2013
+++ src/sys/arch/m68k/fpe/fpu_hyperb.c Fri Apr 19 13:31:11 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_hyperb.c,v 1.8 2013/04/11 13:27:11 isaki Exp $ */
+/* $NetBSD: fpu_hyperb.c,v 1.9 2013/04/19 13:31:11 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@@ -57,7 +57,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.8 2013/04/11 13:27:11 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.9 2013/04/19 13:31:11 isaki Exp $");
#include "fpu_emulate.h"
@@ -74,12 +74,72 @@ fpu_atanh(struct fpemu *fe)
return &fe->fe_f2;
}
+/*
+ * taylor expansion used by sinh(), cosh().
+ */
+static struct fpn *
+__fpu_sinhcosh_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f)
+{
+ struct fpn res;
+ struct fpn x2;
+ struct fpn *s1;
+ struct fpn *r;
+ int sign;
+ uint32_t k;
+
+ /* x2 := x * x */
+ CPYFPN(&fe->fe_f1, &fe->fe_f2);
+ r = fpu_mul(fe);
+ CPYFPN(&x2, r);
+
+ /* res := s0 */
+ CPYFPN(&res, s0);
+
+ sign = 1; /* sign := (-1)^n */
+
+ for (;;) {
+ /* (f1 :=) s0 * x^2 */
+ CPYFPN(&fe->fe_f1, s0);
+ CPYFPN(&fe->fe_f2, &x2);
+ r = fpu_mul(fe);
+ CPYFPN(&fe->fe_f1, r);
+
+ /*
+ * for sin(), s1 := s0 * x^2 / (2n+1)2n
+ * for cos(), s1 := s0 * x^2 / 2n(2n-1)
+ */
+ k = f * (f + 1);
+ fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
+ s1 = fpu_div(fe);
+
+ /* break if s1 is enough small */
+ if (ISZERO(s1))
+ break;
+ if (res.fp_exp - s1->fp_exp >= FP_NMANT)
+ break;
+
+ /* s0 := s1 for next loop */
+ CPYFPN(s0, s1);
+
+ /* res += s1 */
+ CPYFPN(&fe->fe_f2, s1);
+ CPYFPN(&fe->fe_f1, &res);
+ r = fpu_add(fe);
+ CPYFPN(&res, r);
+
+ f += 2;
+ sign ^= 1;
+ }
+
+ CPYFPN(&fe->fe_f2, &res);
+ return &fe->fe_f2;
+}
+
struct fpn *
fpu_cosh(struct fpemu *fe)
{
struct fpn s0;
struct fpn *r;
- int hyperb = 1;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -90,7 +150,7 @@ fpu_cosh(struct fpemu *fe)
}
fpu_const(&s0, FPU_CONST_1);
- r = fpu_sincos_taylor(fe, &s0, 1, hyperb);
+ r = __fpu_sinhcosh_taylor(fe, &s0, 1);
CPYFPN(&fe->fe_f2, r);
return &fe->fe_f2;
@@ -101,7 +161,6 @@ fpu_sinh(struct fpemu *fe)
{
struct fpn s0;
struct fpn *r;
- int hyperb = 1;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -109,7 +168,7 @@ fpu_sinh(struct fpemu *fe)
return &fe->fe_f2;
CPYFPN(&s0, &fe->fe_f2);
- r = fpu_sincos_taylor(fe, &s0, 2, hyperb);
+ r = __fpu_sinhcosh_taylor(fe, &s0, 2);
CPYFPN(&fe->fe_f2, r);
return &fe->fe_f2;
Index: src/sys/arch/m68k/fpe/fpu_trig.c
diff -u src/sys/arch/m68k/fpe/fpu_trig.c:1.10 src/sys/arch/m68k/fpe/fpu_trig.c:1.11
--- src/sys/arch/m68k/fpe/fpu_trig.c:1.10 Thu Apr 18 13:40:25 2013
+++ src/sys/arch/m68k/fpe/fpu_trig.c Fri Apr 19 13:31:11 2013
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_trig.c,v 1.10 2013/04/18 13:40:25 isaki Exp $ */
+/* $NetBSD: fpu_trig.c,v 1.11 2013/04/19 13:31:11 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@@ -57,13 +57,10 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_trig.c,v 1.10 2013/04/18 13:40:25 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_trig.c,v 1.11 2013/04/19 13:31:11 isaki Exp $");
#include "fpu_emulate.h"
-static inline struct fpn *fpu_cos_halfpi(struct fpemu *);
-static inline struct fpn *fpu_sin_halfpi(struct fpemu *);
-
struct fpn *
fpu_acos(struct fpemu *fe)
{
@@ -85,127 +82,23 @@ fpu_atan(struct fpemu *fe)
return &fe->fe_f2;
}
-/*
- * taylor expansion used by sin(), cos(), sinh(), cosh().
- * hyperb is for sinh(), cosh().
- */
-struct fpn *
-fpu_sincos_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f, int hyperb)
-{
- struct fpn res;
- struct fpn x2;
- struct fpn *s1;
- struct fpn *r;
- int sign;
- uint32_t k;
-
- /* x2 := x * x */
- CPYFPN(&fe->fe_f1, &fe->fe_f2);
- r = fpu_mul(fe);
- CPYFPN(&x2, r);
-
- /* res := s0 */
- CPYFPN(&res, s0);
-
- sign = 1; /* sign := (-1)^n */
-
- for (;;) {
- /* (f1 :=) s0 * x^2 */
- CPYFPN(&fe->fe_f1, s0);
- CPYFPN(&fe->fe_f2, &x2);
- r = fpu_mul(fe);
- CPYFPN(&fe->fe_f1, r);
-
- /*
- * for sin(), s1 := s0 * x^2 / (2n+1)2n
- * for cos(), s1 := s0 * x^2 / 2n(2n-1)
- */
- k = f * (f + 1);
- fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
- s1 = fpu_div(fe);
-
- /* break if s1 is enough small */
- if (ISZERO(s1))
- break;
- if (res.fp_exp - s1->fp_exp >= FP_NMANT)
- break;
-
- /* s0 := s1 for next loop */
- CPYFPN(s0, s1);
-
- CPYFPN(&fe->fe_f2, s1);
- if (!hyperb) {
- /* (-1)^n */
- fe->fe_f2.fp_sign = sign;
- }
-
- /* res += s1 */
- CPYFPN(&fe->fe_f1, &res);
- r = fpu_add(fe);
- CPYFPN(&res, r);
-
- f += 2;
- sign ^= 1;
- }
-
- CPYFPN(&fe->fe_f2, &res);
- return &fe->fe_f2;
-}
-
-/*
- * inf (-1)^n 2n
- * cos(x) = \sum { ------ * x }
- * n=0 2n!
- *
- * x^2 x^4 x^6
- * = 1 - --- + --- - --- + ...
- * 2! 4! 6!
- *
- * = s0 - s1 + s2 - s3 + ...
- *
- * x*x x*x x*x
- * s0 = 1, s1 = ---*s0, s2 = ---*s1, s3 = ---*s2, ...
- * 1*2 3*4 5*6
- *
- * here 0 <= x < pi/2
- */
-static inline struct fpn *
-fpu_cos_halfpi(struct fpemu *fe)
-{
- struct fpn s0;
-
- /* s0 := 1 */
- fpu_const(&s0, FPU_CONST_1);
-
- return fpu_sincos_taylor(fe, &s0, 1, 0);
-}
/*
- * inf (-1)^n 2n+1
- * sin(x) = \sum { ------- * x }
- * n=0 (2n+1)!
- *
- * x^3 x^5 x^7
- * = x - --- + --- - --- + ...
- * 3! 5! 7!
- *
- * = s0 - s1 + s2 - s3 + ...
- *
- * x*x x*x x*x
- * s0 = x, s1 = ---*s0, s2 = ---*s1, s3 = ---*s2, ...
- * 2*3 4*5 6*7
- *
- * here 0 <= x < pi/2
+ * fe_f1 := sin(in)
+ * fe_f2 := cos(in)
*/
-static inline struct fpn *
-fpu_sin_halfpi(struct fpemu *fe)
+static void
+__fpu_sincos_cordic(struct fpemu *fe, const struct fpn *in)
{
- struct fpn s0;
-
- /* s0 := x */
- CPYFPN(&s0, &fe->fe_f2);
+ struct fpn a;
+ struct fpn v;
- return fpu_sincos_taylor(fe, &s0, 2, 0);
+ CPYFPN(&a, in);
+ fpu_const(&fe->fe_f1, FPU_CONST_0);
+ CPYFPN(&fe->fe_f2, &fpu_cordic_inv_gain1);
+ fpu_const(&v, FPU_CONST_1);
+ v.fp_sign = 1;
+ fpu_cordit1(fe, &fe->fe_f2, &fe->fe_f1, &a, &v);
}
/*
@@ -298,18 +191,15 @@ fpu_cos(struct fpemu *fe)
fe->fe_f2.fp_sign = 1;
r = fpu_add(fe);
if (r->fp_sign == 0) {
- CPYFPN(&fe->fe_f2, r);
- r = fpu_sin_halfpi(fe);
+ __fpu_sincos_cordic(fe, r);
+ r = &fe->fe_f1;
sign ^= 1;
} else {
- CPYFPN(&fe->fe_f2, &x);
- r = fpu_cos_halfpi(fe);
+ __fpu_sincos_cordic(fe, &x);
+ r = &fe->fe_f2;
}
-
- CPYFPN(&fe->fe_f2, r);
- fe->fe_f2.fp_sign = sign;
-
- return &fe->fe_f2;
+ r->fp_sign = sign;
+ return r;
}
/*
@@ -402,17 +292,14 @@ fpu_sin(struct fpemu *fe)
fe->fe_f2.fp_sign = 1;
r = fpu_add(fe);
if (r->fp_sign == 0) {
- CPYFPN(&fe->fe_f2, r);
- r = fpu_cos_halfpi(fe);
+ __fpu_sincos_cordic(fe, r);
+ r = &fe->fe_f2;
} else {
- CPYFPN(&fe->fe_f2, &x);
- r = fpu_sin_halfpi(fe);
+ __fpu_sincos_cordic(fe, &x);
+ r = &fe->fe_f1;
}
-
- CPYFPN(&fe->fe_f2, r);
- fe->fe_f2.fp_sign = sign;
-
- return &fe->fe_f2;
+ r->fp_sign = sign;
+ return r;
}
/*
@@ -453,17 +340,11 @@ fpu_tan(struct fpemu *fe)
struct fpn *
fpu_sincos(struct fpemu *fe, int regc)
{
- struct fpn x;
- struct fpn *r;
-
- CPYFPN(&x, &fe->fe_f2);
+ __fpu_sincos_cordic(fe, &fe->fe_f2);
/* cos(x) */
- r = fpu_cos(fe);
- fpu_implode(fe, r, FTYPE_EXT, &fe->fe_fpframe->fpf_regs[regc]);
+ fpu_implode(fe, &fe->fe_f2, FTYPE_EXT, &fe->fe_fpframe->fpf_regs[regc]);
/* sin(x) */
- CPYFPN(&fe->fe_f2, &x);
- r = fpu_sin(fe);
- return r;
+ return &fe->fe_f1;
}
Added files:
Index: src/sys/arch/m68k/fpe/fpu_cordic.c
diff -u /dev/null src/sys/arch/m68k/fpe/fpu_cordic.c:1.1
--- /dev/null Fri Apr 19 13:31:11 2013
+++ src/sys/arch/m68k/fpe/fpu_cordic.c Fri Apr 19 13:31:11 2013
@@ -0,0 +1,646 @@
+/* $NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $ */
+
+/*
+ * Copyright (c) 2013 Tetsuya Isaki. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $");
+
+#include <machine/ieee.h>
+
+#include "fpu_emulate.h"
+
+/*
+ * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same.
+ * The most significant byte of sp_m0 is EXP (signed byte) and the rest
+ * of sp_m0 is fp_mant[0].
+ */
+struct sfpn {
+ uint32_t sp_m0;
+ uint32_t sp_m1;
+ uint32_t sp_m2;
+};
+
+#if defined(CORDIC_BOOTSTRAP)
+/*
+ * This is a bootstrap code to generate a pre-calculated tables such as
+ * atan_table[] and atanh_table[]. However, it's just for reference.
+ * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP
+ * and modify these files as a userland application.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <float.h>
+
+static void prepare_cordic_const(struct fpemu *);
+static struct fpn *fpu_gain1_cordic(struct fpemu *);
+static struct fpn *fpu_gain2_cordic(struct fpemu *);
+static struct fpn *fpu_atan_tayler(struct fpemu *);
+static void printf_fpn(const struct fpn *);
+static void printf_sfpn(const struct sfpn *);
+static void fpn_to_sfpn(struct sfpn *, const struct fpn *);
+
+static struct sfpn atan_table[EXT_FRACBITS];
+static struct sfpn atanh_table[EXT_FRACBITS];
+static struct fpn inv_gain1;
+static struct fpn inv_gain2;
+
+int
+main(int argc, char *argv[])
+{
+ struct fpemu dummyfe;
+ int i;
+ struct fpn fp;
+
+ memset(&dummyfe, 0, sizeof(dummyfe));
+ prepare_cordic_const(&dummyfe);
+
+ /* output as source code */
+ printf("static const struct sfpn atan_table[] = {\n");
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ printf("\t");
+ printf_sfpn(&atan_table[i]);
+ printf(",\n");
+ }
+ printf("};\n\n");
+
+ printf("static const struct sfpn atanh_table[] = {\n");
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ printf("\t");
+ printf_sfpn(&atanh_table[i]);
+ printf(",\n");
+ }
+ printf("};\n\n");
+
+ printf("const struct fpn fpu_cordic_inv_gain1 =\n\t");
+ printf_fpn(&inv_gain1);
+ printf(";\n\n");
+
+ printf("const struct fpn fpu_cordic_inv_gain2 =\n\t");
+ printf_fpn(&inv_gain2);
+ printf(";\n\n");
+}
+
+/*
+ * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn()
+ * and fpu_atan_tayler() as bootstrap.
+ */
+static void
+prepare_cordic_const(struct fpemu *fe)
+{
+ struct fpn t;
+ struct fpn x;
+ struct fpn *r;
+ int i;
+
+ /* atan_table and atanh_table */
+ fpu_const(&t, FPU_CONST_1);
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ /* atan(t) */
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_atan_tayler(fe);
+ fpn_to_sfpn(&atan_table[i], r);
+
+ /* t /= 2 */
+ t.fp_exp--;
+
+ /* (1-t) */
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, &t);
+ fe->fe_f2.fp_sign = 1;
+ r = fpu_add(fe);
+ CPYFPN(&x, r);
+
+ /* (1+t) */
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_add(fe);
+
+ /* r = (1+t)/(1-t) */
+ CPYFPN(&fe->fe_f1, r);
+ CPYFPN(&fe->fe_f2, &x);
+ r = fpu_div(fe);
+
+ /* r = log(r) */
+ CPYFPN(&fe->fe_f2, r);
+ r = fpu_logn(fe);
+
+ /* r /= 2 */
+ r->fp_exp--;
+
+ fpn_to_sfpn(&atanh_table[i], r);
+ }
+
+ /* inv_gain1 = 1 / gain1cordic() */
+ r = fpu_gain1_cordic(fe);
+ CPYFPN(&fe->fe_f2, r);
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ r = fpu_div(fe);
+ CPYFPN(&inv_gain1, r);
+
+ /* inv_gain2 = 1 / gain2cordic() */
+ r = fpu_gain2_cordic(fe);
+ CPYFPN(&fe->fe_f2, r);
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ r = fpu_div(fe);
+ CPYFPN(&inv_gain2, r);
+}
+
+static struct fpn *
+fpu_gain1_cordic(struct fpemu *fe)
+{
+ struct fpn x;
+ struct fpn y;
+ struct fpn z;
+ struct fpn v;
+
+ fpu_const(&x, FPU_CONST_1);
+ fpu_const(&y, FPU_CONST_0);
+ fpu_const(&z, FPU_CONST_0);
+ CPYFPN(&v, &x);
+ v.fp_sign = !v.fp_sign;
+
+ fpu_cordit1(fe, &x, &y, &z, &v);
+ CPYFPN(&fe->fe_f2, &x);
+ return &fe->fe_f2;
+}
+
+static struct fpn *
+fpu_gain2_cordic(struct fpemu *fe)
+{
+ struct fpn x;
+ struct fpn y;
+ struct fpn z;
+ struct fpn v;
+
+ fpu_const(&x, FPU_CONST_1);
+ fpu_const(&y, FPU_CONST_0);
+ fpu_const(&z, FPU_CONST_0);
+ CPYFPN(&v, &x);
+ v.fp_sign = !v.fp_sign;
+
+ fpu_cordit2(fe, &x, &y, &z, &v);
+ CPYFPN(&fe->fe_f2, &x);
+ return &fe->fe_f2;
+}
+
+/*
+ * arctan(x) = pi/4 (for |x| = 1)
+ *
+ * x^3 x^5 x^7
+ * arctan(x) = x - --- + --- - --- + ... (for |x| < 1)
+ * 3 5 7
+ */
+static struct fpn *
+fpu_atan_tayler(struct fpemu *fe)
+{
+ struct fpn res;
+ struct fpn x2;
+ struct fpn s0;
+ struct fpn *s1;
+ struct fpn *r;
+ uint32_t k;
+
+ /* arctan(1) is pi/4 */
+ if (fe->fe_f2.fp_exp == 0) {
+ fpu_const(&fe->fe_f2, FPU_CONST_PI);
+ fe->fe_f2.fp_exp -= 2;
+ return &fe->fe_f2;
+ }
+
+ /* s0 := x */
+ CPYFPN(&s0, &fe->fe_f2);
+
+ /* res := x */
+ CPYFPN(&res, &fe->fe_f2);
+
+ /* x2 := x * x */
+ CPYFPN(&fe->fe_f1, &fe->fe_f2);
+ r = fpu_mul(fe);
+ CPYFPN(&x2, r);
+
+ k = 3;
+ for (;;) {
+ /* s1 := -s0 * x2 */
+ CPYFPN(&fe->fe_f1, &s0);
+ CPYFPN(&fe->fe_f2, &x2);
+ s1 = fpu_mul(fe);
+ s1->fp_sign ^= 1;
+ CPYFPN(&fe->fe_f1, s1);
+
+ /* s0 := s1 for next loop */
+ CPYFPN(&s0, s1);
+
+ /* s1 := s1 / k */
+ fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
+ s1 = fpu_div(fe);
+
+ /* break if s1 is enough small */
+ if (ISZERO(s1))
+ break;
+ if (res.fp_exp - s1->fp_exp >= FP_NMANT)
+ break;
+
+ /* res += s1 */
+ CPYFPN(&fe->fe_f2, s1);
+ CPYFPN(&fe->fe_f1, &res);
+ r = fpu_add(fe);
+ CPYFPN(&res, r);
+
+ k += 2;
+ }
+
+ CPYFPN(&fe->fe_f2, &res);
+ return &fe->fe_f2;
+}
+
+static void
+printf_fpn(const struct fpn *fp)
+{
+ printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }",
+ fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0,
+ fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]);
+}
+
+static void
+printf_sfpn(const struct sfpn *sp)
+{
+ printf("{ 0x%08x, 0x%08x, 0x%08x, }",
+ sp->sp_m0, sp->sp_m1, sp->sp_m2);
+}
+
+static void
+fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp)
+{
+ sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0];
+ sp->sp_m1 = fp->fp_mant[1];
+ sp->sp_m2 = fp->fp_mant[2];
+}
+
+#else /* CORDIC_BOOTSTRAP */
+
+static const struct sfpn atan_table[] = {
+ { 0xff06487e, 0xd5110b46, 0x11a80000, },
+ { 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, },
+ { 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, },
+ { 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, },
+ { 0xfb07fd56, 0xedcb3f7a, 0x71b65937, },
+ { 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, },
+ { 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, },
+ { 0xf807fff5, 0x556eeea5, 0xcb403117, },
+ { 0xf707fffd, 0x5556eeed, 0xca5d8956, },
+ { 0xf607ffff, 0x55556eee, 0xea5ca6ab, },
+ { 0xf507ffff, 0xd55556ee, 0xeedca5c8, },
+ { 0xf407ffff, 0xf555556e, 0xeeeea5c8, },
+ { 0xf307ffff, 0xfd555556, 0xeeeeedc8, },
+ { 0xf207ffff, 0xff555555, 0x6eeeeee8, },
+ { 0xf107ffff, 0xffd55555, 0x56eeeeed, },
+ { 0xf007ffff, 0xfff55555, 0x556eeeed, },
+ { 0xef07ffff, 0xfffd5555, 0x5556eeed, },
+ { 0xee07ffff, 0xffff5555, 0x55556eed, },
+ { 0xed07ffff, 0xffffd555, 0x555556ed, },
+ { 0xec07ffff, 0xfffff555, 0x5555556d, },
+ { 0xeb07ffff, 0xfffffd55, 0x55555555, },
+ { 0xea07ffff, 0xffffff55, 0x55555554, },
+ { 0xe907ffff, 0xffffffd5, 0x55555554, },
+ { 0xe807ffff, 0xfffffff5, 0x55555554, },
+ { 0xe707ffff, 0xfffffffd, 0x55555554, },
+ { 0xe607ffff, 0xffffffff, 0x55555554, },
+ { 0xe507ffff, 0xffffffff, 0xd5555554, },
+ { 0xe407ffff, 0xffffffff, 0xf5555554, },
+ { 0xe307ffff, 0xffffffff, 0xfd555554, },
+ { 0xe207ffff, 0xffffffff, 0xff555554, },
+ { 0xe107ffff, 0xffffffff, 0xffd55554, },
+ { 0xe007ffff, 0xffffffff, 0xfff55554, },
+ { 0xdf07ffff, 0xffffffff, 0xfffd5554, },
+ { 0xde07ffff, 0xffffffff, 0xffff5554, },
+ { 0xdd07ffff, 0xffffffff, 0xffffd554, },
+ { 0xdc07ffff, 0xffffffff, 0xfffff554, },
+ { 0xdb07ffff, 0xffffffff, 0xfffffd54, },
+ { 0xda07ffff, 0xffffffff, 0xffffff54, },
+ { 0xd907ffff, 0xffffffff, 0xffffffd4, },
+ { 0xd807ffff, 0xffffffff, 0xfffffff4, },
+ { 0xd707ffff, 0xffffffff, 0xfffffffc, },
+ { 0xd7040000, 0x00000000, 0x00000000, },
+ { 0xd6040000, 0x00000000, 0x00000000, },
+ { 0xd5040000, 0x00000000, 0x00000000, },
+ { 0xd4040000, 0x00000000, 0x00000000, },
+ { 0xd3040000, 0x00000000, 0x00000000, },
+ { 0xd2040000, 0x00000000, 0x00000000, },
+ { 0xd1040000, 0x00000000, 0x00000000, },
+ { 0xd0040000, 0x00000000, 0x00000000, },
+ { 0xcf040000, 0x00000000, 0x00000000, },
+ { 0xce040000, 0x00000000, 0x00000000, },
+ { 0xcd040000, 0x00000000, 0x00000000, },
+ { 0xcc040000, 0x00000000, 0x00000000, },
+ { 0xcb040000, 0x00000000, 0x00000000, },
+ { 0xca040000, 0x00000000, 0x00000000, },
+ { 0xc9040000, 0x00000000, 0x00000000, },
+ { 0xc8040000, 0x00000000, 0x00000000, },
+ { 0xc7040000, 0x00000000, 0x00000000, },
+ { 0xc6040000, 0x00000000, 0x00000000, },
+ { 0xc5040000, 0x00000000, 0x00000000, },
+ { 0xc4040000, 0x00000000, 0x00000000, },
+ { 0xc3040000, 0x00000000, 0x00000000, },
+ { 0xc2040000, 0x00000000, 0x00000000, },
+ { 0xc1040000, 0x00000000, 0x00000000, },
+};
+
+static const struct sfpn atanh_table[] = {
+ { 0xff0464fa, 0x9eab40c2, 0xa5dc43f6, },
+ { 0xfe04162b, 0xbea04514, 0x69ca8e4a, },
+ { 0xfd040562, 0x4727abbd, 0xda654b67, },
+ { 0xfc040156, 0x22b4dd6b, 0x372a679c, },
+ { 0xfb040055, 0x62246bb8, 0x92d60b35, },
+ { 0xfa040015, 0x56222b47, 0x2637d656, },
+ { 0xf9040005, 0x55622246, 0xb4dcf86e, },
+ { 0xf8040001, 0x55562222, 0xb46bb307, },
+ { 0xf7040000, 0x55556222, 0x246b45cd, },
+ { 0xf6040000, 0x15555622, 0x222b465b, },
+ { 0xf5040000, 0x05555562, 0x2222467f, },
+ { 0xf4040000, 0x01555556, 0x22221eaf, },
+ { 0xf3040000, 0x00555555, 0x62222213, },
+ { 0xf2040000, 0x00155555, 0x56221221, },
+ { 0xf1040000, 0x00055555, 0x556221a2, },
+ { 0xf0040000, 0x00015555, 0x5556221e, },
+ { 0xef040000, 0x00005555, 0x55552222, },
+ { 0xee040000, 0x00001555, 0x55555222, },
+ { 0xed040000, 0x00000555, 0x55555522, },
+ { 0xec040000, 0x00000155, 0x55555552, },
+ { 0xeb040000, 0x00000055, 0x554d5555, },
+ { 0xea040000, 0x00000015, 0x55545555, },
+ { 0xe9040000, 0x00000005, 0x55553555, },
+ { 0xe8040000, 0x00000001, 0x55555155, },
+ { 0xe7040000, 0x00000000, 0x555554d5, },
+ { 0xe6040000, 0x00000000, 0x15555545, },
+ { 0xe5040000, 0x00000000, 0x05555553, },
+ { 0xe307ffff, 0xffffffff, 0xfaaaaaaa, },
+ { 0xe207ffff, 0xffffffff, 0xfeaaaaaa, },
+ { 0xe107ffff, 0xffffffff, 0xffaaaaaa, },
+ { 0xe007ffff, 0xffffffff, 0xffeaaaaa, },
+ { 0xdf07ffff, 0xffffffff, 0xfffaaaaa, },
+ { 0xde07ffff, 0xffffffff, 0xfffeaaaa, },
+ { 0xdd07ffff, 0xffffffff, 0xffffaaaa, },
+ { 0xdc07ffff, 0xffffffff, 0xffffeaaa, },
+ { 0xdb07ffff, 0xffffffff, 0xfffffaaa, },
+ { 0xda07ffff, 0xffffffff, 0xfffffeaa, },
+ { 0xd907ffff, 0xffffffff, 0xffffffaa, },
+ { 0xd807ffff, 0xffffffff, 0xffffffea, },
+ { 0xd707ffff, 0xffffffff, 0xfffffffa, },
+ { 0xd607ffff, 0xffffffff, 0xfffffffe, },
+ { 0xd507ffff, 0xfffffe00, 0x00000000, },
+ { 0xd407ffff, 0xffffff00, 0x00000000, },
+ { 0xd307ffff, 0xffffff80, 0x00000000, },
+ { 0xd207ffff, 0xffffffc0, 0x00000000, },
+ { 0xd107ffff, 0xffffffe0, 0x00000000, },
+ { 0xd007ffff, 0xfffffff0, 0x00000000, },
+ { 0xcf07ffff, 0xfffffff8, 0x00000000, },
+ { 0xce07ffff, 0xfffffffc, 0x00000000, },
+ { 0xcd07ffff, 0xfffffffe, 0x00000000, },
+ { 0xcc07ffff, 0xffffffff, 0x00000000, },
+ { 0xcb07ffff, 0xffffffff, 0x80000000, },
+ { 0xca07ffff, 0xffffffff, 0xc0000000, },
+ { 0xc907ffff, 0xffffffff, 0xe0000000, },
+ { 0xc807ffff, 0xffffffff, 0xf0000000, },
+ { 0xc707ffff, 0xffffffff, 0xf8000000, },
+ { 0xc607ffff, 0xffffffff, 0xfc000000, },
+ { 0xc507ffff, 0xffffffff, 0xfe000000, },
+ { 0xc407ffff, 0xffffffff, 0xff000000, },
+ { 0xc307ffff, 0xffffffff, 0xff800000, },
+ { 0xc207ffff, 0xffffffff, 0xffc00000, },
+ { 0xc107ffff, 0xffffffff, 0xffe00000, },
+ { 0xc007ffff, 0xffffffff, 0xfff00000, },
+ { 0xbf07ffff, 0xffffffff, 0xfff80000, },
+};
+
+const struct fpn fpu_cordic_inv_gain1 =
+ { 1, 0, -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, };
+
+const struct fpn fpu_cordic_inv_gain2 =
+ { 1, 0, 0, 1, { 0x0004d483, 0xec3803fc, 0xc5ff12f8, }, };
+
+#endif /* CORDIC_BOOTSTRAP */
+
+static inline void
+sfpn_to_fpn(struct fpn *fp, const struct sfpn *s)
+{
+ fp->fp_class = FPC_NUM;
+ fp->fp_sign = 0;
+ fp->fp_sticky = 0;
+ fp->fp_exp = s->sp_m0 >> 24;
+ if (fp->fp_exp & 0x80) {
+ fp->fp_exp |= 0xffffff00;
+ }
+ fp->fp_mant[0] = s->sp_m0 & 0x000fffff;
+ fp->fp_mant[1] = s->sp_m1;
+ fp->fp_mant[2] = s->sp_m2;
+}
+
+void
+fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
+ const struct fpn *vecmode)
+{
+ struct fpn t;
+ struct fpn x;
+ struct fpn y;
+ struct fpn z;
+ struct fpn *r;
+ int i;
+ int sign;
+
+ fpu_const(&t, FPU_CONST_1);
+ CPYFPN(&x, x0);
+ CPYFPN(&y, y0);
+ CPYFPN(&z, z0);
+
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ struct fpn x1;
+
+ /* y < vecmode */
+ CPYFPN(&fe->fe_f1, &y);
+ CPYFPN(&fe->fe_f2, vecmode);
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ r = fpu_add(fe);
+
+ if ((vecmode->fp_sign == 0 && r->fp_sign) ||
+ (vecmode->fp_sign && z.fp_sign == 0)) {
+ sign = 1;
+ } else {
+ sign = 0;
+ }
+
+ /* y * t */
+ CPYFPN(&fe->fe_f1, &y);
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_mul(fe);
+
+ /*
+ * x1 = x - y*t (if sign)
+ * x1 = x + y*t
+ */
+ CPYFPN(&fe->fe_f2, r);
+ if (sign)
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ CPYFPN(&fe->fe_f1, &x);
+ r = fpu_add(fe);
+ CPYFPN(&x1, r);
+
+ /* x * t */
+ CPYFPN(&fe->fe_f1, &x);
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_mul(fe);
+
+ /*
+ * y = y + x*t (if sign)
+ * y = y - x*t
+ */
+ CPYFPN(&fe->fe_f2, r);
+ if (!sign)
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ CPYFPN(&fe->fe_f1, &y);
+ r = fpu_add(fe);
+ CPYFPN(&y, r);
+
+ /*
+ * z = z - atan_table[i] (if sign)
+ * z = z + atan_table[i]
+ */
+ CPYFPN(&fe->fe_f1, &z);
+ sfpn_to_fpn(&fe->fe_f2, &atan_table[i]);
+ if (sign)
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ r = fpu_add(fe);
+ CPYFPN(&z, r);
+
+ /* x = x1 */
+ CPYFPN(&x, &x1);
+
+ /* t /= 2 */
+ t.fp_exp--;
+ }
+
+ CPYFPN(x0, &x);
+ CPYFPN(y0, &y);
+ CPYFPN(z0, &z);
+}
+
+void
+fpu_cordit2(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0,
+ const struct fpn *vecmode)
+{
+ struct fpn t;
+ struct fpn x;
+ struct fpn y;
+ struct fpn z;
+ struct fpn *r;
+ int i;
+ int k;
+ int sign;
+
+ /* t = 0.5 */
+ fpu_const(&t, FPU_CONST_1);
+ t.fp_exp--;
+
+ CPYFPN(&x, x0);
+ CPYFPN(&y, y0);
+ CPYFPN(&z, z0);
+
+ k = 3;
+ for (i = 0; i < EXT_FRACBITS; i++) {
+ struct fpn x1;
+ int j;
+
+ for (j = 0; j < 2; j++) {
+ if ((vecmode->fp_sign == 0 && y.fp_sign) ||
+ (vecmode->fp_sign && z.fp_sign == 0)) {
+ sign = 0;
+ } else {
+ sign = 1;
+ }
+
+ /* y * t */
+ CPYFPN(&fe->fe_f1, &y);
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_mul(fe);
+
+ /*
+ * x1 = x + y*t
+ * x1 = x - y*t (if sign)
+ */
+ CPYFPN(&fe->fe_f2, r);
+ if (sign)
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ CPYFPN(&fe->fe_f1, &x);
+ r = fpu_add(fe);
+ CPYFPN(&x1, r);
+
+ /* x * t */
+ CPYFPN(&fe->fe_f1, &x);
+ CPYFPN(&fe->fe_f2, &t);
+ r = fpu_mul(fe);
+
+ /*
+ * y = y + x*t
+ * y = y - x*t (if sign)
+ */
+ CPYFPN(&fe->fe_f2, r);
+ if (sign)
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ CPYFPN(&fe->fe_f1, &y);
+ r = fpu_add(fe);
+ CPYFPN(&y, r);
+
+ /*
+ * z = z + atanh_table[i] (if sign)
+ * z = z - atanh_table[i]
+ */
+ CPYFPN(&fe->fe_f1, &z);
+ sfpn_to_fpn(&fe->fe_f2, &atanh_table[i]);
+ if (!sign)
+ fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign;
+ r = fpu_add(fe);
+ CPYFPN(&z, r);
+
+ /* x = x1 */
+ CPYFPN(&x, &x1);
+
+ if (k > 0) {
+ k--;
+ break;
+ } else {
+ k = 3;
+ }
+ }
+
+ /* t /= 2 */
+ t.fp_exp--;
+ }
+
+ CPYFPN(x0, &x);
+ CPYFPN(y0, &y);
+ CPYFPN(z0, &z);
+}