Module Name: src
Committed By: isaki
Date: Mon Dec 5 15:31:01 UTC 2016
Modified Files:
src/sys/arch/m68k/fpe: fpu_exp.c fpu_hyperb.c
Log Message:
Improve the exponential and hyperbolic function's performance
10..100 times faster.
PR port-m68k/51645 from rin@ (and modified by me)
To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/arch/m68k/fpe/fpu_exp.c
cvs rdiff -u -r1.16 -r1.17 src/sys/arch/m68k/fpe/fpu_hyperb.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/m68k/fpe/fpu_exp.c
diff -u src/sys/arch/m68k/fpe/fpu_exp.c:1.8 src/sys/arch/m68k/fpe/fpu_exp.c:1.9
--- src/sys/arch/m68k/fpe/fpu_exp.c:1.8 Sat Apr 20 04:54:22 2013
+++ src/sys/arch/m68k/fpe/fpu_exp.c Mon Dec 5 15:31:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $ */
+/* $NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.8 2013/04/20 04:54:22 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_exp.c,v 1.9 2016/12/05 15:31:01 isaki Exp $");
#include <machine/ieee.h>
@@ -100,12 +100,16 @@ fpu_etox_taylor(struct fpemu *fe)
}
/*
- * exp(x)
+ * exp(x) = 2^k * exp(r) with k = round(x / ln2) and r = x - k * ln2
+ *
+ * Algorithm partially taken from libm, where exp(r) is approximated by a
+ * rational function of r. We use the Taylor expansion instead.
*/
struct fpn *
fpu_etox(struct fpemu *fe)
{
- struct fpn *fp;
+ struct fpn x, *fp;
+ int j, k;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -115,19 +119,47 @@ fpu_etox(struct fpemu *fe)
return &fe->fe_f2;
}
- if (fe->fe_f2.fp_sign == 0) {
- /* exp(x) */
- fp = fpu_etox_taylor(fe);
- } else {
- /* 1/exp(-x) */
- fe->fe_f2.fp_sign = 0;
- fp = fpu_etox_taylor(fe);
+ CPYFPN(&x, &fe->fe_f2);
- CPYFPN(&fe->fe_f2, fp);
- fpu_const(&fe->fe_f1, FPU_CONST_1);
- fp = fpu_div(fe);
+ /* k = round(x / ln2) */
+ CPYFPN(&fe->fe_f1, &fe->fe_f2);
+ fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
+ fp = fpu_div(fe);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_int(fe);
+ if (ISZERO(fp)) {
+ /* k = 0 */
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_etox_taylor(fe);
+ return fp;
+ }
+ /* extract k as integer format from fpn format */
+ j = FP_LG - fp->fp_exp;
+ if (j < 0) {
+ if (fp->fp_sign)
+ fp->fp_class = FPC_ZERO; /* k < -2^18 */
+ else
+ fp->fp_class = FPC_INF; /* k > 2^18 */
+ return fp;
}
-
+ k = fp->fp_mant[0] >> j;
+ if (fp->fp_sign)
+ k *= -1;
+
+ /* exp(r) = exp(x - k * ln2) */
+ CPYFPN(&fe->fe_f1, fp);
+ fpu_const(&fe->fe_f2, FPU_CONST_LN_2);
+ fp = fpu_mul(fe);
+ fp->fp_sign = !fp->fp_sign;
+ CPYFPN(&fe->fe_f1, fp);
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_add(fe);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_etox_taylor(fe);
+
+ /* 2^k */
+ fp->fp_exp += k;
+
return fp;
}
Index: src/sys/arch/m68k/fpe/fpu_hyperb.c
diff -u src/sys/arch/m68k/fpe/fpu_hyperb.c:1.16 src/sys/arch/m68k/fpe/fpu_hyperb.c:1.17
--- src/sys/arch/m68k/fpe/fpu_hyperb.c:1.16 Fri Oct 11 03:37:08 2013
+++ src/sys/arch/m68k/fpe/fpu_hyperb.c Mon Dec 5 15:31:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $ */
+/* $NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $ */
/*
* Copyright (c) 1995 Ken Nakata
@@ -57,15 +57,12 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.16 2013/10/11 03:37:08 isaki Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fpu_hyperb.c,v 1.17 2016/12/05 15:31:01 isaki Exp $");
#include <machine/ieee.h>
#include "fpu_emulate.h"
-/* The number of items to terminate the Taylor expansion */
-#define MAX_ITEMS (2000)
-
/*
* fpu_hyperb.c: defines the following functions
*
@@ -137,71 +134,14 @@ fpu_atanh(struct fpemu *fe)
}
/*
- * taylor expansion used by sinh(), cosh().
+ * exp(x) + exp(-x)
+ * cosh(x) = ------------------
+ * 2
*/
-static struct fpn *
-__fpu_sinhcosh_taylor(struct fpemu *fe, struct fpn *s0, uint32_t f)
-{
- struct fpn res;
- struct fpn x2;
- struct fpn *s1;
- struct fpn *r;
- int sign;
- uint32_t k;
-
- /* x2 := x * x */
- CPYFPN(&fe->fe_f1, &fe->fe_f2);
- r = fpu_mul(fe);
- CPYFPN(&x2, r);
-
- /* res := s0 */
- CPYFPN(&res, s0);
-
- sign = 1; /* sign := (-1)^n */
-
- for (; f < (2 * MAX_ITEMS); ) {
- /* (f1 :=) s0 * x^2 */
- CPYFPN(&fe->fe_f1, s0);
- CPYFPN(&fe->fe_f2, &x2);
- r = fpu_mul(fe);
- CPYFPN(&fe->fe_f1, r);
-
- /*
- * for sinh(), s1 := s0 * x^2 / (2n+1)2n
- * for cosh(), s1 := s0 * x^2 / 2n(2n-1)
- */
- k = f * (f + 1);
- fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k);
- s1 = fpu_div(fe);
-
- /* break if s1 is enough small */
- if (ISZERO(s1))
- break;
- if (res.fp_exp - s1->fp_exp >= EXT_FRACBITS)
- break;
-
- /* s0 := s1 for next loop */
- CPYFPN(s0, s1);
-
- /* res += s1 */
- CPYFPN(&fe->fe_f2, s1);
- CPYFPN(&fe->fe_f1, &res);
- r = fpu_add(fe);
- CPYFPN(&res, r);
-
- f += 2;
- sign ^= 1;
- }
-
- CPYFPN(&fe->fe_f2, &res);
- return &fe->fe_f2;
-}
-
struct fpn *
fpu_cosh(struct fpemu *fe)
{
- struct fpn s0;
- struct fpn *r;
+ struct fpn x, *fp;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -211,17 +151,37 @@ fpu_cosh(struct fpemu *fe)
return &fe->fe_f2;
}
- fpu_const(&s0, FPU_CONST_1);
- r = __fpu_sinhcosh_taylor(fe, &s0, 1);
+ /* if x is +0/-0, return 1 */ /* XXX is this necessary? */
+ if (ISZERO(&fe->fe_f2)) {
+ fpu_const(&fe->fe_f2, FPU_CONST_1);
+ return &fe->fe_f2;
+ }
+
+ fp = fpu_etox(fe);
+ CPYFPN(&x, fp);
- return r;
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_div(fe);
+
+ CPYFPN(&fe->fe_f1, fp);
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_add(fe);
+
+ fp->fp_exp--;
+
+ return fp;
}
+/*
+ * exp(x) - exp(-x)
+ * sinh(x) = ------------------
+ * 2
+ */
struct fpn *
fpu_sinh(struct fpemu *fe)
{
- struct fpn s0;
- struct fpn *r;
+ struct fpn x, *fp;
if (ISNAN(&fe->fe_f2))
return &fe->fe_f2;
@@ -232,12 +192,28 @@ fpu_sinh(struct fpemu *fe)
if (ISZERO(&fe->fe_f2))
return &fe->fe_f2;
- CPYFPN(&s0, &fe->fe_f2);
- r = __fpu_sinhcosh_taylor(fe, &s0, 2);
+ fp = fpu_etox(fe);
+ CPYFPN(&x, fp);
- return r;
+ fpu_const(&fe->fe_f1, FPU_CONST_1);
+ CPYFPN(&fe->fe_f2, fp);
+ fp = fpu_div(fe);
+
+ fp->fp_sign = 1;
+ CPYFPN(&fe->fe_f1, fp);
+ CPYFPN(&fe->fe_f2, &x);
+ fp = fpu_add(fe);
+
+ fp->fp_exp--;
+
+ return fp;
}
+/*
+ * sinh(x)
+ * tanh(x) = ---------
+ * cosh(x)
+ */
struct fpn *
fpu_tanh(struct fpemu *fe)
{