[RFC] libm replacement for Busybox

Loïc Grenié Fri, 10 Oct 2008 07:22:38 -0700

     I've noticed that busybox uses few functions from libm.
  I've written some replacements for those functions. Right
  now this is just the test program and I've not made the
  effort of including them in Busybox. However I'd like to
  have your opinion: would it be a good idea to include
  them ?


    They are neither very fast nor very precise (the
  trigonometric functions are awful, up to 16 bits are
  false) but libm is rather large and usually a user of
  Busybox on an initrd or embedded platform does not
  need neither speed nor excellent precision. Otherwise
  it would always be possible to disable them anyway.

     Quick and dirty way to have a look:

cc -Wall -O2 -o libmtest libmtest.c -lm && ./libmtest
for f in err*
do
  tail -1 $f | sed "s/^/$f: /"|cut -f1-2
done

  The program does 10000 random tests for each of the 7
  functions included in the program (log,exp, pow, sin, cos,
  tan, atan2) and saves them in a (sorted) file. The first
  column is the log2() of the worst relative error of libmtest
  function against the same function of libm (i.e. the number
  of the first error bit in the mantissa) and the second field is
  the value of the arg. that provided the error (the error files are
  more complete -- for pow and atan2 there is a second
  argument but the for loop above does not print it even though
  it is present in the error file).

      Thanks,

           Loïc Grenié

/*
 * Copyright LoÃ¯c GreniÃ©, <[EMAIL PROTECTED]>, 2008
 *
 * You can use this file according to version 2 or 3 of the General Public
 * Licence distributed by the
 * Free Software Foundation, Inc,.
 * 59 Temple Place, Suite 330
 * Boston, MA 0211-1307
 * USA
 */
#define _GNU_SOURCE
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>

#define COMPARE_SIZE	10000

#ifndef INFINITY
#define INFINITY	HUGE_VAL
#endif

#ifndef NAN
#if BB_BIG_ENDIAN
static unsigned char _nan[4] = { 0x7F, 0xF0, 0, 0};
#else
static unsigned char _nan[4] = { 0, 0, 0xF0, 0x7F};
#endif
#define	NAN	(*(float *)&_nan)
#endif

typedef union {
    unsigned char	c[8];
    unsigned short	s[4];
    double		d;
} _internal_double_t;

/* Position of the high-order bits of the doubles (in the "short" array) */
#if BB_BIG_ENDIAN
#define _DOUBLE_BITS48	0
#else
#define _DOUBLE_BITS48	3
#endif

double
mylog(double _x)
{
    _internal_double_t x = *(_internal_double_t *)&_x;
    double t, t2, t2n = 1., y = 0;
    int n, i;

    if (_x == 0)
	return -INFINITY;
    if (_x < 0)
	return NAN;
    n = (x.s[_DOUBLE_BITS48] >> 4) - 1023;
    x.s[_DOUBLE_BITS48] &= 0x000F;
    x.s[_DOUBLE_BITS48] |= 1023 << 4;
    if (x.d>M_SQRT2) {
	x.d /= 2;
	n++;
    }
    t = 1-2/(x.d+1);	/* t = (x_mant-1)/(x_mant+1) */
    t2 = t*t;
    for (i = 1; i <= 21; i += 2) {
	y += t2n/i;
	t2n *= t2;
    }
    return n/M_LOG2E+2*t*y;
}

double
myexp(double x)
{
    _internal_double_t res;
    double y;
    int inv = 0, n, i;

    if (x == 0)	/* Improbable but possible ! */
	return 1;
    if (x < 0) {
	x = -x;
	inv = 1;
    }
    /* Here x > 0 */
    y = x*M_LOG2E;
    n = (int)y;
    if (n >= 1024)
	return inv ? 0 : INFINITY;
    if (y-n > .5)
	n++;
    x -= n/M_LOG2E;
    for (y = 1, res.d = 1, i = 1; i < 15; i++) {
	y *= x/i;
	res.d += y;
    }
    if (inv) {
	res.d = 1/res.d;
	n = -n;
    }
    res.s[_DOUBLE_BITS48] += n*16;
    
    return res.d;
}

double mypow(double x, double y) { return myexp(y*mylog(x)); }

double
mycos_or_sin(int iscos, double x)
{
    int i;
    int n = (int)(x/M_PI);
    double x2, y, res;

    x -= n*M_PI;
    if (x < 0) {
	x = -x;
	if (!iscos)
	    n++;
    }
    /* 0 <= x < M_PI */
    if (x == 0)
	return n & 1 ? -1 : 1;
    if (x == M_PI/2)
	return 0;
    if (x > M_PI/2) {
	x -= M_PI;
	n++;
    }
    /* -M_PI/2 < x <= M_PI/2 */
    if (x < 0) {
	x = -x;
	if (!iscos)
	    n++;
    }
    /* 0 <= x <= M_PI/2 */
#if 0
    if (x > M_PI/4) {
	x = M_PI/2 - x;
	iscos = !iscos;
    }
    /* 0 <= x <= M_PI/4 */
#endif
    if (iscos) {
	y = 1;
	i = 2;
    }
    else {
	y = x;
	i = 3;
    }
    res = y;
    x2 = -x*x;
    for (; i <= 42; i += 2) {
	y *= x2/(i*(i-1));
	res += y;
    }
    return n & 1 ? -res : res;
}

double mycos(double x) { return mycos_or_sin(1 ,x); }
double mysin(double x) { return mycos_or_sin(0, x); }
double mytan(double x) { return mycos_or_sin(0, x)/mycos_or_sin(1, x); }

double
mysqrt(double _x)
{
    _internal_double_t x = *(_internal_double_t *)&_x, res;
    double y = 1, x3;
    short expo;
    int i;

    if (_x == 1 || _x == 0)
	return _x;
    if (_x < 0)
	return NAN;
    expo = x.s[_DOUBLE_BITS48] >> 4;
    expo -= 1023;
    expo /= 2;
    x.s[_DOUBLE_BITS48] -= expo << 5;
    if (x.d > 8./5) {
	expo++;
	x.d /= 4;
    }
    else if (x.d < 2./5) {
	expo--;
	x.d *= 4;
    }
    x.d -= 1;
    x3 = 3 * x.d;
    res.d = 1;
    for (i = 2; i <= 120; i += 2)
    {
	y *= x3/i - x.d;
	res.d += y;
    }
    res.s[_DOUBLE_BITS48] += expo << 4;
    return res.d;
}

double
myatan2(double y, double x)
{
    int dbl, i;
    double t, t2n, res, mt2;

    if (y == 0) {
	if (x == 0)
	    return NAN;
	if (x < 0)
	    return -M_PI;
	return M_PI;
    }
    if (x == 0) {
	if (y > 0)
	    return M_PI/2;
	return -M_PI/2;
    }

    t = y/x;

    for (dbl = 0; t > M_SQRT2-1 || t < 1-M_SQRT2; dbl++)
	t = (sqrt(1+t*t)-1)/t;

    t2n = t;
    mt2 = -t*t;
    res = t;

    for (i = 3; i <= 41; i += 2) {
	t2n *= mt2;
	res += t2n/i;
    }
    for (; dbl > 0; dbl--)
	res *= 2;

    if (x < 0) {
	if (res < 0)
	    res = res + M_PI;
	else
	    res = res - M_PI;
    }

    return res;
}

void
printerr(char *type, double _r1, double _r2)
{
    int i;
    _internal_double_t r1, r2;

    r1.d = _r1;
    r2.d = _r2;
    printf("%s: ", type);
    for (i = 7; i >= 0; i--)
	printf("%02X", r2.c[i] ^ r1.c[i]);
    printf("\n");
}

int
main()
{
    double comp_log[4*COMPARE_SIZE];
    double comp_exp[4*COMPARE_SIZE];
    double comp_pow[5*COMPARE_SIZE];
    double comp_sin[4*COMPARE_SIZE];
    double comp_cos[4*COMPARE_SIZE];
    double comp_tan[4*COMPARE_SIZE];
    double comp_sqrt[4*COMPARE_SIZE];
    double comp_atan2[5*COMPARE_SIZE];
    int i;
    long seed;
    FILE *f;

    f = fopen("/dev/random", "r");
    fread(&seed, sizeof seed, 1, f);
    fclose(f);
    srand48(seed);
    for (i = 0; i < COMPARE_SIZE; i++) {
	comp_log[4*i] = drand48() * 1000;
	comp_log[4*i+1] = mylog(comp_log[4*i]);
	comp_log[4*i+2] = log(comp_log[4*i]);
	comp_log[4*i+3] = log2(fabs((comp_log[4*i+2]-comp_log[4*i+1])/comp_log[4*i+2]));
    }

    f = fopen("_errlog", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_log[4*i+3]) && !isinf(comp_log[4*i+3]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\n", comp_log[4*i+3], comp_log[4*i], comp_log[4*i+1], comp_log[4*i+2]);
    system("sort -g _errlog > errlog; rm _errlog");
    fclose(f);

    for (i = 0; i < COMPARE_SIZE; i++) {
	comp_exp[4*i] = drand48() * 100 - 50;
	comp_exp[4*i+1] = myexp(comp_exp[4*i]);
	comp_exp[4*i+2] = exp(comp_exp[4*i]);
	comp_exp[4*i+3] = log2(fabs((comp_exp[4*i+2]-comp_exp[4*i+1])/comp_exp[4*i+2]));
    }

    f = fopen("_errexp", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_exp[4*i+3]) && !isinf(comp_exp[4*i+3]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\n", comp_exp[4*i+3], comp_exp[4*i], comp_exp[4*i+1], comp_exp[4*i+2]);
    system("sort -g _errexp > errexp; rm _errexp");
    fclose(f);

    for (i = 0; i <     COMPARE_SIZE/4; i++) {
	comp_sin[4*i] = drand48() * 10;
	comp_sin[4*i+1] = mysin(comp_sin[4*i]);
	comp_sin[4*i+2] = sin(comp_sin[4*i]);
	comp_sin[4*i+3] = log2(fabs((comp_sin[4*i+2]-comp_sin[4*i+1])/comp_sin[4*i+2]));
    }
    for (     ; i < 2 * COMPARE_SIZE/4; i++) {
	comp_sin[4*i] = M_PI/2 + drand48() / 2;
	comp_sin[4*i+1] = mysin(comp_sin[4*i]);
	comp_sin[4*i+2] = sin(comp_sin[4*i]);
	comp_sin[4*i+3] = log2(fabs((comp_sin[4*i+2]-comp_sin[4*i+1])/comp_sin[4*i+2]));
    }
    for (     ; i < 4 * COMPARE_SIZE/4; i++) {
	comp_sin[4*i] = M_PI/2 + drand48() / 2;
	comp_sin[4*i+1] = mysin(comp_sin[4*i]);
	comp_sin[4*i+2] = sin(comp_sin[4*i]);
	comp_sin[4*i+3] = log2(fabs((comp_sin[4*i+2]-comp_sin[4*i+1])/comp_sin[4*i+2]));
    }
    for (     ; i < 4 * COMPARE_SIZE/4; i++) {
	comp_sin[4*i] = 3*M_PI/2 + drand48() / 2;
	comp_sin[4*i+1] = mysin(comp_sin[4*i]);
	comp_sin[4*i+2] = sin(comp_sin[4*i]);
	comp_sin[4*i+3] = log2(fabs((comp_sin[4*i+2]-comp_sin[4*i+1])/comp_sin[4*i+2]));
    }

    f = fopen("_errsin", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_sin[4*i+3]) && !isinf(comp_sin[4*i+3]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\n", comp_sin[4*i+3], comp_sin[4*i], comp_sin[4*i+1], comp_sin[4*i+2]);
    system("sort -g _errsin > errsin; rm _errsin");
    fclose(f);

    for (i = 0; i <     COMPARE_SIZE/4; i++) {
	comp_cos[4*i] = drand48() * 10;
	comp_cos[4*i+1] = mycos(comp_cos[4*i]);
	comp_cos[4*i+2] = cos(comp_cos[4*i]);
	comp_cos[4*i+3] = log2(fabs((comp_cos[4*i+2]-comp_cos[4*i+1])/comp_cos[4*i+2]));
    }
    for (     ; i < 2 * COMPARE_SIZE/4; i++) {
	comp_cos[4*i] = M_PI/2 + drand48() / 2;
	comp_cos[4*i+1] = mycos(comp_cos[4*i]);
	comp_cos[4*i+2] = cos(comp_cos[4*i]);
	comp_cos[4*i+3] = log2(fabs((comp_cos[4*i+2]-comp_cos[4*i+1])/comp_cos[4*i+2]));
    }
    for (     ; i < 3 * COMPARE_SIZE/4; i++) {
	comp_cos[4*i] = M_PI/2 + drand48() / 2;
	comp_cos[4*i+1] = mycos(comp_cos[4*i]);
	comp_cos[4*i+2] = cos(comp_cos[4*i]);
	comp_cos[4*i+3] = log2(fabs((comp_cos[4*i+2]-comp_cos[4*i+1])/comp_cos[4*i+2]));
    }
    for (     ; i < 4 * COMPARE_SIZE/4; i++) {
	comp_cos[4*i] = 3*M_PI/2 + drand48() / 2;
	comp_cos[4*i+1] = mycos(comp_cos[4*i]);
	comp_cos[4*i+2] = cos(comp_cos[4*i]);
	comp_cos[4*i+3] = log2(fabs((comp_cos[4*i+2]-comp_cos[4*i+1])/comp_cos[4*i+2]));
    }

    f = fopen("_errcos", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_cos[4*i+3]) && !isinf(comp_cos[4*i+3]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\n", comp_cos[4*i+3], comp_cos[4*i], comp_cos[4*i+1], comp_cos[4*i+2]);
    system("sort -g _errcos > errcos; rm _errcos");
    fclose(f);

    for (i = 0; i <     COMPARE_SIZE/4; i++) {
	comp_tan[4*i] = drand48() * 10;
	comp_tan[4*i+1] = mytan(comp_tan[4*i]);
	comp_tan[4*i+2] = tan(comp_tan[4*i]);
	comp_tan[4*i+3] = log2(fabs((comp_tan[4*i+2]-comp_tan[4*i+1])/comp_tan[4*i+2]));
    }
    for (     ; i < 2 * COMPARE_SIZE/4; i++) {
	comp_tan[4*i] = M_PI/2 + drand48() / 2;
	comp_tan[4*i+1] = mytan(comp_tan[4*i]);
	comp_tan[4*i+2] = tan(comp_tan[4*i]);
	comp_tan[4*i+3] = log2(fabs((comp_tan[4*i+2]-comp_tan[4*i+1])/comp_tan[4*i+2]));
    }
    for (     ; i < 3 * COMPARE_SIZE/4; i++) {
	comp_tan[4*i] = M_PI/2 + drand48() / 2;
	comp_tan[4*i+1] = mytan(comp_tan[4*i]);
	comp_tan[4*i+2] = tan(comp_tan[4*i]);
	comp_tan[4*i+3] = log2(fabs((comp_tan[4*i+2]-comp_tan[4*i+1])/comp_tan[4*i+2]));
    }
    for (     ; i < 4 * COMPARE_SIZE/4; i++) {
	comp_tan[4*i] = 3*M_PI/2 + drand48() / 2;
	comp_tan[4*i+1] = mytan(comp_tan[4*i]);
	comp_tan[4*i+2] = tan(comp_tan[4*i]);
	comp_tan[4*i+3] = log2(fabs((comp_tan[4*i+2]-comp_tan[4*i+1])/comp_tan[4*i+2]));
    }

    f = fopen("_errtan", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_tan[4*i+3]) && !isinf(comp_tan[4*i+3]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\n", comp_tan[4*i+3], comp_tan[4*i], comp_tan[4*i+1], comp_tan[4*i+2]);
    system("sort -g _errtan > errtan; rm _errtan");
    fclose(f);

    for (i = 0; i < COMPARE_SIZE; i++) {
	comp_sqrt[4*i] = drand48() * 1000;
	comp_sqrt[4*i+1] = mysqrt(comp_sqrt[4*i]);
	comp_sqrt[4*i+2] = sqrt(comp_sqrt[4*i]);
	comp_sqrt[4*i+3] = log2(fabs((comp_sqrt[4*i+2]-comp_sqrt[4*i+1])/comp_sqrt[4*i+2]));
    }

    f = fopen("_errsqrt", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_sqrt[4*i+3]) && !isinf(comp_sqrt[4*i+3]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\n", comp_sqrt[4*i+3], comp_sqrt[4*i], comp_sqrt[4*i+1], comp_sqrt[4*i+2]);
    system("sort -g _errsqrt > errsqrt; rm _errsqrt");
    fclose(f);

    for (i = 0; i < COMPARE_SIZE; i++) {
	comp_pow[5*i]   = drand48() * 50;
	comp_pow[5*i+1] = drand48() * 100 - 50;
	comp_pow[5*i+2] = mypow(comp_pow[5*i], comp_pow[5*i+1]);
	comp_pow[5*i+3] = pow(comp_pow[5*i], comp_pow[5*i+1]);
	comp_pow[5*i+4] = log2(fabs((comp_pow[5*i+3]-comp_pow[5*i+2])/comp_pow[5*i+3]));
    }

    f = fopen("_errpow", "w");
    for (i = 0; i < COMPARE_SIZE; i++)
	if (!isnan(comp_pow[5*i+4]) && !isinf(comp_pow[5*i+4]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\t%.20g\n", comp_pow[5*i+4], comp_pow[5*i], comp_pow[5*i+1], comp_pow[5*i+2], comp_pow[5*i+3]);
    system("sort -g _errpow > errpow; rm _errpow");
    fclose(f);

    for (i = 0; i < COMPARE_SIZE; i++) {
	comp_atan2[5*i]   = drand48() * 100 - 50;
	comp_atan2[5*i+1] = drand48() * 100 - 50;
	comp_atan2[5*i+2] = myatan2(comp_atan2[5*i], comp_atan2[5*i+1]);
	comp_atan2[5*i+3] = atan2(comp_atan2[5*i], comp_atan2[5*i+1]);
	comp_atan2[5*i+4] = log2(fabs((comp_atan2[5*i+3]-comp_atan2[5*i+2])/comp_atan2[5*i+3]));
    }

    f = fopen("_erratan2", "w");
    for (i = 0; i < COMPARE_SIZE; i++) {
	if (!isnan(comp_atan2[5*i+4]) && !isinf(comp_atan2[5*i+4]))
	    fprintf(f, "%.20g\t%.20g\t%.20g\t%.20g\t%.20g\n", comp_atan2[5*i+4], comp_atan2[5*i], comp_atan2[5*i+1], comp_atan2[5*i+2], comp_atan2[5*i+3]);
    }
    system("sort -g _erratan2 > erratan2; rm _erratan2");
    fclose(f);

    exit(0);
}

_______________________________________________
busybox mailing list
[email protected]
http://busybox.net/cgi-bin/mailman/listinfo/busybox

[RFC] libm replacement for Busybox

Reply via email to