[Open-graphics] Re: Measuring the reciprocal error

Lourens Veen Fri, 11 Feb 2005 09:21:17 -0800

On Friday 11 February 2005 08:26, Daniel Phillips wrote:
> Hi Lourens,
>
> This may replicate some of the analysis work you've done, but perhaps
> there is some fresh perspective in it as well.  The attached program
> implements the reciprocal interpolation across the interval [1, 2) and
> measures the maximum positive and negative error for every possible
> input.


I've attached my little program as well. It's rather messy at the moment, 
being a prototype "let's try this" kind of thing. I'm only barely getting 17 
bits though (if you special-case the two cases where difference = 2).

As you can see I'm using integers everywhere (except for in the statistics 
calculation and one "perfect" routine for comparison). I'm a bit worried 
about the double precision floating point divide in your inner loop...

Also, why is frac_bits 2? If you have a 16-bit input, and a 10-bit table 
index, then surely you have a 6-bit fraction for interpolation? I'm not sure 
I quite understand the code.

> As I mentioned earlier, it might be nice to arrange for the error to
> always truncate towards zero, which could be a help in fighting some
> artifacts and overflows.  Or the error can be balanced by rounding,
> perhaps giving better numerical stability.

I guess we'll have to try and see. The attached code has a difference of 
either 0 or 1 with respect to the correct 18-bit solution, so if you chop off 
the final bit you get a perfect 17-bit reciprocal.

Lourens

/*
 * float24/float25 reciprocal experimentation program
 *
 * Given 1K 18-bit words of RAM and an 18-bit multiplier,
 * calculate the 16-bit mantissa of 1/x as precisely as
 * possible.
 * 
 * Try and get rid of the multiplier if possible
 *
 * Copyright (c) 2005 Lourens Veen
 *
 * May be distributed under the terms of the
 * GNU General Public License version 2 or above
 */

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

/* helper functions */
/* these won't work on big endian machines */
int mantissa16(float x) {
	return (((*((int*)&x)) >> 7) & 0xFFFF);
}

int mantissa17(float x) {
	return (((*((int*)&x)) >> 6) & 0x1FFFF);
}

int fullmantissa16(float x) {
	return ((((*((int*)&x)) >> 7) & 0xFFFF) | 0x10000);
}

int mantissa16rounded(float x) {
	int i = mantissa17(x);
	++i;
	return i >> 1;
}

int exponent(float x) {
	return ((((*((int*)&x)) >> 23) & 0xFF) - 127);
}

/* all these functions get a 16-bit mantissa and return a 16-bit mantissa */

/* using the FPU (double precision) and rounding */
unsigned short int recpf(unsigned short int x) {
	double xx;
	xx = (65536.0 + x) / 131072.0;
	long long *y = (long long*)&xx;
	xx = 1.0 / xx;
	return ((*y & 0xFFFFFFFFFFFFFll) + (1ll << 35)) >> 36;
}

/* integer with correct rounding (17 bits precision) */
unsigned int recpt17(unsigned short int x) {
	int rmantissa;
	
	rmantissa = ((1ll << 35) / (x | 0x10000ll));
	rmantissa += 1;
	rmantissa >>= 1;
	return rmantissa;
}

/* integer with correct rounding (18 bits precision) */
unsigned int recpf18(unsigned short int x) {
	int rmantissa;
	
	rmantissa = ((1ll << 36) / (x | 0x10000ll));
	rmantissa += 1;
	rmantissa >>= 1;
	return rmantissa;
}

/* integer without correct rounding (15 bits precision) */
unsigned short int recpt1(unsigned short int xx) {
	int rmantissa;
	
	rmantissa = ((1ll << 33) / (xx | 0x10000ll));
	return rmantissa;
}

/* integer with correct rounding (16 bits precision) */
unsigned short int recpt2(unsigned short int xx) {
	int rmantissa;
	
	rmantissa = ((1ll << 34) / (xx | 0x10000ll));
	rmantissa += 1;
	rmantissa >>= 1;
	return rmantissa;
}

/* 10-bit LUT (9 bits precision) */
int table3[1024];	/* only lower 18 bits are used */

/* note the adjust by 0.5 (0x20) so that we can truncate the offset */
void init3() {
	int i = 0;
	for (i = 0; i < 1024; ++i) {
		table3[i] = recpf((i << 6) + 0x20) + 1;		// +1 ??
	}
}

unsigned short int recpt3(unsigned short int xx) {
	int rmantissa;
	if (xx == 0)
		return 0;
	rmantissa = table3[xx >> 6];
	return rmantissa;
}

/* 10-bit LUT with linear interpolation (17 bits precision) */
/* note that the values for the first and the last table entry
 * are 2.0 and 0.499999, so they won't fit in the 16-bit
 * mantissa. I'm using an 18-bit unsigned register instead,
 * and hard-code the values here. We could store one of them
 * in the table as well, and do away with one of the branches.
 * We can't do both however, since we'd need 1025 words then */
int table4[1024];	/* only lower 18 bits are used */

void init4() {
	int i = 0;
	
	for (i = 1; i < 1024; ++i) {
		table4[i] = recpf18(i << 6);
	}
}

unsigned int recpt4(unsigned short int xx) {
	unsigned int rmantissa1, rmantissa2, rmantissa;
	if ((xx & ~0x3F) == 0)	/* first entry, denormalised */
		rmantissa1 = 0x80000;
	else
		rmantissa1 = table4[xx >> 6] | 0x40000;
	if (xx > (1023 << 6)) {	/* last entry, denormalised */
		rmantissa2 = 0x3FFFF;
	}
	else
		rmantissa2 = table4[(xx >> 6) + 1] | 0x40000;
//printf("rma1: %u, rma2: %u, xx & 0x3F: %u\n", rmantissa1, rmantissa2, xx & 0x3F);
	rmantissa = rmantissa1 - (((rmantissa1 - rmantissa2) * (xx & 0x3F)) >> 6);
	
//	rmantissa = (rmantissa + 2) >> 2;
	
	return rmantissa;
}

/* 10-bit 14/4 value/difference LUT (12 bits precision) */
/* this has been tweaked... */
int table5[1024];	/* only lower 18 bits are used */

void init5() {
	int i = 0;
	int v = 0, v2 = 0;
	int d = 0, d2 = 0;
	int adjust = 0;

	table5[0] = (16352 << 4) | 15;
	for (i = 1; i < 1024; ++i) {
		v = recpf((i << 6) + 0x40);
		d = recpf(i << 6) - v;
//printf("%d, %d\n", ((recpf(i << 6) - recpf((i << 6) + 0x40))/2) + recpf((i << 6) + 0x40), recpf((i << 6) + 0x20));
		v -= (((recpf(i << 6) - recpf((i << 6) + 0x40))/2) + recpf((i << 6) + 0x40) - recpf((i << 6) + 0x20)) * 2 / 3;
/*
		if (d > 123) {
			d = 123;
			v += 2;
		}
*/
		switch (d & 0x7) {
			case 0:
				break;
			case 1:
				v += 0;	
				break;
			case 2:
				v += 1;
				break;
			case 3:
				v += 2;
				break;
			case 4:
				v -= 2;
				break;
			case 5:
				v -= 2;
				break;
			case 6:
				v -= 1;
				break;
			case 7:
				break;
		}
		
		v2 = (v + 2) >> 2;
		d2 = ((d + 4) >> 3) - 1;

		table5[i] = (v2 << 4) | d2;
//		printf("i = %d, v = %d, d = %d, v2 = %d, d2 = %d, t = %d\n", i, v, d, v2, d2, table5[i]);
	}
}

unsigned short int recpt5(unsigned short int xx) {
	int rmantissa1, difference, rmantissa, invfrac;
	int t;

	if (xx == 0) return 0;

	t = table5[xx >> 6];
	rmantissa1 = ((t >> 2) & ~0x3);
	difference = (t & 0xF) + 1;

	invfrac = (((xx & 0x3F) ^ 0x3F) + 1);
	rmantissa = rmantissa1 + ((difference * invfrac) >> 3);

//	printf("xx = %d, t = %d, rma1 = %d, d = %d, rma = %d\n", xx, t, rmantissa1, difference, rmantissa);
	return rmantissa;
}

/* initialise tables */
void init() {
	init3();
	init4();
	init5();
	return;
}

/* main */
int main() {
	int difference;
	long long sumdifference = 0, sumsqdifference = 0, sumabsdifference = 0;
	int equalbits;
	int maxdifference = 0, mindifference = 65536;
	
	unsigned int i;

	/* initialise */
	init();

	/* run */
	for (i = 0; i < 65536; ++i) {
		difference = (int)recpt4(i) - (int)recpf18(i);
		printf("i = %d, recpf18(i) = %d, recpt4(i) = %d, difference = %d\n", i, recpf18(i), recpt4(i), difference);
		if (difference > maxdifference)
			maxdifference = difference;
		if (difference < mindifference)
			mindifference = difference;
		sumsqdifference += (difference * difference);
		sumdifference += difference;
		sumabsdifference += abs(difference);
//		if ((i & 0x3F) == 0x0) printf("%d: %d", i >> 6, difference);
//		if ((i & 0x3F) == 0x3F) printf(", %d\n", difference);
//		if ((i & 0x3F) == 0x3F) printf("\n");
	}
	printf("maxdifference = %d\n", maxdifference);
	printf("mindifference = %d\n", mindifference);
	printf("avgdifference = %f\n", sumdifference / 65536.0f);
	printf("avgabsdifference = %f\n", sumabsdifference / 65536.0f);
	printf("rmsdifference = %f\n", sqrt(sumsqdifference / 65536.0f));
	
	/* display statistics */
	difference = abs(maxdifference - mindifference);
	equalbits = 18;
	while (difference) {
		equalbits--;
		difference >>= 1;
	}

	printf("precision1 = %d bits\n", equalbits);
	printf("precision2 = %f bits\n", log(262144.0f / (maxdifference - mindifference + 1)) / log(2));

	
	return 0;
}

_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

[Open-graphics] Re: Measuring the reciprocal error

Reply via email to