Re: [Open-graphics] Ieee-compatible reciprocal

Lourens Veen Sat, 19 Feb 2005 05:40:27 -0800

On Tuesday 15 February 2005 17:17, Lourens Veen wrote:
> On Monday 14 February 2005 05:55, Daniel Phillips wrote:
> > Hi all,
> >
> > This puts together the reciprocal table interpolation with ieee
> > pack/unpack functions to create an ieee-compatible reciprocal
> > approximation suitable for use in the simulation.  The input, output
> > and table widths are generalized.  The ieee format is hard-wired to
> > float32 at the moment.  Denormals, nans and overflow aren't handled.
> >
> > The accuracy estimation code isn't very good in that it supplies test
> > points that can't actually be represented in the target precision,
> > which makes accuracy seem a little bit worse than it actually is.
> >
> > Accuracy is about +-2**-17, using straight interpolation.  Later, I'll
> > try incorporating some fancy rounding to improve accuracy by an
> > additional bit.
>
> Be sure to try every possible input for the mantissa. It gets
> interesting...
>
> Here's my latest attempt. It gives an 18-bit mantissa, with maximum errors
> -0 and +1. That is +2**-18 in your notation I think. Rounding by adding
> 0x3E and truncating is just lucky. If you add 0x3D or 0x3F the maximum
> error becomes one larger (or twice as big, depending on how you look at
> it).


Well, attached is what I'm going to dub version 1.0 of Lourens' cheap 18/17 
bit reciprocal mantissa algorithm :-). I worked around a wrapping problem 
that doesn't exist in the previous version (duh, algebra isn't my strongest 
point), which caused most powers of two to be off by one. As Daniel 
explained, this is a bad thing, so I changed things around. Hardware-wise, 
one adder changed to a subtractor (no extra logic), an additional (simple) 
adder disappeared, and 8 XORs dropped out of the equation. Accuracy is still 
the same, but powers of two now always give correct results.

I may be able to tweak the table a little bit further to improve the RMS 
error, but for now I'll settle for this. It seems that the interpolation 
stuff is currently a more important problem, so let's see if we can fix that 
first.

Lourens

/*
 * 1024x18x2 table interpolation reciprocal calculation device
 *
 * Version 1.0
 *
 * Copyright (c) 2005 Lourens Veen
 * May be freely distributed under the terms of the GNU GPL
 * version 2.0 or above.
 */

#include <iostream>

// reference reciprocal
// in: 16 bits
// out: 18 bits
unsigned int recpr18(unsigned int in) {
	unsigned int value = ((1ll << 35) / (0x10000ll + in));
	if (((1ll << 35) % (0x10000ll + in)) > ((0x10000ll + in) >> 1)) value++;
	return value & 0x3FFFF;
}

// reference reciprocal
// in: 16 bits
// out: 17 bits
unsigned int recpr17(unsigned int in) {
	unsigned int value = ((1ll << 34) / (0x10000ll + in));
	if (((1ll << 34) % (0x10000ll + in)) > ((0x10000ll + in) >> 1)) value++;
	return value & 0x1FFFF;
}

unsigned int block0[1024], block1[1024];

// initialises the tables
// note the special cases for i = 320 and i = 84
// maybe this trick could be expanded to more of the numbers to improve RMS error a bit
void inittables() {
	for (unsigned int j = 0; j < 1024; ++j) {
		unsigned int i = j;
		unsigned int value = recpr18(i << 6);
		unsigned int difference;
		if (i == 0)
			difference = 512;
		else
			difference = (recpr18(i << 6) - recpr18((i << 6) + 0x40));

		if ((i == 320) || (i == 84))
			difference++;
		unsigned int difference1 = difference - 128;
		unsigned int difference3 = difference * 3;
		block0[j] = value & 0x3FFFF;
		block1[j] = ((difference1 & 0x1FF) << 9) | ((difference3 >> 2) & 0x1FF);
	}
}

// to ease notation, returns bits i-j inclusive from the input value (i > j)
unsigned int b(unsigned int value, unsigned int i, unsigned int j) {
	return (value & ((1 << (i + 1)) - 1)) >> j;
}

// guards against register overflows, sets all bits larger than i to 0
unsigned int chop(unsigned int value, unsigned int i) {
	return value & ((1 << (i + 1)) - 1);
}

// interpolated reciprocal
// in: 16 bits
// out: 18 bits (17 bits effective)
unsigned int recp(unsigned int in) {
	// 1) Get data from RAM
	// 1.1) value[17:0] = block0(in[9:0])					1 RAM read
	unsigned int value = block0[b(in, 15, 6)];
	chop(value, 17);
	
	// 1.2) differences[17:0] = block1(in[9:0])				1 RAM read
	unsigned int differences = block1[b(in, 15, 6)];
	chop(differences, 17);
	
	// 2) Process data from RAM
	// 2.1) difference1[9:0] = differences[17:9] + 128			1 NOT, 1 XOR, 1 AND
	unsigned int difference1 = (b(differences, 16, 16) & b(differences, 17, 17)) << 9 |
				   (b(differences, 16, 16) ^ b(differences, 17, 17)) << 8 |
				   (!b(differences, 16, 16) << 7) |
				   b(differences, 15, 9);
	chop(difference1, 9);

	// 2.2) difference3[10:0] = differences[8:0] differences[10:10]^differences[9:9] differences[9:9]
	// 									1 XOR
	unsigned int difference3 = (b(differences, 8, 0) << 2) |
				   ((b(differences, 10, 10) ^ b(differences, 9, 9)) << 1) |
				   b(differences, 9, 9);
	chop(difference3, 10);
	
	// 3-5) Multiply the difference by the fraction
	// 3.1) m1[10:0] = in[1:0] * difference					1 2x11 MUX
	unsigned int m1;
	switch (b(in, 1, 0)) {
		case 0: m1 = 0; break;
		case 1: m1 = difference1; break;
		case 2: m1 = difference1 << 1; break;
		case 3: m1 = difference3; break;			
	}
	chop(m1, 10);
	
	// 3.2) m2[10:0] = in[3:2] * difference					1 2x11 MUX
	unsigned int m2;
	switch (b(in, 3, 2)) {
		case 0: m2 = 0; break;
		case 1: m2 = difference1; break;
		case 2: m2 = difference1 << 1; break;
		case 3: m2 = difference3; break;			
	}
	chop(m2, 10);
	
	// 3.3) m3[10:0] = in[5:4] * difference					1 2x11 MUX
	unsigned int m3;
	switch (b(in, 5, 4)) {
		case 0: m3 = 0; break;
		case 1: m3 = difference1; break;
		case 2: m3 = difference1 << 1; break;
		case 3: m3 = difference3; break;			
	}
	chop(m3, 10);
	
	// 4) ma[13:0] = m1 + (m2 00)						1 9 + 11 -> 12 adder
	unsigned int ma = (b(m1, 10, 2) + m2) << 2 | b(m1, 1, 0);
	chop(ma, 13);
	
	// 5) mb[9:0] = (ma + m3 0000) >> 6					1 10 + 11 -> 10 (drop LSB's) adder
	unsigned int mb = (b(ma, 13, 4) + m3) >> 2;
	chop(mb, 9);
		
	// 6) Subtract fraction from the base value
	// 6) m[17:0] = value - mb						1 18 - 10 -> 18 subtractor
	unsigned int m = value - mb;
	chop(m, 17);
	
	return b(m, 17, 0);
}

int main() {
	inittables();
	std::cout.precision(12);
	std::cerr.precision(12);
	
	int poserr = 0, negerr = 0;
	double fposerr = 0.0, fnegerr = 0.0;
	int sumerr = 0, sumsqerr = 0;
// swap this to test powers of two
#if 1	
	for (int j = 0; j < 65536; ++j) {
		int i = j;
#else
	for (int j = 0; j < 16; ++j) {
		int i = 1 << j;
#endif
		unsigned int ref = recpr18(i);
		unsigned int apx = recp(i);
		int dif = apx - ref;
		if (dif > poserr) poserr = dif;
		if (dif < negerr) negerr = dif;
		double fapx = (double)block0[i >> 6] - ((double)((block1[i >> 6] >> 9) + 128.0) * (double)(i & 0x3F)) / 64.0;
		double real = (((double)(1ll << 35) / (65536.0 + i)) - 262144.0);
		double rdif = fapx - real;
		// wrap
		if (rdif <= -262144.0) rdif += 262144.0;
		if (rdif > fposerr) fposerr = rdif;
		if (rdif < fnegerr) fnegerr = rdif;
		sumerr += dif;
		sumsqerr += dif * dif;

//		std::cerr << i << ": ref = " << ref << ", apx = " << apx << ", dif = " << dif << ", rdif = " << rdif << std::endl;
	}
	std::cout << "maximum positive error: " << poserr << std::endl;
	std::cout << "maximum negative error: " << negerr << std::endl;
	std::cout << "maximum positive error real: " << fposerr << std::endl;
	std::cout << "maximum negative error real: " << fnegerr << std::endl;
	std::cout << "average error: " << (double)sumerr / 65536.0 << std::endl;
	std::cout << "rms error: " << sqrt((double)sumsqerr / 65536.0) << std::endl;
	return 0;
}

_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

Re: [Open-graphics] Ieee-compatible reciprocal

Reply via email to