On Monday 14 February 2005 05:55, Daniel Phillips wrote:
> Hi all,
>
> This puts together the reciprocal table interpolation with ieee
> pack/unpack functions to create an ieee-compatible reciprocal
> approximation suitable for use in the simulation.  The input, output
> and table widths are generalized.  The ieee format is hard-wired to
> float32 at the moment.  Denormals, nans and overflow aren't handled.
>
> The accuracy estimation code isn't very good in that it supplies test
> points that can't actually be represented in the target precision,
> which makes accuracy seem a little bit worse than it actually is.
>
> Accuracy is about +-2**-17, using straight interpolation.  Later, I'll
> try incorporating some fancy rounding to improve accuracy by an
> additional bit.

Be sure to try every possible input for the mantissa. It gets interesting...

Here's my latest attempt. It gives an 18-bit mantissa, with maximum errors -0 
and +1. That is +2**-18 in your notation I think. Rounding by adding 0x3E and 
truncating is just lucky. If you add 0x3D or 0x3F the maximum error becomes 
one larger (or twice as big, depending on how you look at it).

I've also written it in a sort of hardware style, which makes it easier to 
find out how much hardware we need. Here's the list assuming 1 + 8 + 16 
float25 (probably in more detail than is realistic for an FPGA, but what the 
heck):

* For the mantissa
2 1024x18 tables
8 XOR gates
1 NOT gate
1 AND gate
3 2x11 MUXes
1 9 + 11 -> 12 adder
1 2 + 11 -> 12 adder
1 14 + 16 -> 11 adder (drops the 6 least significant bits)
1 18 + 11 -> 18 adder (drops the most significant bit, and the least 
significant bit as well if we truncate to 17 bits)

* For the exponent (not in the attached code)
8 NOT gates
Something that subtracts 2 from (or adds 254 to) an 8-bit number

I think it would be 3 stages, plus 4 times whatever you need for an adder. 
These adders are a bit simpler than the full adders Timothy posted a little 
while ago, so maybe they'll fit into 3 stages, making a total of 15 stages. 
But we don't care about latency anyway.

Cheers,

Lourens
/*
 * 1024x18x2 table interpolation reciprocal calculation device
 *
 * Copyright (c) 2005 Lourens Veen
 * May be freely distributed under the terms of the GNU GPL
 * version 2.0 or above.
 */

#include <iostream>

// reference reciprocal
// in: 16 bits
// out: 18 bits
unsigned int recpr18(unsigned int in) {
	unsigned int value = ((1ll << 35) / (0x10000ll + in));
	if (((1ll << 35) % (0x10000ll + in)) > ((0x10000ll + in) >> 1)) value++;
	return value & 0x3FFFF;
}

// reference reciprocal
// in: 16 bits
// out: 17 bits
unsigned int recpr17(unsigned int in) {
	unsigned int value = ((1ll << 34) / (0x10000ll + in));
	if (((1ll << 34) % (0x10000ll + in)) > ((0x10000ll + in) >> 1)) value++;
	return value & 0x1FFFF;
}

unsigned int block0[1024], block1[1024];

void inittables() {
	for (unsigned int i = 0; i < 1024; ++i) {
		unsigned int value = recpr18((i << 6) + 0x3F);
		unsigned int difference = (recpr18((i << 6) - 1) - recpr18((i << 6) + 0x3F));
		unsigned int difference1 = difference - 128;
		unsigned int difference3 = difference * 3;
		block0[i] = value & 0x3FFFF;
		block1[i] = ((difference1 & 0x1FF) << 9) | ((difference3 >> 2) & 0x1FF);
	}
}

unsigned int b(unsigned int value, unsigned int i, unsigned int j) {
	return (value & ((1 << (i + 1)) - 1)) >> j;
}

// interpolated reciprocal
// in: 16 bits
// out: 18 bits (17 bits effective)
unsigned int recp(unsigned int in) {
	// 1.1) value[17:0] = block0(in[9:0])					1 RAM read
	unsigned int value = block0[b(in, 15, 6)];
	// 1.2) differences[17:0] = block1(in[9:0])				1 RAM read
	unsigned int differences = block1[b(in, 15, 6)];

	// 2.1) difference1[9:0] = differences[17:9] + 128			1 NOT, 1 XOR, 1 AND
	unsigned int difference1 = b(differences, 17, 9) + 128;
	// 2.2) difference3[10:0] = differences[8:0] differences[10:10]^differences[9:9] differences[9:9]
	// 									1 XOR
	unsigned int difference3 = (b(differences, 8, 0) << 2) |
				   ((b(differences, 10, 10) ^ b(differences, 9, 9)) << 1) |
				   b(differences, 9, 9);

	if (difference1 * 3 != difference3) {
		std::cerr << "Aargh! " << difference1 << " * 3 != " << difference3 << " ds = " << differences << std::endl;
	}
	// 2.3) fraction[5:0] = 63 - in[5:0]					6 XOR
	unsigned int fraction = b(in, 5, 0)^0x3F;

	// 3.1) m1[10:0] = fraction[1:0] * difference				1 2x11 MUX
	unsigned int m1;
	switch (b(fraction, 1, 0)) {
		case 0: m1 = 0; break;
		case 1: m1 = difference1; break;
		case 2: m1 = difference1 << 1; break;
		case 3: m1 = difference3; break;			
	}
	// 3.2) m2[10:0] = fraction[3:2] * difference				1 2x11 MUX
	unsigned int m2;
	switch (b(fraction, 3, 2)) {
		case 0: m2 = 0; break;
		case 1: m2 = difference1; break;
		case 2: m2 = difference1 << 1; break;
		case 3: m2 = difference3; break;			
	}
	// 3.3) m3[10:0] = fraction[5:4] * difference				1 2x11 MUX
	unsigned int m3;
	switch (b(fraction, 5, 4)) {
		case 0: m3 = 0; break;
		case 1: m3 = difference1; break;
		case 2: m3 = difference1 << 1; break;
		case 3: m3 = difference3; break;			
	}

	// 4) m[17:0] = value + ((m1 + (m2 00) + (m3 0000)) >> 6)
	// 4.1) ma[13:0] = m1 + (m2 00)						1 9 + 11 -> 12 adder
	unsigned int ma = (b(m1, 10, 2) + m2) << 2 | b(m1, 1, 0);
	// 4.2) mb[15:0] = (m3 << 4) + 0x3E					1 2 + 11 -> 12 adder
	unsigned int mb = ((m3 + 3) << 4) | 0xE;

	// 5) mc[10:0] = (ma + mb) >> 6						1 14 + 16 -> 11 (drop LSB's) adder
	unsigned int mc = b(ma + mb, 16, 6);

	// 6) m[17:0] = value + mc						1 18 + 11 -> 18 (drop MSB) adder
	unsigned int m = value + mc;

	return b(m, 17, 0);
}

int main() {
	inittables();
	std::cout.precision(12);
	std::cerr.precision(12);
	
	int poserr = 0, negerr = 0;
	double fposerr = 0.0, fnegerr = 0.0;
	
	for (int i = 0; i < 65536; ++i) {
		unsigned int ref = recpr18(i);
		unsigned int apx = recp(i);
		int dif = apx - ref;
		if (dif > poserr) poserr = dif;
		if (dif < negerr) negerr = dif;
		double fapx = (double)block0[i >> 6] + ((double)((block1[i >> 6] >> 9) + 128.0) * (double)(63 - (i & 0x3F)) / 64);
		double real = (((double)(1ll << 35) / (65536.0 + i)) - 262144.0);
		double rdif = fapx - real;
		if (rdif > fposerr) fposerr = rdif;
		if (rdif < fnegerr) fnegerr = rdif;
	}
	std::cout << "maximum positive error: " << poserr << std::endl;
	std::cout << "maximum negative error: " << negerr << std::endl;
	std::cout << "maximum positive error real: " << fposerr << std::endl;
	std::cout << "maximum negative error real: " << fnegerr << std::endl;
	return 0;
}

_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)

Reply via email to