On Friday 11 February 2005 08:26, Daniel Phillips wrote:
> Hi Lourens,
>
> This may replicate some of the analysis work you've done, but perhaps
> there is some fresh perspective in it as well. The attached program
> implements the reciprocal interpolation across the interval [1, 2) and
> measures the maximum positive and negative error for every possible
> input.
I've attached my little program as well. It's rather messy at the moment,
being a prototype "let's try this" kind of thing. I'm only barely getting 17
bits though (if you special-case the two cases where difference = 2).
As you can see I'm using integers everywhere (except for in the statistics
calculation and one "perfect" routine for comparison). I'm a bit worried
about the double precision floating point divide in your inner loop...
Also, why is frac_bits 2? If you have a 16-bit input, and a 10-bit table
index, then surely you have a 6-bit fraction for interpolation? I'm not sure
I quite understand the code.
> As I mentioned earlier, it might be nice to arrange for the error to
> always truncate towards zero, which could be a help in fighting some
> artifacts and overflows. Or the error can be balanced by rounding,
> perhaps giving better numerical stability.
I guess we'll have to try and see. The attached code has a difference of
either 0 or 1 with respect to the correct 18-bit solution, so if you chop off
the final bit you get a perfect 17-bit reciprocal.
Lourens
/*
* float24/float25 reciprocal experimentation program
*
* Given 1K 18-bit words of RAM and an 18-bit multiplier,
* calculate the 16-bit mantissa of 1/x as precisely as
* possible.
*
* Try and get rid of the multiplier if possible
*
* Copyright (c) 2005 Lourens Veen
*
* May be distributed under the terms of the
* GNU General Public License version 2 or above
*/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
/* helper functions */
/* these won't work on big endian machines */
int mantissa16(float x) {
return (((*((int*)&x)) >> 7) & 0xFFFF);
}
int mantissa17(float x) {
return (((*((int*)&x)) >> 6) & 0x1FFFF);
}
int fullmantissa16(float x) {
return ((((*((int*)&x)) >> 7) & 0xFFFF) | 0x10000);
}
int mantissa16rounded(float x) {
int i = mantissa17(x);
++i;
return i >> 1;
}
int exponent(float x) {
return ((((*((int*)&x)) >> 23) & 0xFF) - 127);
}
/* all these functions get a 16-bit mantissa and return a 16-bit mantissa */
/* using the FPU (double precision) and rounding */
unsigned short int recpf(unsigned short int x) {
double xx;
xx = (65536.0 + x) / 131072.0;
long long *y = (long long*)&xx;
xx = 1.0 / xx;
return ((*y & 0xFFFFFFFFFFFFFll) + (1ll << 35)) >> 36;
}
/* integer with correct rounding (17 bits precision) */
unsigned int recpt17(unsigned short int x) {
int rmantissa;
rmantissa = ((1ll << 35) / (x | 0x10000ll));
rmantissa += 1;
rmantissa >>= 1;
return rmantissa;
}
/* integer with correct rounding (18 bits precision) */
unsigned int recpf18(unsigned short int x) {
int rmantissa;
rmantissa = ((1ll << 36) / (x | 0x10000ll));
rmantissa += 1;
rmantissa >>= 1;
return rmantissa;
}
/* integer without correct rounding (15 bits precision) */
unsigned short int recpt1(unsigned short int xx) {
int rmantissa;
rmantissa = ((1ll << 33) / (xx | 0x10000ll));
return rmantissa;
}
/* integer with correct rounding (16 bits precision) */
unsigned short int recpt2(unsigned short int xx) {
int rmantissa;
rmantissa = ((1ll << 34) / (xx | 0x10000ll));
rmantissa += 1;
rmantissa >>= 1;
return rmantissa;
}
/* 10-bit LUT (9 bits precision) */
int table3[1024]; /* only lower 18 bits are used */
/* note the adjust by 0.5 (0x20) so that we can truncate the offset */
void init3() {
int i = 0;
for (i = 0; i < 1024; ++i) {
table3[i] = recpf((i << 6) + 0x20) + 1; // +1 ??
}
}
unsigned short int recpt3(unsigned short int xx) {
int rmantissa;
if (xx == 0)
return 0;
rmantissa = table3[xx >> 6];
return rmantissa;
}
/* 10-bit LUT with linear interpolation (17 bits precision) */
/* note that the values for the first and the last table entry
* are 2.0 and 0.499999, so they won't fit in the 16-bit
* mantissa. I'm using an 18-bit unsigned register instead,
* and hard-code the values here. We could store one of them
* in the table as well, and do away with one of the branches.
* We can't do both however, since we'd need 1025 words then */
int table4[1024]; /* only lower 18 bits are used */
void init4() {
int i = 0;
for (i = 1; i < 1024; ++i) {
table4[i] = recpf18(i << 6);
}
}
unsigned int recpt4(unsigned short int xx) {
unsigned int rmantissa1, rmantissa2, rmantissa;
if ((xx & ~0x3F) == 0) /* first entry, denormalised */
rmantissa1 = 0x80000;
else
rmantissa1 = table4[xx >> 6] | 0x40000;
if (xx > (1023 << 6)) { /* last entry, denormalised */
rmantissa2 = 0x3FFFF;
}
else
rmantissa2 = table4[(xx >> 6) + 1] | 0x40000;
//printf("rma1: %u, rma2: %u, xx & 0x3F: %u\n", rmantissa1, rmantissa2, xx & 0x3F);
rmantissa = rmantissa1 - (((rmantissa1 - rmantissa2) * (xx & 0x3F)) >> 6);
// rmantissa = (rmantissa + 2) >> 2;
return rmantissa;
}
/* 10-bit 14/4 value/difference LUT (12 bits precision) */
/* this has been tweaked... */
int table5[1024]; /* only lower 18 bits are used */
void init5() {
int i = 0;
int v = 0, v2 = 0;
int d = 0, d2 = 0;
int adjust = 0;
table5[0] = (16352 << 4) | 15;
for (i = 1; i < 1024; ++i) {
v = recpf((i << 6) + 0x40);
d = recpf(i << 6) - v;
//printf("%d, %d\n", ((recpf(i << 6) - recpf((i << 6) + 0x40))/2) + recpf((i << 6) + 0x40), recpf((i << 6) + 0x20));
v -= (((recpf(i << 6) - recpf((i << 6) + 0x40))/2) + recpf((i << 6) + 0x40) - recpf((i << 6) + 0x20)) * 2 / 3;
/*
if (d > 123) {
d = 123;
v += 2;
}
*/
switch (d & 0x7) {
case 0:
break;
case 1:
v += 0;
break;
case 2:
v += 1;
break;
case 3:
v += 2;
break;
case 4:
v -= 2;
break;
case 5:
v -= 2;
break;
case 6:
v -= 1;
break;
case 7:
break;
}
v2 = (v + 2) >> 2;
d2 = ((d + 4) >> 3) - 1;
table5[i] = (v2 << 4) | d2;
// printf("i = %d, v = %d, d = %d, v2 = %d, d2 = %d, t = %d\n", i, v, d, v2, d2, table5[i]);
}
}
unsigned short int recpt5(unsigned short int xx) {
int rmantissa1, difference, rmantissa, invfrac;
int t;
if (xx == 0) return 0;
t = table5[xx >> 6];
rmantissa1 = ((t >> 2) & ~0x3);
difference = (t & 0xF) + 1;
invfrac = (((xx & 0x3F) ^ 0x3F) + 1);
rmantissa = rmantissa1 + ((difference * invfrac) >> 3);
// printf("xx = %d, t = %d, rma1 = %d, d = %d, rma = %d\n", xx, t, rmantissa1, difference, rmantissa);
return rmantissa;
}
/* initialise tables */
void init() {
init3();
init4();
init5();
return;
}
/* main */
int main() {
int difference;
long long sumdifference = 0, sumsqdifference = 0, sumabsdifference = 0;
int equalbits;
int maxdifference = 0, mindifference = 65536;
unsigned int i;
/* initialise */
init();
/* run */
for (i = 0; i < 65536; ++i) {
difference = (int)recpt4(i) - (int)recpf18(i);
printf("i = %d, recpf18(i) = %d, recpt4(i) = %d, difference = %d\n", i, recpf18(i), recpt4(i), difference);
if (difference > maxdifference)
maxdifference = difference;
if (difference < mindifference)
mindifference = difference;
sumsqdifference += (difference * difference);
sumdifference += difference;
sumabsdifference += abs(difference);
// if ((i & 0x3F) == 0x0) printf("%d: %d", i >> 6, difference);
// if ((i & 0x3F) == 0x3F) printf(", %d\n", difference);
// if ((i & 0x3F) == 0x3F) printf("\n");
}
printf("maxdifference = %d\n", maxdifference);
printf("mindifference = %d\n", mindifference);
printf("avgdifference = %f\n", sumdifference / 65536.0f);
printf("avgabsdifference = %f\n", sumabsdifference / 65536.0f);
printf("rmsdifference = %f\n", sqrt(sumsqdifference / 65536.0f));
/* display statistics */
difference = abs(maxdifference - mindifference);
equalbits = 18;
while (difference) {
equalbits--;
difference >>= 1;
}
printf("precision1 = %d bits\n", equalbits);
printf("precision2 = %f bits\n", log(262144.0f / (maxdifference - mindifference + 1)) / log(2));
return 0;
}
_______________________________________________
Open-graphics mailing list
[email protected]
http://lists.duskglow.com/mailman/listinfo/open-graphics
List service provided by Duskglow Consulting, LLC (www.duskglow.com)