On Mon, Jul 20, 2009 at 8:37 PM, Tom Lane<[email protected]> wrote:
> Anyone want to see if they can beat that? Some testing on other
> architectures would help too.
Hm, I took the three implementations so far (normal, unrolled, and
clz) as well as the two from
http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogObvious
and got some very strange results:
normal: 1.494s
clz: 2.214s
unrolled: 2.966s
lookup table: 0.001s
float hack: 11.930s
I can't see why the unrolled implementation is slower than the
non-unrolled so I'm suspecting something's wrong with my #ifdefs but I
don't see it.
I do think the code I grabbed from the stanford page might be
off-by-one for our purposes but I haven't looked closely at that.
I also wonder if this microbenchmark is actually ok because it's
testing the same value over and over so any branch-prediction will
shine unrealistically well.
--
greg
http://mit.edu/~gsstark/resume.pdf
/*
* usage: time ./testit N
* N is a repeat count, set it large enough to get repeatable timings
*/
#include <stdio.h>
#include <stdlib.h>
#define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */
#define ALLOCSET_NUM_FREELISTS 11
/* number of calls to AllocSetFreeIndex with each result category */
static const int numoccurs[ALLOCSET_NUM_FREELISTS] = {
#ifdef USE_64BIT_COUNTS
2139534,
5994692,
5711479,
3289034,
4550931,
2573389,
487566,
588470,
155148,
52750,
202597
#else
5190113,
5663980,
3573261,
4476398,
4246178,
1100634,
386501,
601654,
44884,
52372,
202801
#endif
};
extern int AllocSetFreeIndex(size_t size);
int
main(int argc, char **argv)
{
int repeat = atoi(argv[1]);
while (repeat-- > 0)
{
int k;
for (k = 0; k < ALLOCSET_NUM_FREELISTS; k++)
{
int n = numoccurs[k];
size_t siz = (1 << (ALLOC_MINBITS + k));
while (n-- > 0)
{
AllocSetFreeIndex(siz);
}
}
}
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#define ALLOC_MINBITS 3 /* smallest chunk size is 8 bytes */
#define ALLOCSET_NUM_FREELISTS 11
#define BITS_PER_BYTE 8
/* ----------
* AllocSetFreeIndex -
*
* Depending on the size of an allocation compute which freechunk
* list of the alloc set it belongs to. Caller must have verified
* that size <= ALLOC_CHUNK_LIMIT.
* ----------
*/
int
AllocSetFreeIndex(size_t size)
{
int idx = 0;
if (size > (1 << ALLOC_MINBITS))
{
size = (size - 1) >> ALLOC_MINBITS;
#if HAVE_BUILTIN_CLZ
idx = sizeof(unsigned int) * BITS_PER_BYTE -
__builtin_clz((unsigned int)size);
#elif TEST_FLOAT_HACK
union { unsigned int u[2]; double d; } t; // temp
t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000;
t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = size;
t.d -= 4503599627370496.0;
idx = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FF;
#elif TEST_LOOKUP_TABLE
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
static const char LogTable256[256] =
{
-1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
};
unsigned int t, tt; // temporaries
if (tt = size >> 16)
{
idx = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
}
else
{
idx = (t = size >> 8) ? 8 + LogTable256[t] : LogTable256[size];
}
#elif TEST_UNROLL
idx++;
size >>= 1;
if (size != 0)
{
idx++;
size >>= 1;
if (size != 0)
{
idx++;
size >>= 1;
if (size != 0)
{
idx++;
size >>= 1;
while (size != 0)
{
idx++;
size >>= 1;
}
}
}
}
#else
do
{
idx++;
size >>= 1;
}
while (size != 0);
#endif
}
return idx;
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers