I wrote:
> [ we should use an index array ]

Just to prove the point, I threw together the attached trivial test case,
which time-trials the existing fmgr_isbuiltin implementation against both
the proposed hash implementation and a simple index array.  On my machine,
with a repeat count of 10000, I get

NOTICE:  bsearch runtime 4234.087 ms
NOTICE:  hash runtime 2542.636 ms
NOTICE:  index runtime 165.184 ms

(These numbers are repeatable within 1% or so.)

It could be argued that trialling OIDs sequentially gives a bit of an
unfair advantage to the bsearch and index methods over the hash method,
because the former are going to suffer fewer cache misses that way.
But I don't see a randomized lookup order changing the conclusion much.

                        regards, tom lane

#include "postgres.h"

#include "fmgr.h"
#include "access/transam.h"
#include "portability/instr_time.h"
#include "utils/fmgrtab.h"
#include "utils/hashutils.h"

PG_MODULE_MAGIC;


static const FmgrBuiltin *
fmgr_isbuiltin_bsearch(Oid id)
{
	int			low = 0;
	int			high = fmgr_nbuiltins - 1;

	/*
	 * Loop invariant: low is the first index that could contain target entry,
	 * and high is the last index that could contain it.
	 */
	while (low <= high)
	{
		int			i = (high + low) / 2;
		const FmgrBuiltin *ptr = &fmgr_builtins[i];

		if (id == ptr->foid)
			return ptr;
		else if (id > ptr->foid)
			low = i + 1;
		else
			high = i - 1;
	}
	return NULL;
}


/*
 * Hashtable for fast lookup builtin functions.
 */
typedef struct BuiltinOidLookupEntry
{
	Oid			foid;
	int			status;
	const FmgrBuiltin *builtin;
} BuiltinOidLookupEntry;

/* define hashtable mapping block numbers to PagetableEntry's */
#define SH_PREFIX oid2builtins
#define SH_ELEMENT_TYPE BuiltinOidLookupEntry
#define SH_KEY_TYPE Oid
#define SH_KEY foid
#define SH_HASH_KEY(tb, key) murmurhash32(key)
#define SH_EQUAL(tb, a, b) a == b
#define SH_SCOPE static inline
#define SH_DEFINE
#define SH_DECLARE
#include "lib/simplehash.h"

static oid2builtins_hash * oid2builtins = 0;

static const FmgrBuiltin *
fmgr_isbuiltin_hash(Oid id)
{
	BuiltinOidLookupEntry *entry;

	entry = oid2builtins_lookup(oid2builtins, id);
	if (entry)
		return entry->builtin;
	else
		return NULL;
}

static void
hash_setup(void)
{
	int			i;

	oid2builtins = oid2builtins_create(TopMemoryContext,
									   fmgr_nbuiltins,
									   NULL);
	for (i = 0; i < fmgr_nbuiltins; i++)
	{
		const FmgrBuiltin *ptr = &fmgr_builtins[i];
		BuiltinOidLookupEntry *entry;
		bool		found;

		entry = oid2builtins_insert(oid2builtins, ptr->foid, &found);
		Assert(!found);
		entry->builtin = ptr;
	}
}


static int16 builtins_index[FirstBootstrapObjectId];

static const FmgrBuiltin *
fmgr_isbuiltin_index(Oid id)
{
	int			i;

	if (id >= FirstBootstrapObjectId)
		return NULL;
	i = builtins_index[id];
	if (i < 0)
		return NULL;
	return &fmgr_builtins[i];
}

static void
index_setup(void)
{
	int			i;

	memset(builtins_index, -1, sizeof(builtins_index));
	for (i = 0; i < fmgr_nbuiltins; i++)
	{
		const FmgrBuiltin *ptr = &fmgr_builtins[i];

		builtins_index[ptr->foid] = i;
	}
}


PG_FUNCTION_INFO_V1(test_lookups);

Datum
test_lookups(PG_FUNCTION_ARGS)
{
	int			rep_count = PG_GETARG_INT32(0);
	instr_time	start_time;
	instr_time	end_time;
	int			i;

	INSTR_TIME_SET_CURRENT(start_time);

	for (i = 0; i < rep_count; i++)
	{
		int			ct = 0;
		Oid			fo;

		for (fo = 1; fo < 10000; fo++)
		{
			if (fmgr_isbuiltin_bsearch(fo))
				ct++;
		}

		if (ct != fmgr_nbuiltins)
			elog(ERROR, "got %d builtins instead of %d", ct, fmgr_nbuiltins);
	}

	INSTR_TIME_SET_CURRENT(end_time);
	INSTR_TIME_SUBTRACT(end_time, start_time);
	elog(NOTICE, "bsearch runtime %.3f ms",
		 1000.0 * INSTR_TIME_GET_DOUBLE(end_time));

	hash_setup();

	INSTR_TIME_SET_CURRENT(start_time);

	for (i = 0; i < rep_count; i++)
	{
		int			ct = 0;
		Oid			fo;

		for (fo = 1; fo < 10000; fo++)
		{
			if (fmgr_isbuiltin_hash(fo))
				ct++;
		}

		if (ct != fmgr_nbuiltins)
			elog(ERROR, "got %d builtins instead of %d", ct, fmgr_nbuiltins);
	}

	INSTR_TIME_SET_CURRENT(end_time);
	INSTR_TIME_SUBTRACT(end_time, start_time);
	elog(NOTICE, "hash runtime %.3f ms",
		 1000.0 * INSTR_TIME_GET_DOUBLE(end_time));

	index_setup();

	INSTR_TIME_SET_CURRENT(start_time);

	for (i = 0; i < rep_count; i++)
	{
		int			ct = 0;
		Oid			fo;

		for (fo = 1; fo < 10000; fo++)
		{
			if (fmgr_isbuiltin_index(fo))
				ct++;
		}

		if (ct != fmgr_nbuiltins)
			elog(ERROR, "got %d builtins instead of %d", ct, fmgr_nbuiltins);
	}

	INSTR_TIME_SET_CURRENT(end_time);
	INSTR_TIME_SUBTRACT(end_time, start_time);
	elog(NOTICE, "index runtime %.3f ms",
		 1000.0 * INSTR_TIME_GET_DOUBLE(end_time));

	PG_RETURN_VOID();
}
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to