On 18.01.2012 14:07, Heikki Linnakangas wrote:
For 9.2, I think we should change gist so
that the user-defined distance function can return any scalar data type,
not just float8 (as long as it has b-tree comparison operators).
I took a shot at doing that. Patch attached. It needs some cleanup; I
think we'll need to bump the version of the btree_gist extension, and I
only changed the btree_int8 distance function to do that, even though it
would now make a lot of sense to adjust the others, too. Also, I think
it'll leak memory (or crash..) if the distance data type is pass-by-ref.
If someone has a test suite at hand for performance testing this, that
would be good. Now that I'm calling the b-tree comparison function for
every comparison operation in the red-black tree, instead of a direct
float8 <= instruction, this could be quite a bit slower. That could be
mitigated somewhat by using the sort-support stuff Tom committed recently.
If we bend things a bit, this might be back-patchable to 9.1. We can't
add a new am support function easily, that would require a catalog
update to increment pg_am.amsupport entry, but we could hardwire the
support for int8 distances, like I hardwired the knowledge of float8's
comparsion function in the attached patch.
--
Heikki Linnakangas
EnterpriseDB http://www.enterprisedb.com
diff --git a/contrib/btree_gist/btree_gist--1.0.sql b/contrib/btree_gist/btree_gist--1.0.sql
index c5c9587..a0906f2 100644
--- a/contrib/btree_gist/btree_gist--1.0.sql
+++ b/contrib/btree_gist/btree_gist--1.0.sql
@@ -460,7 +460,7 @@ AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
CREATE FUNCTION gbt_int8_distance(internal,int8,int2,oid)
-RETURNS float8
+RETURNS int8
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
@@ -510,7 +510,8 @@ AS
ALTER OPERATOR FAMILY gist_int8_ops USING gist ADD
OPERATOR 6 <> (int8, int8) ,
OPERATOR 15 <-> (int8, int8) FOR ORDER BY pg_catalog.integer_ops ,
- FUNCTION 8 (int8, int8) gbt_int8_distance (internal, int8, int2, oid) ;
+ FUNCTION 8 (int8, int8) gbt_int8_distance (internal, int8, int2, oid),
+ FUNCTION 9 (int8, int8) btint8cmp (int8, int8) ;
--
diff --git a/contrib/btree_gist/btree_int8.c b/contrib/btree_gist/btree_int8.c
index c05d868..bc21c20 100644
--- a/contrib/btree_gist/btree_int8.c
+++ b/contrib/btree_gist/btree_int8.c
@@ -166,14 +166,16 @@ gbt_int8_distance(PG_FUNCTION_ARGS)
/* Oid subtype = PG_GETARG_OID(3); */
int64KEY *kkk = (int64KEY *) DatumGetPointer(entry->key);
- GBT_NUMKEY_R key;
+ int64 result;
- key.lower = (GBT_NUMKEY *) &kkk->lower;
- key.upper = (GBT_NUMKEY *) &kkk->upper;
+ if (query <= kkk->lower)
+ result = Abs(query - kkk->lower);
+ else if (query >= kkk->upper)
+ result = Abs(query - kkk->upper);
+ else
+ result = 0;
- PG_RETURN_FLOAT8(
- gbt_num_distance(&key, (void *) &query, GIST_LEAF(entry), &tinfo)
- );
+ PG_RETURN_INT64(result);
}
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index ae6309d..14ab70c 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -18,9 +18,12 @@
#include "access/gist_private.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/indexfsm.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -1270,13 +1273,6 @@ initGISTstate(Relation index)
fmgr_info_copy(&(giststate->equalFn[i]),
index_getprocinfo(index, i + 1, GIST_EQUAL_PROC),
scanCxt);
- /* opclasses are not required to provide a Distance method */
- if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
- fmgr_info_copy(&(giststate->distanceFn[i]),
- index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
- scanCxt);
- else
- giststate->distanceFn[i].fn_oid = InvalidOid;
/*
* If the index column has a specified collation, we should honor that
@@ -1293,6 +1289,36 @@ initGISTstate(Relation index)
giststate->supportCollation[i] = index->rd_indcollation[i];
else
giststate->supportCollation[i] = DEFAULT_COLLATION_OID;
+
+ /* opclasses are not required to provide a Distance method */
+ if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC)))
+ {
+ Oid distanceType;
+
+ fmgr_info_copy(&(giststate->distanceFn[i]),
+ index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC),
+ scanCxt);
+
+ distanceType = get_func_rettype(giststate->distanceFn[i].fn_oid);
+
+ if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_CMP_PROC)))
+ {
+ fmgr_info_copy(&(giststate->distanceCmpFn[i]),
+ index_getprocinfo(index, i + 1, GIST_DISTANCE_CMP_PROC),
+ scanCxt);
+ }
+ /*
+ * The distance function used to be defined to always return a
+ * float8, and there was no distance comparison function. For
+ * backwards-compatibility, if the distance data type is float8,
+ * and no distance comparison function is given, we use the
+ * built-in float8cmp function as the comparison function.
+ */
+ else if (distanceType == FLOAT8OID)
+ fmgr_info(F_BTFLOAT8CMP, &giststate->distanceCmpFn[i]);
+ else
+ elog(ERROR, "distance comparison function is missing");
+ }
}
MemoryContextSwitchTo(oldCxt);
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 73551ec..a078ebb 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -34,7 +34,8 @@
* request it. recheck is not interesting when examining a non-leaf entry,
* since we must visit the lower index page if there's any doubt.
*
- * If we are doing an ordered scan, so->distances[] is filled with distance
+ * If we are doing an ordered scan, so->distances[] (and so->distance_kinds[])
+ * is filled with distance
* data from the distance() functions before returning success.
*
* We must decompress the key in the IndexTuple before passing it to the
@@ -55,7 +56,8 @@ gistindex_keytest(IndexScanDesc scan,
GISTSTATE *giststate = so->giststate;
ScanKey key = scan->keyData;
int keySize = scan->numberOfKeys;
- double *distance_p;
+ Datum *distance_p;
+ char *distance_kind_p;
Relation r = scan->indexRelation;
*recheck_p = false;
@@ -72,7 +74,10 @@ gistindex_keytest(IndexScanDesc scan,
if (GistPageIsLeaf(page)) /* shouldn't happen */
elog(ERROR, "invalid GiST tuple found on leaf page");
for (i = 0; i < scan->numberOfOrderBys; i++)
- so->distances[i] = -get_float8_infinity();
+ {
+ so->distances[i] = (Datum) 0;
+ so->distance_kinds[i] = DISTANCE_MIN_INF;
+ }
return true;
}
@@ -156,6 +161,7 @@ gistindex_keytest(IndexScanDesc scan,
/* OK, it passes --- now let's compute the distances */
key = scan->orderByData;
distance_p = so->distances;
+ distance_kind_p = so->distance_kinds;
keySize = scan->numberOfOrderBys;
while (keySize > 0)
{
@@ -170,7 +176,7 @@ gistindex_keytest(IndexScanDesc scan,
if ((key->sk_flags & SK_ISNULL) || isNull)
{
/* Assume distance computes as null and sorts to the end */
- *distance_p = get_float8_infinity();
+ *distance_kind_p = DISTANCE_MAX_INF;
}
else
{
@@ -202,11 +208,13 @@ gistindex_keytest(IndexScanDesc scan,
Int32GetDatum(key->sk_strategy),
ObjectIdGetDatum(key->sk_subtype));
- *distance_p = DatumGetFloat8(dist);
+ *distance_p = dist;
+ *distance_kind_p = DISTANCE_NORMAL;
}
key++;
distance_p++;
+ distance_kind_p++;
keySize--;
}
@@ -234,7 +242,8 @@ gistindex_keytest(IndexScanDesc scan,
* sibling will be processed next.
*/
static void
-gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
+gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem,
+ Datum *myDistances, char *myDistance_kinds,
TIDBitmap *tbm, int64 *ntids)
{
GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
@@ -284,7 +293,9 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
tmpItem->head = item;
tmpItem->lastHeap = NULL;
memcpy(tmpItem->distances, myDistances,
- sizeof(double) * scan->numberOfOrderBys);
+ sizeof(Datum) * scan->numberOfOrderBys);
+ memcpy(GSTI_DISTANCE_KINDS(tmpItem, scan->numberOfOrderBys),
+ myDistance_kinds, sizeof(char) * scan->numberOfOrderBys);
(void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
@@ -370,7 +381,10 @@ gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances,
tmpItem->head = item;
tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL;
memcpy(tmpItem->distances, so->distances,
- sizeof(double) * scan->numberOfOrderBys);
+ sizeof(Datum) * scan->numberOfOrderBys);
+ memcpy(GSTI_DISTANCE_KINDS(tmpItem, scan->numberOfOrderBys),
+ so->distance_kinds,
+ sizeof(char) * scan->numberOfOrderBys);
(void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew);
@@ -453,7 +467,8 @@ getNextNearest(IndexScanDesc scan)
/* visit an index page, extract its items into queue */
CHECK_FOR_INTERRUPTS();
- gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
+ gistScanPage(scan, item, so->curTreeItem->distances,
+ GSTI_DISTANCE_KINDS(so->curTreeItem, scan->numberOfOrderBys), NULL, NULL);
}
pfree(item);
@@ -491,7 +506,7 @@ gistgettuple(PG_FUNCTION_ARGS)
fakeItem.blkno = GIST_ROOT_BLKNO;
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
- gistScanPage(scan, &fakeItem, NULL, NULL, NULL);
+ gistScanPage(scan, &fakeItem, NULL, NULL, NULL, NULL);
}
if (scan->numberOfOrderBys > 0)
@@ -529,7 +544,8 @@ gistgettuple(PG_FUNCTION_ARGS)
* this page, we fall out of the inner "do" and loop around to
* return them.
*/
- gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL);
+ gistScanPage(scan, item, so->curTreeItem->distances,
+ GSTI_DISTANCE_KINDS(so->curTreeItem, scan->numberOfOrderBys), NULL, NULL);
pfree(item);
} while (so->nPageData == 0);
@@ -562,7 +578,7 @@ gistgetbitmap(PG_FUNCTION_ARGS)
fakeItem.blkno = GIST_ROOT_BLKNO;
memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN));
- gistScanPage(scan, &fakeItem, NULL, tbm, &ntids);
+ gistScanPage(scan, &fakeItem, NULL, NULL, tbm, &ntids);
/*
* While scanning a leaf page, ItemPointers of matching heap tuples will
@@ -577,7 +593,8 @@ gistgetbitmap(PG_FUNCTION_ARGS)
CHECK_FOR_INTERRUPTS();
- gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids);
+ gistScanPage(scan, item, so->curTreeItem->distances,
+ GSTI_DISTANCE_KINDS(so->curTreeItem, scan->numberOfOrderBys), tbm, &ntids);
pfree(item);
}
diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c
index bf139de..696325b 100644
--- a/src/backend/access/gist/gistscan.c
+++ b/src/backend/access/gist/gistscan.c
@@ -31,13 +31,28 @@ GISTSearchTreeItemComparator(const RBNode *a, const RBNode *b, void *arg)
const GISTSearchTreeItem *sa = (const GISTSearchTreeItem *) a;
const GISTSearchTreeItem *sb = (const GISTSearchTreeItem *) b;
IndexScanDesc scan = (IndexScanDesc) arg;
+ GISTScanOpaque so = (GISTScanOpaque) scan->opaque;
int i;
/* Order according to distance comparison */
for (i = 0; i < scan->numberOfOrderBys; i++)
{
- if (sa->distances[i] != sb->distances[i])
- return (sa->distances[i] > sb->distances[i]) ? 1 : -1;
+ char kind_a = GSTI_DISTANCE_KINDS(sa, scan->numberOfOrderBys)[i];
+ char kind_b = GSTI_DISTANCE_KINDS(sb, scan->numberOfOrderBys)[i];
+ int diff;
+
+ if (kind_a != 0 || kind_b != 0)
+ {
+ if (kind_a != kind_b)
+ return (kind_a > kind_b) ? 1 : -1;
+ }
+
+ diff = DatumGetInt32(FunctionCall2Coll(&so->giststate->distanceCmpFn[i],
+ so->giststate->supportCollation[i],
+ sa->distances[i],
+ sb->distances[i]));
+ if (diff != 0)
+ return diff;
}
return 0;
@@ -83,7 +98,9 @@ GISTSearchTreeItemAllocator(void *arg)
{
IndexScanDesc scan = (IndexScanDesc) arg;
- return palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
+ return palloc(GSTIHDRSZ +
+ sizeof(Datum) * scan->numberOfOrderBys +
+ sizeof(char) * scan->numberOfOrderBys);
}
static void
@@ -127,8 +144,11 @@ gistbeginscan(PG_FUNCTION_ARGS)
so->queueCxt = giststate->scanCxt; /* see gistrescan */
/* workspaces with size dependent on numberOfOrderBys: */
- so->tmpTreeItem = palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys);
+ so->tmpTreeItem = palloc(GSTIHDRSZ +
+ sizeof(Datum) * scan->numberOfOrderBys +
+ sizeof(char) * scan->numberOfOrderBys);
so->distances = palloc(sizeof(double) * scan->numberOfOrderBys);
+ so->distance_kinds = palloc(sizeof(char) * scan->numberOfOrderBys);
so->qual_ok = true; /* in case there are zero keys */
scan->opaque = so;
@@ -188,7 +208,9 @@ gistrescan(PG_FUNCTION_ARGS)
/* create new, empty RBTree for search queue */
oldCxt = MemoryContextSwitchTo(so->queueCxt);
- so->queue = rb_create(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys,
+ so->queue = rb_create(GSTIHDRSZ +
+ sizeof(Datum) * scan->numberOfOrderBys +
+ sizeof(char) * scan->numberOfOrderBys,
GISTSearchTreeItemComparator,
GISTSearchTreeItemCombiner,
GISTSearchTreeItemAllocator,
diff --git a/src/include/access/gist.h b/src/include/access/gist.h
index 42ac63a..8f19253 100644
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -33,7 +33,8 @@
#define GIST_PICKSPLIT_PROC 6
#define GIST_EQUAL_PROC 7
#define GIST_DISTANCE_PROC 8
-#define GISTNProcs 8
+#define GIST_DISTANCE_CMP_PROC 9
+#define GISTNProcs 9
/*
* strategy numbers for GiST opclasses that want to implement the old
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index 0d6b625..048e70b 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -21,6 +21,7 @@
#include "storage/buffile.h"
#include "utils/rbtree.h"
#include "utils/hsearch.h"
+#include "utils/sortsupport.h"
/* Buffer lock modes */
#define GIST_SHARE BUFFER_LOCK_SHARE
@@ -71,6 +72,7 @@ typedef struct GISTSTATE
FmgrInfo picksplitFn[INDEX_MAX_KEYS];
FmgrInfo equalFn[INDEX_MAX_KEYS];
FmgrInfo distanceFn[INDEX_MAX_KEYS];
+ FmgrInfo distanceCmpFn[INDEX_MAX_KEYS];
/* Collations to pass to the support functions */
Oid supportCollation[INDEX_MAX_KEYS];
@@ -120,6 +122,17 @@ typedef struct GISTSearchItem
#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber)
/*
+ * We need a few special distance values to represent before/after anything
+ * else. We don't know anything about the data type used for measuring
+ * distance, so we carry around a 'kind' associated with each distance value,
+ * similar to how we carry around an 'isnull' value together with a Datum
+ * in many places in the backend.
+ */
+#define DISTANCE_MIN_INF -1 /* sorts before everything else */
+#define DISTANCE_NORMAL 0 /* normal distance value */
+#define DISTANCE_MAX_INF 1 /* sorts after everything else */
+
+/*
* Within a GISTSearchTreeItem's chain, heap items always appear before
* index-page items, since we want to visit heap items first. lastHeap points
* to the last heap item in the chain, or is NULL if there are none.
@@ -129,9 +142,16 @@ typedef struct GISTSearchTreeItem
RBNode rbnode; /* this is an RBTree item */
GISTSearchItem *head; /* first chain member */
GISTSearchItem *lastHeap; /* last heap-tuple member, if any */
- double distances[1]; /* array with numberOfOrderBys entries */
+ Datum distances[1]; /* array with numberOfOrderBys entries */
+ /*
+ * char distance_kinds[] array follows. Same size as distances array.
+ * Use GSTI_DISTANCE_KINDS to access it.
+ */
} GISTSearchTreeItem;
+#define GSTI_DISTANCE_KINDS(item, numberOfOrderBys) \
+ (((char *) &(item)->distances) + sizeof(Datum) * (numberOfOrderBys))
+
#define GSTIHDRSZ offsetof(GISTSearchTreeItem, distances)
/*
@@ -149,7 +169,8 @@ typedef struct GISTScanOpaqueData
/* pre-allocated workspace arrays */
GISTSearchTreeItem *tmpTreeItem; /* workspace to pass to rb_insert */
- double *distances; /* output area for gistindex_keytest */
+ Datum *distances; /* output area for gistindex_keytest */
+ char *distance_kinds;
/* In a non-ordered search, returnable heap items are stored here: */
GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)];
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 9aac9e9..df56445 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -123,7 +123,7 @@ DESCR("b-tree index access method");
DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbuildempty hashbulkdelete hashvacuumcleanup - hashcostestimate hashoptions ));
DESCR("hash index access method");
#define HASH_AM_OID 405
-DATA(insert OID = 783 ( gist 0 8 f t f f t t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup - gistcostestimate gistoptions ));
+DATA(insert OID = 783 ( gist 0 9 f t f f t t f t t t f 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbuildempty gistbulkdelete gistvacuumcleanup - gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
DATA(insert OID = 2742 ( gin 0 5 f f f f t t f f t f f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbuildempty ginbulkdelete ginvacuumcleanup - gincostestimate ginoptions ));
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index ac45ee6..2883549 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -247,7 +247,7 @@
* Enable debugging print statements for WAL-related operations; see
* also the wal_debug GUC var.
*/
-/* #define WAL_DEBUG */
+#define WAL_DEBUG
/*
* Enable tracing of resource consumption during sort operations;
--
Sent via pgsql-bugs mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs