The patch store hash code only in the index tuple.
It based on Neil Conway's patch with an old version of PostgreSQL.
It passes the regression test but I didn't test the performance yet.
Anyone interested can make a performance test;-)
You can undefine the macro HASHVALUE_ONLY in hash.h to get the
original implementation.
It's a preliminary implementation and I'm looking for input here.
Hope to hear from you.
--
Best Regards,
Xiao Meng
DKERC, Harbin Institute of Technology, China
Gtalk: [EMAIL PROTECTED]
MSN: [EMAIL PROTECTED]
http://xiaomeng.yo2.cn
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 6a5c000..416bf57 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -129,7 +129,11 @@ hashbuildCallback(Relation index,
IndexTuple itup;
/* form an index tuple and point it at the heap tuple */
+#ifdef HASHVALUE_ONLY
+ itup = _hash_form_tuple(index, values,isnull);
+#else
itup = index_form_tuple(RelationGetDescr(index), values, isnull);
+#endif
itup->t_tid = htup->t_self;
/* Hash indexes don't index nulls, see notes in hashinsert */
@@ -171,7 +175,12 @@ hashinsert(PG_FUNCTION_ARGS)
IndexTuple itup;
/* generate an index tuple */
+#ifdef HASHVALUE_ONLY
+ itup = _hash_form_tuple(rel, values, isnull);
+#else
itup = index_form_tuple(RelationGetDescr(rel), values, isnull);
+#endif
+
itup->t_tid = *ht_ctid;
/*
@@ -212,7 +221,11 @@ hashgettuple(PG_FUNCTION_ARGS)
bool res;
/* Hash indexes are never lossy (at the moment anyway) */
- scan->xs_recheck = false;
+#ifdef HASHVALUE_ONLY
+ scan->xs_recheck = true;
+#else
+ scan->xs_recheck = false;
+#endif
/*
* We hold pin but not lock on current buffer while outside the hash AM.
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index 3eb226a..b79d4f8 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -52,9 +52,15 @@ _hash_doinsert(Relation rel, IndexTuple itup)
*/
if (rel->rd_rel->relnatts != 1)
elog(ERROR, "hash indexes support only one index key");
+#ifdef HASHVALUE_ONLY
+ datum = index_getattr(itup, 1, _create_hash_desc(), &isnull);
+ Assert(!isnull);
+ hashkey = DatumGetUInt32(datum);
+#else
datum = index_getattr(itup, 1, RelationGetDescr(rel), &isnull);
Assert(!isnull);
hashkey = _hash_datum2hashkey(rel, datum);
+#endif
/* compute item size too */
itemsz = IndexTupleDSize(*itup);
@@ -195,13 +201,26 @@ _hash_pgaddtup(Relation rel,
Size itemsize,
IndexTuple itup)
{
- OffsetNumber itup_off;
- Page page;
+ OffsetNumber itup_off;
+ Page page;
+ Datum datum;
+ bool isNull;
+ uint32 hashkey;
_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
page = BufferGetPage(buf);
+#ifdef HASHVALUE_ONLY
+ datum = index_getattr(itup,
+ 1,
+ _create_hash_desc(),
+ &isNull);
+ Assert(!isNull);
+ hashkey = DatumGetUInt32(datum);
+ itup_off = _hash_binsearch(page, hashkey);
+#else
itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page));
+#endif
if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
== InvalidOffsetNumber)
elog(ERROR, "failed to add index item to \"%s\"",
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index b0b5874..bba64c4 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -785,7 +785,12 @@ _hash_splitbucket(Relation rel,
OffsetNumber omaxoffnum;
Page opage;
Page npage;
- TupleDesc itupdesc = RelationGetDescr(rel);
+ TupleDesc itupdesc;
+#ifdef HASHVALUE_ONLY
+ itupdesc = _create_hash_desc();
+#else
+ itupdesc = RelationGetDescr(rel);
+#endif
/*
* It should be okay to simultaneously write-lock pages from each bucket,
@@ -854,9 +859,13 @@ _hash_splitbucket(Relation rel,
itup = (IndexTuple) PageGetItem(opage, PageGetItemId(opage, ooffnum));
datum = index_getattr(itup, 1, itupdesc, &null);
Assert(!null);
-
+#ifdef HASHVALUE_ONLY
+ bucket = _hash_hashkey2bucket(DatumGetUInt32(datum),
+ maxbucket, highmask, lowmask);
+#else
bucket = _hash_hashkey2bucket(_hash_datum2hashkey(rel, datum),
maxbucket, highmask, lowmask);
+#endif
if (bucket == nbucket)
{
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 258526b..b9a0307 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -178,6 +178,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument,
cur->sk_subtype);
+ so->hashso_sk_hash = hashkey;
/*
* Acquire shared split lock so we can compute the target bucket safely
* (see README).
@@ -289,6 +290,57 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
* continue to step through tuples until: 1) we get to the end of the
* bucket chain or 2) we find a valid tuple.
*/
+#ifdef HASHVALUE_ONLY
+ for (;;)
+ {
+ if (offnum == InvalidOffsetNumber)
+ {
+ /*
+ * This is the first time we're scanning this particular
+ * page of the bucket, so jump to the right spot via
+ * binary search.
+ */
+ offnum = _hash_binsearch(page, so->hashso_sk_hash);
+ }
+ else
+ {
+ /* Advance to the next tuple */
+ offnum = OffsetNumberNext(offnum);
+ }
+
+
+ if (offnum <= maxoff) itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
+ if (offnum <= maxoff && _hash_checkqual(scan, itup))
+ {
+
+ /* Found a matching tuple */
+ *bufP = so->hashso_curbuf = buf;
+ ItemPointerSet(current, BufferGetBlockNumber(buf), offnum);
+ return true;
+ }
+ else
+ {
+ /* No more matches on this page, so go on to next page */
+ if (ScanDirectionIsForward(dir))
+ _hash_readnext(rel, &buf, &page, &opaque);
+ else
+ _hash_readprev(rel, &buf, &page, &opaque);
+
+ if (BufferIsValid(buf))
+ {
+ maxoff = PageGetMaxOffsetNumber(page);
+ offnum = InvalidOffsetNumber;
+ }
+ else
+ {
+ /* Ran out of pages, so we're done */
+ *bufP = so->hashso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(current);
+ return false;
+ }
+ }
+ }
+#else
do
{
switch (dir)
@@ -367,4 +419,5 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
*bufP = so->hashso_curbuf = buf;
ItemPointerSet(current, blkno, offnum);
return true;
+#endif
}
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index 41e2eef..757df08 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -20,7 +20,7 @@
#include "executor/execdebug.h"
#include "storage/bufmgr.h"
#include "utils/lsyscache.h"
-
+#include "catalog/pg_type.h"
/*
* _hash_checkqual -- does the index tuple satisfy the scan conditions?
@@ -28,16 +28,32 @@
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
- TupleDesc tupdesc = RelationGetDescr(scan->indexRelation);
+ TupleDesc tupdesc;
ScanKey key = scan->keyData;
int scanKeySize = scan->numberOfKeys;
+ Datum datum;
+ bool isNull;
+ HashScanOpaque so = scan->opaque;
IncrIndexProcessed();
+#ifdef HASHVALUE_ONLY
+ tupdesc = _create_hash_desc();
+ datum = index_getattr(itup,
+ key->sk_attno,
+ tupdesc,
+ &isNull);
+ Assert(!isNull);
+ if( so->hashso_sk_hash != DatumGetUInt32(datum) )
+ return false;
+ key++;
+ scanKeySize--;
+#else
+ tupdesc = RelationGetDescr(scan->indexRelation);
+#endif
+
while (scanKeySize > 0)
{
- Datum datum;
- bool isNull;
Datum test;
datum = index_getattr(itup,
@@ -222,3 +238,77 @@ hashoptions(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(result);
PG_RETURN_NULL();
}
+
+/*
+ * _create_hash_desc - create a hash TupleDesc
+ *
+ * The TupleDesc is with int32 attribute, not uint32 because we've no pg_type with UINT4OID
+ * but we use the TupleDesc as a TupleDesc with uint32 attribute
+ */
+TupleDesc _create_hash_desc()
+{
+ TupleDesc tupdesc = CreateTemplateTupleDesc(1, false);
+ TupleDescInitEntry(tupdesc, 1, "hashcode", INT4OID, -1, 0);
+ return tupdesc;
+}
+
+/*
+ * _hash_form_tuple - form a tuple with hash code only
+ */
+IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull)
+{
+ TupleDesc hashdesc;
+ IndexTuple itup;
+ uint32 hashkey;
+
+ hashdesc = _create_hash_desc();
+ hashkey = _hash_datum2hashkey(rel, values[0]);
+ values[0] = UInt32GetDatum(hashkey);
+ itup = index_form_tuple(hashdesc, values, isnull);
+ return itup;
+}
+
+/*
+ * _hash_binsearch - Return the offset number in the page where the specified hash value
+ * should be located.
+ *
+ * The return value might exceed the page's max offset
+ * if the hash value is greater than any hash in the page.
+ */
+OffsetNumber
+_hash_binsearch(Page page, uint32 hash_value)
+{
+ OffsetNumber upper;
+ OffsetNumber lower;
+ TupleDesc hashdesc;
+
+ upper = PageGetMaxOffsetNumber(page) + 1;
+ lower = FirstOffsetNumber;
+ hashdesc = _create_hash_desc();
+
+ while (upper > lower)
+ {
+ IndexTuple pos;
+ OffsetNumber off;
+ uint32 hashkey;
+ Datum datum;
+ bool isNull;
+
+ off = (upper + lower) / 2;
+ Assert(OffsetNumberIsValid(off));
+
+ pos = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+ datum = index_getattr(pos,
+ 1,
+ hashdesc,
+ &isNull);
+ Assert(!isNull);
+ hashkey = DatumGetUInt32(datum);
+ if (hashkey < hash_value)
+ lower = off + 1;
+ else
+ upper = off;
+ }
+
+ return upper;
+}
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index ab0824d..0982c8f 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -100,6 +100,8 @@ typedef struct HashScanOpaqueData
/* Current and marked position of the scan */
ItemPointerData hashso_curpos;
ItemPointerData hashso_mrkpos;
+ /* Hash value of the scan key */
+ uint32 hashso_sk_hash;
} HashScanOpaqueData;
typedef HashScanOpaqueData *HashScanOpaque;
@@ -227,6 +229,10 @@ typedef HashMetaPageData *HashMetaPage;
*/
#define HASHPROC 1
+/*
+ * store hash value only in the bucket
+ */
+#define HASHVALUE_ONLY
/* public routines */
@@ -330,6 +336,9 @@ extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
uint32 highmask, uint32 lowmask);
extern uint32 _hash_log2(uint32 num);
extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
+extern TupleDesc _create_hash_desc();
+extern IndexTuple _hash_form_tuple(Relation rel, Datum* values, bool* isnull);
+OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
/* hash.c */
extern void hash_redo(XLogRecPtr lsn, XLogRecord *record);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers