Antonin Houska <[email protected]> wrote:
> I'm still testing it. especially the concurrent access. There are probably
> bugs in the code, but it can help understand what I mean.
I've traced the most important cases of concurrent insertion into a bucket
split of which is in progress. A few related bugs fixed. Some tool to check
the index structure is needed yet, before performance testing makes
sense. That might include enhancement of contrib/pageinspect.
--
Antonin Houska
Cybertec Schönig & Schönig GmbH
Gröhrmühlgasse 26
A-2700 Wiener Neustadt
Web: http://www.postgresql-support.de, http://www.cybertec.at
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
new file mode 100644
index 24b06a5..149bbcf
*** a/src/backend/access/hash/hash.c
--- b/src/backend/access/hash/hash.c
*************** loop_top:
*** 572,577 ****
--- 572,599 ----
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
Assert(opaque->hasho_bucket == cur_bucket);
+ /*
+ * If the bucket participates in a split, give up.
+ *
+ * (Unlike the metapage copy, the flags at bucket level should
+ * always be up-to-date.)
+ *
+ * TODO
+ *
+ * 1. Analyze if both buckets participating in the split impose
+ * too severe restrictions, and if it makes sense to introduce
+ * separate flags for "old" and "new" bucket. Also, would such a
+ * restricted VACUUM still make sense?
+ *
+ * 2. Consider how statistics should reflect the fact that some
+ * buckets are skipped because of split.
+ */
+ if (opaque->hasho_flag & LH_BUCKET_SPLIT)
+ {
+ _hash_relbuf(rel, buf);
+ break;
+ }
+
/* Scan each tuple in page */
maxoffno = PageGetMaxOffsetNumber(page);
for (offno = FirstOffsetNumber;
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
new file mode 100644
index 63aaec9..de445c9
*** a/src/backend/access/hash/hashinsert.c
--- b/src/backend/access/hash/hashinsert.c
*************** _hash_doinsert(Relation rel, IndexTuple
*** 37,42 ****
--- 37,43 ----
Page page;
HashPageOpaque pageopaque;
Size itemsz;
+ uint16 buckets_total;
bool do_expand;
uint32 hashkey;
Bucket bucket;
*************** _hash_doinsert(Relation rel, IndexTuple
*** 123,129 ****
*/
BlockNumber nextblkno = pageopaque->hasho_nextblkno;
! if (BlockNumberIsValid(nextblkno))
{
/*
* ovfl page exists; go get it. if it doesn't have room, we'll
--- 124,131 ----
*/
BlockNumber nextblkno = pageopaque->hasho_nextblkno;
! if (BlockNumberIsValid(nextblkno) &&
! !(pageopaque->hasho_flag & LH_BUCKET_SPLIT_LAST))
{
/*
* ovfl page exists; go get it. if it doesn't have room, we'll
*************** _hash_doinsert(Relation rel, IndexTuple
*** 136,142 ****
else
{
/*
! * we're at the end of the bucket chain and we haven't found a
* page with enough room. allocate a new overflow page.
*/
--- 138,145 ----
else
{
/*
! * we're at the end of the bucket chain, or (during a split) right
! * before redirection to the "old bucket", and we haven't found a
* page with enough room. allocate a new overflow page.
*/
*************** _hash_doinsert(Relation rel, IndexTuple
*** 151,157 ****
Assert(PageGetFreeSpace(page) >= itemsz);
}
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
! Assert(pageopaque->hasho_flag == LH_OVERFLOW_PAGE);
Assert(pageopaque->hasho_bucket == bucket);
}
--- 154,160 ----
Assert(PageGetFreeSpace(page) >= itemsz);
}
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
! Assert(pageopaque->hasho_flag & LH_OVERFLOW_PAGE);
Assert(pageopaque->hasho_bucket == bucket);
}
*************** _hash_doinsert(Relation rel, IndexTuple
*** 173,180 ****
metap->hashm_ntuples += 1;
/* Make sure this stays in sync with _hash_expandtable() */
! do_expand = metap->hashm_ntuples >
! (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
/* Write out the metapage and drop lock, but keep pin */
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
--- 176,184 ----
metap->hashm_ntuples += 1;
/* Make sure this stays in sync with _hash_expandtable() */
! buckets_total = metap->hashm_maxbucket + 1 + metap->hashm_split_count;
! do_expand = metap->hashm_split_count < HASH_MAX_SPLITS &&
! metap->hashm_ntuples > (double) metap->hashm_ffactor * buckets_total;
/* Write out the metapage and drop lock, but keep pin */
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
new file mode 100644
index b775164..4345f29
*** a/src/backend/access/hash/hashovfl.c
--- b/src/backend/access/hash/hashovfl.c
***************
*** 21,27 ****
#include "utils/rel.h"
- static Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
static uint32 _hash_firstfreebit(uint32 map);
--- 21,26 ----
*************** _hash_addovflpage(Relation rel, Buffer m
*** 127,133 ****
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
nextblkno = pageopaque->hasho_nextblkno;
! if (!BlockNumberIsValid(nextblkno))
break;
/* we assume we do not need to write the unmodified page */
--- 126,133 ----
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
nextblkno = pageopaque->hasho_nextblkno;
! if (!BlockNumberIsValid(nextblkno) ||
! (pageopaque->hasho_flag & LH_BUCKET_SPLIT_LAST))
break;
/* we assume we do not need to write the unmodified page */
*************** _hash_addovflpage(Relation rel, Buffer m
*** 140,150 ****
ovflpage = BufferGetPage(ovflbuf);
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
- ovflopaque->hasho_nextblkno = InvalidBlockNumber;
ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
ovflopaque->hasho_page_id = HASHO_PAGE_ID;
MarkBufferDirty(ovflbuf);
/* logically chain overflow page to previous page */
--- 140,163 ----
ovflpage = BufferGetPage(ovflbuf);
ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
ovflopaque->hasho_prevblkno = BufferGetBlockNumber(buf);
ovflopaque->hasho_bucket = pageopaque->hasho_bucket;
ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
ovflopaque->hasho_page_id = HASHO_PAGE_ID;
+ if (pageopaque->hasho_flag & LH_BUCKET_SPLIT_LAST)
+ {
+ /*
+ * The bucket is being created, so it may point to part of the bucket
+ * being split (but the link may already be terminated - we don't
+ * care, just copy it.)
+ */
+ ovflopaque->hasho_nextblkno = pageopaque->hasho_nextblkno;
+ ovflopaque->hasho_flag |= LH_BUCKET_SPLIT_LAST;
+ pageopaque->hasho_flag &= ~LH_BUCKET_SPLIT_LAST;
+ }
+ else
+ ovflopaque->hasho_nextblkno = InvalidBlockNumber;
+
MarkBufferDirty(ovflbuf);
/* logically chain overflow page to previous page */
*************** _hash_addovflpage(Relation rel, Buffer m
*** 164,170 ****
* The caller must hold a pin, but no lock, on the metapage buffer.
* That buffer is left in the same state at exit.
*/
! static Buffer
_hash_getovflpage(Relation rel, Buffer metabuf)
{
HashMetaPage metap;
--- 177,183 ----
* The caller must hold a pin, but no lock, on the metapage buffer.
* That buffer is left in the same state at exit.
*/
! Buffer
_hash_getovflpage(Relation rel, Buffer metabuf)
{
HashMetaPage metap;
*************** found:
*** 334,339 ****
--- 347,385 ----
}
/*
+ * Return buffer containing the last page of a bucket being created (during
+ * split). The search starts at page contained in 'start' buffer. This buffer
+ * is expected to be pinned and write-locked (caller can't unlock it). The
+ * result is also pinned and write-locked.
+ */
+ Buffer
+ _hash_get_new_bucket_end(Relation rel, Buffer start)
+ {
+ Buffer buf;
+ Page page;
+ HashPageOpaque pageopaque;
+ BlockNumber blkno = InvalidBlockNumber;
+
+ buf = start;
+ for (;;)
+ {
+ if (BlockNumberIsValid(blkno))
+ buf = _hash_getbuf(rel, blkno, HASH_WRITE, LH_OVERFLOW_PAGE);
+ page = BufferGetPage(buf);
+ pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ if (pageopaque->hasho_flag & LH_BUCKET_SPLIT_LAST)
+ return buf;
+
+ blkno = pageopaque->hasho_nextblkno;
+ /* If reached end of the old bucket, something is pretty bad. */
+ Assert(BlockNumberIsValid(blkno));
+
+ if (buf != start)
+ _hash_relbuf(rel, buf);
+ }
+ }
+
+ /*
* _hash_firstfreebit()
*
* Return the number of the first bit that is not set in the word 'map'.
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
new file mode 100644
index 46c6c96..c169dcd
*** a/src/backend/access/hash/hashpage.c
--- b/src/backend/access/hash/hashpage.c
*************** _hash_metapinit(Relation rel, double num
*** 424,432 ****
--- 424,434 ----
*/
metap->hashm_maxbucket = metap->hashm_lowmask = num_buckets - 1;
metap->hashm_highmask = (num_buckets << 1) - 1;
+ metap->hashm_split_count = 0;
MemSet(metap->hashm_spares, 0, sizeof(metap->hashm_spares));
MemSet(metap->hashm_mapp, 0, sizeof(metap->hashm_mapp));
+ MemSet(metap->hashm_splits, 0, sizeof(metap->hashm_splits));
/* Set up mapping for one spare page after the initial splitpoints */
metap->hashm_spares[log2_num_buckets] = 1;
*************** void
*** 498,511 ****
--- 500,521 ----
_hash_expandtable(Relation rel, Buffer metabuf)
{
HashMetaPage metap;
+ uint16 buckets_total;
Bucket old_bucket;
Bucket new_bucket;
+ HashSplit new_split;
uint32 spare_ndx;
BlockNumber start_oblkno;
BlockNumber start_nblkno;
uint32 maxbucket;
uint32 highmask;
uint32 lowmask;
+ Buffer obuf;
+ Page opage, npage;
+ HashPageOpaque oopaque, nopaque;
+ uint16 i;
+ size_t tail;
+ Buffer nbuf;
/*
* Write-lock the meta page. It used to be necessary to acquire a
*************** _hash_expandtable(Relation rel, Buffer m
*** 517,529 ****
metap = HashPageGetMeta(BufferGetPage(metabuf));
/*
! * Check to see if split is still needed; someone else might have already
! * done one while we waited for the lock.
*
* Make sure this stays in sync with _hash_doinsert()
*/
! if (metap->hashm_ntuples <=
! (double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1))
goto fail;
/*
--- 527,542 ----
metap = HashPageGetMeta(BufferGetPage(metabuf));
/*
! * Check to see if we can handle one more split and if it's still needed;
! * someone else might have already done one or more splits while we waited
! * for the lock. (At this point we assume that the splits in progress will
! * all succeed.)
*
* Make sure this stays in sync with _hash_doinsert()
*/
! buckets_total = metap->hashm_maxbucket + 1 + metap->hashm_split_count;
! if (metap->hashm_split_count >= HASH_MAX_SPLITS ||
! metap->hashm_ntuples <= (double) metap->hashm_ffactor * buckets_total)
goto fail;
/*
*************** _hash_expandtable(Relation rel, Buffer m
*** 545,569 ****
goto fail;
/*
! * Determine which bucket is to be split, and attempt to lock the old
! * bucket. If we can't get the lock, give up.
! *
! * The lock protects us against other backends, but not against our own
! * backend. Must check for active scans separately.
*/
new_bucket = metap->hashm_maxbucket + 1;
old_bucket = (new_bucket & metap->hashm_lowmask);
! start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
if (_hash_has_active_scan(rel, old_bucket))
goto fail;
if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
goto fail;
/*
* Likewise lock the new bucket (should never fail).
*
* Note: it is safe to compute the new bucket's blkno here, even though we
--- 558,619 ----
goto fail;
/*
! * Determine which bucket is to be split.
*/
new_bucket = metap->hashm_maxbucket + 1;
old_bucket = (new_bucket & metap->hashm_lowmask);
! /*
! * If the masks changed too recently, another new_bucket might map to the
! * same old_bucket.
! */
! for (i = 0; i < metap->hashm_split_count; i++)
! {
! if (metap->hashm_splits[i].old_bucket == old_bucket)
! goto fail;
! }
+ /*
+ * Lock the old bucket. If we can't get the lock, give up (else we could
+ * cause deadlock with backends looking for the bucket).
+ *
+ * The lock protects us against other backends, but not against our own
+ * backend. Must check for active scans separately.
+ */
if (_hash_has_active_scan(rel, old_bucket))
goto fail;
+ start_oblkno = BUCKET_TO_BLKNO(metap, old_bucket);
if (!_hash_try_getlock(rel, start_oblkno, HASH_EXCLUSIVE))
goto fail;
/*
+ * Make sure that backends in need of old_bucket during the split are
+ * aware of the restriction that split-in-progress imposes.
+ */
+ obuf = _hash_getbuf(rel, start_oblkno, HASH_WRITE, LH_BUCKET_PAGE);
+ opage = BufferGetPage(obuf);
+ oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
+ /*
+ * If hashm_splits indicates no split for this bucket, the bucket itself
+ * shouldn't be marked neither: when the split ends, these flags are unset
+ * first, then the item is removed from hashm_splits.
+ */
+ Assert(!(oopaque->hasho_flag & LH_BUCKET_SPLIT));
+ oopaque->hasho_flag |= LH_BUCKET_SPLIT;
+
+ /*
+ * Do not drop pin, we'll definitely need the buffer soon again.
+ */
+ _hash_chgbufaccess(rel, obuf, HASH_WRITE, HASH_NOLOCK);
+ /*
+ * It's o.k. to let other backends use the bucket now. LH_BUCKET_SPLITS
+ * ensures that they are aware of the relevant restrictions.
+ */
+ _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
+
+ /*
* Likewise lock the new bucket (should never fail).
*
* Note: it is safe to compute the new bucket's blkno here, even though we
*************** _hash_expandtable(Relation rel, Buffer m
*** 596,610 ****
*/
if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
{
! /* can't split due to BlockNumber overflow */
! _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
goto fail;
}
}
/*
! * Okay to proceed with split. Update the metapage bucket mapping info.
*
* Since we are scribbling on the metapage data right in the shared
* buffer, any failure in this next little bit leaves us with a big
--- 646,664 ----
*/
if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
{
! /*
! * can't split due to BlockNumber overflow
! *
! * Meta page is not updated yet, no other backend should be
! * aware of the new bucket.
! */
_hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
goto fail;
}
}
/*
! * Okay to initiate the split. Update the metapage bucket mapping info.
*
* Since we are scribbling on the metapage data right in the shared
* buffer, any failure in this next little bit leaves us with a big
*************** _hash_expandtable(Relation rel, Buffer m
*** 613,619 ****
* establish a critical section.
*/
START_CRIT_SECTION();
-
metap->hashm_maxbucket = new_bucket;
if (new_bucket > metap->hashm_highmask)
--- 667,672 ----
*************** _hash_expandtable(Relation rel, Buffer m
*** 635,640 ****
--- 688,700 ----
metap->hashm_ovflpoint = spare_ndx;
}
+ /*
+ * Publish information about the split in progress.
+ */
+ new_split = &metap->hashm_splits[i];
+ new_split->new_bucket = new_bucket;
+ new_split->old_bucket = old_bucket;
+ metap->hashm_split_count += 1;
/* Done mucking with metapage */
END_CRIT_SECTION();
*************** _hash_expandtable(Relation rel, Buffer m
*** 649,665 ****
highmask = metap->hashm_highmask;
lowmask = metap->hashm_lowmask;
! /* Write out the metapage and drop lock, but keep pin */
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
/* Relocate records to the new bucket */
_hash_splitbucket(rel, metabuf, old_bucket, new_bucket,
! start_oblkno, start_nblkno,
! maxbucket, highmask, lowmask);
! /* Release bucket locks, allowing others to access them */
! _hash_droplock(rel, start_oblkno, HASH_EXCLUSIVE);
! _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
return;
--- 709,832 ----
highmask = metap->hashm_highmask;
lowmask = metap->hashm_lowmask;
! /*
! * Initialize the new bucket's primary page while holding the meta page
! * lock (_hash_getnewbuf should not be called concurrently).
! *
! * XXX This is rather a misuse of the metabuf content lock. Can we
! * synchronize this call in any better way?
! */
! nbuf = _hash_getnewbuf(rel, start_nblkno, MAIN_FORKNUM);
!
! /*
! * No need to hold the meta page lock while initializing page(s) of the
! * new bucket.
! *
! * Note: As soon as the meta page lock is released, any concurrent backend
! * can see the new bucket, but can't lock it. We hold the exclusive lock
! * on the new bucket until its ready to accept both reads and writes.
! *
! * TODO (WAL)
! *
! * Before WAL-replaying the above meta page changes, exclusive lock of the
! * new bucket must be acquired (probably before even locking the metapage,
! * to avoid deadlock with backends looking for the bucket; note that
! * conditional lock acquisition does not help here, we really need the
! * lock). The problem is, that if crash happens exactly after the meta
! * page changes have been WAL-logged, recovery must not let other backends
! * know about (and possibly lock) the new bucket until the replaying
! * backend (startup process) has it locked exclusively (to complete its
! * initial setup).
! */
_hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
+ /*
+ * Neither initialization nor writing of the new page requires the metabuf
+ * lock.
+ *
+ * The last overflow page in the chain must point to the first page of the
+ * old bucket. During the split this ensures that scans of the new bucket
+ * will always succeed, although the new bucket has not received all the
+ * matching tuples from the old bucket yet.
+ */
+ npage = BufferGetPage(nbuf);
+ nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
+ nopaque->hasho_prevblkno = InvalidBlockNumber;
+ nopaque->hasho_nextblkno = start_oblkno;
+ nopaque->hasho_bucket = new_bucket;
+
+ /*
+ * LH_BUCKET_SPLIT_LAST - initially the primary page is also the last page
+ * of the bucket. This flag will be cleared as soon as the first overflow
+ * page is added.
+ */
+ nopaque->hasho_flag = LH_BUCKET_PAGE | LH_BUCKET_SPLIT |
+ LH_BUCKET_SPLIT_LAST;
+ nopaque->hasho_page_id = HASHO_PAGE_ID;
+ _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
+
+ /* Release lock of the new bucket, allowing others to use it. */
+ _hash_droplock(rel, start_nblkno, HASH_EXCLUSIVE);
+
/* Relocate records to the new bucket */
_hash_splitbucket(rel, metabuf, old_bucket, new_bucket,
! start_oblkno, start_nblkno, maxbucket,
! highmask, lowmask);
!
! /*
! * Mark the new buckets as no longer participating in the split.
! */
! _hash_chgbufaccess(rel, nbuf, HASH_NOLOCK, HASH_WRITE);
! nopaque->hasho_flag &= ~LH_BUCKET_SPLIT;
! _hash_wrtbuf(rel, nbuf);
!
! _hash_chgbufaccess(rel, obuf, HASH_NOLOCK, HASH_WRITE);
! oopaque->hasho_flag &= ~LH_BUCKET_SPLIT;
! _hash_wrtbuf(rel, obuf);
!
! /*
! * Remove the "split record" from the metapage.
! *
! * (Use critical section for the same reasons as explained above.)
! */
! START_CRIT_SECTION();
! _hash_chgbufaccess(rel, metabuf, HASH_NOLOCK, HASH_WRITE);
! _hash_checkpage(rel, metabuf, LH_META_PAGE);
! metap = HashPageGetMeta(BufferGetPage(metabuf));
! Assert(metap->hashm_split_count > 0);
! for (i = 0; i < metap->hashm_split_count; i++)
! {
! if (metap->hashm_splits[i].old_bucket == old_bucket)
! break;
! }
! if (i == metap->hashm_split_count)
! /*
! * This should not happen unless the meta page is corrupt.
! */
! elog(ERROR, "could not find info on split of bucket %u", old_bucket);
!
! /* Number of items following the one we're gonna delete. */
! tail = metap->hashm_split_count - (i + 1);
! if (tail > 0)
! {
! HashSplit current, next;
!
! /*
! * The item being removed is not the last one. Shift the following
! * items one position lower.
! */
! current = metap->hashm_splits + i;
! next = current + 1;
! memmove(current, next, tail * sizeof(HashSplitData));
! }
! metap->hashm_split_count -= 1;
! END_CRIT_SECTION();
!
! /*
! * Write the meta page.
! */
! _hash_chgbufaccess(rel, metabuf, HASH_WRITE, HASH_NOLOCK);
return;
*************** _hash_alloc_buckets(Relation rel, BlockN
*** 718,723 ****
--- 885,949 ----
return true;
}
+ /*
+ * Set redirection from the end of "new" bucket (i.e. just being created) to
+ * the appropriate page of "old" (i.e. bucket being split). Search for the
+ * last page of "new" bucket starts at page contained in 'buf'.
+ *
+ * 'buf' must be pinned and write-locked, 'target' points to the page of the
+ * "old" bucket. 'to_unlock' is a list of pages that caller will need to
+ * unlock sometime.
+ */
+ static void
+ _hash_set_new_bucket_end_link(Relation rel, Buffer buf, BlockNumber target,
+ List **to_unlock)
+ {
+ Page page;
+ HashPageOpaque opaque;
+
+ page = BufferGetPage(buf);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ if (!(opaque->hasho_flag & LH_BUCKET_SPLIT_LAST))
+ {
+ buf = _hash_get_new_bucket_end(rel, buf);
+ page = BufferGetPage(buf);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ /* It will need to be unlocked too. */
+ *to_unlock = lappend_int(*to_unlock, buf);
+ }
+
+ Assert(opaque->hasho_flag & LH_BUCKET_SPLIT_LAST);
+ opaque->hasho_nextblkno = target;
+ /* The special case of terminating the new bucket. */
+ if (!BlockNumberIsValid(opaque->hasho_nextblkno))
+ opaque->hasho_flag &= ~LH_BUCKET_SPLIT_LAST;
+ }
+
+
+ /*
+ * Unlock buffers in the order we locked them. 'keep_pin', if valid, is a
+ * buffer that should only by written, but its pin should not be dropped.
+ */
+ static void
+ _hash_write_buffers(Relation rel, List **bufs, Buffer keep_pin)
+ {
+ ListCell *nc;
+
+ foreach(nc, *bufs)
+ {
+ Buffer buf;
+
+ buf = (Buffer) lfirst_int(nc);
+ if (!BufferIsInvalid(keep_pin) && buf == keep_pin)
+ _hash_chgbufaccess(rel, buf, HASH_WRITE, HASH_NOLOCK);
+ else
+ _hash_wrtbuf(rel, buf);
+
+ }
+ list_free(*bufs);
+ *bufs = NIL;
+ }
+
/*
* _hash_splitbucket -- split 'obucket' into 'obucket' and 'nbucket'
*************** _hash_alloc_buckets(Relation rel, BlockN
*** 727,734 ****
* belong in the new bucket, and compress out any free space in the old
* bucket.
*
! * The caller must hold exclusive locks on both buckets to ensure that
! * no one else is trying to access them (see README).
*
* The caller must hold a pin, but no lock, on the metapage buffer.
* The buffer is returned in the same state. (The metapage is only
--- 953,961 ----
* belong in the new bucket, and compress out any free space in the old
* bucket.
*
! * Neither bucket needs to be locked. Any concurrent backend is expected to
! * check flags of primary pages of the buckets and follow the related
! * restrictions.
*
* The caller must hold a pin, but no lock, on the metapage buffer.
* The buffer is returned in the same state. (The metapage is only
*************** _hash_splitbucket(Relation rel,
*** 748,796 ****
BlockNumber oblkno;
BlockNumber nblkno;
Buffer obuf;
- Buffer nbuf;
Page opage;
- Page npage;
HashPageOpaque oopaque;
! HashPageOpaque nopaque;
- /*
- * It should be okay to simultaneously write-lock pages from each bucket,
- * since no one else can be trying to acquire buffer lock on pages of
- * either bucket.
- */
oblkno = start_oblkno;
- obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_BUCKET_PAGE);
- opage = BufferGetPage(obuf);
- oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
-
nblkno = start_nblkno;
! nbuf = _hash_getnewbuf(rel, nblkno, MAIN_FORKNUM);
! npage = BufferGetPage(nbuf);
!
! /* initialize the new bucket's primary page */
! nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
! nopaque->hasho_prevblkno = InvalidBlockNumber;
! nopaque->hasho_nextblkno = InvalidBlockNumber;
! nopaque->hasho_bucket = nbucket;
! nopaque->hasho_flag = LH_BUCKET_PAGE;
! nopaque->hasho_page_id = HASHO_PAGE_ID;
/*
! * Partition the tuples in the old bucket between the old bucket and the
! * new bucket, advancing along the old bucket's overflow bucket chain and
! * adding overflow pages to the new bucket as needed. Outer loop iterates
! * once per page in old bucket.
*/
for (;;)
{
OffsetNumber ooffnum;
OffsetNumber omaxoffnum;
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
/* Scan each tuple in old page */
omaxoffnum = PageGetMaxOffsetNumber(opage);
for (ooffnum = FirstOffsetNumber;
ooffnum <= omaxoffnum;
ooffnum = OffsetNumberNext(ooffnum))
--- 975,1032 ----
BlockNumber oblkno;
BlockNumber nblkno;
Buffer obuf;
Page opage;
HashPageOpaque oopaque;
! Page npage = NULL;
! HashPageOpaque nopaque = NULL;
! char *tuples_new;
! Buffer nbuf = InvalidBuffer;
! int nbuf_inserts = 0;
oblkno = start_oblkno;
nblkno = start_nblkno;
! /* BLCKSZ is safe size for any number of matching tuples. */
! tuples_new = (char *) palloc(BLCKSZ);
/*
! * Copy matching tuples from the old to the new bucket, advancing along
! * the old bucket's overflow bucket chain and adding overflow pages to the
! * new bucket as needed. Backends scanning the old bucket during the split
! * are aware that deletion of the tuples copied is not immediate, so they
! * do re-check each key before returning it.
! *
! * Outer loop iterates once per page in old bucket.
*/
for (;;)
{
OffsetNumber ooffnum;
OffsetNumber omaxoffnum;
+ BlockNumber oblkno_next;
OffsetNumber deletable[MaxOffsetNumber];
int ndeletable = 0;
+ char *tuple_copy;
+ IndexTuple itup;
+ Size itemsz;
+ int item_count, i;
+ Buffer keep_pin = InvalidBuffer;
+ List *new_write = NIL;
+
+ if (!BlockNumberIsValid(oblkno))
+ break;
+
+ /*
+ * 1. Copy the interesting tuples from the source page aside and
+ * release its content lock.
+ */
+ obuf = _hash_getbuf(rel, oblkno, HASH_READ,
+ LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ opage = BufferGetPage(obuf);
+ oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
/* Scan each tuple in old page */
omaxoffnum = PageGetMaxOffsetNumber(opage);
+ tuple_copy = tuples_new;
+ item_count = 0;
for (ooffnum = FirstOffsetNumber;
ooffnum <= omaxoffnum;
ooffnum = OffsetNumberNext(ooffnum))
*************** _hash_splitbucket(Relation rel,
*** 810,862 ****
if (bucket == nbucket)
{
- /*
- * insert the tuple into the new bucket. if it doesn't fit on
- * the current page in the new bucket, we must allocate a new
- * overflow page and place the tuple on that page instead.
- */
itemsz = IndexTupleDSize(*itup);
itemsz = MAXALIGN(itemsz);
! if (PageGetFreeSpace(npage) < itemsz)
{
! /* write out nbuf and drop lock, but keep pin */
! _hash_chgbufaccess(rel, nbuf, HASH_WRITE, HASH_NOLOCK);
! /* chain to a new overflow page */
! nbuf = _hash_addovflpage(rel, metabuf, nbuf);
! npage = BufferGetPage(nbuf);
! /* we don't need nblkno or nopaque within the loop */
}
/*
! * Insert tuple on new page, using _hash_pgaddtup to ensure
! * correct ordering by hashkey. This is a tad inefficient
! * since we may have to shuffle itempointers repeatedly.
! * Possible future improvement: accumulate all the items for
! * the new page and qsort them before insertion.
*/
! (void) _hash_pgaddtup(rel, nbuf, itemsz, itup);
/*
* Mark tuple for deletion from old page.
*/
deletable[ndeletable++] = ooffnum;
}
else
- {
- /*
- * the tuple stays on this page, so nothing to do.
- */
Assert(bucket == obucket);
- }
}
- oblkno = oopaque->hasho_nextblkno;
-
- /*
- * Done scanning this old page. If we moved any tuples, delete them
- * from the old page.
- */
if (ndeletable > 0)
{
PageIndexMultiDelete(opage, deletable, ndeletable);
--- 1046,1305 ----
if (bucket == nbucket)
{
itemsz = IndexTupleDSize(*itup);
itemsz = MAXALIGN(itemsz);
! memcpy(tuple_copy, itup, itemsz);
! tuple_copy += itemsz;
! item_count++;
! }
! else
! {
! /*
! * the tuple stays on this page, so nothing to do.
! */
! Assert(bucket == obucket);
! }
! }
!
! oblkno_next = oopaque->hasho_nextblkno;
! _hash_chgbufaccess(rel, obuf, HASH_READ, HASH_NOLOCK);
!
! /*
! * 2. Insert them into the new bucket.
! */
! if (BufferIsInvalid(nbuf))
! {
! nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE, LH_BUCKET_PAGE);
! npage = BufferGetPage(nbuf);
! nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
! }
! else
! _hash_chgbufaccess(rel, nbuf, HASH_NOLOCK, HASH_WRITE);
!
!
! if (item_count == 0)
! {
! /*
! * No matching tuples on this page.
! *
! * Write even though no items could be added to nbuf: the link to
! * the old bucket will be updated.
! */
! new_write = lappend_int(new_write, nbuf);
! /*
! * If this is the last outer loop, oblkno_next is
! * InvalidBlockNumber, so the link will be terminated. Otherwise
! * it will point to the next page of the old bucket.
! */
! _hash_set_new_bucket_end_link(rel, nbuf, oblkno_next, &new_write);
!
! /*
! * Write, this time only nbuf, and - if distinct - the buffer
! * containing the link to the old bucket. Keep pin of nbuf iff at
! * least one more outer loop is expected.
! */
! keep_pin = BlockNumberIsValid(oblkno_next) ? nbuf : InvalidBuffer;
! _hash_write_buffers(rel, &new_write, keep_pin);
!
! _hash_dropbuf(rel, obuf);
! oblkno = oblkno_next;
! continue;
! }
!
! tuple_copy = tuples_new;
! for (i = 0; i < item_count;)
! {
! itup = (IndexTuple) tuple_copy;
! itemsz = IndexTupleDSize(*itup);
! itemsz = MAXALIGN(itemsz);
!
! /*
! * insert the tuple into the new bucket. if it doesn't fit on the
! * current page in the new bucket, try to proceed to the next one
! * - it's possible that it overflow page has already been added
! * for concurrent insertions.
! */
! if (PageGetFreeSpace(npage) < itemsz)
! {
! BlockNumber nblkno_next;
!
! nblkno_next = nopaque->hasho_nextblkno;
!
! /*
! * The split must have been initialized by now. Thus, even if
! * we are at the end of the new bucket, a valid link should be
! * there (to the old bucket).
! */
! Assert(BlockNumberIsValid(nblkno_next));
!
! if (nbuf_inserts > 0)
{
! /*
! * The page should be written, but not unlocked! As the
! * new bucket eventually links to the current page of the
! * old bucket, the tuples already copied from the current
! * old page could be found twice - once on this page and
! * once on the old one. (On the other hand, the link must
! * not be redirected to oblkno_next yet, that would make
! * readers omit some not-yet-copied tuples.)
! *
! * TODO The hash index seems to have no API to only sync
! * buffer, w/o unlocking. Let's do nothing so far (the
! * buffers will be written when processing the current old
! * page is done) and resolve it when implementing WAL
! */
!
! /*
! * Remember to write and unlock when the current old
! * bucket page is processed.
! */
! new_write = lappend_int(new_write, nbuf);
}
/*
! * The current (already full) page links either to a page of
! * the new bucket (probably created for concurrent insertions)
! * or to one of the old bucket.
*/
! if (!(nopaque->hasho_flag & LH_BUCKET_SPLIT_LAST))
! {
! if (nbuf_inserts == 0)
! {
! /* No reason to prevent others from using the page. */
! _hash_relbuf(rel, nbuf);
! }
!
! /* Follow the link. */
! nbuf = _hash_getbuf(rel, nblkno_next, HASH_WRITE,
! LH_OVERFLOW_PAGE);
! }
! else
! {
! /*
! * A link to the old bucket - add a new page before it.
! */
! Buffer ovflbuf;
! Page ovflpage;
! HashPageOpaque ovflopaque;
!
! /*
! * _hash_addovflpage can't be used here because it expects
! * the buffer unlocked. We can't afford unlocking as
! * explained above.
! */
! ovflbuf = _hash_getovflpage(rel, metabuf);
! ovflpage = BufferGetPage(ovflbuf);
! ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
! ovflopaque->hasho_bucket = nopaque->hasho_bucket;
! ovflopaque->hasho_flag = LH_OVERFLOW_PAGE;
! ovflopaque->hasho_page_id = HASHO_PAGE_ID;
! /* ovflpage takes the link over. */
! ovflopaque->hasho_nextblkno = nopaque->hasho_nextblkno;
! nopaque->hasho_flag &= ~LH_BUCKET_SPLIT_LAST;
! ovflopaque->hasho_flag |= LH_BUCKET_SPLIT_LAST;
! ovflopaque->hasho_prevblkno = nblkno;
! /* The current new page is no longer the last. */
! nopaque->hasho_nextblkno = BufferGetBlockNumber(ovflbuf);
!
! if (nbuf_inserts == 0)
! {
! /*
! * No reason to prevent others from using the
! * page. (Could not do it earlier as we needed some
! * info from nopaque. to setup the links.
! */
! _hash_relbuf(rel, nbuf);
! }
! nbuf = ovflbuf;
! }
! /* Keep nblkno always in-sync with nbuf. */
! nblkno = BufferGetBlockNumber(nbuf);
! npage = BufferGetPage(nbuf);
! nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
! nbuf_inserts = 0;
! continue;
! }
!
! /*
! * Insert tuple on new page, using _hash_pgaddtup to ensure
! * correct ordering by hashkey. This is a tad inefficient
! * since we may have to shuffle itempointers repeatedly.
! * Possible future improvement: accumulate all the items for
! * the new page and qsort them before insertion.
! */
! (void) _hash_pgaddtup(rel, nbuf, itemsz, itup);
!
! nbuf_inserts++;
! tuple_copy += itemsz;
! i++;
! }
!
! Assert(nbuf_inserts > 0);
! new_write = lappend_int(new_write, nbuf);
! /*
! * If the next outer loop will add some more items to 'nbuf', they
! * should trigger writing again.
! */
! nbuf_inserts = 0;
!
! /*
! * 3. Before the affected pages of the new bucket get unlocked,
! * redirect scans of the new bucket to the first not-yet-processed
! * page of the old bucket.
! *
! * If we're at the last page of the old bucket, this effectively
! * terminates the new bucket.
! */
! _hash_set_new_bucket_end_link(rel, nbuf, oblkno_next, &new_write);
!
! /*
! * Write all buffers modified for the current old bucket page.
! *
! * Modification means tuple insertion or update of the link to the old
! * bucket.
! *
! * nbuf (the last buffer we inserted data into) might be needed for
! * the next outer loop. Keep its pin unless this is the last outer
! * loop. (However the page _hash_set_new_bucket_end_link might have
! * added can be dropped for now.)
! */
! keep_pin = BlockNumberIsValid(oblkno_next) ? nbuf : InvalidBuffer;
! _hash_write_buffers(rel, &new_write, keep_pin);
!
! /*
! * 4. Now that the copying has succeeded, write-lock page of the old
! * bucket and delete the tuples just copied.
! */
! _hash_chgbufaccess(rel, obuf, HASH_NOLOCK, HASH_WRITE);
! omaxoffnum = PageGetMaxOffsetNumber(opage);
! for (ooffnum = FirstOffsetNumber;
! ooffnum <= omaxoffnum;
! ooffnum = OffsetNumberNext(ooffnum))
! {
! IndexTuple itup;
! Bucket bucket;
!
! /*
! * Fetch the item's hash key (conveniently stored in the item) and
! * determine which bucket it now belongs in.
! */
! itup = (IndexTuple) PageGetItem(opage,
! PageGetItemId(opage, ooffnum));
! bucket = _hash_hashkey2bucket(_hash_get_indextuple_hashkey(itup),
! maxbucket, highmask, lowmask);
+ if (bucket == nbucket)
+ {
/*
* Mark tuple for deletion from old page.
*/
deletable[ndeletable++] = ooffnum;
}
else
Assert(bucket == obucket);
}
if (ndeletable > 0)
{
PageIndexMultiDelete(opage, deletable, ndeletable);
*************** _hash_splitbucket(Relation rel,
*** 865,887 ****
else
_hash_relbuf(rel, obuf);
! /* Exit loop if no more overflow pages in old bucket */
! if (!BlockNumberIsValid(oblkno))
! break;
!
! /* Else, advance to next old page */
! obuf = _hash_getbuf(rel, oblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
! opage = BufferGetPage(obuf);
! oopaque = (HashPageOpaque) PageGetSpecialPointer(opage);
}
/*
! * We're at the end of the old bucket chain, so we're done partitioning
! * the tuples. Before quitting, call _hash_squeezebucket to ensure the
! * tuples remaining in the old bucket (including the overflow pages) are
! * packed as tightly as possible. The new bucket is already tight.
*/
- _hash_wrtbuf(rel, nbuf);
-
- _hash_squeezebucket(rel, obucket, start_oblkno, NULL);
}
--- 1308,1343 ----
else
_hash_relbuf(rel, obuf);
! /* Go for the next unprocessed page. */
! oblkno = oblkno_next;
}
+ pfree(tuples_new);
+
/*
! * TODO Squeeze the old bucket
! *
! * Whether it should happen here or elsewhere, consider how to interact
! * with concurrent calls of _hash_addovflpage. That function claims: "at
! * least share lock on the bucket, to ensure that no one else tries to
! * compact the bucket meanwhile. This guarantees that 'buf' won't stop
! * being part of the bucket while it's unlocked."
! *
! * The problem is that we don't hold exclusive lock on either bucket
! * during the split anymore (except for short time during the
! * preparation), so the shared lock held by caller of _hash_addovflpage
! * is useless. We can either:
! *
! * 1. Squeeze the bucket here, but consider that LH_BUCKET_SPLIT is still
! * set. That is, be "careful enough" not to disrupt any concurrent
! * execution of _hash_addovflpage (during insertion). (And remove the
! * mentioned comment about "at least share lock"). Does such a "tentative"
! * squeezing still make sense?
! *
! * 2. Move the "squeeze" phase elsewhere (to VACUUM ?) and be careful
! * about LH_BUCKET_SPLIT flag before doing anything.
! *
! * Note: whichever approach we take, VACUUM must also respect
! * LH_BUCKET_SPLIT.
*/
}
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
new file mode 100644
index 3d66216..2321890
*** a/src/backend/access/hash/hashutil.c
--- b/src/backend/access/hash/hashutil.c
*************** _hash_checkpage(Relation rel, Buffer buf
*** 185,192 ****
if (flags)
{
HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
! if ((opaque->hasho_flag & flags) == 0)
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" contains corrupted page at block %u",
--- 185,196 ----
if (flags)
{
HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ uint16 oflags = opaque->hasho_flag;
! if (((oflags & flags) == 0) ||
! ((oflags & LH_BUCKET_SPLIT) && !(oflags & LH_BUCKET_PAGE)) ||
! ((oflags & LH_BUCKET_SPLIT_LAST) &&
! !(oflags & (LH_BUCKET_PAGE | LH_OVERFLOW_PAGE))))
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg("index \"%s\" contains corrupted page at block %u",
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
new file mode 100644
index fc3c7f4..956e5a2
*** a/src/include/access/hash.h
--- b/src/include/access/hash.h
*************** typedef uint32 Bucket;
*** 46,57 ****
* available to other buckets by calling _hash_freeovflpage(). If all
* the tuples are deleted from a bucket page, no additional action is
* necessary.
*/
#define LH_UNUSED_PAGE (0)
#define LH_OVERFLOW_PAGE (1 << 0)
#define LH_BUCKET_PAGE (1 << 1)
! #define LH_BITMAP_PAGE (1 << 2)
! #define LH_META_PAGE (1 << 3)
typedef struct HashPageOpaqueData
{
--- 46,77 ----
* available to other buckets by calling _hash_freeovflpage(). If all
* the tuples are deleted from a bucket page, no additional action is
* necessary.
+ *
+ * LH_BUCKET_SPLIT can only be set if LH_BUCKET_PAGE is already set. It
+ * indicates that bucket participates in a split - whether the bucket being
+ * split or a new one, receiving tuples from an existing bucket. This flag
+ * compensates for the fact that no lock is held at bucket level for major
+ * part of the split. Whoever wants to move tuples across pages (VACUUM,
+ * _hash_squeezebucket) should check for this flags *in addition* to having
+ * acquired exclusive lock on the bucket. (The exclusive lock itself is
+ * sufficient protection for running scans.)
+ *
+ * XXX Do we need 2 separate flags, one for the "old" and one for the "new"
+ * bucket?
+ *
+ * LH_BUCKET_SPLIT_LAST can be set either on primary page of a bucket being
+ * created during a split (such page already has LH_BUCKET_SPLIT set), or - if
+ * the bucket already contains overflow pages - on the last overflow page of
+ * such bucket. It tells that hasho_nextblkno of the page points to the first
+ * not-yet-processed page of the bucket being split.
*/
#define LH_UNUSED_PAGE (0)
#define LH_OVERFLOW_PAGE (1 << 0)
#define LH_BUCKET_PAGE (1 << 1)
! #define LH_BUCKET_SPLIT (1 << 2)
! #define LH_BUCKET_SPLIT_LAST (1 << 3)
! #define LH_BITMAP_PAGE (1 << 4)
! #define LH_META_PAGE (1 << 5)
typedef struct HashPageOpaqueData
{
*************** typedef struct HashScanOpaqueData
*** 88,94 ****
bool hashso_bucket_valid;
/*
! * If we have a share lock on the bucket, we record it here. When
* hashso_bucket_blkno is zero, we have no such lock.
*/
BlockNumber hashso_bucket_blkno;
--- 108,114 ----
bool hashso_bucket_valid;
/*
! * If we have a share lock on the bucket, we record it here. When
* hashso_bucket_blkno is zero, we have no such lock.
*/
BlockNumber hashso_bucket_blkno;
*************** typedef struct HashScanOpaqueData
*** 111,116 ****
--- 131,152 ----
typedef HashScanOpaqueData *HashScanOpaque;
/*
+ * Details of a split in progress.
+ *
+ * old_bucket is the bucket being split, by moving some tuples into
+ * new_bucket.
+ *
+ * TODO Consider renaming to BucketSplitData.
+ */
+ typedef struct HashSplitData
+ {
+ Bucket old_bucket;
+ Bucket new_bucket;
+ } HashSplitData;
+
+ typedef HashSplitData *HashSplit;
+
+ /*
* Definitions for metapage.
*/
*************** typedef HashScanOpaqueData *HashScanOpaq
*** 138,145 ****
--- 174,185 ----
* There is no particular upper limit on the size of mapp[], other than
* needing to fit into the metapage. (With 8K block size, 128 bitmaps
* limit us to 64 Gb of overflow space...)
+ *
+ * splits contains list of bucket splits in progress. Concurrent scans and
+ * insertions need to be aware of them.
*/
#define HASH_MAX_SPLITPOINTS 32
+ #define HASH_MAX_SPLITS 8 /* TODO How many can we afford? */
#define HASH_MAX_BITMAPS 128
typedef struct HashMetaPageData
*************** typedef struct HashMetaPageData
*** 153,158 ****
--- 193,199 ----
* of 2 */
uint16 hashm_bmshift; /* log2(bitmap array size in BITS) */
uint32 hashm_maxbucket; /* ID of maximum bucket in use */
+ uint16 hashm_split_count; /* number of splits in progress */
uint32 hashm_highmask; /* mask to modulo into entire table */
uint32 hashm_lowmask; /* mask to modulo into lower half of table */
uint32 hashm_ovflpoint;/* splitpoint from which ovflpgs being
*************** typedef struct HashMetaPageData
*** 163,168 ****
--- 204,210 ----
uint32 hashm_spares[HASH_MAX_SPLITPOINTS]; /* spare pages before
* each splitpoint */
BlockNumber hashm_mapp[HASH_MAX_BITMAPS]; /* blknos of ovfl bitmaps */
+ HashSplitData hashm_splits[HASH_MAX_SPLITS]; /* splits in progress */
} HashMetaPageData;
typedef HashMetaPageData *HashMetaPage;
*************** extern OffsetNumber _hash_pgaddtup(Relat
*** 291,296 ****
--- 333,340 ----
/* hashovfl.c */
extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
+ extern Buffer _hash_getovflpage(Relation rel, Buffer metabuf);
+ extern Buffer _hash_get_new_bucket_end(Relation rel, Buffer start);
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
BufferAccessStrategy bstrategy);
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers