We can make markpos fast, if we make the copy lazily in _bt_steppage, see attached patch.
I did some micro-benchmarking of merge join performance, see attached test. Test results, on my laptop:
8_1_STABLE: 1.77 s HEAD, with patch: 1.65 s HEAD, without patch: 2.46 s The results are pretty stable, within 0.1 s. -- Heikki Linnakangas EnterpriseDB http://www.enterprisedb.com
mergejointest.sh
Description: application/shellscript
Index: src/backend/access/nbtree/nbtree.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtree.c,v
retrieving revision 1.149
diff -c -r1.149 nbtree.c
*** src/backend/access/nbtree/nbtree.c 10 May 2006 23:18:39 -0000 1.149
--- src/backend/access/nbtree/nbtree.c 23 Aug 2006 11:54:09 -0000
***************
*** 368,373 ****
--- 368,374 ----
{
so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
so->currPos.buf = so->markPos.buf = InvalidBuffer;
+ so->markItemIndex = -1;
if (scan->numberOfKeys > 0)
so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));
else
***************
*** 392,397 ****
--- 393,399 ----
ReleaseBuffer(so->markPos.buf);
so->markPos.buf = InvalidBuffer;
}
+ so->markItemIndex = -1;
/*
* Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
***************
*** 430,435 ****
--- 432,438 ----
ReleaseBuffer(so->markPos.buf);
so->markPos.buf = InvalidBuffer;
}
+ so->markItemIndex = -1;
if (so->killedItems != NULL)
pfree(so->killedItems);
***************
*** 456,469 ****
so->markPos.buf = InvalidBuffer;
}
! /* bump pin on current buffer for assignment to mark buffer */
if (BTScanPosIsValid(so->currPos))
{
! IncrBufferRefCount(so->currPos.buf);
! memcpy(&so->markPos, &so->currPos,
! offsetof(BTScanPosData, items[1]) +
! so->currPos.lastItem * sizeof(BTScanPosItem));
! }
PG_RETURN_VOID();
}
--- 459,472 ----
so->markPos.buf = InvalidBuffer;
}
! /* Record the current itemIndex we're on. If we later step to next page
! * before releasing the marked position, _bt_steppage makes a full copy
! * of the currPos-struct. */
if (BTScanPosIsValid(so->currPos))
{
! so->markItemIndex = so->currPos.itemIndex;
! } else
! so->markItemIndex = -1;
PG_RETURN_VOID();
}
***************
*** 477,500 ****
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
BTScanOpaque so = (BTScanOpaque) scan->opaque;
! /* we aren't holding any read locks, but gotta drop the pin */
! if (BTScanPosIsValid(so->currPos))
{
! /* Before leaving current page, deal with any killed items */
! if (so->numKilled > 0 &&
! so->currPos.buf != so->markPos.buf)
! _bt_killitems(scan, false);
! ReleaseBuffer(so->currPos.buf);
! so->currPos.buf = InvalidBuffer;
! }
!
! /* bump pin on marked buffer */
! if (BTScanPosIsValid(so->markPos))
{
! IncrBufferRefCount(so->markPos.buf);
! memcpy(&so->currPos, &so->markPos,
! offsetof(BTScanPosData, items[1]) +
! so->markPos.lastItem * sizeof(BTScanPosItem));
}
PG_RETURN_VOID();
--- 480,512 ----
IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
BTScanOpaque so = (BTScanOpaque) scan->opaque;
! if (so->markItemIndex != -1)
{
! /* The restore position was on the same page.
! * Just restore the itemIndex */
! so->currPos.itemIndex = so->markItemIndex;
! }
! else
{
! /* we aren't holding any read locks, but gotta drop the pin */
! if (BTScanPosIsValid(so->currPos))
! {
! /* Before leaving current page, deal with any killed items */
! if (so->numKilled > 0 &&
! so->currPos.buf != so->markPos.buf)
! _bt_killitems(scan, false);
! ReleaseBuffer(so->currPos.buf);
! so->currPos.buf = InvalidBuffer;
! }
!
! if (BTScanPosIsValid(so->markPos))
! {
! /* bump pin on marked buffer for assignment to current buffer */
! IncrBufferRefCount(so->markPos.buf);
! memcpy(&so->currPos, &so->markPos,
! offsetof(BTScanPosData, items[1]) +
! so->markPos.lastItem * sizeof(BTScanPosItem));
! }
}
PG_RETURN_VOID();
Index: src/backend/access/nbtree/nbtsearch.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/nbtree/nbtsearch.c,v
retrieving revision 1.105
diff -c -r1.105 nbtsearch.c
*** src/backend/access/nbtree/nbtsearch.c 7 May 2006 01:21:30 -0000 1.105
--- src/backend/access/nbtree/nbtsearch.c 23 Aug 2006 11:11:45 -0000
***************
*** 1055,1060 ****
--- 1055,1074 ----
rel = scan->indexRelation;
+ /* Before we modify currPos, make a copy of the items if there
+ * was a marked position that needs them. */
+ if (so->markItemIndex != -1)
+ {
+ /* bump pin on current buffer for assignment to marked buffer */
+ IncrBufferRefCount(so->currPos.buf);
+ memcpy(&so->markPos, &so->currPos,
+ offsetof(BTScanPosData, items[1]) +
+ so->currPos.lastItem * sizeof(BTScanPosItem));
+
+ so->markPos.itemIndex = so->markItemIndex;
+ so->markItemIndex = -1;
+ }
+
if (ScanDirectionIsForward(dir))
{
/* Walk right to the next page with data */
Index: src/include/access/nbtree.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/nbtree.h,v
retrieving revision 1.103
diff -c -r1.103 nbtree.h
*** src/include/access/nbtree.h 7 Aug 2006 16:57:57 -0000 1.103
--- src/include/access/nbtree.h 23 Aug 2006 11:07:47 -0000
***************
*** 441,446 ****
--- 441,450 ----
/* keep these last in struct for efficiency */
BTScanPosData currPos; /* current position data */
BTScanPosData markPos; /* marked position, if any */
+ int markItemIndex; /* if the marked position is on the same
+ page as current position, we don't use
+ markPos, but just keep the marked
+ itemIndex in markItemIndex */
} BTScanOpaqueData;
typedef BTScanOpaqueData *BTScanOpaque;
---------------------------(end of broadcast)---------------------------
TIP 4: Have you searched our list archives?
http://archives.postgresql.org
