Re: [PATCHES] rtree: improve performance, tuple killing
On Tue, 2005-01-18 at 16:43 +1100, Neil Conway wrote: Barring any objections, I intend to apply this patch tomorrow. Applied. -Neil ---(end of broadcast)--- TIP 3: if posting/reading through Usenet, please send an appropriate subscribe-nomail command to [EMAIL PROTECTED] so that your message can get through to the mailing list cleanly
Re: [PATCHES] rtree: improve performance, tuple killing
Barring any objections, I intend to apply this patch tomorrow. The patch, as well as the original -patches email, are included below. -Neil On Wed, 2004-11-24 at 11:15 +1100, Neil Conway wrote: This patch makes some improvements to the rtree index implementation: (1) Keep a pin on the scan's current buffer and mark buffer. This avoids the need to do a ReadBuffer() for each tuple produced by the scan. (2) Convert a ReleaseBuffer() ; ReadBuffer() pair into ReleaseAndReadBuffer(). Surely not a huge win, but it saves a lock acquire/release... (3) Remove a bunch of duplicated code in rtget.c; make rtnext() handle both the initial result and subsequent result cases. (4) Add support for index tuple killing (5) Remove rtscancache(): it is dead code, for the same reason that gistscancache() is dead code (an index scan ought not be invoked with NoMovementScanDirection). The end result is about a 10% improvement in index scan performance, according to contrib/rtree_gist/bench. These changes (with the exception of #2) are analogous to changes I've already made for GiST (it's clear that GiST was started as a fork of rtree). I'm not hugely interested in further improvements to rtree; I just did this stuff because it is low-hanging fruit and I've already made the same changes for GiST. # # patch src/backend/access/rtree/rtget.c # from [3db9a1c0391d9e319ac8455167f23215e00bf832] #to [ebfe121a3bcd3e2a15da91e14a84b69c87afac67] # # patch src/backend/access/rtree/rtree.c # from [274583f574bc483a709b46912a567c58c6abb98c] #to [8df26216af5a60db10afd38e3f1dc5c6355ec79c] # # patch src/backend/access/rtree/rtscan.c # from [9ace8f57b28397296b386955fdf3b7b712178a9a] #to [92b177352fd28ffab208ac9abf502cb4c81d2740] # # patch src/include/access/rtree.h # from [d3b048ba826b36bc766de324779c0b5bbb143b42] #to [e4947131a99641d8f43c4b7b770485c8c1043094] # --- src/backend/access/rtree/rtget.c +++ src/backend/access/rtree/rtget.c @@ -19,10 +19,8 @@ #include access/relscan.h #include access/rtree.h -static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n, +static OffsetNumber findnext(IndexScanDesc s, OffsetNumber n, ScanDirection dir); -static bool rtscancache(IndexScanDesc s, ScanDirection dir); -static bool rtfirst(IndexScanDesc s, ScanDirection dir); static bool rtnext(IndexScanDesc s, ScanDirection dir); @@ -31,138 +29,106 @@ { IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - bool res; - - /* if we have it cached in the scan desc, just return the value */ - if (rtscancache(s, dir)) - PG_RETURN_BOOL(true); - - /* not cached, so we'll have to do some work */ - if (ItemPointerIsValid((s-currentItemData))) - res = rtnext(s, dir); - else - res = rtfirst(s, dir); - PG_RETURN_BOOL(res); -} - -static bool -rtfirst(IndexScanDesc s, ScanDirection dir) -{ - Buffer b; - Page p; - OffsetNumber n; - OffsetNumber maxoff; - RTreePageOpaque po; + Page page; + OffsetNumber offnum; RTreeScanOpaque so; - RTSTACK*stk; - BlockNumber blk; - IndexTuple it; - b = ReadBuffer(s-indexRelation, P_ROOT); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); so = (RTreeScanOpaque) s-opaque; - for (;;) + /* + * If we've already produced a tuple and the executor has informed + * us that it should be marked killed, do so know. + */ + if (s-kill_prior_tuple ItemPointerIsValid((s-currentItemData))) { - maxoff = PageGetMaxOffsetNumber(p); - if (ScanDirectionIsBackward(dir)) - n = findnext(s, p, maxoff, dir); - else - n = findnext(s, p, FirstOffsetNumber, dir); + offnum = ItemPointerGetOffsetNumber((s-currentItemData)); + page = BufferGetPage(so-curbuf); + PageGetItemId(page, offnum)-lp_flags |= LP_DELETE; + SetBufferCommitInfoNeedsSave(so-curbuf); + } - while (n FirstOffsetNumber || n maxoff) - { - ReleaseBuffer(b); - if (so-s_stack == NULL) -return false; + /* + * Get the next tuple that matches the search key; if asked to + * skip killed tuples, find the first non-killed tuple that + * matches. Return as soon as we've run out of matches or we've + * found an acceptable match. + */ + for (;;) + { + bool res = rtnext(s, dir); - stk = so-s_stack; - b = ReadBuffer(s-indexRelation, stk-rts_blk); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - maxoff = PageGetMaxOffsetNumber(p); - - if (ScanDirectionIsBackward(dir)) -n = OffsetNumberPrev(stk-rts_child); - else -n = OffsetNumberNext(stk-rts_child); - so-s_stack = stk-rts_parent; - pfree(stk); - - n = findnext(s, p, n, dir); - } - if (po-flags F_LEAF) + if (res == true s-ignore_killed_tuples) { - ItemPointerSet((s-currentItemData), BufferGetBlockNumber(b), n); - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - - s-xs_ctup.t_self = it-t_tid; - - ReleaseBuffer(b); - return true; + offnum =
[PATCHES] rtree: improve performance, tuple killing
This patch makes some improvements to the rtree index implementation: (1) Keep a pin on the scan's current buffer and mark buffer. This avoids the need to do a ReadBuffer() for each tuple produced by the scan. (2) Convert a ReleaseBuffer() ; ReadBuffer() pair into ReleaseAndReadBuffer(). Surely not a huge win, but it saves a lock acquire/release... (3) Remove a bunch of duplicated code in rtget.c; make rtnext() handle both the initial result and subsequent result cases. (4) Add support for index tuple killing (5) Remove rtscancache(): it is dead code, for the same reason that gistscancache() is dead code (an index scan ought not be invoked with NoMovementScanDirection). The end result is about a 10% improvement in index scan performance, according to contrib/rtree_gist/bench. These changes (with the exception of #2) are analogous to changes I've already made for GiST (it's clear that GiST was started as a fork of rtree). I'm not hugely interested in further improvements to rtree; I just did this stuff because it is low-hanging fruit and I've already made the same changes for GiST. -Neil # # patch src/backend/access/rtree/rtget.c # from [3db9a1c0391d9e319ac8455167f23215e00bf832] #to [ebfe121a3bcd3e2a15da91e14a84b69c87afac67] # # patch src/backend/access/rtree/rtree.c # from [274583f574bc483a709b46912a567c58c6abb98c] #to [8df26216af5a60db10afd38e3f1dc5c6355ec79c] # # patch src/backend/access/rtree/rtscan.c # from [9ace8f57b28397296b386955fdf3b7b712178a9a] #to [92b177352fd28ffab208ac9abf502cb4c81d2740] # # patch src/include/access/rtree.h # from [d3b048ba826b36bc766de324779c0b5bbb143b42] #to [e4947131a99641d8f43c4b7b770485c8c1043094] # --- src/backend/access/rtree/rtget.c +++ src/backend/access/rtree/rtget.c @@ -19,10 +19,8 @@ #include access/relscan.h #include access/rtree.h -static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n, +static OffsetNumber findnext(IndexScanDesc s, OffsetNumber n, ScanDirection dir); -static bool rtscancache(IndexScanDesc s, ScanDirection dir); -static bool rtfirst(IndexScanDesc s, ScanDirection dir); static bool rtnext(IndexScanDesc s, ScanDirection dir); @@ -31,138 +29,106 @@ { IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - bool res; - - /* if we have it cached in the scan desc, just return the value */ - if (rtscancache(s, dir)) - PG_RETURN_BOOL(true); - - /* not cached, so we'll have to do some work */ - if (ItemPointerIsValid((s-currentItemData))) - res = rtnext(s, dir); - else - res = rtfirst(s, dir); - PG_RETURN_BOOL(res); -} - -static bool -rtfirst(IndexScanDesc s, ScanDirection dir) -{ - Buffer b; - Page p; - OffsetNumber n; - OffsetNumber maxoff; - RTreePageOpaque po; + Page page; + OffsetNumber offnum; RTreeScanOpaque so; - RTSTACK*stk; - BlockNumber blk; - IndexTuple it; - b = ReadBuffer(s-indexRelation, P_ROOT); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); so = (RTreeScanOpaque) s-opaque; - for (;;) + /* + * If we've already produced a tuple and the executor has informed + * us that it should be marked killed, do so know. + */ + if (s-kill_prior_tuple ItemPointerIsValid((s-currentItemData))) { - maxoff = PageGetMaxOffsetNumber(p); - if (ScanDirectionIsBackward(dir)) - n = findnext(s, p, maxoff, dir); - else - n = findnext(s, p, FirstOffsetNumber, dir); + offnum = ItemPointerGetOffsetNumber((s-currentItemData)); + page = BufferGetPage(so-curbuf); + PageGetItemId(page, offnum)-lp_flags |= LP_DELETE; + SetBufferCommitInfoNeedsSave(so-curbuf); + } - while (n FirstOffsetNumber || n maxoff) - { - ReleaseBuffer(b); - if (so-s_stack == NULL) -return false; + /* + * Get the next tuple that matches the search key; if asked to + * skip killed tuples, find the first non-killed tuple that + * matches. Return as soon as we've run out of matches or we've + * found an acceptable match. + */ + for (;;) + { + bool res = rtnext(s, dir); - stk = so-s_stack; - b = ReadBuffer(s-indexRelation, stk-rts_blk); - p = BufferGetPage(b); - po = (RTreePageOpaque) PageGetSpecialPointer(p); - maxoff = PageGetMaxOffsetNumber(p); - - if (ScanDirectionIsBackward(dir)) -n = OffsetNumberPrev(stk-rts_child); - else -n = OffsetNumberNext(stk-rts_child); - so-s_stack = stk-rts_parent; - pfree(stk); - - n = findnext(s, p, n, dir); - } - if (po-flags F_LEAF) + if (res == true s-ignore_killed_tuples) { - ItemPointerSet((s-currentItemData), BufferGetBlockNumber(b), n); - - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - - s-xs_ctup.t_self = it-t_tid; - - ReleaseBuffer(b); - return true; + offnum = ItemPointerGetOffsetNumber((s-currentItemData)); + page = BufferGetPage(so-curbuf); + if (ItemIdDeleted(PageGetItemId(page, offnum))) +continue; } - else - { - stk = (RTSTACK *) palloc(sizeof(RTSTACK)); - stk-rts_child = n; -