Hi hackers.
_bt_readpage performs key check for each item on the page trying to
locate upper boundary.
While comparison of simple integer keys are very fast, comparison of
long strings can be quite expensive.
We can first make check for the largest key on the page and if it is not
larger than upper boundary, then skip checks for all elements.
At this quite artificial example such optimization gives 3x time speed-up:
create table t(t text primary key);
insert into t values ('primary key-'||generate_series(1,10000000)::text);
select count(*) from t where t between 'primary key-1000000' and 'primary
key-2000000';
At my notebook with large enough shared buffers and disabled concurrency
the difference is 83 vs. 247 msec
For integer keys the difference is much smaller: 69 vs. 82 msec
Certainly I realized that this example is quite exotic: most of DBAs
prefer integer keys and such large ranges are quite rare.
But still such large range queries are used.
And I have checked that the proposed patch doesn't cause slowdown of
exact search.diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 263f75fce9..0f6a767409 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -888,7 +888,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* Examine the scan keys and eliminate any redundant keys; also mark the
* keys that must be matched to continue the scan.
*/
- _bt_preprocess_keys(scan);
+ _bt_preprocess_keys(scan, dir);
/*
* Quit now if _bt_preprocess_keys() discovered that the scan keys can
@@ -1531,6 +1531,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
int itemIndex;
bool continuescan;
int indnatts;
+ IndexTuple itup;
+ bool all_fit = false;
/*
* We must have the buffer pinned and locked, but the usual macro can't be
@@ -1584,6 +1586,17 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
*/
Assert(BTScanPosIsPinned(so->currPos));
+ /* Do no perform this optimization for first index page to avoid slowdown of dot queries.
+ * so->has_matches is set at the end if _bt_readpage, so we do not try to perform this optimization
+ * at first invocation of _bt_readpage. Also it enforces that we found items mathcing low boundary o only upper boundary has to be checked.
+ */
+ if (so->is_range_search && so->has_matches) {
+ bool temp;
+ /* Try first to compare minnimal/maximal key on the page to avoid checks of all other items on the page */
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, ScanDirectionIsForward(dir) ? maxoff : minoff));
+ all_fit = _bt_checkkeys(scan, itup, indnatts, dir, &temp);
+ }
+
if (ScanDirectionIsForward(dir))
{
/* load items[] in ascending order */
@@ -1594,7 +1607,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
while (offnum <= maxoff)
{
ItemId iid = PageGetItemId(page, offnum);
- IndexTuple itup;
/*
* If the scan specifies not to return killed tuples, then we
@@ -1608,7 +1620,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
itup = (IndexTuple) PageGetItem(page, iid);
- if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
+ if (all_fit || _bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
{
/* tuple passes all scan key conditions */
if (!BTreeTupleIsPosting(itup))
@@ -1661,9 +1673,9 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
if (continuescan && !P_RIGHTMOST(opaque))
{
ItemId iid = PageGetItemId(page, P_HIKEY);
- IndexTuple itup = (IndexTuple) PageGetItem(page, iid);
int truncatt;
+ itup = (IndexTuple) PageGetItem(page, iid);
truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
_bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
}
@@ -1686,7 +1698,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
while (offnum >= minoff)
{
ItemId iid = PageGetItemId(page, offnum);
- IndexTuple itup;
bool tuple_alive;
bool passes_quals;
@@ -1716,8 +1727,8 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
itup = (IndexTuple) PageGetItem(page, iid);
- passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
- &continuescan);
+ passes_quals = all_fit || _bt_checkkeys(scan, itup, indnatts, dir,
+ &continuescan);
if (passes_quals && tuple_alive)
{
/* tuple passes all scan key conditions */
@@ -1771,8 +1782,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
so->currPos.lastItem = MaxTIDsPerBTreePage - 1;
so->currPos.itemIndex = MaxTIDsPerBTreePage - 1;
}
-
- return (so->currPos.firstItem <= so->currPos.lastItem);
+ return so->has_matches = so->currPos.firstItem <= so->currPos.lastItem;
}
/* Save an index item into so->currPos.items[itemIndex] */
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 05abf36032..50701dc9d9 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -654,7 +654,7 @@ _bt_restore_array_keys(IndexScanDesc scan)
*/
if (changed)
{
- _bt_preprocess_keys(scan);
+ _bt_preprocess_keys(scan, NoMovementScanDirection);
/* The mark should have been set on a consistent set of keys... */
Assert(so->qual_ok);
}
@@ -746,7 +746,7 @@ _bt_restore_array_keys(IndexScanDesc scan)
* new elements of array keys. Therefore we can't overwrite the source data.
*/
void
-_bt_preprocess_keys(IndexScanDesc scan)
+_bt_preprocess_keys(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int numberOfKeys = scan->numberOfKeys;
@@ -761,9 +761,12 @@ _bt_preprocess_keys(IndexScanDesc scan)
int i,
j;
AttrNumber attno;
+ bool low_boundary, high_boundary;
/* initialize result variables */
so->qual_ok = true;
+ so->is_range_search = true;
+ so->has_matches = false;
so->numberOfKeys = 0;
if (numberOfKeys < 1)
@@ -794,6 +797,11 @@ _bt_preprocess_keys(IndexScanDesc scan)
/* We can mark the qual as required if it's for first index col */
if (cur->sk_attno == 1)
_bt_mark_scankey_required(outkeys);
+
+ /* Check if we can perform range search optimization */
+ so->is_range_search =
+ (ScanDirectionIsForward(dir) & ((outkeys->sk_flags & (SK_ISNULL|SK_ROW_HEADER|SK_BT_REQFWD)) == SK_BT_REQFWD)) |
+ (ScanDirectionIsBackward(dir) & ((outkeys->sk_flags & (SK_ISNULL|SK_ROW_HEADER|SK_BT_REQBKWD)) == SK_BT_REQBKWD));
return;
}
@@ -1009,6 +1017,21 @@ _bt_preprocess_keys(IndexScanDesc scan)
}
}
+ /* Check if we can perform range search optimization */
+ if (new_numberOfKeys == 2)
+ {
+ low_boundary = false;
+ high_boundary = false;
+ for (i = 0; i < 2; i++)
+ {
+ ScanKey key = &outkeys[i];
+ if ((key->sk_flags & (SK_ISNULL|SK_ROW_HEADER|SK_BT_REQFWD)) == SK_BT_REQFWD)
+ high_boundary = true;
+ else if ((key->sk_flags & (SK_ISNULL|SK_ROW_HEADER|SK_BT_REQBKWD)) == SK_BT_REQBKWD)
+ low_boundary = true;
+ }
+ so->is_range_search = low_boundary & high_boundary;
+ }
so->numberOfKeys = new_numberOfKeys;
}
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index d684786095..160f45f857 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1035,6 +1035,8 @@ typedef struct BTScanOpaqueData
{
/* these fields are set by _bt_preprocess_keys(): */
bool qual_ok; /* false if qual can never be satisfied */
+ bool is_range_search;/* true if each is limited by upperbound */
+ bool has_matches; /* true if index scan already found some matches */
int numberOfKeys; /* number of preprocessed scan keys */
ScanKey keyData; /* array of preprocessed scan keys */
@@ -1250,7 +1252,7 @@ extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir);
extern void _bt_mark_array_keys(IndexScanDesc scan);
extern void _bt_restore_array_keys(IndexScanDesc scan);
-extern void _bt_preprocess_keys(IndexScanDesc scan);
+extern void _bt_preprocess_keys(IndexScanDesc scan, ScanDirection dir);
extern bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple,
int tupnatts, ScanDirection dir, bool *continuescan);
extern void _bt_killitems(IndexScanDesc scan);