Hi, Our customer experienced a significant slowdown on queries involving Index Only Scan. As it turned out, the problem was constant pin-unpin of the visibility map page. IOS caches only one vm page, which corresponds to 8192 * 8 / 2 * 8192 bytes = 256 MB of data; if the table is larger and the order of access (index) doesn't match the order of data, vm page will be replaced on each tuple processing. That's costly. Attached ios.sql script emulates this worst case behaviour. In current master, select takes
[local]:5432 ars@postgres:21052=# explain analyse select * from test order by id; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------- Index Only Scan using test_idx on test (cost=0.44..1159381.24 rows=59013120 width=8) (actual time=0.015..9094.532 rows=59013120 loops=1) Heap Fetches: 0 Planning Time: 0.043 ms Execution Time: 10508.576 ms Attached straightforward patch increases the cache to store 64 pages (16 GB of data). With it, we get [local]:5432 ars@postgres:17427=# explain analyse select * from test order by id; QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------- Index Only Scan using test_idx on test (cost=0.44..1159381.24 rows=59013120 width=8) (actual time=0.040..3469.299 rows=59013120 loops=1) Heap Fetches: 0 Planning Time: 0.118 ms Execution Time: 4871.124 ms (I believe the whole index is cached in these tests) You might say 16GB is also somewhat arbitrary border. Well, it is. We could make it GUC-able, but I'm not sure here as the setting is rather low-level, and at the same time having several dozens of additionally pinned buffers doesn't sound too criminal, i.e. I doubt there is a real risk of "no unpinned buffers available" or something (e.g. even default 32MB shared_buffers contain 4096 pages). However, forcing IOS to be inefficient if the table is larger is also illy. Any thoughts? (the code is by K. Knizhnik, testing by M. Zhilin and R. Zharkov, I've only polished the things up)
ios.sql
Description: application/sql
commit 3726b6ffdba5461dcbf82cce5de13760a2642468 Author: Konstantin Knizhnik <k.knizh...@postgrespro.ru> Date: Fri Jan 15 12:57:46 2021 +0300 Increase IOS vm cache. diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index e198df65d82..98e249b0c2a 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -99,24 +99,6 @@ /*#define TRACE_VISIBILITYMAP */ -/* - * Size of the bitmap on each visibility map page, in bytes. There's no - * extra headers, so the whole page minus the standard page header is - * used for the bitmap. - */ -#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData)) - -/* Number of heap blocks we can represent in one byte */ -#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK) - -/* Number of heap blocks we can represent in one visibility map page. */ -#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE) - -/* Mapping from heap block number to the right bit in the visibility map */ -#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE) -#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) -#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) - /* Masks for counting subsets of bits in the visibility map. */ #define VISIBLE_MASK64 UINT64CONST(0x5555555555555555) /* The lower bit of each * bit pair */ diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 0754e28a9aa..47c6e8929a3 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -101,7 +101,9 @@ IndexOnlyNext(IndexOnlyScanState *node) /* Set it up for index-only scan */ node->ioss_ScanDesc->xs_want_itup = true; - node->ioss_VMBuffer = InvalidBuffer; + + for (int i = 0; i < VMBUF_SIZE; i++) + node->ioss_VMBuffer[i] = InvalidBuffer; /* * If no run-time keys to calculate or they are ready, go ahead and @@ -121,6 +123,7 @@ IndexOnlyNext(IndexOnlyScanState *node) while ((tid = index_getnext_tid(scandesc, direction)) != NULL) { bool tuple_from_heap = false; + Buffer *vm_buf; CHECK_FOR_INTERRUPTS(); @@ -158,9 +161,11 @@ IndexOnlyNext(IndexOnlyScanState *node) * It's worth going through this complexity to avoid needing to lock * the VM buffer, which could cause significant contention. */ + vm_buf = &node->ioss_VMBuffer[HEAPBLK_TO_MAPBLOCK( + ItemPointerGetBlockNumber(tid)) % VMBUF_SIZE]; if (!VM_ALL_VISIBLE(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), - &node->ioss_VMBuffer)) + vm_buf)) { /* * Rats, we have to visit the heap to check visibility. @@ -377,11 +382,14 @@ ExecEndIndexOnlyScan(IndexOnlyScanState *node) indexRelationDesc = node->ioss_RelationDesc; indexScanDesc = node->ioss_ScanDesc; - /* Release VM buffer pin, if any. */ - if (node->ioss_VMBuffer != InvalidBuffer) + /* Release VM buffer pins, if any. */ + for (int i = 0; i < VMBUF_SIZE; i++) { - ReleaseBuffer(node->ioss_VMBuffer); - node->ioss_VMBuffer = InvalidBuffer; + if (node->ioss_VMBuffer[i] != InvalidBuffer) + { + ReleaseBuffer(node->ioss_VMBuffer[i]); + node->ioss_VMBuffer[i] = InvalidBuffer; + } } /* @@ -680,7 +688,8 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node, node->ioss_NumOrderByKeys, piscan); node->ioss_ScanDesc->xs_want_itup = true; - node->ioss_VMBuffer = InvalidBuffer; + for (int i = 0; i < VMBUF_SIZE; i++) + node->ioss_VMBuffer[i] = InvalidBuffer; /* * If no run-time keys to calculate or they are ready, go ahead and pass diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index 57362c36876..0c9bda8de17 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -28,6 +28,24 @@ #define VISIBILITYMAP_VALID_BITS 0x03 /* OR of all valid visibilitymap * flags bits */ +/* + * Size of the bitmap on each visibility map page, in bytes. There's no + * extra headers, so the whole page minus the standard page header is + * used for the bitmap. + */ +#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData)) + +/* Number of heap blocks we can represent in one byte */ +#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK) + +/* Number of heap blocks we can represent in one visibility map page. */ +#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE) + +/* Mapping from heap block number to the right bit in the visibility map */ +#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE) +#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) +#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK) + /* Macros for visibilitymap test */ #define VM_ALL_VISIBLE(r, b, v) \ ((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_VISIBLE) != 0) diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e31ad6204e6..15290be11c6 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1439,6 +1439,8 @@ typedef struct IndexScanState Size iss_PscanLen; } IndexScanState; +#define VMBUF_SIZE 64 + /* ---------------- * IndexOnlyScanState information * @@ -1473,7 +1475,7 @@ typedef struct IndexOnlyScanState Relation ioss_RelationDesc; struct IndexScanDescData *ioss_ScanDesc; TupleTableSlot *ioss_TableSlot; - Buffer ioss_VMBuffer; + Buffer ioss_VMBuffer[VMBUF_SIZE]; Size ioss_PscanLen; } IndexOnlyScanState;
-- Arseny Sher Postgres Professional: http://www.postgrespro.com The Russian Postgres Company