Hi,

Our customer experienced a significant slowdown on queries involving
Index Only Scan. As it turned out, the problem was constant pin-unpin of
the visibility map page. IOS caches only one vm page, which corresponds
to 8192 * 8 / 2 * 8192 bytes = 256 MB of data; if the table is larger
and the order of access (index) doesn't match the order of data, vm page
will be replaced on each tuple processing. That's costly. Attached
ios.sql script emulates this worst case behaviour. In current master,
select takes

[local]:5432 ars@postgres:21052=# explain analyse select * from test order by 
id;
                                                                QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------
 Index Only Scan using test_idx on test  (cost=0.44..1159381.24 rows=59013120 
width=8) (actual time=0.015..9094.532 rows=59013120 loops=1)
   Heap Fetches: 0
 Planning Time: 0.043 ms
 Execution Time: 10508.576 ms


Attached straightforward patch increases the cache to store 64 pages (16
GB of data). With it, we get

[local]:5432 ars@postgres:17427=# explain analyse select * from test order by 
id;
                                                                QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------
 Index Only Scan using test_idx on test  (cost=0.44..1159381.24 rows=59013120 
width=8) (actual time=0.040..3469.299 rows=59013120 loops=1)
   Heap Fetches: 0
 Planning Time: 0.118 ms
 Execution Time: 4871.124 ms

(I believe the whole index is cached in these tests)

You might say 16GB is also somewhat arbitrary border. Well, it is. We
could make it GUC-able, but I'm not sure here as the setting is rather
low-level, and at the same time having several dozens of additionally
pinned buffers doesn't sound too criminal, i.e. I doubt there is a real
risk of "no unpinned buffers available" or something (e.g. even default
32MB shared_buffers contain 4096 pages). However, forcing IOS to be
inefficient if the table is larger is also illy. Any thoughts?

(the code is by K. Knizhnik, testing by M. Zhilin and R. Zharkov, I've
only polished the things up)

Attachment: ios.sql
Description: application/sql

commit 3726b6ffdba5461dcbf82cce5de13760a2642468
Author: Konstantin Knizhnik <k.knizh...@postgrespro.ru>
Date:   Fri Jan 15 12:57:46 2021 +0300

    Increase IOS vm cache.

diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index e198df65d82..98e249b0c2a 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -99,24 +99,6 @@
 
 /*#define TRACE_VISIBILITYMAP */
 
-/*
- * Size of the bitmap on each visibility map page, in bytes. There's no
- * extra headers, so the whole page minus the standard page header is
- * used for the bitmap.
- */
-#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-
-/* Number of heap blocks we can represent in one byte */
-#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
-
-/* Number of heap blocks we can represent in one visibility map page. */
-#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
-
-/* Mapping from heap block number to the right bit in the visibility map */
-#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
-#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
-#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
-
 /* Masks for counting subsets of bits in the visibility map. */
 #define VISIBLE_MASK64	UINT64CONST(0x5555555555555555) /* The lower bit of each
 														 * bit pair */
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 0754e28a9aa..47c6e8929a3 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -101,7 +101,9 @@ IndexOnlyNext(IndexOnlyScanState *node)
 
 		/* Set it up for index-only scan */
 		node->ioss_ScanDesc->xs_want_itup = true;
-		node->ioss_VMBuffer = InvalidBuffer;
+
+		for (int i = 0; i < VMBUF_SIZE; i++)
+			node->ioss_VMBuffer[i] = InvalidBuffer;
 
 		/*
 		 * If no run-time keys to calculate or they are ready, go ahead and
@@ -121,6 +123,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 	while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
 	{
 		bool		tuple_from_heap = false;
+		Buffer		*vm_buf;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -158,9 +161,11 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		 * It's worth going through this complexity to avoid needing to lock
 		 * the VM buffer, which could cause significant contention.
 		 */
+		vm_buf = &node->ioss_VMBuffer[HEAPBLK_TO_MAPBLOCK(
+				ItemPointerGetBlockNumber(tid)) % VMBUF_SIZE];
 		if (!VM_ALL_VISIBLE(scandesc->heapRelation,
 							ItemPointerGetBlockNumber(tid),
-							&node->ioss_VMBuffer))
+							vm_buf))
 		{
 			/*
 			 * Rats, we have to visit the heap to check visibility.
@@ -377,11 +382,14 @@ ExecEndIndexOnlyScan(IndexOnlyScanState *node)
 	indexRelationDesc = node->ioss_RelationDesc;
 	indexScanDesc = node->ioss_ScanDesc;
 
-	/* Release VM buffer pin, if any. */
-	if (node->ioss_VMBuffer != InvalidBuffer)
+	/* Release VM buffer pins, if any. */
+	for (int i = 0; i < VMBUF_SIZE; i++)
 	{
-		ReleaseBuffer(node->ioss_VMBuffer);
-		node->ioss_VMBuffer = InvalidBuffer;
+		if (node->ioss_VMBuffer[i] != InvalidBuffer)
+		{
+			ReleaseBuffer(node->ioss_VMBuffer[i]);
+			node->ioss_VMBuffer[i] = InvalidBuffer;
+		}
 	}
 
 	/*
@@ -680,7 +688,8 @@ ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
 								 node->ioss_NumOrderByKeys,
 								 piscan);
 	node->ioss_ScanDesc->xs_want_itup = true;
-	node->ioss_VMBuffer = InvalidBuffer;
+	for (int i = 0; i < VMBUF_SIZE; i++)
+		node->ioss_VMBuffer[i] = InvalidBuffer;
 
 	/*
 	 * If no run-time keys to calculate or they are ready, go ahead and pass
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 57362c36876..0c9bda8de17 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -28,6 +28,24 @@
 #define VISIBILITYMAP_VALID_BITS	0x03	/* OR of all valid visibilitymap
 											 * flags bits */
 
+/*
+ * Size of the bitmap on each visibility map page, in bytes. There's no
+ * extra headers, so the whole page minus the standard page header is
+ * used for the bitmap.
+ */
+#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
+
+/* Number of heap blocks we can represent in one byte */
+#define HEAPBLOCKS_PER_BYTE (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)
+
+/* Number of heap blocks we can represent in one visibility map page. */
+#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
+
+/* Mapping from heap block number to the right bit in the visibility map */
+#define HEAPBLK_TO_MAPBLOCK(x) ((x) / HEAPBLOCKS_PER_PAGE)
+#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
+#define HEAPBLK_TO_OFFSET(x) (((x) % HEAPBLOCKS_PER_BYTE) * BITS_PER_HEAPBLOCK)
+
 /* Macros for visibilitymap test */
 #define VM_ALL_VISIBLE(r, b, v) \
 	((visibilitymap_get_status((r), (b), (v)) & VISIBILITYMAP_ALL_VISIBLE) != 0)
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e31ad6204e6..15290be11c6 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1439,6 +1439,8 @@ typedef struct IndexScanState
 	Size		iss_PscanLen;
 } IndexScanState;
 
+#define VMBUF_SIZE 64
+
 /* ----------------
  *	 IndexOnlyScanState information
  *
@@ -1473,7 +1475,7 @@ typedef struct IndexOnlyScanState
 	Relation	ioss_RelationDesc;
 	struct IndexScanDescData *ioss_ScanDesc;
 	TupleTableSlot *ioss_TableSlot;
-	Buffer		ioss_VMBuffer;
+	Buffer		ioss_VMBuffer[VMBUF_SIZE];
 	Size		ioss_PscanLen;
 } IndexOnlyScanState;
 

--
Arseny Sher
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company

Reply via email to