io_combine_limit 1, effective_io_concurrency 16, read ahead kb 16

On Fri, Feb 14, 2025 at 12:18 PM Tomas Vondra <to...@vondra.me> wrote:
>
> Based on off-list discussion with Melanie, I ran a modified version of
> the benchmark, with these changes:

Thanks! It looks like the worst offender is io_combine_limit 1 (128
kB), effective_io_concurrency 16, read_ahead_kb 16. This is a
combination that people shouldn't run in practice I think -- an
io_combine_limit larger than read ahead.

But we do still see regressions with io_combine_limit 1,
effective_io_concurrency 16 at other read_ahead_kb values. Therefore
I'd be interested to see that subset (ioc 1, eic 16, diff read ahead
values) with the attached patch.

> FWIW this does not change anything in the detection of sequential access
> patterns, discussed nearby, because the benchmarks started before Andres
> looked into that. If needed, I can easily rerun these tests, I just need
> a patch to apply.

Attached a patch that has all the commits squashed + removes
sequential detection.

- Melanie
From 0617aebfdd635ba75f3cc7bc44cc9e72431aa9c5 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Fri, 17 Jan 2025 16:42:25 -0500
Subject: [PATCH] Streaming BHS

---
 src/backend/access/gin/ginget.c           |  39 +-
 src/backend/access/gin/ginscan.c          |   2 +-
 src/backend/access/heap/heapam.c          |  70 ++++
 src/backend/access/heap/heapam_handler.c  | 366 ++++++++++---------
 src/backend/access/table/tableamapi.c     |   2 -
 src/backend/executor/nodeBitmapHeapscan.c | 424 ++--------------------
 src/backend/nodes/tidbitmap.c             | 101 +++---
 src/backend/optimizer/util/plancat.c      |   2 +-
 src/backend/storage/aio/read_stream.c     |   4 +-
 src/include/access/gin_private.h          |   7 +-
 src/include/access/relscan.h              |   8 +-
 src/include/access/tableam.h              | 121 ++----
 src/include/nodes/execnodes.h             |  23 +-
 src/include/nodes/tidbitmap.h             |  18 +-
 14 files changed, 411 insertions(+), 776 deletions(-)

diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 63dd1f3679f..ac1b749ed98 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -332,10 +332,11 @@ restartScanEntry:
 	entry->list = NULL;
 	entry->nlist = 0;
 	entry->matchBitmap = NULL;
-	entry->matchResult = NULL;
+	entry->matchResult.blockno = InvalidBlockNumber;
 	entry->reduceResult = false;
 	entry->predictNumberResult = 0;
 
+
 	/*
 	 * we should find entry, and begin scan of posting tree or just store
 	 * posting list in memory
@@ -824,19 +825,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 		{
 			/*
 			 * If we've exhausted all items on this block, move to next block
-			 * in the bitmap.
+			 * in the bitmap. tbm_private_iterate() sets matchResult->blockno
+			 * to InvalidBlockNumber when the bitmap is exhausted.
 			 */
-			while (entry->matchResult == NULL ||
-				   (entry->matchResult->ntuples >= 0 &&
-					entry->offset >= entry->matchResult->ntuples) ||
-				   entry->matchResult->blockno < advancePastBlk ||
+			while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
+				   (entry->matchResult.ntuples >= 0 &&
+					entry->offset >= entry->matchResult.ntuples) ||
+				   entry->matchResult.blockno < advancePastBlk ||
 				   (ItemPointerIsLossyPage(&advancePast) &&
-					entry->matchResult->blockno == advancePastBlk))
+					entry->matchResult.blockno == advancePastBlk))
 			{
-				entry->matchResult =
-					tbm_private_iterate(entry->matchIterator);
+				tbm_private_iterate(entry->matchIterator, &entry->matchResult);
 
-				if (entry->matchResult == NULL)
+				if (!BlockNumberIsValid(entry->matchResult.blockno))
 				{
 					ItemPointerSetInvalid(&entry->curItem);
 					tbm_end_private_iterate(entry->matchIterator);
@@ -847,7 +848,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 
 				/*
 				 * Reset counter to the beginning of entry->matchResult. Note:
-				 * entry->offset is still greater than matchResult->ntuples if
+				 * entry->offset is still greater than matchResult.ntuples if
 				 * matchResult is lossy.  So, on next call we will get next
 				 * result from TIDBitmap.
 				 */
@@ -860,10 +861,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 			 * We're now on the first page after advancePast which has any
 			 * items on it. If it's a lossy result, return that.
 			 */
-			if (entry->matchResult->ntuples < 0)
+			if (entry->matchResult.ntuples < 0)
 			{
 				ItemPointerSetLossyPage(&entry->curItem,
-										entry->matchResult->blockno);
+										entry->matchResult.blockno);
 
 				/*
 				 * We might as well fall out of the loop; we could not
@@ -877,27 +878,27 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
 			 * Not a lossy page. Skip over any offsets <= advancePast, and
 			 * return that.
 			 */
-			if (entry->matchResult->blockno == advancePastBlk)
+			if (entry->matchResult.blockno == advancePastBlk)
 			{
 				/*
 				 * First, do a quick check against the last offset on the
 				 * page. If that's > advancePast, so are all the other
 				 * offsets, so just go back to the top to get the next page.
 				 */
-				if (entry->matchResult->offsets[entry->matchResult->ntuples - 1] <= advancePastOff)
+				if (entry->matchResult.offsets[entry->matchResult.ntuples - 1] <= advancePastOff)
 				{
-					entry->offset = entry->matchResult->ntuples;
+					entry->offset = entry->matchResult.ntuples;
 					continue;
 				}
 
 				/* Otherwise scan to find the first item > advancePast */
-				while (entry->matchResult->offsets[entry->offset] <= advancePastOff)
+				while (entry->matchResult.offsets[entry->offset] <= advancePastOff)
 					entry->offset++;
 			}
 
 			ItemPointerSet(&entry->curItem,
-						   entry->matchResult->blockno,
-						   entry->matchResult->offsets[entry->offset]);
+						   entry->matchResult.blockno,
+						   entry->matchResult.offsets[entry->offset]);
 			entry->offset++;
 
 			/* Done unless we need to reduce the result */
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index 7d1e6615260..625140fdf25 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -106,7 +106,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
 	ItemPointerSetMin(&scanEntry->curItem);
 	scanEntry->matchBitmap = NULL;
 	scanEntry->matchIterator = NULL;
-	scanEntry->matchResult = NULL;
+	scanEntry->matchResult.blockno = InvalidBlockNumber;
 	scanEntry->list = NULL;
 	scanEntry->nlist = 0;
 	scanEntry->offset = InvalidOffsetNumber;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ea0a12b39af..45f419f849f 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -279,6 +279,62 @@ heap_scan_stream_read_next_serial(ReadStream *stream,
 	return scan->rs_prefetch_block;
 }
 
+/*
+ * Read stream API callback for bitmap heap scans.
+ * Returns the next block the caller wants from the read stream or
+ * InvalidBlockNumber when done.
+ */
+static BlockNumber
+bitmapheap_stream_read_next(ReadStream *pgsr, void *private_data,
+							void *per_buffer_data)
+{
+	TBMIterateResult *tbmres = per_buffer_data;
+	BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) private_data;
+	HeapScanDesc hscan = (HeapScanDesc) bscan;
+	TableScanDesc sscan = &hscan->rs_base;
+
+	for (;;)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		tbm_iterate(&sscan->st.bitmap.rs_iterator, tbmres);
+
+		/* no more entries in the bitmap */
+		if (!BlockNumberIsValid(tbmres->blockno))
+			return InvalidBlockNumber;
+
+		/*
+		 * Ignore any claimed entries past what we think is the end of the
+		 * relation. It may have been extended after the start of our scan (we
+		 * only hold an AccessShareLock, and it could be inserts from this
+		 * backend).  We don't take this optimization in SERIALIZABLE
+		 * isolation though, as we need to examine all invisible tuples
+		 * reachable by the index.
+		 */
+		if (!IsolationIsSerializable() &&
+			tbmres->blockno >= hscan->rs_nblocks)
+			continue;
+
+		/*
+		 * We can skip fetching the heap page if we don't need any fields from
+		 * the heap, the bitmap entries don't need rechecking, and all tuples
+		 * on the page are visible to our transaction.
+		 */
+		if (!(sscan->rs_flags & SO_NEED_TUPLES) &&
+			!tbmres->recheck &&
+			VM_ALL_VISIBLE(sscan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer))
+		{
+			bscan->rs_empty_tuples_pending += tbmres->ntuples;
+			continue;
+		}
+
+		return tbmres->blockno;
+	}
+
+	/* not reachable */
+	Assert(false);
+}
+
 /* ----------------
  *		initscan - scan code common to heap_beginscan and heap_rescan
  * ----------------
@@ -1067,6 +1123,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 	scan->rs_base.rs_flags = flags;
 	scan->rs_base.rs_parallel = parallel_scan;
 	scan->rs_strategy = NULL;	/* set in initscan */
+	scan->rs_cbuf = InvalidBuffer;
 
 	/*
 	 * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
@@ -1146,6 +1203,16 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 														  scan,
 														  0);
 	}
+	else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
+	{
+		scan->rs_read_stream = read_stream_begin_relation(READ_STREAM_DEFAULT,
+														  scan->rs_strategy,
+														  scan->rs_base.rs_rd,
+														  MAIN_FORKNUM,
+														  bitmapheap_stream_read_next,
+														  scan,
+														  sizeof(TBMIterateResult));
+	}
 
 
 	return (TableScanDesc) scan;
@@ -1180,7 +1247,10 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 	 * unpin scan buffers
 	 */
 	if (BufferIsValid(scan->rs_cbuf))
+	{
 		ReleaseBuffer(scan->rs_cbuf);
+		scan->rs_cbuf = InvalidBuffer;
+	}
 
 	if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
 	{
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index c0bec014154..f6387f60e3e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -56,6 +56,11 @@ static bool SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
 
 static BlockNumber heapam_scan_get_blocks_done(HeapScanDesc hscan);
 
+static bool BitmapHeapScanNextBlock(TableScanDesc scan,
+									bool *recheck,
+									uint64 *lossy_pages, uint64 *exact_pages);
+
+
 
 /* ------------------------------------------------------------------------
  * Slot related callbacks for heap AM
@@ -2116,199 +2121,44 @@ heapam_estimate_rel_size(Relation rel, int32 *attr_widths,
  */
 
 static bool
-heapam_scan_bitmap_next_block(TableScanDesc scan,
-							  BlockNumber *blockno, bool *recheck,
-							  uint64 *lossy_pages, uint64 *exact_pages)
+heapam_scan_bitmap_next_tuple(TableScanDesc scan,
+							  TupleTableSlot *slot,
+							  bool *recheck,
+							  uint64 *lossy_pages,
+							  uint64 *exact_pages)
 {
 	BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
 	HeapScanDesc hscan = (HeapScanDesc) bscan;
-	BlockNumber block;
-	Buffer		buffer;
-	Snapshot	snapshot;
-	int			ntup;
-	TBMIterateResult *tbmres;
-
-	Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
-
-	hscan->rs_cindex = 0;
-	hscan->rs_ntuples = 0;
-
-	*blockno = InvalidBlockNumber;
-	*recheck = true;
-
-	do
-	{
-		CHECK_FOR_INTERRUPTS();
-
-		tbmres = tbm_iterate(&scan->st.rs_tbmiterator);
-
-		if (tbmres == NULL)
-			return false;
-
-		/*
-		 * Ignore any claimed entries past what we think is the end of the
-		 * relation. It may have been extended after the start of our scan (we
-		 * only hold an AccessShareLock, and it could be inserts from this
-		 * backend).  We don't take this optimization in SERIALIZABLE
-		 * isolation though, as we need to examine all invisible tuples
-		 * reachable by the index.
-		 */
-	} while (!IsolationIsSerializable() &&
-			 tbmres->blockno >= hscan->rs_nblocks);
-
-	/* Got a valid block */
-	*blockno = tbmres->blockno;
-	*recheck = tbmres->recheck;
-
-	/*
-	 * We can skip fetching the heap page if we don't need any fields from the
-	 * heap, the bitmap entries don't need rechecking, and all tuples on the
-	 * page are visible to our transaction.
-	 */
-	if (!(scan->rs_flags & SO_NEED_TUPLES) &&
-		!tbmres->recheck &&
-		VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer))
-	{
-		/* can't be lossy in the skip_fetch case */
-		Assert(tbmres->ntuples >= 0);
-		Assert(bscan->rs_empty_tuples_pending >= 0);
-
-		bscan->rs_empty_tuples_pending += tbmres->ntuples;
-
-		return true;
-	}
-
-	block = tbmres->blockno;
-
-	/*
-	 * Acquire pin on the target heap page, trading in any pin we held before.
-	 */
-	hscan->rs_cbuf = ReleaseAndReadBuffer(hscan->rs_cbuf,
-										  scan->rs_rd,
-										  block);
-	hscan->rs_cblock = block;
-	buffer = hscan->rs_cbuf;
-	snapshot = scan->rs_snapshot;
-
-	ntup = 0;
-
-	/*
-	 * Prune and repair fragmentation for the whole page, if possible.
-	 */
-	heap_page_prune_opt(scan->rs_rd, buffer);
-
-	/*
-	 * We must hold share lock on the buffer content while examining tuple
-	 * visibility.  Afterwards, however, the tuples we have found to be
-	 * visible are guaranteed good as long as we hold the buffer pin.
-	 */
-	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+	OffsetNumber targoffset;
+	Page		page;
+	ItemId		lp;
 
 	/*
-	 * We need two separate strategies for lossy and non-lossy cases.
+	 * Out of range?  If so, nothing more to look at on this page
 	 */
-	if (tbmres->ntuples >= 0)
-	{
-		/*
-		 * Bitmap is non-lossy, so we just look through the offsets listed in
-		 * tbmres; but we have to follow any HOT chain starting at each such
-		 * offset.
-		 */
-		int			curslot;
-
-		for (curslot = 0; curslot < tbmres->ntuples; curslot++)
-		{
-			OffsetNumber offnum = tbmres->offsets[curslot];
-			ItemPointerData tid;
-			HeapTupleData heapTuple;
-
-			ItemPointerSet(&tid, block, offnum);
-			if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
-									   &heapTuple, NULL, true))
-				hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
-		}
-	}
-	else
+	while (hscan->rs_cindex >= hscan->rs_ntuples)
 	{
 		/*
-		 * Bitmap is lossy, so we must examine each line pointer on the page.
-		 * But we can ignore HOT chains, since we'll check each tuple anyway.
+		 * Emit empty tuples before advancing to the next block
 		 */
-		Page		page = BufferGetPage(buffer);
-		OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
-		OffsetNumber offnum;
-
-		for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
+		if (bscan->rs_empty_tuples_pending > 0)
 		{
-			ItemId		lp;
-			HeapTupleData loctup;
-			bool		valid;
-
-			lp = PageGetItemId(page, offnum);
-			if (!ItemIdIsNormal(lp))
-				continue;
-			loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
-			loctup.t_len = ItemIdGetLength(lp);
-			loctup.t_tableOid = scan->rs_rd->rd_id;
-			ItemPointerSet(&loctup.t_self, block, offnum);
-			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
-			if (valid)
-			{
-				hscan->rs_vistuples[ntup++] = offnum;
-				PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
-								 HeapTupleHeaderGetXmin(loctup.t_data));
-			}
-			HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
-												buffer, snapshot);
+			/*
+			 * If we don't have to fetch the tuple, just return nulls.
+			 */
+			ExecStoreAllNullTuple(slot);
+			bscan->rs_empty_tuples_pending--;
+			return true;
 		}
-	}
-
-	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-
-	Assert(ntup <= MaxHeapTuplesPerPage);
-	hscan->rs_ntuples = ntup;
-
-	if (tbmres->ntuples >= 0)
-		(*exact_pages)++;
-	else
-		(*lossy_pages)++;
-
-	/*
-	 * Return true to indicate that a valid block was found and the bitmap is
-	 * not exhausted. If there are no visible tuples on this page,
-	 * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
-	 * return false returning control to this function to advance to the next
-	 * block in the bitmap.
-	 */
-	return true;
-}
-
-static bool
-heapam_scan_bitmap_next_tuple(TableScanDesc scan,
-							  TupleTableSlot *slot)
-{
-	BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
-	HeapScanDesc hscan = (HeapScanDesc) bscan;
-	OffsetNumber targoffset;
-	Page		page;
-	ItemId		lp;
 
-	if (bscan->rs_empty_tuples_pending > 0)
-	{
 		/*
-		 * If we don't have to fetch the tuple, just return nulls.
+		 * Returns false if the bitmap is exhausted and there are no further
+		 * blocks we need to scan.
 		 */
-		ExecStoreAllNullTuple(slot);
-		bscan->rs_empty_tuples_pending--;
-		return true;
+		if (!BitmapHeapScanNextBlock(scan, recheck, lossy_pages, exact_pages))
+			return false;
 	}
 
-	/*
-	 * Out of range?  If so, nothing more to look at on this page
-	 */
-	if (hscan->rs_cindex >= hscan->rs_ntuples)
-		return false;
-
 	targoffset = hscan->rs_vistuples[hscan->rs_cindex];
 	page = BufferGetPage(hscan->rs_cbuf);
 	lp = PageGetItemId(page, targoffset);
@@ -2615,6 +2465,167 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer,
 	}
 }
 
+/*
+ * Helper function get the next block of a bitmap heap scan. Returns true when
+ * it got the next block and saved it in the scan descriptor and false when
+ * the bitmap and or relation are exhausted.
+ */
+static bool
+BitmapHeapScanNextBlock(TableScanDesc scan,
+						bool *recheck,
+						uint64 *lossy_pages, uint64 *exact_pages)
+{
+	BitmapHeapScanDesc bscan = (BitmapHeapScanDesc) scan;
+	HeapScanDesc hscan = (HeapScanDesc) bscan;
+	BlockNumber block;
+	void	   *per_buffer_data;
+	Buffer		buffer;
+	Snapshot	snapshot;
+	int			ntup;
+	TBMIterateResult *tbmres;
+
+	Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
+	Assert(hscan->rs_read_stream);
+
+	hscan->rs_cindex = 0;
+	hscan->rs_ntuples = 0;
+
+	/* Release buffer containing previous block. */
+	if (BufferIsValid(hscan->rs_cbuf))
+	{
+		ReleaseBuffer(hscan->rs_cbuf);
+		hscan->rs_cbuf = InvalidBuffer;
+	}
+
+	hscan->rs_cbuf = read_stream_next_buffer(hscan->rs_read_stream, &per_buffer_data);
+
+	if (BufferIsInvalid(hscan->rs_cbuf))
+	{
+		if (BufferIsValid(bscan->rs_vmbuffer))
+		{
+			ReleaseBuffer(bscan->rs_vmbuffer);
+			bscan->rs_vmbuffer = InvalidBuffer;
+		}
+
+		/*
+		 * Bitmap is exhausted. Time to emit empty tuples if relevant. We emit
+		 * all empty tuples at the end instead of emitting them per block we
+		 * skip fetching. This is necessary because the streaming read API
+		 * will only return TBMIterateResults for blocks actually fetched.
+		 * When we skip fetching a block, we keep track of how many empty
+		 * tuples to emit at the end of the BitmapHeapScan. We do not recheck
+		 * all NULL tuples.
+		 */
+		*recheck = false;
+		return bscan->rs_empty_tuples_pending > 0;
+	}
+
+	Assert(per_buffer_data);
+
+	tbmres = per_buffer_data;
+
+	Assert(BufferGetBlockNumber(hscan->rs_cbuf) == tbmres->blockno);
+
+	*recheck = tbmres->recheck;
+
+	hscan->rs_cblock = tbmres->blockno;
+	hscan->rs_ntuples = tbmres->ntuples;
+
+	block = tbmres->blockno;
+	buffer = hscan->rs_cbuf;
+	snapshot = scan->rs_snapshot;
+
+	ntup = 0;
+
+	/*
+	 * Prune and repair fragmentation for the whole page, if possible.
+	 */
+	heap_page_prune_opt(scan->rs_rd, buffer);
+
+	/*
+	 * We must hold share lock on the buffer content while examining tuple
+	 * visibility.  Afterwards, however, the tuples we have found to be
+	 * visible are guaranteed good as long as we hold the buffer pin.
+	 */
+	LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+	/*
+	 * We need two separate strategies for lossy and non-lossy cases.
+	 */
+	if (tbmres->ntuples >= 0)
+	{
+		/*
+		 * Bitmap is non-lossy, so we just look through the offsets listed in
+		 * tbmres; but we have to follow any HOT chain starting at each such
+		 * offset.
+		 */
+		int			curslot;
+
+		for (curslot = 0; curslot < tbmres->ntuples; curslot++)
+		{
+			OffsetNumber offnum = tbmres->offsets[curslot];
+			ItemPointerData tid;
+			HeapTupleData heapTuple;
+
+			ItemPointerSet(&tid, block, offnum);
+			if (heap_hot_search_buffer(&tid, scan->rs_rd, buffer, snapshot,
+									   &heapTuple, NULL, true))
+				hscan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
+		}
+	}
+	else
+	{
+		/*
+		 * Bitmap is lossy, so we must examine each line pointer on the page.
+		 * But we can ignore HOT chains, since we'll check each tuple anyway.
+		 */
+		Page		page = BufferGetPage(buffer);
+		OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+		OffsetNumber offnum;
+
+		for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
+		{
+			ItemId		lp;
+			HeapTupleData loctup;
+			bool		valid;
+
+			lp = PageGetItemId(page, offnum);
+			if (!ItemIdIsNormal(lp))
+				continue;
+			loctup.t_data = (HeapTupleHeader) PageGetItem(page, lp);
+			loctup.t_len = ItemIdGetLength(lp);
+			loctup.t_tableOid = scan->rs_rd->rd_id;
+			ItemPointerSet(&loctup.t_self, block, offnum);
+			valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
+			if (valid)
+			{
+				hscan->rs_vistuples[ntup++] = offnum;
+				PredicateLockTID(scan->rs_rd, &loctup.t_self, snapshot,
+								 HeapTupleHeaderGetXmin(loctup.t_data));
+			}
+			HeapCheckForSerializableConflictOut(valid, scan->rs_rd, &loctup,
+												buffer, snapshot);
+		}
+	}
+
+	LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
+	Assert(ntup <= MaxHeapTuplesPerPage);
+	hscan->rs_ntuples = ntup;
+
+	if (tbmres->ntuples >= 0)
+		(*exact_pages)++;
+	else
+		(*lossy_pages)++;
+
+	/*
+	 * Return true to indicate that a valid block was found and the bitmap is
+	 * not exhausted. If there are no visible tuples on this page,
+	 * hscan->rs_ntuples will be 0 and heapam_scan_bitmap_next_tuple() will
+	 * invoke this function again.
+	 */
+	return true;
+}
 
 /* ------------------------------------------------------------------------
  * Definition of the heap table access method.
@@ -2674,7 +2685,6 @@ static const TableAmRoutine heapam_methods = {
 
 	.relation_estimate_size = heapam_estimate_rel_size,
 
-	.scan_bitmap_next_block = heapam_scan_bitmap_next_block,
 	.scan_bitmap_next_tuple = heapam_scan_bitmap_next_tuple,
 	.scan_sample_next_block = heapam_scan_sample_next_block,
 	.scan_sample_next_tuple = heapam_scan_sample_next_tuple
diff --git a/src/backend/access/table/tableamapi.c b/src/backend/access/table/tableamapi.c
index 760a36fd2a1..56378007a81 100644
--- a/src/backend/access/table/tableamapi.c
+++ b/src/backend/access/table/tableamapi.c
@@ -92,8 +92,6 @@ GetTableAmRoutine(Oid amhandler)
 	Assert(routine->relation_estimate_size != NULL);
 
 	/* optional, but one callback implies presence of the other */
-	Assert((routine->scan_bitmap_next_block == NULL) ==
-		   (routine->scan_bitmap_next_tuple == NULL));
 	Assert(routine->scan_sample_next_block != NULL);
 	Assert(routine->scan_sample_next_tuple != NULL);
 
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index be0d24d901b..68499e3aa45 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -51,10 +51,6 @@
 static void BitmapTableScanSetup(BitmapHeapScanState *node);
 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
-static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node);
-static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
-static inline void BitmapPrefetch(BitmapHeapScanState *node,
-								  TableScanDesc scan);
 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
 
 
@@ -62,14 +58,6 @@ static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
  * Do the underlying index scan, build the bitmap, set up the parallel state
  * needed for parallel workers to iterate through the bitmap, and set up the
  * underlying table scan descriptor.
- *
- * For prefetching, we use *two* iterators, one for the pages we are actually
- * scanning and another that runs ahead of the first for prefetching.
- * node->prefetch_pages tracks exactly how many pages ahead the prefetch
- * iterator is.  Also, node->prefetch_target tracks the desired prefetch
- * distance, which starts small and increases up to the
- * node->prefetch_maximum.  This is to avoid doing a lot of prefetching in a
- * scan that stops after a few tuples because of a LIMIT.
  */
 static void
 BitmapTableScanSetup(BitmapHeapScanState *node)
@@ -102,14 +90,6 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 		 */
 		pstate->tbmiterator = tbm_prepare_shared_iterate(node->tbm);
 
-#ifdef USE_PREFETCH
-		if (node->prefetch_maximum > 0)
-		{
-			pstate->prefetch_iterator =
-				tbm_prepare_shared_iterate(node->tbm);
-		}
-#endif							/* USE_PREFETCH */
-
 		/* We have initialized the shared state so wake up others. */
 		BitmapDoneInitializingSharedState(pstate);
 	}
@@ -119,15 +99,6 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 									pstate->tbmiterator :
 									InvalidDsaPointer);
 
-#ifdef USE_PREFETCH
-	if (node->prefetch_maximum > 0)
-		node->prefetch_iterator =
-			tbm_begin_iterate(node->tbm, dsa,
-							  pstate ?
-							  pstate->prefetch_iterator :
-							  InvalidDsaPointer);
-#endif							/* USE_PREFETCH */
-
 	/*
 	 * If this is the first scan of the underlying table, create the table
 	 * scan descriptor and begin the scan.
@@ -149,16 +120,16 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 		node->ss.ss_currentScanDesc =
 			table_beginscan_bm(node->ss.ss_currentRelation,
 							   node->ss.ps.state->es_snapshot,
+							   pstate,
 							   0,
 							   NULL,
 							   need_tuples);
 	}
 
-	node->ss.ss_currentScanDesc->st.rs_tbmiterator = tbmiterator;
+	node->ss.ss_currentScanDesc->st.bitmap.rs_iterator = tbmiterator;
 	node->initialized = true;
 }
 
-
 /* ----------------------------------------------------------------
  *		BitmapHeapNext
  *
@@ -168,120 +139,44 @@ BitmapTableScanSetup(BitmapHeapScanState *node)
 static TupleTableSlot *
 BitmapHeapNext(BitmapHeapScanState *node)
 {
-	ExprContext *econtext;
-	TableScanDesc scan;
-	TupleTableSlot *slot;
-
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-#endif
-
-	/*
-	 * extract necessary information from index scan node
-	 */
-	econtext = node->ss.ps.ps_ExprContext;
-	slot = node->ss.ss_ScanTupleSlot;
-	scan = node->ss.ss_currentScanDesc;
+	ExprContext *econtext = node->ss.ps.ps_ExprContext;
+	TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
 
 	/*
 	 * If we haven't yet performed the underlying index scan, do it, and begin
 	 * the iteration over the bitmap.
 	 */
 	if (!node->initialized)
-	{
 		BitmapTableScanSetup(node);
-		scan = node->ss.ss_currentScanDesc;
-		goto new_page;
-	}
 
-	for (;;)
+	while (table_scan_bitmap_next_tuple(node->ss.ss_currentScanDesc,
+										slot, &node->recheck,
+										&node->stats.lossy_pages,
+										&node->stats.exact_pages))
 	{
-		while (table_scan_bitmap_next_tuple(scan, slot))
-		{
-			/*
-			 * Continuing in previously obtained page.
-			 */
-
-			CHECK_FOR_INTERRUPTS();
-
-#ifdef USE_PREFETCH
-
-			/*
-			 * Try to prefetch at least a few pages even before we get to the
-			 * second page if we don't stop reading after the first tuple.
-			 */
-			if (!pstate)
-			{
-				if (node->prefetch_target < node->prefetch_maximum)
-					node->prefetch_target++;
-			}
-			else if (pstate->prefetch_target < node->prefetch_maximum)
-			{
-				/* take spinlock while updating shared state */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_target < node->prefetch_maximum)
-					pstate->prefetch_target++;
-				SpinLockRelease(&pstate->mutex);
-			}
-#endif							/* USE_PREFETCH */
-
-			/*
-			 * We issue prefetch requests *after* fetching the current page to
-			 * try to avoid having prefetching interfere with the main I/O.
-			 * Also, this should happen only when we have determined there is
-			 * still something to do on the current page, else we may
-			 * uselessly prefetch the same page we are just about to request
-			 * for real.
-			 */
-			BitmapPrefetch(node, scan);
-
-			/*
-			 * If we are using lossy info, we have to recheck the qual
-			 * conditions at every tuple.
-			 */
-			if (node->recheck)
-			{
-				econtext->ecxt_scantuple = slot;
-				if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-				{
-					/* Fails recheck, so drop it and loop back for another */
-					InstrCountFiltered2(node, 1);
-					ExecClearTuple(slot);
-					continue;
-				}
-			}
-
-			/* OK to return this tuple */
-			return slot;
-		}
-
-new_page:
-
-		BitmapAdjustPrefetchIterator(node);
-
 		/*
-		 * Returns false if the bitmap is exhausted and there are no further
-		 * blocks we need to scan.
+		 * Continuing in previously obtained page.
 		 */
-		if (!table_scan_bitmap_next_block(scan, &node->blockno,
-										  &node->recheck,
-										  &node->stats.lossy_pages,
-										  &node->stats.exact_pages))
-			break;
+		CHECK_FOR_INTERRUPTS();
 
 		/*
-		 * If serial, we can error out if the prefetch block doesn't stay
-		 * ahead of the current block.
+		 * If we are using lossy info, we have to recheck the qual conditions
+		 * at every tuple.
 		 */
-		if (node->pstate == NULL &&
-			!tbm_exhausted(&node->prefetch_iterator) &&
-			node->prefetch_blockno < node->blockno)
-			elog(ERROR,
-				 "prefetch and main iterators are out of sync. pfblockno: %d. blockno: %d",
-				 node->prefetch_blockno, node->blockno);
-
-		/* Adjust the prefetch target */
-		BitmapAdjustPrefetchTarget(node);
+		if (node->recheck)
+		{
+			econtext->ecxt_scantuple = slot;
+			if (!ExecQualAndReset(node->bitmapqualorig, econtext))
+			{
+				/* Fails recheck, so drop it and loop back for another */
+				InstrCountFiltered2(node, 1);
+				ExecClearTuple(slot);
+				continue;
+			}
+		}
+
+		/* OK to return this tuple */
+		return slot;
 	}
 
 	/*
@@ -305,226 +200,6 @@ BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
 	ConditionVariableBroadcast(&pstate->cv);
 }
 
-/*
- *	BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
- *
- *	We keep track of how far the prefetch iterator is ahead of the main
- *	iterator in prefetch_pages. For each block the main iterator returns, we
- *	decrement prefetch_pages.
- */
-static inline void
-BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-	TBMIterateResult *tbmpre;
-
-	if (pstate == NULL)
-	{
-		TBMIterator *prefetch_iterator = &node->prefetch_iterator;
-
-		if (node->prefetch_pages > 0)
-		{
-			/* The main iterator has closed the distance by one page */
-			node->prefetch_pages--;
-		}
-		else if (!tbm_exhausted(prefetch_iterator))
-		{
-			tbmpre = tbm_iterate(prefetch_iterator);
-			node->prefetch_blockno = tbmpre ? tbmpre->blockno :
-				InvalidBlockNumber;
-		}
-		return;
-	}
-
-	/*
-	 * XXX: There is a known issue with keeping the prefetch and current block
-	 * iterators in sync for parallel bitmap table scans. This can lead to
-	 * prefetching blocks that have already been read. See the discussion
-	 * here:
-	 * https://postgr.es/m/20240315211449.en2jcmdqxv5o6tlz%40alap3.anarazel.de
-	 * Note that moving the call site of BitmapAdjustPrefetchIterator()
-	 * exacerbates the effects of this bug.
-	 */
-	if (node->prefetch_maximum > 0)
-	{
-		TBMIterator *prefetch_iterator = &node->prefetch_iterator;
-
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_pages > 0)
-		{
-			pstate->prefetch_pages--;
-			SpinLockRelease(&pstate->mutex);
-		}
-		else
-		{
-			/* Release the mutex before iterating */
-			SpinLockRelease(&pstate->mutex);
-
-			/*
-			 * In case of shared mode, we can not ensure that the current
-			 * blockno of the main iterator and that of the prefetch iterator
-			 * are same.  It's possible that whatever blockno we are
-			 * prefetching will be processed by another process.  Therefore,
-			 * we don't validate the blockno here as we do in non-parallel
-			 * case.
-			 */
-			if (!tbm_exhausted(prefetch_iterator))
-			{
-				tbmpre = tbm_iterate(prefetch_iterator);
-				node->prefetch_blockno = tbmpre ? tbmpre->blockno :
-					InvalidBlockNumber;
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapAdjustPrefetchTarget - Adjust the prefetch target
- *
- * Increase prefetch target if it's not yet at the max.  Note that
- * we will increase it to zero after fetching the very first
- * page/tuple, then to one after the second tuple is fetched, then
- * it doubles as later pages are fetched.
- */
-static inline void
-BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		if (node->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (node->prefetch_target >= node->prefetch_maximum / 2)
-			node->prefetch_target = node->prefetch_maximum;
-		else if (node->prefetch_target > 0)
-			node->prefetch_target *= 2;
-		else
-			node->prefetch_target++;
-		return;
-	}
-
-	/* Do an unlocked check first to save spinlock acquisitions. */
-	if (pstate->prefetch_target < node->prefetch_maximum)
-	{
-		SpinLockAcquire(&pstate->mutex);
-		if (pstate->prefetch_target >= node->prefetch_maximum)
-			 /* don't increase any further */ ;
-		else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
-			pstate->prefetch_target = node->prefetch_maximum;
-		else if (pstate->prefetch_target > 0)
-			pstate->prefetch_target *= 2;
-		else
-			pstate->prefetch_target++;
-		SpinLockRelease(&pstate->mutex);
-	}
-#endif							/* USE_PREFETCH */
-}
-
-/*
- * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
- */
-static inline void
-BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
-{
-#ifdef USE_PREFETCH
-	ParallelBitmapHeapState *pstate = node->pstate;
-
-	if (pstate == NULL)
-	{
-		TBMIterator *prefetch_iterator = &node->prefetch_iterator;
-
-		if (!tbm_exhausted(prefetch_iterator))
-		{
-			while (node->prefetch_pages < node->prefetch_target)
-			{
-				TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
-				bool		skip_fetch;
-
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					tbm_end_iterate(prefetch_iterator);
-					break;
-				}
-				node->prefetch_pages++;
-				node->prefetch_blockno = tbmpre->blockno;
-
-				/*
-				 * If we expect not to have to actually read this heap page,
-				 * skip this prefetch call, but continue to run the prefetch
-				 * logic normally.  (Would it be better not to increment
-				 * prefetch_pages?)
-				 */
-				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-							  !tbmpre->recheck &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-
-		return;
-	}
-
-	if (pstate->prefetch_pages < pstate->prefetch_target)
-	{
-		TBMIterator *prefetch_iterator = &node->prefetch_iterator;
-
-		if (!tbm_exhausted(prefetch_iterator))
-		{
-			while (1)
-			{
-				TBMIterateResult *tbmpre;
-				bool		do_prefetch = false;
-				bool		skip_fetch;
-
-				/*
-				 * Recheck under the mutex. If some other process has already
-				 * done enough prefetching then we need not to do anything.
-				 */
-				SpinLockAcquire(&pstate->mutex);
-				if (pstate->prefetch_pages < pstate->prefetch_target)
-				{
-					pstate->prefetch_pages++;
-					do_prefetch = true;
-				}
-				SpinLockRelease(&pstate->mutex);
-
-				if (!do_prefetch)
-					return;
-
-				tbmpre = tbm_iterate(prefetch_iterator);
-				if (tbmpre == NULL)
-				{
-					/* No more pages to prefetch */
-					tbm_end_iterate(prefetch_iterator);
-					break;
-				}
-
-				node->prefetch_blockno = tbmpre->blockno;
-
-				/* As above, skip prefetch if we expect not to need page */
-				skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-							  !tbmpre->recheck &&
-							  VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-											 tbmpre->blockno,
-											 &node->pvmbuffer));
-
-				if (!skip_fetch)
-					PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
-			}
-		}
-	}
-#endif							/* USE_PREFETCH */
-}
-
 /*
  * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
  */
@@ -574,31 +249,19 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
 		 * End iteration on iterators saved in scan descriptor if they have
 		 * not already been cleaned up.
 		 */
-		if (!tbm_exhausted(&scan->st.rs_tbmiterator))
-			tbm_end_iterate(&scan->st.rs_tbmiterator);
+		if (!tbm_exhausted(&scan->st.bitmap.rs_iterator))
+			tbm_end_iterate(&scan->st.bitmap.rs_iterator);
 
 		/* rescan to release any page pin */
 		table_rescan(node->ss.ss_currentScanDesc, NULL);
 	}
 
-	/* If we did not already clean up the prefetch iterator, do so now. */
-	if (!tbm_exhausted(&node->prefetch_iterator))
-		tbm_end_iterate(&node->prefetch_iterator);
-
 	/* release bitmaps and buffers if any */
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->pvmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->pvmbuffer);
 	node->tbm = NULL;
 	node->initialized = false;
-	node->pvmbuffer = InvalidBuffer;
 	node->recheck = true;
-	/* Only used for serial BHS */
-	node->blockno = InvalidBlockNumber;
-	node->prefetch_blockno = InvalidBlockNumber;
-	node->prefetch_pages = 0;
-	node->prefetch_target = -1;
 
 	ExecScanReScan(&node->ss);
 
@@ -658,8 +321,8 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 		 * End iteration on iterators saved in scan descriptor if they have
 		 * not already been cleaned up.
 		 */
-		if (!tbm_exhausted(&scanDesc->st.rs_tbmiterator))
-			tbm_end_iterate(&scanDesc->st.rs_tbmiterator);
+		if (!tbm_exhausted(&scanDesc->st.bitmap.rs_iterator))
+			tbm_end_iterate(&scanDesc->st.bitmap.rs_iterator);
 
 		/*
 		 * close table scan
@@ -667,17 +330,11 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 		table_endscan(scanDesc);
 	}
 
-	/* If we did not already clean up the prefetch iterator, do so now. */
-	if (!tbm_exhausted(&node->prefetch_iterator))
-		tbm_end_iterate(&node->prefetch_iterator);
-
 	/*
 	 * release bitmaps and buffers if any
 	 */
 	if (node->tbm)
 		tbm_free(node->tbm);
-	if (node->pvmbuffer != InvalidBuffer)
-		ReleaseBuffer(node->pvmbuffer);
 }
 
 /* ----------------------------------------------------------------
@@ -710,18 +367,13 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
 
 	scanstate->tbm = NULL;
-	scanstate->pvmbuffer = InvalidBuffer;
 
 	/* Zero the statistics counters */
 	memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
 
-	scanstate->prefetch_pages = 0;
-	scanstate->prefetch_target = -1;
 	scanstate->initialized = false;
 	scanstate->pstate = NULL;
 	scanstate->recheck = true;
-	scanstate->blockno = InvalidBlockNumber;
-	scanstate->prefetch_blockno = InvalidBlockNumber;
 
 	/*
 	 * Miscellaneous initialization
@@ -761,13 +413,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->bitmapqualorig =
 		ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
 
-	/*
-	 * Maximum number of prefetches for the tablespace if configured,
-	 * otherwise the current value of the effective_io_concurrency GUC.
-	 */
-	scanstate->prefetch_maximum =
-		get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
-
 	scanstate->ss.ss_currentRelation = currentRelation;
 
 	/*
@@ -871,12 +516,9 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 		sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
 
 	pstate->tbmiterator = 0;
-	pstate->prefetch_iterator = 0;
 
 	/* Initialize the mutex */
 	SpinLockInit(&pstate->mutex);
-	pstate->prefetch_pages = 0;
-	pstate->prefetch_target = -1;
 	pstate->state = BM_INITIAL;
 
 	ConditionVariableInit(&pstate->cv);
@@ -913,17 +555,11 @@ ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
 		return;
 
 	pstate->state = BM_INITIAL;
-	pstate->prefetch_pages = 0;
-	pstate->prefetch_target = -1;
 
 	if (DsaPointerIsValid(pstate->tbmiterator))
 		tbm_free_shared_area(dsa, pstate->tbmiterator);
 
-	if (DsaPointerIsValid(pstate->prefetch_iterator))
-		tbm_free_shared_area(dsa, pstate->prefetch_iterator);
-
 	pstate->tbmiterator = InvalidDsaPointer;
-	pstate->prefetch_iterator = InvalidDsaPointer;
 }
 
 /* ----------------------------------------------------------------
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index 66b3c387d53..b7e4b4d9e1c 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -40,7 +40,6 @@
 
 #include <limits.h>
 
-#include "access/htup_details.h"
 #include "common/hashfn.h"
 #include "common/int.h"
 #include "nodes/bitmapset.h"
@@ -48,14 +47,6 @@
 #include "storage/lwlock.h"
 #include "utils/dsa.h"
 
-/*
- * The maximum number of tuples per page is not large (typically 256 with
- * 8K pages, or 1024 with 32K pages).  So there's not much point in making
- * the per-page bitmaps variable size.  We just legislate that the size
- * is this:
- */
-#define MAX_TUPLES_PER_PAGE  MaxHeapTuplesPerPage
-
 /*
  * When we have to switch over to lossy storage, we use a data structure
  * with one bit per page, where all pages having the same number DIV
@@ -67,7 +58,7 @@
  * table, using identical data structures.  (This is because the memory
  * management for hashtables doesn't easily/efficiently allow space to be
  * transferred easily from one hashtable to another.)  Therefore it's best
- * if PAGES_PER_CHUNK is the same as MAX_TUPLES_PER_PAGE, or at least not
+ * if PAGES_PER_CHUNK is the same as MaxHeapTuplesPerPage, or at least not
  * too different.  But we also want PAGES_PER_CHUNK to be a power of 2 to
  * avoid expensive integer remainder operations.  So, define it like this:
  */
@@ -79,7 +70,7 @@
 #define BITNUM(x)	((x) % BITS_PER_BITMAPWORD)
 
 /* number of active words for an exact page: */
-#define WORDS_PER_PAGE	((MAX_TUPLES_PER_PAGE - 1) / BITS_PER_BITMAPWORD + 1)
+#define WORDS_PER_PAGE	((MaxHeapTuplesPerPage - 1) / BITS_PER_BITMAPWORD + 1)
 /* number of active words for a lossy chunk: */
 #define WORDS_PER_CHUNK  ((PAGES_PER_CHUNK - 1) / BITS_PER_BITMAPWORD + 1)
 
@@ -181,7 +172,6 @@ struct TBMPrivateIterator
 	int			spageptr;		/* next spages index */
 	int			schunkptr;		/* next schunks index */
 	int			schunkbit;		/* next bit to check in current schunk */
-	TBMIterateResult output;	/* MUST BE LAST (because variable-size) */
 };
 
 /*
@@ -222,7 +212,6 @@ struct TBMSharedIterator
 	PTEntryArray *ptbase;		/* pagetable element array */
 	PTIterationArray *ptpages;	/* sorted exact page index list */
 	PTIterationArray *ptchunks; /* sorted lossy page index list */
-	TBMIterateResult output;	/* MUST BE LAST (because variable-size) */
 };
 
 /* Local function prototypes */
@@ -390,7 +379,7 @@ tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids,
 					bitnum;
 
 		/* safety check to ensure we don't overrun bit array bounds */
-		if (off < 1 || off > MAX_TUPLES_PER_PAGE)
+		if (off < 1 || off > MaxHeapTuplesPerPage)
 			elog(ERROR, "tuple offset out of range: %u", off);
 
 		/*
@@ -696,9 +685,7 @@ tbm_begin_private_iterate(TIDBitmap *tbm)
 	 * Create the TBMPrivateIterator struct, with enough trailing space to
 	 * serve the needs of the TBMIterateResult sub-struct.
 	 */
-	iterator = (TBMPrivateIterator *) palloc(sizeof(TBMPrivateIterator) +
-											 MAX_TUPLES_PER_PAGE *
-											 sizeof(OffsetNumber));
+	iterator = (TBMPrivateIterator *) palloc(sizeof(TBMPrivateIterator));
 	iterator->tbm = tbm;
 
 	/*
@@ -959,20 +946,21 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp)
 /*
  * tbm_private_iterate - scan through next page of a TIDBitmap
  *
- * Returns a TBMIterateResult representing one page, or NULL if there are
- * no more pages to scan.  Pages are guaranteed to be delivered in numerical
- * order.  If result->ntuples < 0, then the bitmap is "lossy" and failed to
- * remember the exact tuples to look at on this page --- the caller must
- * examine all tuples on the page and check if they meet the intended
- * condition.  If result->recheck is true, only the indicated tuples need
- * be examined, but the condition must be rechecked anyway.  (For ease of
- * testing, recheck is always set true when ntuples < 0.)
+ * Caller must pass in a TBMIterateResult to be filled.
+ *
+ * Pages are guaranteed to be delivered in numerical order.  tbmres->blockno is
+ * set to InvalidBlockNumber when there are no more pages to scan. If
+ * tbmres->ntuples < 0, then the bitmap is "lossy" and failed to remember the
+ * exact tuples to look at on this page --- the caller must examine all tuples
+ * on the page and check if they meet the intended condition.  If
+ * tbmres->recheck is true, only the indicated tuples need be examined, but the
+ * condition must be rechecked anyway.  (For ease of testing, recheck is always
+ * set true when ntuples < 0.)
  */
-TBMIterateResult *
-tbm_private_iterate(TBMPrivateIterator *iterator)
+void
+tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres)
 {
 	TIDBitmap  *tbm = iterator->tbm;
-	TBMIterateResult *output = &(iterator->output);
 
 	Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
 
@@ -1010,11 +998,11 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
 			chunk_blockno < tbm->spages[iterator->spageptr]->blockno)
 		{
 			/* Return a lossy page indicator from the chunk */
-			output->blockno = chunk_blockno;
-			output->ntuples = -1;
-			output->recheck = true;
+			tbmres->blockno = chunk_blockno;
+			tbmres->ntuples = -1;
+			tbmres->recheck = true;
 			iterator->schunkbit++;
-			return output;
+			return;
 		}
 	}
 
@@ -1030,16 +1018,16 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
 			page = tbm->spages[iterator->spageptr];
 
 		/* scan bitmap to extract individual offset numbers */
-		ntuples = tbm_extract_page_tuple(page, output);
-		output->blockno = page->blockno;
-		output->ntuples = ntuples;
-		output->recheck = page->recheck;
+		ntuples = tbm_extract_page_tuple(page, tbmres);
+		tbmres->blockno = page->blockno;
+		tbmres->ntuples = ntuples;
+		tbmres->recheck = page->recheck;
 		iterator->spageptr++;
-		return output;
+		return;
 	}
 
 	/* Nothing more in the bitmap */
-	return NULL;
+	tbmres->blockno = InvalidBlockNumber;
 }
 
 /*
@@ -1049,10 +1037,9 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
  *	across multiple processes.  We need to acquire the iterator LWLock,
  *	before accessing the shared members.
  */
-TBMIterateResult *
-tbm_shared_iterate(TBMSharedIterator *iterator)
+void
+tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres)
 {
-	TBMIterateResult *output = &iterator->output;
 	TBMSharedIteratorState *istate = iterator->state;
 	PagetableEntry *ptbase = NULL;
 	int		   *idxpages = NULL;
@@ -1103,13 +1090,13 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
 			chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno)
 		{
 			/* Return a lossy page indicator from the chunk */
-			output->blockno = chunk_blockno;
-			output->ntuples = -1;
-			output->recheck = true;
+			tbmres->blockno = chunk_blockno;
+			tbmres->ntuples = -1;
+			tbmres->recheck = true;
 			istate->schunkbit++;
 
 			LWLockRelease(&istate->lock);
-			return output;
+			return;
 		}
 	}
 
@@ -1119,21 +1106,21 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
 		int			ntuples;
 
 		/* scan bitmap to extract individual offset numbers */
-		ntuples = tbm_extract_page_tuple(page, output);
-		output->blockno = page->blockno;
-		output->ntuples = ntuples;
-		output->recheck = page->recheck;
+		ntuples = tbm_extract_page_tuple(page, tbmres);
+		tbmres->blockno = page->blockno;
+		tbmres->ntuples = ntuples;
+		tbmres->recheck = page->recheck;
 		istate->spageptr++;
 
 		LWLockRelease(&istate->lock);
 
-		return output;
+		return;
 	}
 
 	LWLockRelease(&istate->lock);
 
 	/* Nothing more in the bitmap */
-	return NULL;
+	tbmres->blockno = InvalidBlockNumber;
 }
 
 /*
@@ -1468,8 +1455,7 @@ tbm_attach_shared_iterate(dsa_area *dsa, dsa_pointer dp)
 	 * Create the TBMSharedIterator struct, with enough trailing space to
 	 * serve the needs of the TBMIterateResult sub-struct.
 	 */
-	iterator = (TBMSharedIterator *) palloc0(sizeof(TBMSharedIterator) +
-											 MAX_TUPLES_PER_PAGE * sizeof(OffsetNumber));
+	iterator = (TBMSharedIterator *) palloc0(sizeof(TBMSharedIterator));
 
 	istate = (TBMSharedIteratorState *) dsa_get_address(dsa, dp);
 
@@ -1610,13 +1596,14 @@ tbm_end_iterate(TBMIterator *iterator)
 /*
  * Get the next TBMIterateResult from the shared or private bitmap iterator.
  */
-TBMIterateResult *
-tbm_iterate(TBMIterator *iterator)
+void
+tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres)
 {
 	Assert(iterator);
+	Assert(tbmres);
 
 	if (iterator->shared)
-		return tbm_shared_iterate(iterator->i.shared_iterator);
+		return tbm_shared_iterate(iterator->i.shared_iterator, tbmres);
 	else
-		return tbm_private_iterate(iterator->i.private_iterator);
+		return tbm_private_iterate(iterator->i.private_iterator, tbmres);
 }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 71abb01f655..0489ad36644 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -325,7 +325,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 				info->amcanparallel = amroutine->amcanparallel;
 				info->amhasgettuple = (amroutine->amgettuple != NULL);
 				info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
-					relation->rd_tableam->scan_bitmap_next_block != NULL;
+					relation->rd_tableam->scan_bitmap_next_tuple != NULL;
 				info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
 									  amroutine->amrestrpos != NULL);
 				info->amcostestimate = amroutine->amcostestimate;
diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index e4414b2e915..4dd58e96bf4 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -241,9 +241,7 @@ read_stream_start_pending_read(ReadStream *stream, bool suppress_advice)
 	 * If advice hasn't been suppressed, this system supports it, and this
 	 * isn't a strictly sequential pattern, then we'll issue advice.
 	 */
-	if (!suppress_advice &&
-		stream->advice_enabled &&
-		stream->pending_read_blocknum != stream->seq_blocknum)
+	if (!suppress_advice && stream->advice_enabled)
 		flags = READ_BUFFERS_ISSUE_ADVICE;
 	else
 		flags = 0;
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index dcd1ae3fc34..478e2a8a377 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -353,7 +353,12 @@ typedef struct GinScanEntryData
 	/* for a partial-match or full-scan query, we accumulate all TIDs here */
 	TIDBitmap  *matchBitmap;
 	TBMPrivateIterator *matchIterator;
-	TBMIterateResult *matchResult;
+
+	/*
+	 * If blockno is InvalidBlockNumber, all of the other fields in the
+	 * matchResult are meaningless.
+	 */
+	TBMIterateResult matchResult;
 
 	/* used for Posting list and one page in Posting tree */
 	ItemPointerData *list;
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index dc6e0184284..934b667ad78 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -43,8 +43,12 @@ typedef struct TableScanDescData
 	 */
 	union
 	{
-		/* Iterator for Bitmap Table Scans */
-		TBMIterator rs_tbmiterator;
+		/* State for Bitmap Table Scans */
+		struct
+		{
+			struct ParallelBitmapHeapState *rs_pstate;
+			TBMIterator rs_iterator;
+		}			bitmap;
 
 		/*
 		 * Range of ItemPointers for table_scan_getnextslot_tidrange() to
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index 131c050c15f..4206a6d0ab1 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -780,59 +780,23 @@ typedef struct TableAmRoutine
 	 */
 
 	/*
-	 * Prepare to fetch / check / return tuples from `blockno` as part of a
-	 * bitmap table scan. `scan` was started via table_beginscan_bm(). Return
-	 * false if the bitmap is exhausted and true otherwise.
-	 *
-	 * This will typically read and pin the target block, and do the necessary
-	 * work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
-	 * make sense to perform tuple visibility checks at this time).
-	 *
-	 * `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
-	 * incremented by the table AM to indicate whether or not the block's
-	 * representation in the bitmap is lossy.
-	 *
-	 * `recheck` is set by the table AM to indicate whether or not the tuples
-	 * from this block should be rechecked. Tuples from lossy pages will
-	 * always need to be rechecked, but some non-lossy pages' tuples may also
-	 * require recheck.
-	 *
-	 * `blockno` is the current block and is set by the table AM. The table AM
-	 * is responsible for advancing the main iterator, but the bitmap table
-	 * scan code still advances the prefetch iterator. `blockno` is used by
-	 * bitmap table scan code to validate that the prefetch block stays ahead
-	 * of the current block.
+	 * Fetch the next tuple of a bitmap table scan into `slot` and return true
+	 * if a visible tuple was found, false otherwise.
 	 *
-	 * XXX: Currently this may only be implemented if the AM uses md.c as its
-	 * storage manager, and uses ItemPointer->ip_blkid in a manner that maps
-	 * blockids directly to the underlying storage. nodeBitmapHeapscan.c
-	 * performs prefetching directly using that interface.  This probably
-	 * needs to be rectified at a later point.
+	 * `lossy_pages` is incremented if the bitmap is lossy for the selected
+	 * page; otherwise, `exact_pages` is incremented. This is tracked for
+	 * display in EXPLAIN ANALYZE output.
 	 *
-	 * XXX: Currently this may only be implemented if the AM uses the
-	 * visibilitymap, as nodeBitmapHeapscan.c unconditionally accesses it to
-	 * perform prefetching.  This probably needs to be rectified at a later
-	 * point.
+	 * Prefetching additional data from the bitmap is left to the table AM.
 	 *
-	 * Optional callback, but either both scan_bitmap_next_block and
-	 * scan_bitmap_next_tuple need to exist, or neither.
+	 * This is an optional callback.
 	 */
-	bool		(*scan_bitmap_next_block) (TableScanDesc scan,
-										   BlockNumber *blockno,
+	bool		(*scan_bitmap_next_tuple) (TableScanDesc scan,
+										   TupleTableSlot *slot,
 										   bool *recheck,
 										   uint64 *lossy_pages,
 										   uint64 *exact_pages);
 
-	/*
-	 * Fetch the next tuple of a bitmap table scan into `slot` and return true
-	 * if a visible tuple was found, false otherwise.
-	 *
-	 * Optional callback, but either both scan_bitmap_next_block and
-	 * scan_bitmap_next_tuple need to exist, or neither.
-	 */
-	bool		(*scan_bitmap_next_tuple) (TableScanDesc scan,
-										   TupleTableSlot *slot);
-
 	/*
 	 * Prepare to fetch tuples from the next block in a sample scan. Return
 	 * false if the sample scan is finished, true otherwise. `scan` was
@@ -956,15 +920,19 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
  */
 static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot,
+				   struct ParallelBitmapHeapState *pstate,
 				   int nkeys, struct ScanKeyData *key, bool need_tuple)
 {
+	TableScanDesc result;
 	uint32		flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
 	if (need_tuple)
 		flags |= SO_NEED_TUPLES;
 
-	return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
-									   NULL, flags);
+	result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys,
+										 key, NULL, flags);
+	result->st.bitmap.rs_pstate = pstate;
+	return result;
 }
 
 /*
@@ -1955,56 +1923,24 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
  */
 
 /*
- * Prepare to fetch / check / return tuples as part of a bitmap table scan.
- * `scan` needs to have been started via table_beginscan_bm(). Returns false
- * if there are no more blocks in the bitmap, true otherwise.
- *
- * `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
- * incremented by the table AM to indicate whether or not the block's
- * representation in the bitmap is lossy.
+ * Fetch / check / return tuples as part of a bitmap table scan. `scan` needs
+ * to have been started via table_beginscan_bm(). Fetch the next tuple of a
+ * bitmap table scan into `slot` and return true if a visible tuple was found,
+ * false otherwise.
  *
- * `recheck` is set by the table AM to indicate whether or not the tuples
- * from this block should be rechecked.
+ * `recheck` is set by the table AM to indicate whether or not the tuple in
+ * `slot` should be rechecked. Tuples from lossy pages will always need to be
+ * rechecked, but some non-lossy pages' tuples may also require recheck.
  *
- * `blockno` is the current block and is set by the table AM and is used by
- * bitmap table scan code to validate that the prefetch block stays ahead of
- * the current block.
- *
- * Note, this is an optionally implemented function, therefore should only be
- * used after verifying the presence (at plan time or such).
+ * `lossy_pages` is incremented if the block's representation in the bitmap is
+ * lossy; otherwise, `exact_pages` is incremented.
  */
 static inline bool
-table_scan_bitmap_next_block(TableScanDesc scan,
-							 BlockNumber *blockno,
+table_scan_bitmap_next_tuple(TableScanDesc scan,
+							 TupleTableSlot *slot,
 							 bool *recheck,
 							 uint64 *lossy_pages,
 							 uint64 *exact_pages)
-{
-	/*
-	 * We don't expect direct calls to table_scan_bitmap_next_block with valid
-	 * CheckXidAlive for catalog or regular tables.  See detailed comments in
-	 * xact.c where these variables are declared.
-	 */
-	if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan))
-		elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
-
-	return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
-														   blockno, recheck,
-														   lossy_pages,
-														   exact_pages);
-}
-
-/*
- * Fetch the next tuple of a bitmap table scan into `slot` and return true if
- * a visible tuple was found, false otherwise.
- * table_scan_bitmap_next_block() needs to previously have selected a
- * block (i.e. returned true), and no previous
- * table_scan_bitmap_next_tuple() for the same block may have
- * returned false.
- */
-static inline bool
-table_scan_bitmap_next_tuple(TableScanDesc scan,
-							 TupleTableSlot *slot)
 {
 	/*
 	 * We don't expect direct calls to table_scan_bitmap_next_tuple with valid
@@ -2015,7 +1951,10 @@ table_scan_bitmap_next_tuple(TableScanDesc scan,
 		elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
 
 	return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
-														   slot);
+														   slot,
+														   recheck,
+														   lossy_pages,
+														   exact_pages);
 }
 
 /*
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e2d1dc1e067..c08fa33ad83 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1822,11 +1822,7 @@ typedef enum
 /* ----------------
  *	 ParallelBitmapHeapState information
  *		tbmiterator				iterator for scanning current pages
- *		prefetch_iterator		iterator for prefetching ahead of current page
- *		mutex					mutual exclusion for the prefetching variable
- *								and state
- *		prefetch_pages			# pages prefetch iterator is ahead of current
- *		prefetch_target			current target prefetch distance
+ *		mutex					mutual exclusion for state
  *		state					current state of the TIDBitmap
  *		cv						conditional wait variable
  * ----------------
@@ -1834,10 +1830,7 @@ typedef enum
 typedef struct ParallelBitmapHeapState
 {
 	dsa_pointer tbmiterator;
-	dsa_pointer prefetch_iterator;
 	slock_t		mutex;
-	int			prefetch_pages;
-	int			prefetch_target;
 	SharedBitmapState state;
 	ConditionVariable cv;
 } ParallelBitmapHeapState;
@@ -1861,18 +1854,11 @@ typedef struct SharedBitmapHeapInstrumentation
  *
  *		bitmapqualorig	   execution state for bitmapqualorig expressions
  *		tbm				   bitmap obtained from child index scan(s)
- *		pvmbuffer		   buffer for visibility-map lookups of prefetched pages
  *		stats			   execution statistics
- *		prefetch_iterator  iterator for prefetching ahead of current page
- *		prefetch_pages	   # pages prefetch iterator is ahead of current
- *		prefetch_target    current target prefetch distance
- *		prefetch_maximum   maximum value for prefetch_target
  *		initialized		   is node is ready to iterate
  *		pstate			   shared state for parallel bitmap scan
  *		sinstrument		   statistics for parallel workers
  *		recheck			   do current page's tuples need recheck
- *		blockno			   used to validate pf and current block stay in sync
- *		prefetch_blockno   used to validate pf stays ahead of current block
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1880,18 +1866,11 @@ typedef struct BitmapHeapScanState
 	ScanState	ss;				/* its first field is NodeTag */
 	ExprState  *bitmapqualorig;
 	TIDBitmap  *tbm;
-	Buffer		pvmbuffer;
 	BitmapHeapScanInstrumentation stats;
-	TBMIterator prefetch_iterator;
-	int			prefetch_pages;
-	int			prefetch_target;
-	int			prefetch_maximum;
 	bool		initialized;
 	ParallelBitmapHeapState *pstate;
 	SharedBitmapHeapInstrumentation *sinstrument;
 	bool		recheck;
-	BlockNumber blockno;
-	BlockNumber prefetch_blockno;
 } BitmapHeapScanState;
 
 /* ----------------
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index a6ffeac90be..bc4adca3809 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -22,6 +22,7 @@
 #ifndef TIDBITMAP_H
 #define TIDBITMAP_H
 
+#include "access/htup_details.h"
 #include "storage/itemptr.h"
 #include "utils/dsa.h"
 
@@ -55,9 +56,16 @@ typedef struct TBMIterateResult
 {
 	BlockNumber blockno;		/* page number containing tuples */
 	int			ntuples;		/* -1 indicates lossy result */
-	bool		recheck;		/* should the tuples be rechecked? */
 	/* Note: recheck is always true if ntuples < 0 */
-	OffsetNumber offsets[FLEXIBLE_ARRAY_MEMBER];
+	bool		recheck;		/* should the tuples be rechecked? */
+
+	/*
+	 * The maximum number of tuples per page is not large (typically 256 with
+	 * 8K pages, or 1024 with 32K pages).  So there's not much point in making
+	 * the per-page bitmaps variable size.  We just legislate that the size is
+	 * this:
+	 */
+	OffsetNumber offsets[MaxHeapTuplesPerPage];
 } TBMIterateResult;
 
 /* function prototypes in nodes/tidbitmap.c */
@@ -78,8 +86,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm);
 
 extern TBMPrivateIterator *tbm_begin_private_iterate(TIDBitmap *tbm);
 extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm);
-extern TBMIterateResult *tbm_private_iterate(TBMPrivateIterator *iterator);
-extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator);
+extern void tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres);
+extern void tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres);
 extern void tbm_end_private_iterate(TBMPrivateIterator *iterator);
 extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
@@ -90,7 +98,7 @@ extern TBMIterator tbm_begin_iterate(TIDBitmap *tbm,
 									 dsa_area *dsa, dsa_pointer dsp);
 extern void tbm_end_iterate(TBMIterator *iterator);
 
-extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
+extern void tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres);
 
 static inline bool
 tbm_exhausted(TBMIterator *iterator)
-- 
2.34.1

Reply via email to