Hi,

On Wed, 3 Apr 2024 at 23:44, Melanie Plageman <melanieplage...@gmail.com> wrote:
>
>
> I've reviewed the patches inline below and attached a patch that has
> some of my ideas on top of your patch.

Thank you!

>
> > From 8d396a42186325f920d5a05e7092d8e1b66f3cdf Mon Sep 17 00:00:00 2001
> > From: Nazir Bilal Yavuz <byavu...@gmail.com>
> > Date: Wed, 3 Apr 2024 15:14:15 +0300
> > Subject: [PATCH v6] Use streaming read API in ANALYZE
> >
> > ANALYZE command gets random tuples using BlockSampler algorithm. Use
> > streaming reads to get these tuples by using BlockSampler algorithm in
> > streaming read API prefetch logic.
> > ---
> >  src/include/access/heapam.h              |  6 +-
> >  src/backend/access/heap/heapam_handler.c | 22 +++---
> >  src/backend/commands/analyze.c           | 85 ++++++++----------------
> >  3 files changed, 42 insertions(+), 71 deletions(-)
> >
> > diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
> > index a307fb5f245..633caee9d95 100644
> > --- a/src/include/access/heapam.h
> > +++ b/src/include/access/heapam.h
> > @@ -25,6 +25,7 @@
> >  #include "storage/bufpage.h"
> >  #include "storage/dsm.h"
> >  #include "storage/lockdefs.h"
> > +#include "storage/read_stream.h"
> >  #include "storage/shm_toc.h"
> >  #include "utils/relcache.h"
> >  #include "utils/snapshot.h"
> > @@ -388,9 +389,8 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
> >                                                                 struct 
> > GlobalVisState *vistest);
> >
> >  /* in heap/heapam_handler.c*/
> > -extern void heapam_scan_analyze_next_block(TableScanDesc scan,
> > -                                                                           
> >      BlockNumber blockno,
> > -                                                                           
> >      BufferAccessStrategy bstrategy);
> > +extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
> > +                                                                           
> >      ReadStream *stream);
> >  extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
> >                                                                             
> >      TransactionId OldestXmin,
> >                                                                             
> >      double *liverows, double *deadrows,
> > diff --git a/src/backend/access/heap/heapam_handler.c 
> > b/src/backend/access/heap/heapam_handler.c
> > index 0952d4a98eb..d83fbbe6af3 100644
> > --- a/src/backend/access/heap/heapam_handler.c
> > +++ b/src/backend/access/heap/heapam_handler.c
> > @@ -1054,16 +1054,16 @@ heapam_relation_copy_for_cluster(Relation OldHeap, 
> > Relation NewHeap,
> >  }
> >
> >  /*
> > - * Prepare to analyze block `blockno` of `scan`.  The scan has been started
> > - * with SO_TYPE_ANALYZE option.
> > + * Prepare to analyze block returned from streaming object.  If the block 
> > returned
> > + * from streaming object is valid, true is returned; otherwise false is 
> > returned.
> > + * The scan has been started with SO_TYPE_ANALYZE option.
> >   *
> >   * This routine holds a buffer pin and lock on the heap page.  They are 
> > held
> >   * until heapam_scan_analyze_next_tuple() returns false.  That is until 
> > all the
> >   * items of the heap page are analyzed.
> >   */
> > -void
> > -heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
> > -                                                        
> > BufferAccessStrategy bstrategy)
> > +bool
> > +heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
> >  {
> >       HeapScanDesc hscan = (HeapScanDesc) scan;
> >
> > @@ -1076,11 +1076,15 @@ heapam_scan_analyze_next_block(TableScanDesc scan, 
> > BlockNumber blockno,
> >        * doing much work per tuple, the extra lock traffic is probably 
> > better
> >        * avoided.
>
> Personally I think heapam_scan_analyze_next_block() should be inlined.
> It only has a few lines. I would find it clearer inline. At the least,
> there is no reason for it (or heapam_scan_analyze_next_tuple()) to take
> a TableScanDesc instead of a HeapScanDesc.

I agree.

>
> >        */
> > -     hscan->rs_cblock = blockno;
> > -     hscan->rs_cindex = FirstOffsetNumber;
> > -     hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
> > -                                                                           
> >   blockno, RBM_NORMAL, bstrategy);
> > +     hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
> > +     if (hscan->rs_cbuf == InvalidBuffer)
> > +             return false;
> > +
> >       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
> > +
> > +     hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
> > +     hscan->rs_cindex = FirstOffsetNumber;
> > +     return true;
> >  }
>
> >  /*
> > diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
> > index 2fb39f3ede1..764520d5aa2 100644
> > --- a/src/backend/commands/analyze.c
> > +++ b/src/backend/commands/analyze.c
> > @@ -1102,6 +1102,20 @@ examine_attribute(Relation onerel, int attnum, Node 
> > *index_expr)
> >       return stats;
> >  }
> >
> > +/*
> > + * Prefetch callback function to get next block number while using
> > + * BlockSampling algorithm
> > + */
> > +static BlockNumber
> > +block_sampling_streaming_read_next(ReadStream *stream,
> > +                                                                void 
> > *user_data,
> > +                                                                void 
> > *per_buffer_data)
> > +{
> > +     BlockSamplerData *bs = user_data;
> > +
> > +     return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : 
> > InvalidBlockNumber;
>
> I don't see the point of BlockSampler_HasMore() anymore. I removed it in
> the attached and made BlockSampler_Next() return InvalidBlockNumber
> under the same conditions. Is there a reason not to do this? There
> aren't other callers. If the BlockSampler_Next() wasn't part of an API,
> we could just make it the streaming read callback, but that might be
> weird as it is now.

I agree. There is no reason to have BlockSampler_HasMore() after
streaming read API changes.

> That and my other ideas in attached. Let me know what you think.

I agree with your changes but I am not sure if others agree with all
the changes you have proposed. So, I didn't merge 0001 and your ideas
yet, instead I wrote a commit message, added some comments, changed ->
'if (bs->t >= bs->N || bs->m >= bs->n)' to 'if (K <= 0 || k <= 0)' and
attached it as 0002.

--
Regards,
Nazir Bilal Yavuz
Microsoft
From 9715cc628846697b039d3145fe83082d8756851c Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Wed, 3 Apr 2024 15:14:15 +0300
Subject: [PATCH v8 1/2] Use streaming read API in ANALYZE

ANALYZE command gets random tuples using BlockSampler algorithm. Use
streaming reads to get these tuples by using BlockSampler algorithm in
streaming read API prefetch logic.
---
 src/include/access/heapam.h              |  6 +-
 src/backend/access/heap/heapam_handler.c | 22 +++---
 src/backend/commands/analyze.c           | 85 ++++++++----------------
 3 files changed, 42 insertions(+), 71 deletions(-)

diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 2765efc4e5e..fbadb9eeea1 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -25,6 +25,7 @@
 #include "storage/bufpage.h"
 #include "storage/dsm.h"
 #include "storage/lockdefs.h"
+#include "storage/read_stream.h"
 #include "storage/shm_toc.h"
 #include "utils/relcache.h"
 #include "utils/snapshot.h"
@@ -388,9 +389,8 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
 								  struct GlobalVisState *vistest);
 
 /* in heap/heapam_handler.c*/
-extern void heapam_scan_analyze_next_block(TableScanDesc scan,
-										   BlockNumber blockno,
-										   BufferAccessStrategy bstrategy);
+extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
+										   ReadStream *stream);
 extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
 										   TransactionId OldestXmin,
 										   double *liverows, double *deadrows,
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 3e7a6b5548b..a32c69cf034 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1054,16 +1054,16 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 }
 
 /*
- * Prepare to analyze block `blockno` of `scan`.  The scan has been started
- * with SO_TYPE_ANALYZE option.
+ * Prepare to analyze block returned from streaming object.  If the block returned
+ * from streaming object is valid, true is returned; otherwise false is returned.
+ * The scan has been started with SO_TYPE_ANALYZE option.
  *
  * This routine holds a buffer pin and lock on the heap page.  They are held
  * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
  * items of the heap page are analyzed.
  */
-void
-heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
-							   BufferAccessStrategy bstrategy)
+bool
+heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
 {
 	HeapScanDesc hscan = (HeapScanDesc) scan;
 
@@ -1076,11 +1076,15 @@ heapam_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno,
 	 * doing much work per tuple, the extra lock traffic is probably better
 	 * avoided.
 	 */
-	hscan->rs_cblock = blockno;
-	hscan->rs_cindex = FirstOffsetNumber;
-	hscan->rs_cbuf = ReadBufferExtended(scan->rs_rd, MAIN_FORKNUM,
-										blockno, RBM_NORMAL, bstrategy);
+	hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
+	if (hscan->rs_cbuf == InvalidBuffer)
+		return false;
+
 	LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+	hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
+	hscan->rs_cindex = FirstOffsetNumber;
+	return true;
 }
 
 /*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 2fb39f3ede1..764520d5aa2 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1102,6 +1102,20 @@ examine_attribute(Relation onerel, int attnum, Node *index_expr)
 	return stats;
 }
 
+/*
+ * Prefetch callback function to get next block number while using
+ * BlockSampling algorithm
+ */
+static BlockNumber
+block_sampling_streaming_read_next(ReadStream *stream,
+								   void *user_data,
+								   void *per_buffer_data)
+{
+	BlockSamplerData *bs = user_data;
+
+	return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
+}
+
 /*
  * acquire_sample_rows -- acquire a random sample of rows from the heap
  *
@@ -1154,10 +1168,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	TableScanDesc scan;
 	BlockNumber nblocks;
 	BlockNumber blksdone = 0;
-#ifdef USE_PREFETCH
-	int			prefetch_maximum = 0;	/* blocks to prefetch if enabled */
-	BlockSamplerData prefetch_bs;
-#endif
+	ReadStream *stream;
 
 	Assert(targrows > 0);
 
@@ -1170,13 +1181,6 @@ acquire_sample_rows(Relation onerel, int elevel,
 	randseed = pg_prng_uint32(&pg_global_prng_state);
 	nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
 
-#ifdef USE_PREFETCH
-	prefetch_maximum = get_tablespace_maintenance_io_concurrency(onerel->rd_rel->reltablespace);
-	/* Create another BlockSampler, using the same seed, for prefetching */
-	if (prefetch_maximum)
-		(void) BlockSampler_Init(&prefetch_bs, totalblocks, targrows, randseed);
-#endif
-
 	/* Report sampling block numbers */
 	pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
 								 nblocks);
@@ -1187,59 +1191,21 @@ acquire_sample_rows(Relation onerel, int elevel,
 	scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
 	slot = table_slot_create(onerel, NULL);
 
-#ifdef USE_PREFETCH
-
-	/*
-	 * If we are doing prefetching, then go ahead and tell the kernel about
-	 * the first set of pages we are going to want.  This also moves our
-	 * iterator out ahead of the main one being used, where we will keep it so
-	 * that we're always pre-fetching out prefetch_maximum number of blocks
-	 * ahead.
-	 */
-	if (prefetch_maximum)
-	{
-		for (int i = 0; i < prefetch_maximum; i++)
-		{
-			BlockNumber prefetch_block;
-
-			if (!BlockSampler_HasMore(&prefetch_bs))
-				break;
-
-			prefetch_block = BlockSampler_Next(&prefetch_bs);
-			PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_block);
-		}
-	}
-#endif
+	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+										vac_strategy,
+										scan->rs_rd,
+										MAIN_FORKNUM,
+										block_sampling_streaming_read_next,
+										&bs,
+										0);
 
 	/* Outer loop over blocks to sample */
-	while (BlockSampler_HasMore(&bs))
+	while (true)
 	{
-		BlockNumber targblock = BlockSampler_Next(&bs);
-#ifdef USE_PREFETCH
-		BlockNumber prefetch_targblock = InvalidBlockNumber;
-
-		/*
-		 * Make sure that every time the main BlockSampler is moved forward
-		 * that our prefetch BlockSampler also gets moved forward, so that we
-		 * always stay out ahead.
-		 */
-		if (prefetch_maximum && BlockSampler_HasMore(&prefetch_bs))
-			prefetch_targblock = BlockSampler_Next(&prefetch_bs);
-#endif
-
 		vacuum_delay_point();
 
-		heapam_scan_analyze_next_block(scan, targblock, vac_strategy);
-
-#ifdef USE_PREFETCH
-
-		/*
-		 * When pre-fetching, after we get a block, tell the kernel about the
-		 * next one we will want, if there's any left.
-		 */
-		if (prefetch_maximum && prefetch_targblock != InvalidBlockNumber)
-			PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, prefetch_targblock);
-#endif
+		if (!heapam_scan_analyze_next_block(scan, stream))
+			break;
 
 		while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
 		{
@@ -1289,6 +1255,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 		pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
 									 ++blksdone);
 	}
+	read_stream_end(stream);
 
 	ExecDropSingleTupleTableSlot(slot);
 	heap_endscan(scan);
-- 
2.43.0

From c5c006322f4fb49b4da152ac8222babcb1e24651 Mon Sep 17 00:00:00 2001
From: Nazir Bilal Yavuz <byavu...@gmail.com>
Date: Thu, 4 Apr 2024 13:30:46 +0300
Subject: [PATCH v8 2/2] Refactorings on top of using streaming read API in
 ANALYZE

This patch includes a couple of refactorings on top of using streaming
read API in ANALYZE. So, this patch should be committed after
'Use streaming read API in ANALYZE' patch is committed.

Refactorings:

- heapam_scan_analyze_next_block() is inlined, its content is moved
  under a main loop in acquire_sample_rows(). Comments are updated
  regarding this change.

- heapam_scan_analyze_next_tuple() takes HeapScanDesc instead of
  TableScanDesc now.

- BlockSampler_HasMore() is removed, BlockSampler_Next() returns an
  InvalidBlockNumber if there are no remaining blocks or no blocks to
  sample.

Author: Melanie Plageman <melanieplage...@gmail.com>
---
 src/include/access/heapam.h              |  4 +-
 src/include/utils/sampling.h             |  1 -
 src/backend/access/heap/heapam_handler.c | 66 ++++++------------------
 src/backend/commands/analyze.c           | 33 +++++++++---
 src/backend/utils/misc/sampling.c        | 10 ++--
 5 files changed, 44 insertions(+), 70 deletions(-)

diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index fbadb9eeea1..8b5fcf4a4a9 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -389,9 +389,7 @@ extern bool HeapTupleIsSurelyDead(HeapTuple htup,
 								  struct GlobalVisState *vistest);
 
 /* in heap/heapam_handler.c*/
-extern bool heapam_scan_analyze_next_block(TableScanDesc scan,
-										   ReadStream *stream);
-extern bool heapam_scan_analyze_next_tuple(TableScanDesc scan,
+extern bool heapam_scan_analyze_next_tuple(HeapScanDesc scan,
 										   TransactionId OldestXmin,
 										   double *liverows, double *deadrows,
 										   TupleTableSlot *slot);
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
index be48ee52bac..fb5d6820a24 100644
--- a/src/include/utils/sampling.h
+++ b/src/include/utils/sampling.h
@@ -38,7 +38,6 @@ typedef BlockSamplerData *BlockSampler;
 
 extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
 									 int samplesize, uint32 randseed);
-extern bool BlockSampler_HasMore(BlockSampler bs);
 extern BlockNumber BlockSampler_Next(BlockSampler bs);
 
 /* Reservoir sampling methods */
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index a32c69cf034..34d99e30c21 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1054,55 +1054,19 @@ heapam_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap,
 }
 
 /*
- * Prepare to analyze block returned from streaming object.  If the block returned
- * from streaming object is valid, true is returned; otherwise false is returned.
- * The scan has been started with SO_TYPE_ANALYZE option.
- *
- * This routine holds a buffer pin and lock on the heap page.  They are held
- * until heapam_scan_analyze_next_tuple() returns false.  That is until all the
- * items of the heap page are analyzed.
- */
-bool
-heapam_scan_analyze_next_block(TableScanDesc scan, ReadStream *stream)
-{
-	HeapScanDesc hscan = (HeapScanDesc) scan;
-
-	/*
-	 * We must maintain a pin on the target page's buffer to ensure that
-	 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out from
-	 * under us.  Hence, pin the page until we are done looking at it.  We
-	 * also choose to hold sharelock on the buffer throughout --- we could
-	 * release and re-acquire sharelock for each tuple, but since we aren't
-	 * doing much work per tuple, the extra lock traffic is probably better
-	 * avoided.
-	 */
-	hscan->rs_cbuf = read_stream_next_buffer(stream, NULL);
-	if (hscan->rs_cbuf == InvalidBuffer)
-		return false;
-
-	LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-	hscan->rs_cblock = BufferGetBlockNumber(hscan->rs_cbuf);
-	hscan->rs_cindex = FirstOffsetNumber;
-	return true;
-}
-
-/*
- * Iterate over tuples in the block selected with
- * heapam_scan_analyze_next_block().  If a tuple that's suitable for sampling
- * is found, true is returned and a tuple is stored in `slot`.  When no more
- * tuples for sampling, false is returned and the pin and lock acquired by
- * heapam_scan_analyze_next_block() are released.
+ * Iterate over tuples in the block selected to analyze.  If a tuple that's
+ * suitable for sampling is found, true is returned and a tuple is stored in
+ * `slot`.  When no more tuples for sampling, false is returned and the pin
+ * and lock on the current buffer in scan are released.
  *
  * *liverows and *deadrows are incremented according to the encountered
  * tuples.
  */
 bool
-heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
+heapam_scan_analyze_next_tuple(HeapScanDesc scan, TransactionId OldestXmin,
 							   double *liverows, double *deadrows,
 							   TupleTableSlot *slot)
 {
-	HeapScanDesc hscan = (HeapScanDesc) scan;
 	Page		targpage;
 	OffsetNumber maxoffset;
 	BufferHeapTupleTableSlot *hslot;
@@ -1110,17 +1074,17 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 	Assert(TTS_IS_BUFFERTUPLE(slot));
 
 	hslot = (BufferHeapTupleTableSlot *) slot;
-	targpage = BufferGetPage(hscan->rs_cbuf);
+	targpage = BufferGetPage(scan->rs_cbuf);
 	maxoffset = PageGetMaxOffsetNumber(targpage);
 
 	/* Inner loop over all tuples on the selected page */
-	for (; hscan->rs_cindex <= maxoffset; hscan->rs_cindex++)
+	for (; scan->rs_cindex <= maxoffset; scan->rs_cindex++)
 	{
 		ItemId		itemid;
 		HeapTuple	targtuple = &hslot->base.tupdata;
 		bool		sample_it = false;
 
-		itemid = PageGetItemId(targpage, hscan->rs_cindex);
+		itemid = PageGetItemId(targpage, scan->rs_cindex);
 
 		/*
 		 * We ignore unused and redirect line pointers.  DEAD line pointers
@@ -1135,14 +1099,14 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 			continue;
 		}
 
-		ItemPointerSet(&targtuple->t_self, hscan->rs_cblock, hscan->rs_cindex);
+		ItemPointerSet(&targtuple->t_self, scan->rs_cblock, scan->rs_cindex);
 
-		targtuple->t_tableOid = RelationGetRelid(scan->rs_rd);
+		targtuple->t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
 		targtuple->t_data = (HeapTupleHeader) PageGetItem(targpage, itemid);
 		targtuple->t_len = ItemIdGetLength(itemid);
 
 		switch (HeapTupleSatisfiesVacuum(targtuple, OldestXmin,
-										 hscan->rs_cbuf))
+										 scan->rs_cbuf))
 		{
 			case HEAPTUPLE_LIVE:
 				sample_it = true;
@@ -1222,8 +1186,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 
 		if (sample_it)
 		{
-			ExecStoreBufferHeapTuple(targtuple, slot, hscan->rs_cbuf);
-			hscan->rs_cindex++;
+			ExecStoreBufferHeapTuple(targtuple, slot, scan->rs_cbuf);
+			scan->rs_cindex++;
 
 			/* note that we leave the buffer locked here! */
 			return true;
@@ -1231,8 +1195,8 @@ heapam_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin,
 	}
 
 	/* Now release the lock and pin on the page */
-	UnlockReleaseBuffer(hscan->rs_cbuf);
-	hscan->rs_cbuf = InvalidBuffer;
+	UnlockReleaseBuffer(scan->rs_cbuf);
+	scan->rs_cbuf = InvalidBuffer;
 
 	/* also prevent old slot contents from having pin on page */
 	ExecClearTuple(slot);
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 764520d5aa2..ae4f13be572 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1111,9 +1111,7 @@ block_sampling_streaming_read_next(ReadStream *stream,
 								   void *user_data,
 								   void *per_buffer_data)
 {
-	BlockSamplerData *bs = user_data;
-
-	return BlockSampler_HasMore(bs) ? BlockSampler_Next(bs) : InvalidBlockNumber;
+	return BlockSampler_Next(user_data);
 }
 
 /*
@@ -1165,7 +1163,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	BlockSamplerData bs;
 	ReservoirStateData rstate;
 	TupleTableSlot *slot;
-	TableScanDesc scan;
+	HeapScanDesc scan;
 	BlockNumber nblocks;
 	BlockNumber blksdone = 0;
 	ReadStream *stream;
@@ -1188,12 +1186,12 @@ acquire_sample_rows(Relation onerel, int elevel,
 	/* Prepare for sampling rows */
 	reservoir_init_selection_state(&rstate, targrows);
 
-	scan = heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
+	scan = (HeapScanDesc) heap_beginscan(onerel, NULL, 0, NULL, NULL, SO_TYPE_ANALYZE);
 	slot = table_slot_create(onerel, NULL);
 
 	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
 										vac_strategy,
-										scan->rs_rd,
+										onerel,
 										MAIN_FORKNUM,
 										block_sampling_streaming_read_next,
 										&bs,
@@ -1204,9 +1202,28 @@ acquire_sample_rows(Relation onerel, int elevel,
 	{
 		vacuum_delay_point();
 
-		if (!heapam_scan_analyze_next_block(scan, stream))
+		/*
+		 * We must maintain a pin on the target page's buffer to ensure that
+		 * concurrent activity - e.g. HOT pruning - doesn't delete tuples out
+		 * from under us.  Hence, pin the page until we are done looking at
+		 * it.
+		 */
+		scan->rs_cbuf = read_stream_next_buffer(stream, NULL);
+
+		if (!BufferIsValid(scan->rs_cbuf))
 			break;
 
+		/*
+		 * We choose to hold sharelock on the buffer throughout --- we could
+		 * release and re-acquire sharelock for each tuple, but since we
+		 * aren't doing much work per tuple, the extra lock traffic is
+		 * probably better avoided.
+		 */
+		LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+		scan->rs_cblock = BufferGetBlockNumber(scan->rs_cbuf);
+		scan->rs_cindex = FirstOffsetNumber;
+
 		while (heapam_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
 		{
 			/*
@@ -1258,7 +1275,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	read_stream_end(stream);
 
 	ExecDropSingleTupleTableSlot(slot);
-	heap_endscan(scan);
+	heap_endscan(&scan->rs_base);
 
 	/*
 	 * If we didn't find as many tuples as we wanted then we're done. No sort
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index 933db06702c..cae89e331fb 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -54,12 +54,6 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
 	return Min(bs->n, bs->N);
 }
 
-bool
-BlockSampler_HasMore(BlockSampler bs)
-{
-	return (bs->t < bs->N) && (bs->m < bs->n);
-}
-
 BlockNumber
 BlockSampler_Next(BlockSampler bs)
 {
@@ -68,7 +62,9 @@ BlockSampler_Next(BlockSampler bs)
 	double		p;				/* probability to skip block */
 	double		V;				/* random */
 
-	Assert(BlockSampler_HasMore(bs));	/* hence K > 0 and k > 0 */
+	/* no remaining blocks or no blocks to sample */
+	if (K <= 0 || k <= 0)
+		return InvalidBlockNumber;
 
 	if ((BlockNumber) k >= K)
 	{
-- 
2.43.0

Reply via email to