On Tue, Mar 7, 2023 at 2:47 PM Andres Freund <and...@anarazel.de> wrote:
>
> Hi,
>
> LGTM. The only comment I have is that a small test wouldn't hurt... Compared
> to the other things it should be fairly easy...

So, I have attached an updated patchset which adds a test for hits. Since
there is only one call site where we count hits, I think this single
test is sufficient to protect against regressions.

However, I am concerned that, while unlikely, this could be flakey.
Something could happen to force all of those blocks out of shared
buffers (even though they were just read in) before we hit them.

We could simply check if hits are greater at the end of all of the
pg_stat_io tests than at the beginning and rely on the fact that it is
highly unlikely that every single buffer access will be a miss for all
of the tests. However, is it not technically also possible to have zero
hits?

- Melanie
From cab8e0435b2a8390887f99091b2e9d336dd353c0 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Sat, 25 Feb 2023 14:36:06 -0500
Subject: [PATCH v3 2/2] Track shared buffer hits in pg_stat_io

Count new IOOP_HITs and add "hits" column to pg_stat_io.

Reviewed-by: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CAAKRu_beMa9Hzih40%3DXPYqhDVz6tsgUGTrhZXRo%3Dunp%2Bszb%3DUA%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml           | 11 ++++++++
 src/backend/catalog/system_views.sql   |  1 +
 src/backend/storage/buffer/bufmgr.c    | 38 ++++++++++----------------
 src/backend/storage/buffer/localbuf.c  | 11 ++------
 src/backend/utils/activity/pgstat_io.c |  2 +-
 src/backend/utils/adt/pgstatfuncs.c    |  3 ++
 src/include/catalog/pg_proc.dat        |  6 ++--
 src/include/pgstat.h                   |  1 +
 src/include/storage/buf_internals.h    |  2 +-
 src/test/regress/expected/rules.out    |  3 +-
 src/test/regress/expected/stats.out    | 27 ++++++++++++++++--
 src/test/regress/sql/stats.sql         | 14 ++++++++--
 12 files changed, 76 insertions(+), 43 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 6249bb50d0..8b34ca60bc 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3855,6 +3855,17 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
       </entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry">
+       <para role="column_definition">
+        <structfield>hits</structfield> <type>bigint</type>
+       </para>
+       <para>
+        The number of times a desired block was found in a shared buffer.
+       </para>
+      </entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry">
        <para role="column_definition">
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 34ca0e739f..87bbbdfcb3 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1126,6 +1126,7 @@ SELECT
        b.writes,
        b.extends,
        b.op_bytes,
+       b.hits,
        b.evictions,
        b.reuses,
        b.fsyncs,
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 0a05577b68..05fd3c9a2a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -472,7 +472,7 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   ForkNumber forkNum,
 							   BlockNumber blockNum,
 							   BufferAccessStrategy strategy,
-							   bool *foundPtr, IOContext *io_context);
+							   bool *foundPtr, IOContext io_context);
 static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
 						IOObject io_object, IOContext io_context);
 static void FindAndDropRelationBuffers(RelFileLocator rlocator,
@@ -850,13 +850,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	if (isLocalBuf)
 	{
 		/*
-		 * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
-		 * do not use a BufferAccessStrategy for I/O of temporary tables.
+		 * We do not use a BufferAccessStrategy for I/O of temporary tables.
 		 * However, in some cases, the "strategy" may not be NULL, so we can't
 		 * rely on IOContextForStrategy() to set the right IOContext for us.
 		 * This may happen in cases like CREATE TEMPORARY TABLE AS...
 		 */
-		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
+		io_context = IOCONTEXT_NORMAL;
+		io_object = IOOBJECT_TEMP_RELATION;
+		bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
 		if (found)
 			pgBufferUsage.local_blks_hit++;
 		else if (isExtend)
@@ -871,8 +872,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is
 		 * not currently in memory.
 		 */
+		io_context = IOContextForStrategy(strategy);
+		io_object = IOOBJECT_RELATION;
 		bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
-							 strategy, &found, &io_context);
+							 strategy, &found, io_context);
 		if (found)
 			pgBufferUsage.shared_blks_hit++;
 		else if (isExtend)
@@ -892,6 +895,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			/* Just need to update stats before we exit */
 			*hit = true;
 			VacuumPageHit++;
+			pgstat_count_io_op(io_object, io_context, IOOP_HIT);
 
 			if (VacuumCostActive)
 				VacuumCostBalance += VacuumCostPageHit;
@@ -987,16 +991,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID));	/* spinlock not needed */
 
-	if (isLocalBuf)
-	{
-		bufBlock = LocalBufHdrGetBlock(bufHdr);
-		io_object = IOOBJECT_TEMP_RELATION;
-	}
-	else
-	{
-		bufBlock = BufHdrGetBlock(bufHdr);
-		io_object = IOOBJECT_RELATION;
-	}
+	bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
 
 	if (isExtend)
 	{
@@ -1139,7 +1134,7 @@ static BufferDesc *
 BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			BlockNumber blockNum,
 			BufferAccessStrategy strategy,
-			bool *foundPtr, IOContext *io_context)
+			bool *foundPtr, IOContext io_context)
 {
 	bool		from_ring;
 	BufferTag	newTag;			/* identity of requested block */
@@ -1193,11 +1188,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			{
 				/*
 				 * If we get here, previous attempts to read the buffer must
-				 * have failed ... but we shall bravely try again. Set
-				 * io_context since we will in fact need to count an IO
-				 * Operation.
+				 * have failed ... but we shall bravely try again.
 				 */
-				*io_context = IOContextForStrategy(strategy);
 				*foundPtr = false;
 			}
 		}
@@ -1211,8 +1203,6 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	 */
 	LWLockRelease(newPartitionLock);
 
-	*io_context = IOContextForStrategy(strategy);
-
 	/* Loop here in case we have to try another victim buffer */
 	for (;;)
 	{
@@ -1295,7 +1285,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 														  smgr->smgr_rlocator.locator.dbOid,
 														  smgr->smgr_rlocator.locator.relNumber);
 
-				FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
+				FlushBuffer(buf, NULL, IOOBJECT_RELATION, io_context);
 				LWLockRelease(BufferDescriptorGetContentLock(buf));
 
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1494,7 +1484,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * we may have been forced to release the buffer due to concurrent
 		 * pinners or erroring out.
 		 */
-		pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
+		pgstat_count_io_op(IOOBJECT_RELATION, io_context,
 						   from_ring ? IOOP_REUSE : IOOP_EVICT);
 	}
 
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 5325ddb663..880c9909b9 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -108,7 +108,7 @@ PrefetchLocalBuffer(SMgrRelation smgr, ForkNumber forkNum,
  */
 BufferDesc *
 LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
-				 bool *foundPtr, IOContext *io_context)
+				 bool *foundPtr)
 {
 	BufferTag	newTag;			/* identity of requested block */
 	LocalBufferLookupEnt *hresult;
@@ -128,14 +128,6 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	hresult = (LocalBufferLookupEnt *)
 		hash_search(LocalBufHash, &newTag, HASH_FIND, NULL);
 
-	/*
-	 * IO Operations on local buffers are only done in IOCONTEXT_NORMAL. Set
-	 * io_context here (instead of after a buffer hit would have returned) for
-	 * convenience since we don't have to worry about the overhead of calling
-	 * IOContextForStrategy().
-	 */
-	*io_context = IOCONTEXT_NORMAL;
-
 	if (hresult)
 	{
 		b = hresult->id;
@@ -239,6 +231,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		buf_state &= ~BM_DIRTY;
 		pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
 
+		/* Temporary table I/O does not use Buffer Access Strategies */
 		pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
 		pgBufferUsage.local_blks_written++;
 	}
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c
index af5d554610..ae8bb34f78 100644
--- a/src/backend/utils/activity/pgstat_io.c
+++ b/src/backend/utils/activity/pgstat_io.c
@@ -344,7 +344,7 @@ pgstat_tracks_io_op(BackendType bktype, IOObject io_object,
 	 * Some BackendTypes will not do certain IOOps.
 	 */
 	if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) &&
-		(io_op == IOOP_READ || io_op == IOOP_EVICT))
+		(io_op == IOOP_READ || io_op == IOOP_EVICT || io_op == IOOP_HIT))
 		return false;
 
 	if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER ||
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 1112bc2904..e2e79eca99 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1258,6 +1258,7 @@ typedef enum io_stat_col
 	IO_COL_WRITES,
 	IO_COL_EXTENDS,
 	IO_COL_CONVERSION,
+	IO_COL_HITS,
 	IO_COL_EVICTIONS,
 	IO_COL_REUSES,
 	IO_COL_FSYNCS,
@@ -1280,6 +1281,8 @@ pgstat_get_io_op_index(IOOp io_op)
 			return IO_COL_EXTENDS;
 		case IOOP_FSYNC:
 			return IO_COL_FSYNCS;
+		case IOOP_HIT:
+			return IO_COL_HITS;
 		case IOOP_READ:
 			return IO_COL_READS;
 		case IOOP_REUSE:
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 505595620e..615139c2f9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5721,9 +5721,9 @@
   proname => 'pg_stat_get_io', provolatile => 'v',
   prorows => '30', proretset => 't',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
-  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
-  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,evictions,reuses,fsyncs,stats_reset}',
+  proallargtypes => '{text,text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_object,io_context,reads,writes,extends,op_bytes,hits,evictions,reuses,fsyncs,stats_reset}',
   prosrc => 'pg_stat_get_io' },
 
 { oid => '1136', descr => 'statistics: information about WAL activity',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f43fac09ed..a7f77c17b3 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -304,6 +304,7 @@ typedef enum IOOp
 	IOOP_EVICT,
 	IOOP_EXTEND,
 	IOOP_FSYNC,
+	IOOP_HIT,
 	IOOP_READ,
 	IOOP_REUSE,
 	IOOP_WRITE,
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 0b44814740..2afb9bb309 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -419,7 +419,7 @@ extern PrefetchBufferResult PrefetchLocalBuffer(SMgrRelation smgr,
 												ForkNumber forkNum,
 												BlockNumber blockNum);
 extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
-									BlockNumber blockNum, bool *foundPtr, IOContext *io_context);
+									BlockNumber blockNum, bool *foundPtr);
 extern void MarkLocalBufferDirty(Buffer buffer);
 extern void DropRelationLocalBuffers(RelFileLocator rlocator,
 									 ForkNumber forkNum,
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index e953d1f515..c994da9f97 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1883,11 +1883,12 @@ pg_stat_io| SELECT backend_type,
     writes,
     extends,
     op_bytes,
+    hits,
     evictions,
     reuses,
     fsyncs,
     stats_reset
-   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, evictions, reuses, fsyncs, stats_reset);
+   FROM pg_stat_get_io() b(backend_type, io_object, io_context, reads, writes, extends, op_bytes, hits, evictions, reuses, fsyncs, stats_reset);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
     d.datname,
diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 186c296299..9dc66ef027 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -1131,6 +1131,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - writes of shared buffers to permanent storage
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
+-- - shared buffer hits
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
 -- Create a regular table and insert some data to generate IOCONTEXT_NORMAL
@@ -1208,6 +1209,28 @@ SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
  t
 (1 row)
 
+SELECT sum(hits) AS io_sum_shared_before_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT COUNT(*) FROM test_io_shared;
+ count 
+-------
+   100
+(1 row)
+
+SELECT pg_stat_force_next_flush();
+ pg_stat_force_next_flush 
+--------------------------
+ 
+(1 row)
+
+SELECT sum(hits) AS io_sum_shared_after_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
+ ?column? 
+----------
+ t
+(1 row)
+
 DROP TABLE test_io_shared;
 -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
 -- - eviction of local buffers in order to reuse them
@@ -1342,7 +1365,7 @@ SELECT pg_stat_have_stats('io', 0, 0);
  t
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
  pg_stat_reset_shared 
@@ -1350,7 +1373,7 @@ SELECT pg_stat_reset_shared('io');
  
 (1 row)
 
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
  ?column? 
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index d7f873cfc9..b7f0aafe4f 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -541,6 +541,7 @@ SELECT pg_stat_get_subscription_stats(NULL);
 -- - writes of shared buffers to permanent storage
 -- - extends of relations using shared buffers
 -- - fsyncs done to ensure the durability of data dirtying shared buffers
+-- - shared buffer hits
 
 -- There is no test for blocks evicted from shared buffers, because we cannot
 -- be sure of the state of shared buffers at the point the test is run.
@@ -588,6 +589,15 @@ SELECT pg_stat_force_next_flush();
 SELECT sum(reads) AS io_sum_shared_after_reads
   FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation'  \gset
 SELECT :io_sum_shared_after_reads > :io_sum_shared_before_reads;
+
+SELECT sum(hits) AS io_sum_shared_before_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT COUNT(*) FROM test_io_shared;
+SELECT pg_stat_force_next_flush();
+SELECT sum(hits) AS io_sum_shared_after_hits
+  FROM pg_stat_io WHERE io_context = 'normal' AND io_object = 'relation' \gset
+SELECT :io_sum_shared_after_hits > :io_sum_shared_before_hits;
+
 DROP TABLE test_io_shared;
 
 -- Test that the follow IOCONTEXT_LOCAL IOOps are tracked in pg_stat_io:
@@ -675,10 +685,10 @@ SELECT :io_sum_bulkwrite_strategy_extends_after > :io_sum_bulkwrite_strategy_ext
 
 -- Test IO stats reset
 SELECT pg_stat_have_stats('io', 0, 0);
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_pre_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_pre_reset
   FROM pg_stat_io \gset
 SELECT pg_stat_reset_shared('io');
-SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) AS io_stats_post_reset
+SELECT sum(evictions) + sum(reuses) + sum(extends) + sum(fsyncs) + sum(reads) + sum(writes) + sum(hits) AS io_stats_post_reset
   FROM pg_stat_io \gset
 SELECT :io_stats_post_reset < :io_stats_pre_reset;
 
-- 
2.37.2

From 23bb19c6cbdbf4dcfba53a01395d55b151d5a1f3 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Sat, 25 Feb 2023 14:37:25 -0500
Subject: [PATCH v3 1/2] Reorder pgstatfuncs-local enum

---
 src/backend/utils/adt/pgstatfuncs.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index b61a12382b..1112bc2904 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1276,16 +1276,16 @@ pgstat_get_io_op_index(IOOp io_op)
 	{
 		case IOOP_EVICT:
 			return IO_COL_EVICTIONS;
+		case IOOP_EXTEND:
+			return IO_COL_EXTENDS;
+		case IOOP_FSYNC:
+			return IO_COL_FSYNCS;
 		case IOOP_READ:
 			return IO_COL_READS;
 		case IOOP_REUSE:
 			return IO_COL_REUSES;
 		case IOOP_WRITE:
 			return IO_COL_WRITES;
-		case IOOP_EXTEND:
-			return IO_COL_EXTENDS;
-		case IOOP_FSYNC:
-			return IO_COL_FSYNCS;
 	}
 
 	elog(ERROR, "unrecognized IOOp value: %d", io_op);
-- 
2.37.2

Reply via email to