Hello,
I would like to get some feedback on that task.
> pg_statio_*_tables.idx_blks_hit are highly misleading in practice
> because they fail to take account of the difference between internal
> pages and leaf pages in B-Tree indexes.
I see it is still the case, so the issue is relevant, isn't it ?
> The main challenge would be
> passing information about what page we're dealing with (internal/leaf)
> to the place actually calling pgstat_count_buffer_(read|hit). That
> happens in ReadBufferExtended, which just has no idea what page it's
> dealing with. Not sure how to do that cleanly ...
I do not immediately see the way to pass the information in a
completely clean manner.
Either
(1) ReadBufferExtended needs to know the type of an index page (leaf/internal)
or
(2) caller of ReadBufferExtended that can check the page type needs to learn
if there was a hit and call pgstat_count_buffer_(read|hit) accordingly.
In either case necessary code changes seem quite invasive to me.
I have attached a code snippet to illustrate the second idea.
Regards,
Sergey
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 20adb602a4..d8c22be949 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -31,6 +31,7 @@
#include "miscadmin.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
+#include "pgstat.h"
#include "storage/predicate.h"
#include "storage/procarray.h"
#include "utils/memdebug.h"
@@ -870,14 +871,23 @@ _bt_log_reuse_page(Relation rel, BlockNumber blkno, FullTransactionId safexid)
Buffer
_bt_getbuf(Relation rel, BlockNumber blkno, int access)
{
- Buffer buf;
+ Buffer buf;
+ Page page;
+ BTPageOpaque opaque;
if (blkno != P_NEW)
{
+ bool hit;
/* Read an existing block of the relation */
- buf = ReadBuffer(rel, blkno);
+ buf = ReadIndexBuffer(rel, blkno, &hit);
_bt_lockbuf(rel, buf, access);
_bt_checkpage(rel, buf);
+
+ page = BufferGetPage(buf);
+ opaque = BTPageGetOpaque(page);
+ if (hit && P_ISLEAF(opaque))
+ pgstat_count_buffer_hit(rel);
+
}
else
{
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index ae13011d27..ab9522fd50 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -704,6 +704,16 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL);
}
+/*
+ * ReadIndexBuffer -- a shorthand for ReadIndexBufferExtended, for reading from main
+ * fork with RBM_NORMAL mode and default strategy.
+ */
+Buffer
+ReadIndexBuffer(Relation reln, BlockNumber blockNum, bool *hit)
+{
+ return ReadIndexBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL, hit);
+}
+
/*
* ReadBufferExtended -- returns a buffer containing the requested
* block of the requested relation. If the blknum
@@ -774,6 +784,34 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
return buf;
}
+/*
+ * Proof-of-concept.
+ *
+ * Same as ReadBufferExtended but returns the value of "hit" to improve
+ * statistics collection for indexes.
+ */
+Buffer
+ReadIndexBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
+ ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit)
+{
+ Buffer buf;
+
+ /*
+ * Reject attempts to read non-local temporary relations; we would be
+ * likely to get wrong data since we have no visibility into the owning
+ * session's local buffers.
+ */
+ if (RELATION_IS_OTHER_TEMP(reln))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions")));
+
+ buf = ReadBuffer_common(RelationGetSmgr(reln), reln->rd_rel->relpersistence,
+ forkNum, blockNum, mode, strategy, hit);
+
+ return buf;
+}
+
/*
* ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 58391406f6..d4483c1666 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -179,9 +179,13 @@ extern PrefetchBufferResult PrefetchBuffer(Relation reln, ForkNumber forkNum,
extern bool ReadRecentBuffer(RelFileNode rnode, ForkNumber forkNum,
BlockNumber blockNum, Buffer recent_buffer);
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
+extern Buffer ReadIndexBuffer(Relation reln, BlockNumber blockNum, bool *hit);
extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
BufferAccessStrategy strategy);
+extern Buffer ReadIndexBufferExtended(Relation reln, ForkNumber forkNum,
+ BlockNumber blockNum, ReadBufferMode mode,
+ BufferAccessStrategy strategy, bool *hit);
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
ForkNumber forkNum, BlockNumber blockNum,
ReadBufferMode mode, BufferAccessStrategy strategy,