Hi! Thank you for your work on this issue!
Your patch required a little revision. I did this and attached the patch.
Also, I think you should add some clarification to the comments about
printing 'exact' and 'loosy' pages in show_hashagg_info function, which
you get from planstate->stats, whereas previously it was output only
from planstate. Perhaps it is enough to mention this in the comment to
the commit.
I mean this place:
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 926d70afaf..02251994c6 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3467,26 +3467,57 @@ show_hashagg_info(AggState *aggstate,
ExplainState *es)
static void
show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
{
+ Assert(es->analyze);
+
if (es->format != EXPLAIN_FORMAT_TEXT)
{
ExplainPropertyInteger("Exact Heap Blocks", NULL,
- planstate->exact_pages, es);
+ planstate->stats.exact_pages, es);
ExplainPropertyInteger("Lossy Heap Blocks", NULL,
- planstate->lossy_pages, es);
+ planstate->stats.lossy_pages, es);
}
else
{
- if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+ if (planstate->stats.exact_pages > 0 ||
planstate->stats.lossy_pages > 0)
{
ExplainIndentText(es);
appendStringInfoString(es->str, "Heap Blocks:");
- if (planstate->exact_pages > 0)
- appendStringInfo(es->str, " exact=%ld",
planstate->exact_pages);
- if (planstate->lossy_pages > 0)
- appendStringInfo(es->str, " lossy=%ld",
planstate->lossy_pages);
+ if (planstate->stats.exact_pages > 0)
+ appendStringInfo(es->str, " exact=%ld",
planstate->stats.exact_pages);
+ if (planstate->stats.lossy_pages > 0)
+ appendStringInfo(es->str, " lossy=%ld",
planstate->stats.lossy_pages);
appendStringInfoChar(es->str, '\n');
}
}
+
+ if (planstate->pstate != NULL)
+ {
+ for (int n = 0; n < planstate->sinstrument->num_workers; n++)
+ {
+ BitmapHeapScanInstrumentation *si =
&planstate->sinstrument->sinstrument[n];
+
+ if (si->exact_pages == 0 && si->lossy_pages == 0)
+ continue;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Heap Blocks: exact=%ld
lossy=%ld\n",
+ si->exact_pages, si->lossy_pages);
+ }
+ else
+ {
+ ExplainPropertyInteger("Exact Heap Blocks", NULL,
si->exact_pages, es);
+ ExplainPropertyInteger("Lossy Heap Blocks", NULL,
si->lossy_pages, es);
+ }
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+ }
}
I suggest some code refactoring (diff.diff.no-cfbot file) that allows
you to improve your code.
--
Regards,
Alena Rybakina
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
diff --git a/src/backend/executor/nodeBitmapHeapscan.c
b/src/backend/executor/nodeBitmapHeapscan.c
index 973bf56022d..ca8d94ba09a 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -634,8 +634,9 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
*/
if (node->sinstrument != NULL && IsParallelWorker())
{
+ BitmapHeapScanInstrumentation *si;
Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
- BitmapHeapScanInstrumentation *si =
&node->sinstrument->sinstrument[ParallelWorkerNumber];
+ si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
si->exact_pages += node->stats.exact_pages;
si->lossy_pages += node->stats.lossy_pages;
}
@@ -864,7 +865,7 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
ptr = shm_toc_allocate(pcxt->toc, size);
pstate = (ParallelBitmapHeapState *) ptr;
- ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+ ptr += size;
if (node->ss.ps.instrument && pcxt->nworkers > 0)
sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
From 8dc939749a3f0cea9ba337c020b9ccd474e7c8e3 Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybak...@postgrespro.ru>
Date: Mon, 8 Apr 2024 23:41:41 +0300
Subject: [PATCH] Parallel Bitmap Heap Scan reports per-worker stats.
Similarly to other nodes (e.g. hash join, sort, memoize),
Bitmap Heap Scan now reports per-worker stats in the EXPLAIN
ANALYZE output. Previously only the heap blocks stats for the
leader were reported which was incomplete in parallel scans.
Discussion: https://www.postgresql.org/message-id/flat/b3d80961-c2e5-38cc-6a32-61886cdf766d%40gmail.com
---
src/backend/commands/explain.c | 45 +++++++++--
src/backend/executor/execParallel.c | 3 +
src/backend/executor/nodeBitmapHeapscan.c | 95 ++++++++++++++++++++---
src/include/executor/nodeBitmapHeapscan.h | 1 +
src/include/nodes/execnodes.h | 33 +++++++-
5 files changed, 159 insertions(+), 18 deletions(-)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 2c5d980f729..0bee60c4a90 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -3584,26 +3584,57 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
static void
show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
{
+ Assert(es->analyze);
+
if (es->format != EXPLAIN_FORMAT_TEXT)
{
ExplainPropertyInteger("Exact Heap Blocks", NULL,
- planstate->exact_pages, es);
+ planstate->stats.exact_pages, es);
ExplainPropertyInteger("Lossy Heap Blocks", NULL,
- planstate->lossy_pages, es);
+ planstate->stats.lossy_pages, es);
}
else
{
- if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+ if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0)
{
ExplainIndentText(es);
appendStringInfoString(es->str, "Heap Blocks:");
- if (planstate->exact_pages > 0)
- appendStringInfo(es->str, " exact=%ld", planstate->exact_pages);
- if (planstate->lossy_pages > 0)
- appendStringInfo(es->str, " lossy=%ld", planstate->lossy_pages);
+ if (planstate->stats.exact_pages > 0)
+ appendStringInfo(es->str, " exact=%ld", planstate->stats.exact_pages);
+ if (planstate->stats.lossy_pages > 0)
+ appendStringInfo(es->str, " lossy=%ld", planstate->stats.lossy_pages);
appendStringInfoChar(es->str, '\n');
}
}
+
+ if (planstate->pstate != NULL)
+ {
+ for (int n = 0; n < planstate->sinstrument->num_workers; n++)
+ {
+ BitmapHeapScanInstrumentation *si = &planstate->sinstrument->sinstrument[n];
+
+ if (si->exact_pages == 0 && si->lossy_pages == 0)
+ continue;
+
+ if (es->workers_state)
+ ExplainOpenWorker(n, es);
+
+ if (es->format == EXPLAIN_FORMAT_TEXT)
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Heap Blocks: exact=%ld lossy=%ld\n",
+ si->exact_pages, si->lossy_pages);
+ }
+ else
+ {
+ ExplainPropertyInteger("Exact Heap Blocks", NULL, si->exact_pages, es);
+ ExplainPropertyInteger("Lossy Heap Blocks", NULL, si->lossy_pages, es);
+ }
+
+ if (es->workers_state)
+ ExplainCloseWorker(n, es);
+ }
+ }
}
/*
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 8c53d1834e9..bfb3419efb7 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1076,6 +1076,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
case T_MemoizeState:
ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate);
break;
+ case T_BitmapHeapScanState:
+ ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate);
+ break;
default:
break;
}
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 6b48a6d8350..973bf56022d 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -236,9 +236,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
valid_block = table_scan_bitmap_next_block(scan, tbmres);
if (tbmres->ntuples >= 0)
- node->exact_pages++;
+ node->stats.exact_pages++;
else
- node->lossy_pages++;
+ node->stats.lossy_pages++;
if (!valid_block)
{
@@ -627,6 +627,19 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
{
TableScanDesc scanDesc;
+ /*
+ * When ending a parallel worker, copy the statistics gathered by the
+ * worker back into shared memory so that it can be picked up by the main
+ * process to report in EXPLAIN ANALYZE.
+ */
+ if (node->sinstrument != NULL && IsParallelWorker())
+ {
+ Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
+ BitmapHeapScanInstrumentation *si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
+ si->exact_pages += node->stats.exact_pages;
+ si->lossy_pages += node->stats.lossy_pages;
+ }
+
/*
* extract information from the node
*/
@@ -694,8 +707,8 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->tbmiterator = NULL;
scanstate->tbmres = NULL;
scanstate->pvmbuffer = InvalidBuffer;
- scanstate->exact_pages = 0;
- scanstate->lossy_pages = 0;
+ scanstate->stats.exact_pages = 0;
+ scanstate->stats.lossy_pages = 0;
scanstate->prefetch_iterator = NULL;
scanstate->prefetch_pages = 0;
scanstate->prefetch_target = 0;
@@ -803,7 +816,22 @@ void
ExecBitmapHeapEstimate(BitmapHeapScanState *node,
ParallelContext *pcxt)
{
- shm_toc_estimate_chunk(&pcxt->estimator, sizeof(ParallelBitmapHeapState));
+ Size size;
+
+ /*
+ * We store ParallelBitmapHeapState followed by
+ * SharedBitmapHeapInstrumentationInfo in the same shmem chunk.
+ */
+ size = MAXALIGN(sizeof(ParallelBitmapHeapState));
+
+ /* don't need this if not instrumenting or no workers */
+ if (node->ss.ps.instrument && pcxt->nworkers > 0)
+ {
+ size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
+ size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
+ }
+
+ shm_toc_estimate_chunk(&pcxt->estimator, size);
shm_toc_estimate_keys(&pcxt->estimator, 1);
}
@@ -818,13 +846,27 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
ParallelContext *pcxt)
{
ParallelBitmapHeapState *pstate;
+ SharedBitmapHeapInstrumentation *sinstrument = NULL;
dsa_area *dsa = node->ss.ps.state->es_query_dsa;
+ char *ptr;
+ Size size;
/* If there's no DSA, there are no workers; initialize nothing. */
if (dsa == NULL)
return;
- pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState));
+ size = MAXALIGN(sizeof(ParallelBitmapHeapState));
+ if (node->ss.ps.instrument && pcxt->nworkers > 0)
+ {
+ size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
+ size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
+ }
+
+ ptr = shm_toc_allocate(pcxt->toc, size);
+ pstate = (ParallelBitmapHeapState *) ptr;
+ ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+ if (node->ss.ps.instrument && pcxt->nworkers > 0)
+ sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
pstate->tbmiterator = 0;
pstate->prefetch_iterator = 0;
@@ -837,8 +879,16 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
ConditionVariableInit(&pstate->cv);
+ /* ensure any unfilled slots will contain zeroes */
+ if (sinstrument)
+ {
+ sinstrument->num_workers = pcxt->nworkers;
+ memset(sinstrument->sinstrument, 0, pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
+ }
+
shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
node->pstate = pstate;
+ node->sinstrument = sinstrument;
}
/* ----------------------------------------------------------------
@@ -880,10 +930,37 @@ void
ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
ParallelWorkerContext *pwcxt)
{
- ParallelBitmapHeapState *pstate;
+ char *ptr;
Assert(node->ss.ps.state->es_query_dsa != NULL);
- pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
- node->pstate = pstate;
+ ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
+
+ node->pstate = (ParallelBitmapHeapState *) ptr;
+ ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+
+ if (node->ss.ps.instrument)
+ node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
+}
+
+/* ----------------------------------------------------------------
+ * ExecBitmapHeapRetrieveInstrumentation
+ *
+ * Transfer bitmap heap scan statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
+{
+ SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
+ Size size;
+
+ if (sinstrument == NULL)
+ return;
+
+ size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
+ + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
+
+ node->sinstrument = palloc(size);
+ memcpy(node->sinstrument, sinstrument, size);
}
diff --git a/src/include/executor/nodeBitmapHeapscan.h b/src/include/executor/nodeBitmapHeapscan.h
index ea003a9caae..446a664590a 100644
--- a/src/include/executor/nodeBitmapHeapscan.h
+++ b/src/include/executor/nodeBitmapHeapscan.h
@@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
ParallelContext *pcxt);
extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
ParallelWorkerContext *pwcxt);
+extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node);
#endif /* NODEBITMAPHEAPSCAN_H */
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index fa2f70b7a48..0ac3306d76a 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1744,6 +1744,12 @@ typedef struct BitmapIndexScanState
struct IndexScanDescData *biss_ScanDesc;
} BitmapIndexScanState;
+typedef struct BitmapHeapScanInstrumentation
+{
+ long lossy_pages;
+ long exact_pages;
+} BitmapHeapScanInstrumentation;
+
/* ----------------
* SharedBitmapState information
*
@@ -1787,6 +1793,20 @@ typedef struct ParallelBitmapHeapState
ConditionVariable cv;
} ParallelBitmapHeapState;
+/* ----------------
+ * Instrumentation data for a parallel bitmap heap scan.
+ *
+ * During a parallel bitmap heap scan, this lives in shared memory. At the
+ * end, each worker copies their own stats to the right slot. Finally, the
+ * leader copies the data to its local memory
+ * ----------------
+ */
+typedef struct SharedBitmapHeapInstrumentation
+{
+ int num_workers;
+ BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedBitmapHeapInstrumentation;
+
/* ----------------
* BitmapHeapScanState information
*
@@ -1815,8 +1835,6 @@ typedef struct BitmapHeapScanState
TBMIterator *tbmiterator;
TBMIterateResult *tbmres;
Buffer pvmbuffer;
- long exact_pages;
- long lossy_pages;
TBMIterator *prefetch_iterator;
int prefetch_pages;
int prefetch_target;
@@ -1825,6 +1843,17 @@ typedef struct BitmapHeapScanState
TBMSharedIterator *shared_tbmiterator;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
+
+ /*
+ * In a parallelized bitmap heap scan, each worker sets their
+ * instrumentaton data in pstate->sinstrument at the end. The leader
+ * copies the shared-memory info back to local storage before DSM
+ * shutdown, and stores it here in 'instrument'. It remains NULL in
+ * workers and in non-parallel scans.
+ */
+ SharedBitmapHeapInstrumentation *sinstrument;
+
+ BitmapHeapScanInstrumentation stats;
} BitmapHeapScanState;
/* ----------------
--
2.34.1