This is an automated email from the ASF dual-hosted git repository.
jiaqizho pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new 2018fed2ab2 Add explain analyze detailed info of hash agg (#15917)
2018fed2ab2 is described below
commit 2018fed2ab24c67af69907e950a2990cfe4d3f90
Author: Yao Wang <[email protected]>
AuthorDate: Thu Aug 24 15:36:41 2023 +0800
Add explain analyze detailed info of hash agg (#15917)
After upgrading to GPDB7, we lost the detailed cdb executor instruments of
hashagg since the code base has been changed in a large size. The patch is
to re-implement explain analyze related code of hashagg to provide more
critical info for troubleshooting issues.
---
src/backend/commands/explain.c | 2 +-
src/backend/executor/nodeAgg.c | 145 +++++++++++++++++++++
src/include/executor/nodeAgg.h | 23 ++++
src/include/lib/simplehash.h | 81 ++++++++++++
src/test/regress/expected/explain_format.out | 54 +++++++-
.../regress/expected/explain_format_optimizer.out | 52 +++++++-
src/test/regress/sql/explain_format.sql | 35 ++++-
7 files changed, 388 insertions(+), 4 deletions(-)
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index bfd5f98219c..c89d088c4a2 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -4266,7 +4266,7 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
}
/* Display stats for each parallel worker */
- if (es->analyze && aggstate->shared_info != NULL)
+ if (aggstate->shared_info != NULL)
{
for (int n = 0; n < aggstate->shared_info->num_workers; n++)
{
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 881d09e5375..1c49e426d2d 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -481,6 +481,8 @@ static void build_pertrans_for_aggref(AggStatePerTrans
pertrans,
static void ExecEagerFreeAgg(AggState *node);
+void agg_hash_explain_extra_message(AggState *aggstate);
+
/*
* Select the current grouping set; affects current_set and
* curaggcontext.
@@ -1540,6 +1542,16 @@ build_hash_tables(AggState *aggstate)
memory);
build_hash_table(aggstate, setno, nbuckets);
+
+ /* initialize some statistic info of hash table */
+ perhash->num_output_groups = 0;
+ perhash->num_spill_parts = 0;
+ perhash->num_expansions = 0;
+ perhash->bucket_total = 0;
+ perhash->bucket_used = 0;
+ perhash->chain_count = 0;
+ perhash->chain_length_total = 0;
+ perhash->chain_length_max = 0;
}
aggstate->hash_ngroups_current = 0;
@@ -1966,6 +1978,7 @@ hash_agg_enter_spill_mode(AggState *aggstate)
hashagg_spill_init(aggstate, spill,
aggstate->hash_tapeinfo, 0,
perhash->aggnode->numGroups,
aggstate->hashentrysize);
+ perhash->num_spill_parts += spill->npartitions;
}
if (aggstate->ss.ps.instrument)
@@ -2019,6 +2032,12 @@ hash_agg_update_metrics(AggState *aggstate, bool
from_tape, int npartitions)
}
/* update hashentrysize estimate based on contents */
+ /*
+ * Greenplum doesn't use hashentrysize in the instrumentation, it will
+ * calculate hash table chain length to get an accurate number.
+ *
+ * See the following code to collect hash table statistic info.
+ */
if (aggstate->hash_ngroups_current > 0)
{
aggstate->hashentrysize =
@@ -2031,6 +2050,47 @@ hash_agg_update_metrics(AggState *aggstate, bool
from_tape, int npartitions)
Instrumentation *instrument = aggstate->ss.ps.instrument;
instrument->workmemused = aggstate->hash_mem_peak;
+
+ /*
+ * workmemwanted to avoid scratch i/o, that how much memory is
needed
+ * if we want to load into the hashtable at once:
+ *
+ * 1. add meta_mem only when from_tape is false, because when
we are
+ * reading from tape/spilled file, we can reuse the
existing hash
+ * table's meta.
+ * 2. add hash_mem every time.
+ * 3. don't add buffer_mem since it's unnecessary when we can
load into
+ * into the memory at once.
+ */
+ if (!from_tape)
+ instrument->workmemwanted += meta_mem;
+ instrument->workmemwanted += hashkey_mem;
+
+ /* Scan all perhashs and collect hash table statistic info */
+ for (int setno = 0; setno < aggstate->num_hashes; setno++)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[setno];
+ tuplehash_hash *hashtab = perhash->hashtable->hashtab;
+
+ Assert(hashtab);
+
+ perhash->num_expansions += hashtab->num_expansions;
+ perhash->bucket_total += hashtab->size;
+ perhash->bucket_used += hashtab->members;
+ if (hashtab->members > 0)
+ {
+ uint32 perht_chain_length_total = 0;
+ uint32 perht_chain_count = 0;
+
+ /* collect statistic info of chain length per
hash table */
+ tuplehash_coll_stat(hashtab,
+
&(perhash->chain_length_max),
+
&perht_chain_length_total,
+
&perht_chain_count);
+ perhash->chain_count += perht_chain_count;
+ perhash->chain_length_total +=
perht_chain_length_total;
+ }
+ }
}
}
@@ -2225,6 +2285,7 @@ lookup_hash_entries(AggState *aggstate)
hashagg_spill_init(aggstate, spill,
aggstate->hash_tapeinfo, 0,
perhash->aggnode->numGroups,
aggstate->hashentrysize);
+ perhash->num_spill_parts += spill->npartitions;
hashagg_spill_tuple(aggstate, spill, slot, hash);
pergroup[setno] = NULL;
@@ -2277,6 +2338,13 @@ ExecAgg(PlanState *pstate)
return result;
}
+ /* Save statistics into the cdbexplainbuf for EXPLAIN ANALYZE */
+ if (node->ss.ps.instrument &&
+ (node->ss.ps.instrument)->need_cdb &&
+ (node->phase->aggstrategy == AGG_HASHED ||
+ node->phase->aggstrategy == AGG_MIXED))
+ agg_hash_explain_extra_message(node);
+
return NULL;
}
@@ -2807,6 +2875,7 @@ agg_refill_hash_table(AggState *aggstate)
spill_initialized = true;
hashagg_spill_init(aggstate, &spill, tapeinfo,
batch->used_bits,
batch->input_card, aggstate->hashentrysize);
+
aggstate->perhash[aggstate->current_set].num_spill_parts += spill.npartitions;
}
/* no memory for a new group, spill */
hashagg_spill_tuple(aggstate, &spill, spillslot, hash);
@@ -2981,6 +3050,8 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
}
}
+ perhash->num_output_groups++;
+
/*
* Clear the per-output-tuple context for each group
*
@@ -5139,3 +5210,77 @@ ReuseHashTable(AggState *node)
!node->streaming &&
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams));
}
+
+/*
+ * Save statistics into the cdbexplainbuf for EXPLAIN ANALYZE
+ */
+void
+agg_hash_explain_extra_message(AggState *aggstate)
+{
+ /*
+ * Check cdbexplain_depositStatsToNode(), Greenplum only saves extra
+ * message text for the most interesting winning qExecs.
+ */
+ StringInfo hbuf = aggstate->ss.ps.cdbexplainbuf;
+ uint64 sum_num_expansions = 0;
+ uint64 sum_output_groups = 0;
+ uint64 sum_spill_parts = 0;
+ uint64 sum_chain_length_total = 0;
+ uint64 sum_chain_count = 0;
+ uint32 chain_length_max = 0;
+ uint64 sum_bucket_used = 0;
+ uint64 sum_bucket_total = 0;
+
+ Assert(hbuf);
+
+ appendStringInfo(hbuf, "hash table(s): %d", aggstate->num_hashes);
+
+ /* Scan all perhashs and collect statistic info */
+ for (int setno = 0; setno < aggstate->num_hashes; setno++)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[setno];
+
+ /* spill statistic info */
+ if (aggstate->hash_ever_spilled)
+ {
+ sum_output_groups += perhash->num_output_groups;
+ sum_spill_parts += perhash->num_spill_parts;
+ }
+
+ /* inner hash table statistic info */
+ if (perhash->chain_count > 0)
+ {
+ sum_chain_length_total += perhash->chain_length_total;
+ sum_chain_count += perhash->chain_count;
+ if (perhash->chain_length_max > chain_length_max)
+ chain_length_max = perhash->chain_length_max;
+ sum_bucket_used = perhash->bucket_used;
+ sum_bucket_total = perhash->bucket_total;
+ sum_num_expansions += perhash->num_expansions;
+ }
+ }
+
+ if (aggstate->hash_ever_spilled)
+ {
+ appendStringInfo(hbuf,
+ "; " UINT64_FORMAT " groups total in %d batches, "
UINT64_FORMAT
+ " spill partitions; disk usage: " INT64_FORMAT "KB",
+ sum_output_groups,
+ aggstate->hash_batches_used,
+ sum_spill_parts,
+ aggstate->hash_disk_used);
+ }
+
+ if (sum_chain_count > 0)
+ {
+ appendStringInfo(hbuf,
+ "; chain length %.1f avg, %d max;"
+ " using " INT64_FORMAT " of " INT64_FORMAT "
buckets;"
+ " total " INT64_FORMAT " expansions.\n",
+ (double)sum_chain_length_total /
sum_chain_count,
+ chain_length_max,
+ sum_bucket_used,
+ sum_bucket_total,
+ sum_num_expansions);
+ }
+}
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 60fd9f960f8..5e4e21f250a 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -311,6 +311,29 @@ typedef struct AggStatePerHashData
AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
AttrNumber *hashGrpColIdxHash; /* indices in hash table tuples */
Agg *aggnode; /* original Agg node, for
numGroups etc. */
+
+ /*
+ * Some statistic info of hash table, used for EXPLAIN ANALYZE.
+ * Note that they are accumulated info and will not be reset even
+ * after the hash table is reset.
+ */
+
+ /* number of groups/entries output by the iterator */
+ uint64 num_output_groups;
+ /* number of spilled partitions */
+ uint64 num_spill_parts;
+ /* number of hash table expansions */
+ uint32 num_expansions;
+ /* total number of buckets */
+ uint64 bucket_total;
+ /* number of used buckets */
+ uint64 bucket_used;
+ /* number of all chains */
+ uint64 chain_count;
+ /* total length of all chains */
+ uint64 chain_length_total;
+ /* max chain length */
+ uint32 chain_length_max;
} AggStatePerHashData;
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index a4a25c57f21..8b4af795544 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -126,6 +126,7 @@
#define SH_ALLOCATE SH_MAKE_NAME(allocate)
#define SH_FREE SH_MAKE_NAME(free)
#define SH_STAT SH_MAKE_NAME(stat)
+#define SH_COLL_STAT SH_MAKE_NAME(coll_stat)
/* internal helper functions (no externally visible prototypes) */
#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
@@ -169,6 +170,14 @@ typedef struct SH_TYPE
/* user defined data, useful for callbacks */
void *private_data;
+
+ /*
+ * number of times hash table is expanded
+ *
+ * Since the max size of hash table is UINT32_MAX, uint32 is good
+ * enough for the number of expanded times.
+ */
+ uint32 num_expansions;
} SH_TYPE;
typedef enum SH_STATUS
@@ -244,6 +253,12 @@ SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb,
SH_ITERATOR * iter);
/* void <prefix>_stat(<prefix>_hash *tb */
SH_SCOPE void SH_STAT(SH_TYPE * tb);
+SH_SCOPE void
+SH_COLL_STAT(SH_TYPE * tb,
+ uint32 * max_chain_length,
+ uint32 * total_chain_length,
+ uint32 * chain_count);
+
#endif /* SH_DECLARE */
@@ -450,6 +465,7 @@ SH_CREATE(MemoryContext ctx, uint32 nelements, void
*private_data)
SH_COMPUTE_PARAMETERS(tb, size);
tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
+ tb->num_expansions = 0;
return tb;
}
@@ -468,6 +484,7 @@ SH_RESET(SH_TYPE * tb)
{
memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
tb->members = 0;
+ tb->num_expansions = 0;
}
/*
@@ -580,6 +597,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize)
}
}
+ (tb->num_expansions)++;
SH_FREE(tb, olddata);
}
@@ -1120,6 +1138,69 @@ SH_STAT(SH_TYPE * tb)
total_collisions, max_collisions, avg_collisions);
}
+/*
+ * Greenplum specific
+ *
+ * Collect some statistics about the state of the hashtable. Major code was
+ * copied from SH_STAT() with some modifications to keep consistent with GPDB6.
+ */
+SH_SCOPE void
+SH_COLL_STAT(SH_TYPE * tb,
+ uint32 * max_chain_length,
+ uint32 * total_chain_length,
+ uint32 * chain_count)
+{
+ *total_chain_length = 0;
+ *chain_count = 0;
+ uint32 last_dist = 0;
+
+ for (int i = 0; i < tb->size; i++)
+ {
+ uint32 hash;
+ uint32 optimal;
+ uint32 dist;
+ SH_ELEMENT_TYPE *elem;
+
+ elem = &tb->data[i];
+
+ if (elem->status != SH_STATUS_IN_USE)
+ continue;
+
+ hash = SH_ENTRY_HASH(tb, elem);
+ optimal = SH_INITIAL_BUCKET(tb, hash);
+ dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
+
+ /*
+ * Different from SH_STAT(), always calculate chain length from
1 but
+ * not 0, e.g. when there is only one element in bucket, the
length
+ * is 1.
+ */
+ dist++;
+
+ /*
+ * In same chain, dist must be always increasing. If dist <
last_dist,
+ * we must hit a new chain; take the length of old chain into
account.
+ */
+ if (dist < last_dist)
+ {
+ if (last_dist > *max_chain_length)
+ *max_chain_length = last_dist;
+ *total_chain_length += last_dist;
+ (*chain_count)++;
+ }
+ last_dist = dist;
+ }
+
+ /* Count the last chain. */
+ if (last_dist != 0)
+ {
+ if (last_dist > *max_chain_length)
+ *max_chain_length = last_dist;
+ *total_chain_length += last_dist;
+ (*chain_count)++;
+ }
+}
+
#endif /* SH_DEFINE */
diff --git a/src/test/regress/expected/explain_format.out
b/src/test/regress/expected/explain_format.out
index 1675c8a2153..c8c452cc781 100644
--- a/src/test/regress/expected/explain_format.out
+++ b/src/test/regress/expected/explain_format.out
@@ -17,6 +17,8 @@
-- s/Memory used: \d+\w?B/Memory used: ###B/
-- m/Memory Usage: \d+\w?B/
-- s/Memory Usage: \d+\w?B/Memory Usage: ###B/
+-- m/Memory wanted: \d+\w?kB/
+-- s/Memory wanted: \d+\w?kB/Memory wanted: ###kB/
-- m/Peak Memory Usage: \d+/
-- s/Peak Memory Usage: \d+/Peak Memory Usage: ###/
-- m/Buckets: \d+/
@@ -583,9 +585,59 @@ QUERY PLAN
}
]
(1 row)
--- explain_processing_on
+-- start_matchsubs
+-- m/Extra Text: \(seg\d+\) hash table\(s\): \d+; \d+ groups total in \d+
batches, \d+ spill partitions; disk usage: \d+KB; chain length \d+.\d+ avg, \d+
max; using \d+ of \d+ buckets; total \d+ expansions./
+-- s/Extra Text: \(seg\d+\) hash table\(s\): \d+; \d+ groups total in \d+
batches, \d+ spill partitions; disk usage: \d+KB; chain length \d+.\d+ avg, \d+
max; using \d+ of \d+ buckets; total \d+ expansions./Extra Text: (seg0) hash
table(s): ###; ### groups total in ### batches, ### spill partitions; disk
usage: ###KB; chain length ###.## avg, ### max; using ## of ### buckets; total
### expansions./
+-- m/Work_mem: \d+K bytes max, \d+K bytes wanted/
+-- s/Work_mem: \d+K bytes max, \d+K bytes wanted/Work_mem: ###K bytes max,
###K bytes wanted/
+-- end_matchsubs
+-- Greenplum hash table extra message
+CREATE TABLE test_src_tbl AS
+SELECT i % 10000 AS a, i % 10000 + 1 AS b FROM generate_series(1, 50000) i
DISTRIBUTED BY (a);
+ANALYZE test_src_tbl;
+-- Enable optimizer_enable_hashagg, and set statement_mem to a small value to
force spilling
+set optimizer_enable_hashagg = on;
+SET statement_mem = '1000kB';
+-- Hashagg with spilling
+CREATE TABLE test_hashagg_spill AS
+SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a;
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named
'a' as the Greenplum Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make
sure column(s) chosen are the optimal data distribution key to minimize skew.
+EXPLAIN (analyze, costs off) SELECT a, COUNT(DISTINCT b) AS b FROM
test_src_tbl GROUP BY a;
+QUERY PLAN
+Gather Motion 3:1 (slice1; segments: 3) (actual time=8.948..21.656 rows=10000
loops=1)
+ -> HashAggregate (actual time=13.950..15.183 rows=3386 loops=1)
+ Group Key: a
+ Extra Text: (seg0) hash table(s): 1; 3368 groups total in 4 batches,
2576 spill partitions; disk usage: 1024KB; chain length 2.9 avg, 9 max; using
3368 of 20480 buckets; total 0 expansions.
+
+ -> HashAggregate (actual time=12.113..12.658 rows=3386 loops=1)
+ Group Key: a, b
+ Extra Text: (seg0) hash table(s): 1; chain length 2.3 avg, 5
max; using 3368 of 8192 buckets; total 1 expansions.
+
+ -> Seq Scan on test_src_tbl (actual time=0.015..1.748
rows=16930 loops=1)
+Planning Time: 0.167 ms
+ (slice0) Executor memory: 236K bytes.
+* (slice1) Executor memory: 657K bytes avg x 3 workers, 722K bytes max
(seg0). Work_mem: 753K bytes max, 721K bytes wanted.
+Memory used: 1000kB
+Memory wanted: 1640kB
+Optimizer: Postgres query optimizer
+Execution Time: 22.346 ms
+(17 rows)
+-- Hashagg with grouping sets
+CREATE TABLE test_hashagg_groupingsets AS
+SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b));
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named
'a' as the Greenplum Database data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make
sure column(s) chosen are the optimal data distribution key to minimize skew.
+-- The planner generates multiple hash tables but ORCA uses Shared Scan.
+-- flaky test
+-- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP
BY grouping sets ((a), (b));
+RESET optimizer_enable_hashagg;
+RESET statement_mem;
-- Cleanup
DROP TABLE boxes;
DROP TABLE apples;
DROP TABLE box_locations;
DROP TABLE jsonexplaintest;
+DROP TABLE test_src_tbl;
+DROP TABLE test_hashagg_spill;
+DROP TABLE test_hashagg_groupingsets;
diff --git a/src/test/regress/expected/explain_format_optimizer.out
b/src/test/regress/expected/explain_format_optimizer.out
index 74a0d03e09d..459a466c993 100644
--- a/src/test/regress/expected/explain_format_optimizer.out
+++ b/src/test/regress/expected/explain_format_optimizer.out
@@ -17,6 +17,8 @@
-- s/Memory used: \d+\w?B/Memory used: ###B/
-- m/Memory Usage: \d+\w?B/
-- s/Memory Usage: \d+\w?B/Memory Usage: ###B/
+-- m/Memory wanted: \d+\w?kB/
+-- s/Memory wanted: \d+\w?kB/Memory wanted: ###kB/
-- m/Peak Memory Usage: \d+/
-- s/Peak Memory Usage: \d+/Peak Memory Usage: ###/
-- m/Buckets: \d+/
@@ -530,9 +532,57 @@ QUERY PLAN
}
]
(1 row)
--- explain_processing_on
+-- start_matchsubs
+-- m/Extra Text: \(seg\d+\) hash table\(s\): \d+; \d+ groups total in \d+
batches, \d+ spill partitions; disk usage: \d+KB; chain length \d+.\d+ avg, \d+
max; using \d+ of \d+ buckets; total \d+ expansions./
+-- s/Extra Text: \(seg\d+\) hash table\(s\): \d+; \d+ groups total in \d+
batches, \d+ spill partitions; disk usage: \d+KB; chain length \d+.\d+ avg, \d+
max; using \d+ of \d+ buckets; total \d+ expansions./Extra Text: (seg0) hash
table(s): ###; ### groups total in ### batches, ### spill partitions; disk
usage: ###KB; chain length ###.## avg, ### max; using ## of ### buckets; total
### expansions./
+-- m/Work_mem: \d+K bytes max, \d+K bytes wanted/
+-- s/Work_mem: \d+K bytes max, \d+K bytes wanted/Work_mem: ###K bytes max,
###K bytes wanted/
+-- end_matchsubs
+-- Greenplum hash table extra message
+CREATE TABLE test_src_tbl AS
+SELECT i % 10000 AS a, i % 10000 + 1 AS b FROM generate_series(1, 50000) i
DISTRIBUTED BY (a);
+ANALYZE test_src_tbl;
+-- Enable optimizer_enable_hashagg, and set statement_mem to a small value to
force spilling
+set optimizer_enable_hashagg = on;
+SET statement_mem = '1000kB';
+-- Hashagg with spilling
+CREATE TABLE test_hashagg_spill AS
+SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a;
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy
entry.
+EXPLAIN (analyze, costs off) SELECT a, COUNT(DISTINCT b) AS b FROM
test_src_tbl GROUP BY a;
+QUERY PLAN
+Gather Motion 3:1 (slice1; segments: 3) (actual time=9.551..21.820 rows=10000
loops=1)
+ -> HashAggregate (actual time=9.028..10.280 rows=3386 loops=1)
+ Group Key: a
+ Extra Text: (seg0) hash table(s): 1; 3368 groups total in 4 batches,
2576 spill partitions; disk usage: 1024KB; chain length 2.9 avg, 9 max; using
3368 of 20480 buckets; total 0 expansions.
+
+ -> HashAggregate (actual time=7.066..7.628 rows=3386 loops=1)
+ Group Key: a, b
+ Extra Text: (seg0) hash table(s): 1; 3368 groups total in 4
batches, 1264 spill partitions; disk usage: 1024KB; chain length 2.3 avg, 5
max; using 3368 of 40960 buckets; total 1 expansions.
+
+ -> Seq Scan on test_src_tbl (actual time=0.020..1.349
rows=16930 loops=1)
+Planning Time: 5.518 ms
+ (slice0) Executor memory: 236K bytes.
+* (slice1) Executor memory: 709K bytes avg x 3 workers, 876K bytes max
(seg0). Work_mem: 753K bytes max, 721K bytes wanted.
+Memory used: 1000kB
+Memory wanted: 1640kB
+Optimizer: GPORCA
+Execution Time: 22.843 ms
+(17 rows)
+-- Hashagg with grouping sets
+CREATE TABLE test_hashagg_groupingsets AS
+SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b));
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy
entry.
+-- The planner generates multiple hash tables but ORCA uses Shared Scan.
+-- flaky test
+-- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP
BY grouping sets ((a), (b));
+RESET optimizer_enable_hashagg;
+RESET statement_mem;
-- Cleanup
DROP TABLE boxes;
DROP TABLE apples;
DROP TABLE box_locations;
DROP TABLE jsonexplaintest;
+DROP TABLE test_src_tbl;
+DROP TABLE test_hashagg_spill;
+DROP TABLE test_hashagg_groupingsets;
diff --git a/src/test/regress/sql/explain_format.sql
b/src/test/regress/sql/explain_format.sql
index 602891698fd..e04cba90947 100644
--- a/src/test/regress/sql/explain_format.sql
+++ b/src/test/regress/sql/explain_format.sql
@@ -17,6 +17,8 @@
-- s/Memory used: \d+\w?B/Memory used: ###B/
-- m/Memory Usage: \d+\w?B/
-- s/Memory Usage: \d+\w?B/Memory Usage: ###B/
+-- m/Memory wanted: \d+\w?kB/
+-- s/Memory wanted: \d+\w?kB/Memory wanted: ###kB/
-- m/Peak Memory Usage: \d+/
-- s/Peak Memory Usage: \d+/Peak Memory Usage: ###/
-- m/Buckets: \d+/
@@ -110,10 +112,41 @@ EXPLAIN (FORMAT XML, COSTS OFF) SELECT * FROM
generate_series(1, 10);
CREATE TABLE jsonexplaintest (i int4) PARTITION BY RANGE (i) (START(1) END(3)
EVERY(1));
EXPLAIN (FORMAT JSON, COSTS OFF) SELECT * FROM jsonexplaintest WHERE i = 2;
--- explain_processing_on
+-- start_matchsubs
+-- m/Extra Text: \(seg\d+\) hash table\(s\): \d+; \d+ groups total in \d+
batches, \d+ spill partitions; disk usage: \d+KB; chain length \d+.\d+ avg, \d+
max; using \d+ of \d+ buckets; total \d+ expansions./
+-- s/Extra Text: \(seg\d+\) hash table\(s\): \d+; \d+ groups total in \d+
batches, \d+ spill partitions; disk usage: \d+KB; chain length \d+.\d+ avg, \d+
max; using \d+ of \d+ buckets; total \d+ expansions./Extra Text: (seg0) hash
table(s): ###; ### groups total in ### batches, ### spill partitions; disk
usage: ###KB; chain length ###.## avg, ### max; using ## of ### buckets; total
### expansions./
+-- m/Work_mem: \d+K bytes max, \d+K bytes wanted/
+-- s/Work_mem: \d+K bytes max, \d+K bytes wanted/Work_mem: ###K bytes max,
###K bytes wanted/
+-- end_matchsubs
+-- Greenplum hash table extra message
+CREATE TABLE test_src_tbl AS
+SELECT i % 10000 AS a, i % 10000 + 1 AS b FROM generate_series(1, 50000) i
DISTRIBUTED BY (a);
+ANALYZE test_src_tbl;
+
+-- Enable optimizer_enable_hashagg, and set statement_mem to a small value to
force spilling
+set optimizer_enable_hashagg = on;
+SET statement_mem = '1000kB';
+
+-- Hashagg with spilling
+CREATE TABLE test_hashagg_spill AS
+SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a;
+EXPLAIN (analyze, costs off) SELECT a, COUNT(DISTINCT b) AS b FROM
test_src_tbl GROUP BY a;
+
+-- Hashagg with grouping sets
+CREATE TABLE test_hashagg_groupingsets AS
+SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping sets ((a), (b));
+-- The planner generates multiple hash tables but ORCA uses Shared Scan.
+-- flaky test
+-- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP
BY grouping sets ((a), (b));
+
+RESET optimizer_enable_hashagg;
+RESET statement_mem;
-- Cleanup
DROP TABLE boxes;
DROP TABLE apples;
DROP TABLE box_locations;
DROP TABLE jsonexplaintest;
+DROP TABLE test_src_tbl;
+DROP TABLE test_hashagg_spill;
+DROP TABLE test_hashagg_groupingsets;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]