This is an automated email from the ASF dual-hosted git repository.
maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new d3a46c69200 perf: Improve hash agg performance in large-scale data
situations
d3a46c69200 is described below
commit d3a46c692006c2bdc8fe99eab0df9d5dc8152a15
Author: GongXun <[email protected]>
AuthorDate: Fri Jun 20 11:46:14 2025 +0800
perf: Improve hash agg performance in large-scale data situations
During the TPC-DS tests, we observed an issue where CloudberryDB (CBDB)
performed worse than Greenplum (GPDB) when the query plan generated a
multi-stage aggregation (e.g., TPC-DS query 04). The phenomenon showed
that the deduplication effect of CBDB’s **Streaming Partial HashAggregate**
was significantly worse. As a result, the **Finalize HashAggregate**
operator in CBDB processed significantly more data compared to GPDB
under the same dataset.
Example plan from CBDB:
Gather Motion 32:1 (slice1; segments: 32) (cost=0.00..19988.81
rows=1800000 width=76)
-> Finalize HashAggregate (cost=0.00..19663.99 rows=56250 width=81)
Group Key: customer_gp.c_customer_id, customer_gp.c_first_name,
customer_gp.c_last_name,
customer_gp.c_preferred_cust_flag,
customer_gp.c_birth_country, customer_gp.c_login,
customer_gp.c_email_address, date_dim_gp.d_year
-> Redistribute Motion 32:32 (slice2; segments: 32)
(cost=0.00..19603.35 rows=56250 width=81)
Hash Key: customer_gp.c_customer_id,
customer_gp.c_first_name, customer_gp.c_last_name,
customer_gp.c_preferred_cust_flag,
customer_gp.c_birth_country, customer_gp.c_login,
customer_gp.c_email_address, date_dim_gp.d_year
-> Streaming Partial HashAggregate (cost=0.00..19589.09
rows=56250 width=81)
Group Key: customer_gp.c_customer_id,
customer_gp.c_first_name, customer_gp.c_last_name,
customer_gp.c_preferred_cust_flag,
customer_gp.c_birth_country, customer_gp.c_login,
customer_gp.c_email_address,
date_dim_gp.d_year
-> Hash Join (cost=0.00..12346.24 rows=6935137
width=95)
Hash Cond: (store_sales_gp.ss_customer_sk =
customer_gp.c_customer_sk)
...
...
Upon further investigation, we found that the **`NumericAggState`**
structure
in CloudberryDB contained two additional fields compared to Greenplum:
```c
int64 pInfcount; /* count of +Inf values */
int64 nInfcount; /* count of -Inf values */
These fields were introduced in PostgreSQL 14 to support +/- Inf values for
numeric types. Consequently, the size of the NumericAggState structure
increased from 128 bytes to 144 bytes.
In the Streaming Partial HashAggregate, a NumericAggState structure is
created for each grouping key to track statistics for numeric types.
This results in CBDB allocating 16 more bytes per grouping key compared
to GPDB. This additional memory allocation contributes to the observed
performance difference.
To address this issue, we need to adjust the aggstate->hash_mem_limit
Inspired by PostgreSQL’s handling of hash_mem_limit, we introduce a
scaling factor called hash_mem_multiplier. Following the changes made
in PostgreSQL 15, we set the default value of hash_mem_multiplier to
2.0 to ensure better memory utilization during hash-based aggregation
operations.
https://www.postgresql.org/message-id/flat/CAH2-Wzndc_ROk6CY-bC6p9O53q974Y0Ey4WX8jcPbuTZYM4Q3A%40mail.gmail.com
---
.../src/test/regress/expected/groupingsets.out | 2 ++
.../regress/expected/groupingsets_optimizer.out | 2 ++
.../src/test/regress/expected/rowsecurity.out | 10 ++++----
.../regress/expected/rowsecurity_optimizer.out | 10 ++++----
.../src/test/regress/sql/groupingsets.sql | 2 ++
src/backend/executor/nodeAgg.c | 27 +++++++++++++++++++++-
src/backend/utils/init/globals.c | 2 +-
src/backend/utils/misc/guc.c | 2 +-
src/test/regress/expected/cte_prune.out | 18 +++++++--------
src/test/regress/expected/explain_format.out | 2 ++
.../regress/expected/explain_format_optimizer.out | 2 ++
src/test/regress/expected/groupingsets.out | 2 ++
.../regress/expected/groupingsets_optimizer.out | 2 ++
src/test/regress/sql/explain_format.sql | 2 ++
src/test/regress/sql/groupingsets.sql | 2 ++
.../singlenode_regress/expected/groupingsets.out | 9 ++++----
.../expected/workfile/hashagg_spill.out | 2 ++
.../sql/workfile/hashagg_spill.sql | 2 ++
18 files changed, 73 insertions(+), 27 deletions(-)
diff --git a/contrib/pax_storage/src/test/regress/expected/groupingsets.out
b/contrib/pax_storage/src/test/regress/expected/groupingsets.out
index 5dd8bb8b53e..25a26d506f8 100644
--- a/contrib/pax_storage/src/test/regress/expected/groupingsets.out
+++ b/contrib/pax_storage/src/test/regress/expected/groupingsets.out
@@ -1784,6 +1784,7 @@ select array(select row(v.a,s1.*) from (select two,four,
count(*) from onek grou
(2 rows)
-- test the knapsack
+set hash_mem_multiplier = 1;
set enable_indexscan = false;
set work_mem = '64kB';
explain (costs off)
@@ -1863,6 +1864,7 @@ explain (costs off)
Optimizer: Postgres query optimizer
(15 rows)
+reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
-- (similar to a check for aggregates, but there are additional code
-- paths for GROUPING, so check again here)
diff --git
a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
index 9923ea610d2..b2ac9ce9ddc 100644
--- a/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/groupingsets_optimizer.out
@@ -1847,6 +1847,7 @@ select array(select row(v.a,s1.*) from (select two,four,
count(*) from onek grou
(2 rows)
-- test the knapsack
+set hash_mem_multiplier = 1;
set enable_indexscan = false;
set work_mem = '64kB';
explain (costs off)
@@ -2017,6 +2018,7 @@ explain (costs off)
Optimizer: Pivotal Optimizer (GPORCA)
(50 rows)
+reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
-- (similar to a check for aggregates, but there are additional code
-- paths for GROUPING, so check again here)
diff --git a/contrib/pax_storage/src/test/regress/expected/rowsecurity.out
b/contrib/pax_storage/src/test/regress/expected/rowsecurity.out
index 4cd81f77e1e..f1a68caafaf 100644
--- a/contrib/pax_storage/src/test/regress/expected/rowsecurity.out
+++ b/contrib/pax_storage/src/test/regress/expected/rowsecurity.out
@@ -1878,15 +1878,15 @@ AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *,
t2_1, t2_2;
-> Update on t2 t2_1
-> Explicit Redistribute Motion 3:3 (slice2; segments: 3)
-> Hash Join
- Hash Cond: (t2_2.b = t2_1.b)
+ Hash Cond: (t2_1.b = t2_2.b)
-> Redistribute Motion 3:3 (slice3; segments: 3)
- Hash Key: t2_2.b
- -> Seq Scan on t2 t2_2
+ Hash Key: t2_1.b
+ -> Seq Scan on t2 t2_1
Filter: ((a = 3) AND ((a % 2) = 1) AND
f_leak(b))
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
- Hash Key: t2_1.b
- -> Seq Scan on t2 t2_1
+ Hash Key: t2_2.b
+ -> Seq Scan on t2 t2_2
Filter: ((a = 3) AND ((a % 2) = 1) AND
f_leak(b))
Optimizer: Postgres query optimizer
(15 rows)
diff --git
a/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out
b/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out
index da828356739..c26777d8711 100644
--- a/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out
+++ b/contrib/pax_storage/src/test/regress/expected/rowsecurity_optimizer.out
@@ -1878,15 +1878,15 @@ AND f_leak(t2_1.b) AND f_leak(t2_2.b) RETURNING *,
t2_1, t2_2;
-> Update on t2 t2_1
-> Explicit Redistribute Motion 3:3 (slice2; segments: 3)
-> Hash Join
- Hash Cond: (t2_2.b = t2_1.b)
+ Hash Cond: (t2_1.b = t2_2.b)
-> Redistribute Motion 3:3 (slice3; segments: 3)
- Hash Key: t2_2.b
- -> Seq Scan on t2 t2_2
+ Hash Key: t2_1.b
+ -> Seq Scan on t2 t2_1
Filter: ((a = 3) AND ((a % 2) = 1) AND
f_leak(b))
-> Hash
-> Redistribute Motion 3:3 (slice4; segments: 3)
- Hash Key: t2_1.b
- -> Seq Scan on t2 t2_1
+ Hash Key: t2_2.b
+ -> Seq Scan on t2 t2_2
Filter: ((a = 3) AND ((a % 2) = 1) AND
f_leak(b))
Optimizer: Postgres query optimizer
(15 rows)
diff --git a/contrib/pax_storage/src/test/regress/sql/groupingsets.sql
b/contrib/pax_storage/src/test/regress/sql/groupingsets.sql
index 835a3cff623..294a52c2036 100644
--- a/contrib/pax_storage/src/test/regress/sql/groupingsets.sql
+++ b/contrib/pax_storage/src/test/regress/sql/groupingsets.sql
@@ -518,6 +518,7 @@ select array(select row(v.a,s1.*) from (select two,four,
count(*) from onek grou
-- test the knapsack
+set hash_mem_multiplier = 1;
set enable_indexscan = false;
set work_mem = '64kB';
explain (costs off)
@@ -541,6 +542,7 @@ explain (costs off)
count(*)
from tenk1 group by grouping sets
(unique1,twothousand,thousand,hundred,ten,four,two);
+reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
-- (similar to a check for aggregates, but there are additional code
-- paths for GROUPING, so check again here)
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 1c49e426d2d..7513d11102f 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -268,6 +268,7 @@
#include "utils/dynahash.h"
#include "utils/expandeddatum.h"
#include "utils/faultinjector.h"
+#include "utils/guc.h"
#include "utils/logtape.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -1870,6 +1871,8 @@ hash_agg_set_limits(AggState *aggstate, double
hashentrysize, double input_group
strict_memlimit = operator_mem;
}
+ strict_memlimit = strict_memlimit * hash_mem_multiplier;
+
/* if not expected to spill, use all of work_mem */
if (input_groups * hashentrysize < strict_memlimit * 1024L)
{
@@ -2092,6 +2095,26 @@ hash_agg_update_metrics(AggState *aggstate, bool
from_tape, int npartitions)
}
}
}
+
+#ifdef USE_ASSERT_CHECKING
+ if (unlikely(DEBUG1 >= log_min_messages) && aggstate->streaming)
+ {
+ for (int setno = 0; setno < aggstate->num_hashes; setno++)
+ {
+ AggStatePerHash perhash = &aggstate->perhash[setno];
+ if (perhash->chain_count > 0)
+ {
+ elog(DEBUG1, "hash_agg_update_metrics
streaming: setno=%d, chain_length_total=%.1f, chain_length_max=%d,
bucket_used=%lu, bucket_total=%lu, num_expansions=%u",
+ setno,
+ (double)perhash->chain_length_total /
perhash->chain_count,
+ perhash->chain_length_max,
+ perhash->bucket_used,
+ perhash->bucket_total,
+ perhash->num_expansions);
+ }
+ }
+ }
+#endif
}
/*
@@ -2148,6 +2171,8 @@ hash_choose_num_partitions(AggState *aggstate, double
input_groups, double hashe
strict_memlimit = operator_mem;
}
+ strict_memlimit = strict_memlimit * hash_mem_multiplier;
+
/*
* Avoid creating so many partitions that the memory requirements of the
* open partition files are greater than 1/4 of hash_mem.
@@ -2276,7 +2301,7 @@ lookup_hash_entries(AggState *aggstate)
initialize_hash_entry(aggstate, hashtable,
entry);
pergroup[setno] = entry->additional;
}
- else
+ else if (!aggstate->streaming)
{
HashAggSpill *spill = &aggstate->hash_spills[setno];
TupleTableSlot *slot =
aggstate->tmpcontext->ecxt_outertuple;
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index b187da7e849..aad67dbb2f5 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -145,7 +145,7 @@ int max_statement_mem = 2048000;
* do not enforce per-query memory limit
*/
int gp_vmem_limit_per_query = 0;
-double hash_mem_multiplier = 1.0;
+double hash_mem_multiplier = 1.5;
int maintenance_work_mem = 65536;
int max_parallel_maintenance_workers = 2;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index fae7f8a0e5c..d8cdf027df5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3828,7 +3828,7 @@ static struct config_real ConfigureNamesReal[] =
GUC_EXPLAIN
},
&hash_mem_multiplier,
- 1.0, 1.0, 1000.0,
+ 1.5, 1.0, 1000.0,
NULL, NULL, NULL
},
diff --git a/src/test/regress/expected/cte_prune.out
b/src/test/regress/expected/cte_prune.out
index 1b54f443487..a76bc064ce2 100644
--- a/src/test/regress/expected/cte_prune.out
+++ b/src/test/regress/expected/cte_prune.out
@@ -1230,14 +1230,14 @@ LIMIT 10;
-> HashAggregate
(cost=102580688686565152.00..102596190789768272.00 rows=247107000000000
width=48)
Output: cup.c, cup.d, cup.e, t.d,
t.b
Group Key: cup.c, cup.d, cup.e,
t.d, t.b
- Planned Partitions: 256
+ Planned Partitions: 512
-> Redistribute Motion 3:3
(slice3; segments: 3) (cost=93196181903903024.00..102459992361252656.00
rows=741321000000000 width=48)
Output: cup.c, cup.d,
cup.e, t.d, t.b
Hash Key: cup.c, cup.d,
cup.e, t.d, t.b
-> HashAggregate
(cost=93196181903903024.00..102445165941252656.00 rows=741321000000000 width=48)
Output: cup.c, cup.d,
cup.e, t.d, t.b
Group Key: cup.c,
cup.d, cup.e, t.d, t.b
- Planned Partitions:
256
+ Planned Partitions:
512
-> Nested Loop
(cost=67221234716.02..7598699218584692.00 rows=525742695955889984 width=48)
Output: cup.c,
cup.d, cup.e, t.d, t.b
-> Broadcast
Motion 3:3 (slice4; segments: 3) (cost=679.75..228161.25 rows=7413210 width=8)
@@ -1443,20 +1443,20 @@ select * from
where country.percentage = countrylanguage.percentage order by
countrylanguage.COUNTRY,country.language LIMIT 40;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- Limit (cost=1051.23..1051.63 rows=28 width=202)
+ Limit (cost=1051.23..1051.62 rows=28 width=202)
Output: country_2.code, country_2.name, city_1.name,
countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage, country.code, country.country, country.language,
country.isofficial, country.percentage
- -> Gather Motion 3:1 (slice1; segments: 3) (cost=1051.23..1051.63
rows=28 width=202)
+ -> Gather Motion 3:1 (slice1; segments: 3) (cost=1051.23..1051.62
rows=28 width=202)
Output: country_2.code, country_2.name, city_1.name,
countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage, country.code, country.country, country.language,
country.isofficial, country.percentage
Merge Key: country.country, countrylanguage_2.language
- -> Limit (cost=1051.23..1051.26 rows=9 width=202)
+ -> Limit (cost=1051.23..1051.25 rows=9 width=202)
Output: country_2.code, country_2.name, city_1.name,
countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage, country.code, country.country, country.language,
country.isofficial, country.percentage
- -> Sort (cost=1051.23..1051.26 rows=9 width=202)
+ -> Sort (cost=1051.23..1051.25 rows=9 width=202)
Output: country_2.code, country_2.name, city_1.name,
countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage, country.code, country.country, country.language,
country.isofficial, country.percentage
Sort Key: country.country, countrylanguage_2.language
-> Hash Join (cost=846.99..1051.08 rows=9 width=202)
Output: country_2.code, country_2.name,
city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage, country.code, country.country, country.language,
country.isofficial, country.percentage
Hash Cond: (country.percentage =
countrylanguage_2.percentage)
- -> Hash Join (cost=436.98..637.91 rows=245
width=85)
+ -> Hash Join (cost=436.98..637.90 rows=245
width=85)
Output: country.code, country.country,
country.language, country.isofficial, country.percentage
Hash Cond: (countrylanguage.countrycode =
country.code)
-> Seq Scan on cte_prune.countrylanguage
(cost=0.00..165.33 rows=13133 width=16)
@@ -1490,9 +1490,9 @@ where country.percentage = countrylanguage.percentage
order by countrylanguage.C
-> Seq Scan on cte_prune.country country_1 (cost=0.00..77.33 rows=3
width=52)
Output: country_1.code, country_1.name, country_1.capital
Filter: (country_1.continent = 'Europe'::text)
- -> Hash (cost=409.67..409.67 rows=28 width=117)
+ -> Hash (cost=409.66..409.66 rows=28 width=117)
Output: country_2.code, country_2.name,
city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage
- -> Broadcast Motion 3:3 (slice5; segments:
3) (cost=227.29..409.67 rows=28 width=117)
+ -> Broadcast Motion 3:3 (slice5; segments:
3) (cost=227.29..409.66 rows=28 width=117)
Output: country_2.code, country_2.name,
city_1.name, countrylanguage_2.language, countrylanguage_2.isofficial,
countrylanguage_2.percentage
-> Hash Join (cost=227.29..409.20
rows=9 width=117)
Output: country_2.code,
country_2.name, city_1.name, countrylanguage_2.language,
countrylanguage_2.isofficial, countrylanguage_2.percentage
diff --git a/src/test/regress/expected/explain_format.out
b/src/test/regress/expected/explain_format.out
index c8c452cc781..a57a1fd55fc 100644
--- a/src/test/regress/expected/explain_format.out
+++ b/src/test/regress/expected/explain_format.out
@@ -599,6 +599,7 @@ ANALYZE test_src_tbl;
set optimizer_enable_hashagg = on;
SET statement_mem = '1000kB';
-- Hashagg with spilling
+set hash_mem_multiplier = 1;
CREATE TABLE test_hashagg_spill AS
SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named
'a' as the Greenplum Database data distribution key for this table.
@@ -631,6 +632,7 @@ HINT: The 'DISTRIBUTED BY' clause determines the
distribution of data. Make sur
-- The planner generates multiple hash tables but ORCA uses Shared Scan.
-- flaky test
-- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP
BY grouping sets ((a), (b));
+reset hash_mem_multiplier;
RESET optimizer_enable_hashagg;
RESET statement_mem;
-- Cleanup
diff --git a/src/test/regress/expected/explain_format_optimizer.out
b/src/test/regress/expected/explain_format_optimizer.out
index 459a466c993..f778c3f2048 100644
--- a/src/test/regress/expected/explain_format_optimizer.out
+++ b/src/test/regress/expected/explain_format_optimizer.out
@@ -546,6 +546,7 @@ ANALYZE test_src_tbl;
set optimizer_enable_hashagg = on;
SET statement_mem = '1000kB';
-- Hashagg with spilling
+set hash_mem_multiplier = 1;
CREATE TABLE test_hashagg_spill AS
SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause. Creating a NULL policy
entry.
@@ -576,6 +577,7 @@ NOTICE: Table doesn't have 'DISTRIBUTED BY' clause.
Creating a NULL policy entr
-- The planner generates multiple hash tables but ORCA uses Shared Scan.
-- flaky test
-- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP
BY grouping sets ((a), (b));
+reset hash_mem_multiplier;
RESET optimizer_enable_hashagg;
RESET statement_mem;
-- Cleanup
diff --git a/src/test/regress/expected/groupingsets.out
b/src/test/regress/expected/groupingsets.out
index ae84ced2484..f7eefa2c8eb 100644
--- a/src/test/regress/expected/groupingsets.out
+++ b/src/test/regress/expected/groupingsets.out
@@ -1861,6 +1861,7 @@ select array(select row(v.a,s1.*) from (select two,four,
count(*) from onek grou
(2 rows)
-- test the knapsack
+set hash_mem_multiplier = 1;
set enable_indexscan = false;
set work_mem = '64kB';
explain (costs off)
@@ -1940,6 +1941,7 @@ explain (costs off)
Optimizer: Postgres query optimizer
(15 rows)
+reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
-- (similar to a check for aggregates, but there are additional code
-- paths for GROUPING, so check again here)
diff --git a/src/test/regress/expected/groupingsets_optimizer.out
b/src/test/regress/expected/groupingsets_optimizer.out
index d59a83ae9cf..39fad5a1c8a 100644
--- a/src/test/regress/expected/groupingsets_optimizer.out
+++ b/src/test/regress/expected/groupingsets_optimizer.out
@@ -1924,6 +1924,7 @@ select array(select row(v.a,s1.*) from (select two,four,
count(*) from onek grou
(2 rows)
-- test the knapsack
+set hash_mem_multiplier = 1;
set enable_indexscan = false;
set work_mem = '64kB';
explain (costs off)
@@ -2094,6 +2095,7 @@ explain (costs off)
Optimizer: Pivotal Optimizer (GPORCA)
(50 rows)
+reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
-- (similar to a check for aggregates, but there are additional code
-- paths for GROUPING, so check again here)
diff --git a/src/test/regress/sql/explain_format.sql
b/src/test/regress/sql/explain_format.sql
index e04cba90947..229fcdfe6fe 100644
--- a/src/test/regress/sql/explain_format.sql
+++ b/src/test/regress/sql/explain_format.sql
@@ -128,6 +128,7 @@ set optimizer_enable_hashagg = on;
SET statement_mem = '1000kB';
-- Hashagg with spilling
+set hash_mem_multiplier = 1;
CREATE TABLE test_hashagg_spill AS
SELECT a, COUNT(DISTINCT b) AS b FROM test_src_tbl GROUP BY a;
EXPLAIN (analyze, costs off) SELECT a, COUNT(DISTINCT b) AS b FROM
test_src_tbl GROUP BY a;
@@ -139,6 +140,7 @@ SELECT a, avg(b) AS b FROM test_src_tbl GROUP BY grouping
sets ((a), (b));
-- flaky test
-- EXPLAIN (analyze, costs off) SELECT a, avg(b) AS b FROM test_src_tbl GROUP
BY grouping sets ((a), (b));
+reset hash_mem_multiplier;
RESET optimizer_enable_hashagg;
RESET statement_mem;
diff --git a/src/test/regress/sql/groupingsets.sql
b/src/test/regress/sql/groupingsets.sql
index 054a299b56f..6907b5f1f55 100644
--- a/src/test/regress/sql/groupingsets.sql
+++ b/src/test/regress/sql/groupingsets.sql
@@ -544,6 +544,7 @@ select array(select row(v.a,s1.*) from (select two,four,
count(*) from onek grou
-- test the knapsack
+set hash_mem_multiplier = 1;
set enable_indexscan = false;
set work_mem = '64kB';
explain (costs off)
@@ -567,6 +568,7 @@ explain (costs off)
count(*)
from tenk1 group by grouping sets
(unique1,twothousand,thousand,hundred,ten,four,two);
+reset hash_mem_multiplier;
-- check collation-sensitive matching between grouping expressions
-- (similar to a check for aggregates, but there are additional code
-- paths for GROUPING, so check again here)
diff --git a/src/test/singlenode_regress/expected/groupingsets.out
b/src/test/singlenode_regress/expected/groupingsets.out
index 36872ea7a29..83d6890b95d 100644
--- a/src/test/singlenode_regress/expected/groupingsets.out
+++ b/src/test/singlenode_regress/expected/groupingsets.out
@@ -2069,14 +2069,13 @@ select g100, g10, sum(g::numeric), count(*),
max(g::text)
from gs_data_1 group by cube (g1000, g100,g10);
QUERY PLAN
-------------------------------------
- GroupAggregate
+ MixedAggregate
+ Hash Key: g100
+ Hash Key: g100, g10
Group Key: g1000, g100, g10
Group Key: g1000, g100
Group Key: g1000
Group Key: ()
- Sort Key: g100, g10
- Group Key: g100, g10
- Group Key: g100
Sort Key: g10, g1000
Group Key: g10, g1000
Group Key: g10
@@ -2084,7 +2083,7 @@ from gs_data_1 group by cube (g1000, g100,g10);
Sort Key: g1000, g100, g10
-> Seq Scan on gs_data_1
Optimizer: Postgres query optimizer
-(15 rows)
+(14 rows)
create table gs_hash_1 as
select g100, g10, sum(g::numeric), count(*), max(g::text)
diff --git a/src/test/singlenode_regress/expected/workfile/hashagg_spill.out
b/src/test/singlenode_regress/expected/workfile/hashagg_spill.out
index 4a61fc97090..cc98d3a785a 100644
--- a/src/test/singlenode_regress/expected/workfile/hashagg_spill.out
+++ b/src/test/singlenode_regress/expected/workfile/hashagg_spill.out
@@ -112,6 +112,7 @@ reset optimizer_force_multistage_agg;
CREATE TABLE hashagg_spill(col1 numeric, col2 int);
INSERT INTO hashagg_spill SELECT id, 1 FROM generate_series(1,20000) id;
ANALYZE hashagg_spill;
+set hash_mem_multiplier = 1;
SET statement_mem='1000kB';
SET gp_workfile_compression = OFF;
select * from hashagg_spill.is_workfile_created('explain (analyze, verbose)
SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0;');
@@ -131,6 +132,7 @@ select * from hashagg_spill.is_workfile_created('explain
(analyze, verbose) SELE
CREATE TABLE spill_temptblspace (a numeric);
SET temp_tablespaces=pg_default;
INSERT INTO spill_temptblspace SELECT avg(col2) col2 FROM hashagg_spill GROUP
BY col1 HAVING(sum(col1)) < 0;
+reset hash_mem_multiplier;
RESET temp_tablespaces;
RESET statement_mem;
RESET gp_workfile_compression;
diff --git a/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql
b/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql
index 6f22b57a99a..7cbcd11b8ec 100644
--- a/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql
+++ b/src/test/singlenode_regress/sql/workfile/hashagg_spill.sql
@@ -85,6 +85,7 @@ reset optimizer_force_multistage_agg;
CREATE TABLE hashagg_spill(col1 numeric, col2 int);
INSERT INTO hashagg_spill SELECT id, 1 FROM generate_series(1,20000) id;
ANALYZE hashagg_spill;
+set hash_mem_multiplier = 1;
SET statement_mem='1000kB';
SET gp_workfile_compression = OFF;
select * from hashagg_spill.is_workfile_created('explain (analyze, verbose)
SELECT avg(col2) col2 FROM hashagg_spill GROUP BY col1 HAVING(sum(col1)) < 0;');
@@ -95,6 +96,7 @@ select * from hashagg_spill.is_workfile_created('explain
(analyze, verbose) SELE
CREATE TABLE spill_temptblspace (a numeric);
SET temp_tablespaces=pg_default;
INSERT INTO spill_temptblspace SELECT avg(col2) col2 FROM hashagg_spill GROUP
BY col1 HAVING(sum(col1)) < 0;
+reset hash_mem_multiplier;
RESET temp_tablespaces;
RESET statement_mem;
RESET gp_workfile_compression;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]