This is an automated email from the ASF dual-hosted git repository. avamingli pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit e520e30e60b3eb695a740d849221fa970c094178 Author: Zhang Mingli <[email protected]> AuthorDate: Tue Oct 28 10:24:53 2025 +0800 Add GUC gp_use_streaming_hashagg Swtich GUC to check results and avoid a lot of plan diffs in pax test cases. Authored-by: Zhang Mingli [email protected] --- contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/aggregates.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/eagerfree.sql | 3 +++ .../pax_storage/src/test/regress/sql/gp_aggregates_costs.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/gporca.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/indexjoin.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/limit_gp.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/olap_plans.sql | 3 +++ .../pax_storage/src/test/regress/sql/partition_aggregate.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/partition_join.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/select_distinct.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/shared_scan.sql | 3 +++ contrib/pax_storage/src/test/regress/sql/window.sql | 3 +++ src/backend/cdb/cdbgroupingpaths.c | 2 +- src/backend/utils/misc/guc_gp.c | 11 +++++++++++ src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 1 + 20 files changed, 62 insertions(+), 1 deletion(-) diff --git a/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql b/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql index 3378f8b64e6..0d1e45ae992 100644 --- a/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql +++ b/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql @@ -1,5 +1,8 @@ -- disable ORCA SET optimizer TO off; +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- Test case group 1: basic functions CREATE TABLE agg_pushdown_parent ( diff --git a/contrib/pax_storage/src/test/regress/sql/aggregates.sql b/contrib/pax_storage/src/test/regress/sql/aggregates.sql index 158223e2633..136c70fb384 100644 --- a/contrib/pax_storage/src/test/regress/sql/aggregates.sql +++ b/contrib/pax_storage/src/test/regress/sql/aggregates.sql @@ -1,6 +1,9 @@ -- -- AGGREGATES -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- start_ignore -- end_ignore diff --git a/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql b/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql index 92d90ae72d9..8bc9d97c513 100644 --- a/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql +++ b/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql @@ -1,5 +1,8 @@ create schema bfv_aggregate; set search_path=bfv_aggregate; +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- -- Window function with outer references in PARTITION BY/ORDER BY clause diff --git a/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql b/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql index d9e7b562a0a..783c6cf77f6 100644 --- a/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql +++ b/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql @@ -1,5 +1,8 @@ -- turn off autostats so we don't have to worry about the logging of the autostat queries set gp_autostats_mode = None; +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- create needed tables (in a transaction, for speed) begin; diff --git a/contrib/pax_storage/src/test/regress/sql/eagerfree.sql b/contrib/pax_storage/src/test/regress/sql/eagerfree.sql index 81500e4575b..e2b79d19f9a 100644 --- a/contrib/pax_storage/src/test/regress/sql/eagerfree.sql +++ b/contrib/pax_storage/src/test/regress/sql/eagerfree.sql @@ -1,5 +1,8 @@ create schema eagerfree; set search_path=eagerfree; +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore create table smallt (i int, t text, d date) distributed by (i); insert into smallt select i%10, 'text ' || (i%15), '2011-01-01'::date + ((i%20) || ' days')::interval diff --git a/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql b/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql index c1337de6083..8c3263a58bf 100644 --- a/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql +++ b/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql @@ -5,6 +5,9 @@ insert into cost_agg_t2 select i, random() * 99999, i % 300000 from generate_ser analyze cost_agg_t1; analyze cost_agg_t2; +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- -- Test planner's decisions on aggregates when only little memory is available. -- diff --git a/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql b/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql index 36000de8af8..7b880c4e62c 100644 --- a/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql +++ b/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql @@ -10,6 +10,9 @@ insert into hashagg_test values (1,1,'1/1/2006','hi',2); insert into hashagg_test values (1,1,'1/2/2006','hi',3); insert into hashagg_test values (1,1,'1/3/2006','hi',4); +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- this will get the wrong answer (right number of rows, wrong aggregates) set enable_seqscan=off; select grp,sum(v) from hashagg_test where id1 = 1 and id2 = 1 and day between '1/1/2006' and '1/31/2006' group by grp order by sum(v) desc; diff --git a/contrib/pax_storage/src/test/regress/sql/gporca.sql b/contrib/pax_storage/src/test/regress/sql/gporca.sql index 573c1726dfd..b19afd60d7b 100644 --- a/contrib/pax_storage/src/test/regress/sql/gporca.sql +++ b/contrib/pax_storage/src/test/regress/sql/gporca.sql @@ -1,6 +1,9 @@ -- -- ORCA tests -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- show version SELECT count(*) from gp_opt_version(); diff --git a/contrib/pax_storage/src/test/regress/sql/indexjoin.sql b/contrib/pax_storage/src/test/regress/sql/indexjoin.sql index b99b561e42c..59290287e3b 100644 --- a/contrib/pax_storage/src/test/regress/sql/indexjoin.sql +++ b/contrib/pax_storage/src/test/regress/sql/indexjoin.sql @@ -4,6 +4,9 @@ CREATE TABLE my_tt_agg_small ( trade_price numeric, trade_volume bigint ) DISTRIBUTED BY (symbol); +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore CREATE TABLE my_tq_agg_small ( diff --git a/contrib/pax_storage/src/test/regress/sql/limit_gp.sql b/contrib/pax_storage/src/test/regress/sql/limit_gp.sql index 88c934c0779..0a1b4e8fd58 100644 --- a/contrib/pax_storage/src/test/regress/sql/limit_gp.sql +++ b/contrib/pax_storage/src/test/regress/sql/limit_gp.sql @@ -2,6 +2,9 @@ -- Check for MPP-19310 and MPP-19857 where mksort produces wrong result -- on OPT build, and fails assertion on debug build if a "LIMIT" query -- spills to disk. +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore CREATE TABLE mksort_limit_test_table(dkey INT, jkey INT, rval REAL, tval TEXT default repeat('abcdefghijklmnopqrstuvwxyz', 300)) DISTRIBUTED BY (dkey); INSERT INTO mksort_limit_test_table VALUES(generate_series(1, 10000), generate_series(10001, 20000), sqrt(generate_series(10001, 20000))); diff --git a/contrib/pax_storage/src/test/regress/sql/olap_plans.sql b/contrib/pax_storage/src/test/regress/sql/olap_plans.sql index c4242a34565..5b7ed221f18 100644 --- a/contrib/pax_storage/src/test/regress/sql/olap_plans.sql +++ b/contrib/pax_storage/src/test/regress/sql/olap_plans.sql @@ -2,6 +2,9 @@ -- Test the planner's ability to produce different kinds of plans to implement -- grouping and aggregation. -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore drop table if exists olap_test; drop table if exists olap_test_single; diff --git a/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql b/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql index 0e1ea0eec80..d93f5cffc2c 100644 --- a/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql +++ b/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql @@ -5,6 +5,9 @@ -- Note: to ensure plan stability, it's a good idea to make the partitions of -- any one partitioned table in this test all have different numbers of rows. -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- Disable ORCA since it does support partition-wise aggregates set optimizer to off; diff --git a/contrib/pax_storage/src/test/regress/sql/partition_join.sql b/contrib/pax_storage/src/test/regress/sql/partition_join.sql index 1d188d1476f..311384e85b2 100644 --- a/contrib/pax_storage/src/test/regress/sql/partition_join.sql +++ b/contrib/pax_storage/src/test/regress/sql/partition_join.sql @@ -2,6 +2,9 @@ -- PARTITION_JOIN -- Test partitionwise join between partitioned tables -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- Disable ORCA since it does support partition-wise joins set optimizer to off; diff --git a/contrib/pax_storage/src/test/regress/sql/select_distinct.sql b/contrib/pax_storage/src/test/regress/sql/select_distinct.sql index 27b63e699a0..35facaa9ca3 100644 --- a/contrib/pax_storage/src/test/regress/sql/select_distinct.sql +++ b/contrib/pax_storage/src/test/regress/sql/select_distinct.sql @@ -1,6 +1,9 @@ -- -- SELECT_DISTINCT -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- -- awk '{print $3;}' onek.data | sort -n | uniq diff --git a/contrib/pax_storage/src/test/regress/sql/shared_scan.sql b/contrib/pax_storage/src/test/regress/sql/shared_scan.sql index 7234cef6e4a..d37eca9cdce 100644 --- a/contrib/pax_storage/src/test/regress/sql/shared_scan.sql +++ b/contrib/pax_storage/src/test/regress/sql/shared_scan.sql @@ -2,6 +2,9 @@ -- Queries that lead to hanging (not dead lock) when we don't handle synchronization properly in shared scan -- Queries that lead to wrong result when we don't finish executing the subtree below the shared scan being squelched. -- +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore CREATE SCHEMA shared_scan; diff --git a/contrib/pax_storage/src/test/regress/sql/window.sql b/contrib/pax_storage/src/test/regress/sql/window.sql index 6fd72f478e4..3ef1bc824c6 100644 --- a/contrib/pax_storage/src/test/regress/sql/window.sql +++ b/contrib/pax_storage/src/test/regress/sql/window.sql @@ -2,6 +2,9 @@ -- wrong result for some core case. Turn it on to run the existing tests -- and minimize the difference from upstream. set enable_incremental_sort=on; +-- start_ignore +set gp_use_streaming_hashagg = off; +-- end_ignore -- -- WINDOW FUNCTIONS diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index 4fff89b19dc..be2e0ad496a 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -1264,7 +1264,7 @@ add_first_stage_hash_agg_path(PlannerInfo *root, ctx->partial_grouping_target, AGG_HASHED, ctx->hasAggs ? AGGSPLIT_INITIAL_SERIAL : AGGSPLIT_SIMPLE, - true, /* streaming */ + gp_use_streaming_hashagg, /* streaming */ ctx->groupClause, NIL, ctx->agg_partial_costs, diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index d2fa30ea762..891044157b9 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -153,6 +153,7 @@ bool enable_parallel_semi_join = true; bool enable_parallel_dedup_semi_join = true; bool enable_parallel_dedup_semi_reverse_join = true; bool parallel_query_use_streaming_hashagg = false; +bool gp_use_streaming_hashagg = true; int gp_appendonly_insert_files = 0; int gp_appendonly_insert_files_tuples_range = 0; int gp_random_insert_segments = 0; @@ -1898,6 +1899,16 @@ struct config_bool ConfigureNamesBool_gp[] = false, NULL, NULL }, + { + {"gp_use_streaming_hashagg", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Use streaming hash agg in the first phase for multi-phase aggregations."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &gp_use_streaming_hashagg, + true, NULL, NULL + }, + { {"gp_force_random_redistribution", PGC_USERSET, CUSTOM_OPTIONS, gettext_noop("Force redistribution of insert for randomly-distributed."), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 4b62b663f0b..aa34138a4b5 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -294,6 +294,7 @@ extern bool enable_parallel_semi_join; extern bool enable_parallel_dedup_semi_join; extern bool enable_parallel_dedup_semi_reverse_join; extern bool parallel_query_use_streaming_hashagg; +extern bool gp_use_streaming_hashagg; extern int gp_appendonly_insert_files; extern int gp_appendonly_insert_files_tuples_range; extern int gp_random_insert_segments; diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index 4c956e14fbf..cba11770a81 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -289,6 +289,7 @@ "gp_subtrans_warn_limit", "gp_vmem_idle_resource_timeout", "gp_use_legacy_hashops", + "gp_use_streaming_hashagg", "gp_vmem_limit_per_query", "gp_vmem_protect_limit", "gp_vmem_protect_segworker_cache_limit", --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
