(cloudberry) 03/05: Add GUC gp_use_streaming_hashagg

avamingli Thu, 30 Oct 2025 04:49:39 -0700

This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit e520e30e60b3eb695a740d849221fa970c094178
Author: Zhang Mingli <[email protected]>
AuthorDate: Tue Oct 28 10:24:53 2025 +0800

    Add GUC gp_use_streaming_hashagg
    
    Swtich GUC to check results and avoid a lot of plan diffs
    in pax test cases.
    
    Authored-by: Zhang Mingli [email protected]
---
 contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql     |  3 +++
 contrib/pax_storage/src/test/regress/sql/aggregates.sql       |  3 +++
 contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql    |  3 +++
 contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql  |  3 +++
 contrib/pax_storage/src/test/regress/sql/eagerfree.sql        |  3 +++
 .../pax_storage/src/test/regress/sql/gp_aggregates_costs.sql  |  3 +++
 contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql       |  3 +++
 contrib/pax_storage/src/test/regress/sql/gporca.sql           |  3 +++
 contrib/pax_storage/src/test/regress/sql/indexjoin.sql        |  3 +++
 contrib/pax_storage/src/test/regress/sql/limit_gp.sql         |  3 +++
 contrib/pax_storage/src/test/regress/sql/olap_plans.sql       |  3 +++
 .../pax_storage/src/test/regress/sql/partition_aggregate.sql  |  3 +++
 contrib/pax_storage/src/test/regress/sql/partition_join.sql   |  3 +++
 contrib/pax_storage/src/test/regress/sql/select_distinct.sql  |  3 +++
 contrib/pax_storage/src/test/regress/sql/shared_scan.sql      |  3 +++
 contrib/pax_storage/src/test/regress/sql/window.sql           |  3 +++
 src/backend/cdb/cdbgroupingpaths.c                            |  2 +-
 src/backend/utils/misc/guc_gp.c                               | 11 +++++++++++
 src/include/utils/guc.h                                       |  1 +
 src/include/utils/unsync_guc_name.h                           |  1 +
 20 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql 
b/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql
index 3378f8b64e6..0d1e45ae992 100644
--- a/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql
+++ b/contrib/pax_storage/src/test/regress/sql/agg_pushdown.sql
@@ -1,5 +1,8 @@
 -- disable ORCA
 SET optimizer TO off;
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 -- Test case group 1: basic functions
 CREATE TABLE agg_pushdown_parent (
diff --git a/contrib/pax_storage/src/test/regress/sql/aggregates.sql 
b/contrib/pax_storage/src/test/regress/sql/aggregates.sql
index 158223e2633..136c70fb384 100644
--- a/contrib/pax_storage/src/test/regress/sql/aggregates.sql
+++ b/contrib/pax_storage/src/test/regress/sql/aggregates.sql
@@ -1,6 +1,9 @@
 --
 -- AGGREGATES
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 -- start_ignore
 -- end_ignore
diff --git a/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql 
b/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql
index 92d90ae72d9..8bc9d97c513 100644
--- a/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql
+++ b/contrib/pax_storage/src/test/regress/sql/bfv_aggregate.sql
@@ -1,5 +1,8 @@
 create schema bfv_aggregate;
 set search_path=bfv_aggregate;
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 --
 -- Window function with outer references in PARTITION BY/ORDER BY clause
diff --git a/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql 
b/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql
index d9e7b562a0a..783c6cf77f6 100644
--- a/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql
+++ b/contrib/pax_storage/src/test/regress/sql/direct_dispatch.sql
@@ -1,5 +1,8 @@
 -- turn off autostats so we don't have to worry about the logging of the 
autostat queries
 set gp_autostats_mode = None;
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 -- create needed tables (in a transaction, for speed)
 begin;
diff --git a/contrib/pax_storage/src/test/regress/sql/eagerfree.sql 
b/contrib/pax_storage/src/test/regress/sql/eagerfree.sql
index 81500e4575b..e2b79d19f9a 100644
--- a/contrib/pax_storage/src/test/regress/sql/eagerfree.sql
+++ b/contrib/pax_storage/src/test/regress/sql/eagerfree.sql
@@ -1,5 +1,8 @@
 create schema eagerfree;
 set search_path=eagerfree;
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 create table smallt (i int, t text, d date) distributed by (i);
 insert into smallt select i%10, 'text ' || (i%15), '2011-01-01'::date + 
((i%20) || ' days')::interval
diff --git a/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql 
b/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql
index c1337de6083..8c3263a58bf 100644
--- a/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql
+++ b/contrib/pax_storage/src/test/regress/sql/gp_aggregates_costs.sql
@@ -5,6 +5,9 @@ insert into cost_agg_t2 select i, random() * 99999, i % 300000 
from generate_ser
 analyze cost_agg_t1;
 analyze cost_agg_t2;
 
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 --
 -- Test planner's decisions on aggregates when only little memory is available.
 --
diff --git a/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql 
b/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql
index 36000de8af8..7b880c4e62c 100644
--- a/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql
+++ b/contrib/pax_storage/src/test/regress/sql/gp_hashagg.sql
@@ -10,6 +10,9 @@ insert into hashagg_test values (1,1,'1/1/2006','hi',2);
 insert into hashagg_test values (1,1,'1/2/2006','hi',3);
 insert into hashagg_test values (1,1,'1/3/2006','hi',4);
 
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 -- this will get the wrong answer (right number of rows, wrong aggregates)
 set enable_seqscan=off;
 select grp,sum(v) from hashagg_test where id1 = 1 and id2 = 1 and day between 
'1/1/2006' and '1/31/2006' group by grp order by sum(v) desc;
diff --git a/contrib/pax_storage/src/test/regress/sql/gporca.sql 
b/contrib/pax_storage/src/test/regress/sql/gporca.sql
index 573c1726dfd..b19afd60d7b 100644
--- a/contrib/pax_storage/src/test/regress/sql/gporca.sql
+++ b/contrib/pax_storage/src/test/regress/sql/gporca.sql
@@ -1,6 +1,9 @@
 --
 -- ORCA tests
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 -- show version
 SELECT count(*) from gp_opt_version();
diff --git a/contrib/pax_storage/src/test/regress/sql/indexjoin.sql 
b/contrib/pax_storage/src/test/regress/sql/indexjoin.sql
index b99b561e42c..59290287e3b 100644
--- a/contrib/pax_storage/src/test/regress/sql/indexjoin.sql
+++ b/contrib/pax_storage/src/test/regress/sql/indexjoin.sql
@@ -4,6 +4,9 @@ CREATE TABLE my_tt_agg_small (
     trade_price numeric,
     trade_volume bigint
 ) DISTRIBUTED BY (symbol);
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 
 CREATE TABLE my_tq_agg_small (
diff --git a/contrib/pax_storage/src/test/regress/sql/limit_gp.sql 
b/contrib/pax_storage/src/test/regress/sql/limit_gp.sql
index 88c934c0779..0a1b4e8fd58 100644
--- a/contrib/pax_storage/src/test/regress/sql/limit_gp.sql
+++ b/contrib/pax_storage/src/test/regress/sql/limit_gp.sql
@@ -2,6 +2,9 @@
 -- Check for MPP-19310 and MPP-19857 where mksort produces wrong result
 -- on OPT build, and fails assertion on debug build if a "LIMIT" query
 -- spills to disk.
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 CREATE TABLE mksort_limit_test_table(dkey INT, jkey INT, rval REAL, tval TEXT 
default repeat('abcdefghijklmnopqrstuvwxyz', 300)) DISTRIBUTED BY (dkey);
 INSERT INTO mksort_limit_test_table VALUES(generate_series(1, 10000), 
generate_series(10001, 20000), sqrt(generate_series(10001, 20000)));
diff --git a/contrib/pax_storage/src/test/regress/sql/olap_plans.sql 
b/contrib/pax_storage/src/test/regress/sql/olap_plans.sql
index c4242a34565..5b7ed221f18 100644
--- a/contrib/pax_storage/src/test/regress/sql/olap_plans.sql
+++ b/contrib/pax_storage/src/test/regress/sql/olap_plans.sql
@@ -2,6 +2,9 @@
 -- Test the planner's ability to produce different kinds of plans to implement
 -- grouping and aggregation.
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 drop table if exists olap_test;
 drop table if exists olap_test_single;
diff --git a/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql 
b/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql
index 0e1ea0eec80..d93f5cffc2c 100644
--- a/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql
+++ b/contrib/pax_storage/src/test/regress/sql/partition_aggregate.sql
@@ -5,6 +5,9 @@
 -- Note: to ensure plan stability, it's a good idea to make the partitions of
 -- any one partitioned table in this test all have different numbers of rows.
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 -- Disable ORCA since it does support partition-wise aggregates
 set optimizer to off;
diff --git a/contrib/pax_storage/src/test/regress/sql/partition_join.sql 
b/contrib/pax_storage/src/test/regress/sql/partition_join.sql
index 1d188d1476f..311384e85b2 100644
--- a/contrib/pax_storage/src/test/regress/sql/partition_join.sql
+++ b/contrib/pax_storage/src/test/regress/sql/partition_join.sql
@@ -2,6 +2,9 @@
 -- PARTITION_JOIN
 -- Test partitionwise join between partitioned tables
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 -- Disable ORCA since it does support partition-wise joins
 set optimizer to off;
diff --git a/contrib/pax_storage/src/test/regress/sql/select_distinct.sql 
b/contrib/pax_storage/src/test/regress/sql/select_distinct.sql
index 27b63e699a0..35facaa9ca3 100644
--- a/contrib/pax_storage/src/test/regress/sql/select_distinct.sql
+++ b/contrib/pax_storage/src/test/regress/sql/select_distinct.sql
@@ -1,6 +1,9 @@
 --
 -- SELECT_DISTINCT
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 --
 -- awk '{print $3;}' onek.data | sort -n | uniq
diff --git a/contrib/pax_storage/src/test/regress/sql/shared_scan.sql 
b/contrib/pax_storage/src/test/regress/sql/shared_scan.sql
index 7234cef6e4a..d37eca9cdce 100644
--- a/contrib/pax_storage/src/test/regress/sql/shared_scan.sql
+++ b/contrib/pax_storage/src/test/regress/sql/shared_scan.sql
@@ -2,6 +2,9 @@
 -- Queries that lead to hanging (not dead lock) when we don't handle 
synchronization properly in shared scan
 -- Queries that lead to wrong result when we don't finish executing the 
subtree below the shared scan being squelched.
 --
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 CREATE SCHEMA shared_scan;
 
diff --git a/contrib/pax_storage/src/test/regress/sql/window.sql 
b/contrib/pax_storage/src/test/regress/sql/window.sql
index 6fd72f478e4..3ef1bc824c6 100644
--- a/contrib/pax_storage/src/test/regress/sql/window.sql
+++ b/contrib/pax_storage/src/test/regress/sql/window.sql
@@ -2,6 +2,9 @@
 -- wrong result for some core case. Turn it on to run the existing tests
 -- and minimize the difference from upstream.
 set enable_incremental_sort=on;
+-- start_ignore
+set gp_use_streaming_hashagg = off;
+-- end_ignore
 
 --
 -- WINDOW FUNCTIONS
diff --git a/src/backend/cdb/cdbgroupingpaths.c 
b/src/backend/cdb/cdbgroupingpaths.c
index 4fff89b19dc..be2e0ad496a 100644
--- a/src/backend/cdb/cdbgroupingpaths.c
+++ b/src/backend/cdb/cdbgroupingpaths.c
@@ -1264,7 +1264,7 @@ add_first_stage_hash_agg_path(PlannerInfo *root,
                                                                                
  ctx->partial_grouping_target,
                                                                                
  AGG_HASHED,
                                                                                
  ctx->hasAggs ? AGGSPLIT_INITIAL_SERIAL : AGGSPLIT_SIMPLE,
-                                                                               
  true, /* streaming */
+                                                                               
  gp_use_streaming_hashagg, /* streaming */
                                                                                
  ctx->groupClause,
                                                                                
  NIL,
                                                                                
  ctx->agg_partial_costs,
diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c
index d2fa30ea762..891044157b9 100644
--- a/src/backend/utils/misc/guc_gp.c
+++ b/src/backend/utils/misc/guc_gp.c
@@ -153,6 +153,7 @@ bool                enable_parallel_semi_join = true;
 bool           enable_parallel_dedup_semi_join = true;
 bool           enable_parallel_dedup_semi_reverse_join = true;
 bool           parallel_query_use_streaming_hashagg = false;
+bool           gp_use_streaming_hashagg = true;
 int                    gp_appendonly_insert_files = 0;
 int                    gp_appendonly_insert_files_tuples_range = 0;
 int                    gp_random_insert_segments = 0;
@@ -1898,6 +1899,16 @@ struct config_bool ConfigureNamesBool_gp[] =
                false, NULL, NULL
        },
 
+       {
+               {"gp_use_streaming_hashagg", PGC_USERSET, QUERY_TUNING_METHOD,
+                       gettext_noop("Use streaming hash agg in the first phase 
for multi-phase aggregations."),
+                       NULL,
+                       GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE
+               },
+               &gp_use_streaming_hashagg,
+               true, NULL, NULL
+       },
+
        {
                {"gp_force_random_redistribution", PGC_USERSET, CUSTOM_OPTIONS,
                        gettext_noop("Force redistribution of insert for 
randomly-distributed."),
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 4b62b663f0b..aa34138a4b5 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -294,6 +294,7 @@ extern bool enable_parallel_semi_join;
 extern bool enable_parallel_dedup_semi_join;
 extern bool enable_parallel_dedup_semi_reverse_join;
 extern bool    parallel_query_use_streaming_hashagg;
+extern bool gp_use_streaming_hashagg;
 extern int  gp_appendonly_insert_files;
 extern int  gp_appendonly_insert_files_tuples_range;
 extern int  gp_random_insert_segments;
diff --git a/src/include/utils/unsync_guc_name.h 
b/src/include/utils/unsync_guc_name.h
index 4c956e14fbf..cba11770a81 100644
--- a/src/include/utils/unsync_guc_name.h
+++ b/src/include/utils/unsync_guc_name.h
@@ -289,6 +289,7 @@
                "gp_subtrans_warn_limit",
                "gp_vmem_idle_resource_timeout",
                "gp_use_legacy_hashops",
+               "gp_use_streaming_hashagg",
                "gp_vmem_limit_per_query",
                "gp_vmem_protect_limit",
                "gp_vmem_protect_segworker_cache_limit",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 03/05: Add GUC gp_use_streaming_hashagg

Reply via email to