This is an automated email from the ASF dual-hosted git repository.
reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new f5dcd91b847 Check MergeAppend node in share input mutator (#1204)
f5dcd91b847 is described below
commit f5dcd91b84757f4afcfe274abb38128ad72191a7
Author: reshke <[email protected]>
AuthorDate: Wed Jul 2 18:31:42 2025 +0500
Check MergeAppend node in share input mutator (#1204)
* Check MergeAppend node in share input mutator
Do recursive call in plan walker for MergeAppend type of plan.
When planner decides to merge two sorted sub-plans one
of which has Share Input Scan node, executor fails to execute this,
because of wrongly aligned internal structures.
It turns out, it was forgotten to do proper recursion call
in shareinput tree walker
---
src/backend/cdb/cdbmutate.c | 8 ++++
src/test/regress/expected/with_clause.out | 50 ++++++++++++++++++++++
.../regress/expected/with_clause_optimizer.out | 50 ++++++++++++++++++++++
src/test/regress/sql/with_clause.sql | 29 +++++++++++++
4 files changed, 137 insertions(+)
diff --git a/src/backend/cdb/cdbmutate.c b/src/backend/cdb/cdbmutate.c
index 9672ce25b59..1b5820587ef 100644
--- a/src/backend/cdb/cdbmutate.c
+++ b/src/backend/cdb/cdbmutate.c
@@ -403,6 +403,14 @@ shareinput_walker(SHAREINPUT_MUTATOR f, Node *node,
PlannerInfo *root)
foreach(cell, app->appendplans)
shareinput_walker(f, (Node *) lfirst(cell),
root);
}
+ else if (IsA(node, MergeAppend))
+ {
+ ListCell *cell;
+ MergeAppend *mapp = (MergeAppend *) node;
+
+ foreach(cell, mapp->mergeplans)
+ shareinput_walker(f, (Node *) lfirst(cell),
root);
+ }
/* GPDB_14_MERGE_FIXME: double check on following logics. */
// else if (IsA(node, ModifyTable))
// {
diff --git a/src/test/regress/expected/with_clause.out
b/src/test/regress/expected/with_clause.out
index 2e574f1a146..2c3aff9951f 100644
--- a/src/test/regress/expected/with_clause.out
+++ b/src/test/regress/expected/with_clause.out
@@ -2355,3 +2355,53 @@ UNION ALL
c | 1
(7 rows)
+-- Test issue from PR#1204
+-- Executor used to fail to execute this query
+CREATE TABLE foo_issue_1204_test (a INT, b INT, c INT, d INT, e INT, f INT);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a'
as the Apache Cloudberry data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO foo_issue_1204_test SELECT generate_series(1,10000);
+ANALYZE foo_issue_1204_test;
+-- Assert than plan uses Merge Append strategy, and has Share Input Scan node.
+-- Also we are not actaully interested in output so discard it using SELECT
EXISTS() hack
+EXPLAIN (COSTS OFF, TIMING OFF, BUFFERS OFF)
+SELECT EXISTS(
+ with inp as MATERIALIZED (select * from foo_issue_1204_test ) select
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group
by 1,2,3
+ UNION ALL
+ select a,b,c, count(distinct d), count(distinct e), count(distinct f)
from foo_issue_1204_test group by 1,2,3
+ ORDER BY 1
+);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0) (slice1)
+ -> Gather Motion 3:1 (slice2; segments: 3)
+ -> Merge Append
+ Sort Key: share0_ref1.a
+ -> GroupAggregate
+ Group Key: share0_ref1.a, share0_ref1.b, share0_ref1.c
+ -> Sort
+ Sort Key: share0_ref1.a, share0_ref1.b,
share0_ref1.c
+ -> Shared Scan (share slice:id 2:0)
+ -> Seq Scan on foo_issue_1204_test
+ -> GroupAggregate
+ Group Key: foo_issue_1204_test_1.a,
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+ -> Sort
+ Sort Key: foo_issue_1204_test_1.a,
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+ -> Seq Scan on foo_issue_1204_test
foo_issue_1204_test_1
+ Optimizer: Postgres query optimizer
+(17 rows)
+
+-- Check execution is ok
+SELECT EXISTS(
+ with inp as MATERIALIZED (select * from foo_issue_1204_test ) select
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group
by 1,2,3
+ UNION ALL
+ select a,b,c, count(distinct d), count(distinct e), count(distinct f)
from foo_issue_1204_test group by 1,2,3
+ ORDER BY 1
+);
+ exists
+--------
+ t
+(1 row)
+
+DROP TABLE foo_issue_1204_test;
diff --git a/src/test/regress/expected/with_clause_optimizer.out
b/src/test/regress/expected/with_clause_optimizer.out
index 3fbea887ef7..b95ef3ac3a6 100644
--- a/src/test/regress/expected/with_clause_optimizer.out
+++ b/src/test/regress/expected/with_clause_optimizer.out
@@ -2364,3 +2364,53 @@ UNION ALL
c | 1
(7 rows)
+-- Test issue from PR#1204
+-- Executor used to fail to execute this query
+CREATE TABLE foo_issue_1204_test (a INT, b INT, c INT, d INT, e INT, f INT);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a'
as the Apache Cloudberry data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO foo_issue_1204_test SELECT generate_series(1,10000);
+ANALYZE foo_issue_1204_test;
+-- Assert than plan uses Merge Append strategy, and has Share Input Scan node.
+-- Also we are not actaully interested in output so discard it using SELECT
EXISTS() hack
+EXPLAIN (COSTS OFF, TIMING OFF, BUFFERS OFF)
+SELECT EXISTS(
+ with inp as MATERIALIZED (select * from foo_issue_1204_test ) select
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group
by 1,2,3
+ UNION ALL
+ select a,b,c, count(distinct d), count(distinct e), count(distinct f)
from foo_issue_1204_test group by 1,2,3
+ ORDER BY 1
+);
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------
+ Result
+ InitPlan 1 (returns $0) (slice1)
+ -> Gather Motion 3:1 (slice2; segments: 3)
+ -> Merge Append
+ Sort Key: share0_ref1.a
+ -> GroupAggregate
+ Group Key: share0_ref1.a, share0_ref1.b, share0_ref1.c
+ -> Sort
+ Sort Key: share0_ref1.a, share0_ref1.b,
share0_ref1.c
+ -> Shared Scan (share slice:id 2:0)
+ -> Seq Scan on foo_issue_1204_test
+ -> GroupAggregate
+ Group Key: foo_issue_1204_test_1.a,
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+ -> Sort
+ Sort Key: foo_issue_1204_test_1.a,
foo_issue_1204_test_1.b, foo_issue_1204_test_1.c
+ -> Seq Scan on foo_issue_1204_test
foo_issue_1204_test_1
+ Optimizer: Postgres query optimizer
+(17 rows)
+
+-- Check execution is ok
+SELECT EXISTS(
+ with inp as MATERIALIZED (select * from foo_issue_1204_test ) select
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group
by 1,2,3
+ UNION ALL
+ select a,b,c, count(distinct d), count(distinct e), count(distinct f)
from foo_issue_1204_test group by 1,2,3
+ ORDER BY 1
+);
+ exists
+--------
+ t
+(1 row)
+
+DROP TABLE foo_issue_1204_test;
diff --git a/src/test/regress/sql/with_clause.sql
b/src/test/regress/sql/with_clause.sql
index 0ef209415ab..149edfe92af 100644
--- a/src/test/regress/sql/with_clause.sql
+++ b/src/test/regress/sql/with_clause.sql
@@ -462,3 +462,32 @@ UNION ALL
SELECT 'sleep', 1 where pg_sleep(1) is not null
UNION ALL
SELECT 'c', j FROM cte;
+
+
+-- Test issue from PR#1204
+-- Executor used to fail to execute this query
+
+CREATE TABLE foo_issue_1204_test (a INT, b INT, c INT, d INT, e INT, f INT);
+INSERT INTO foo_issue_1204_test SELECT generate_series(1,10000);
+
+ANALYZE foo_issue_1204_test;
+
+-- Assert than plan uses Merge Append strategy, and has Share Input Scan node.
+-- Also we are not actaully interested in output so discard it using SELECT
EXISTS() hack
+EXPLAIN (COSTS OFF, TIMING OFF, BUFFERS OFF)
+SELECT EXISTS(
+ with inp as MATERIALIZED (select * from foo_issue_1204_test ) select
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group
by 1,2,3
+ UNION ALL
+ select a,b,c, count(distinct d), count(distinct e), count(distinct f)
from foo_issue_1204_test group by 1,2,3
+ ORDER BY 1
+);
+
+-- Check execution is ok
+SELECT EXISTS(
+ with inp as MATERIALIZED (select * from foo_issue_1204_test ) select
a,b,c, count(distinct d), count(distinct e), count(distinct f) from inp group
by 1,2,3
+ UNION ALL
+ select a,b,c, count(distinct d), count(distinct e), count(distinct f)
from foo_issue_1204_test group by 1,2,3
+ ORDER BY 1
+);
+
+DROP TABLE foo_issue_1204_test;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]