This is an automated email from the ASF dual-hosted git repository. avamingli pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 8b01eafef7c7f4c9f3fdb6420bfe163e337c2210 Author: Zhang Mingli <avamin...@gmail.com> AuthorDate: Mon Aug 4 23:50:34 2025 +0800 Correct parallel window function in CASE WHEN. Previously, we attempted to disable window functions inside CASE WHEN expressions due to concerns about unstable parallel results. However, this was a misunderstanding. All expressions from the subquery are Var columns, not the original expressions. This issue was uncovered when we fixed the subquery row count estimation, causing the cost to change in the upper plan. EXPLAIN(COSTS OFF) SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( SELECT *, CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, CASE WHEN AVG(salary) OVER (PARTITION BY depname) < salary THEN 200 END AS depadj FROM empsalary )s; QUERY PLAN -------------------------------------------------------------------------- WindowAgg -> WindowAgg Order By: s.empno -> Gather Motion 6:1 (slice1; segments: 6) Merge Key: s.empno -> Sort Sort Key: s.empno -> Subquery Scan on s -> WindowAgg Partition By: empsalary.depname -> Sort Sort Key: empsalary.depname -> Redistribute Motion 6:6 (slice2; segments: 6) Hash Key: empsalary.depname Hash Module: 3 -> Parallel Seq Scan on empsalary Optimizer: Postgres query optimizer (17 rows) Authored-by: Zhang Mingli avamin...@gmail.com --- src/backend/optimizer/plan/planner.c | 63 --------------------------- src/test/regress/expected/window_parallel.out | 15 ++++--- src/test/regress/sql/misc_jiras.sql | 2 + src/test/regress/sql/window_parallel.sql | 2 +- 4 files changed, 12 insertions(+), 70 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 9728f4f34ef..22ec8b43266 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -300,12 +300,6 @@ static split_rollup_data *make_new_rollups_for_hash_grouping_set(PlannerInfo *ro Path *path, grouping_sets_data *gd); -static bool -contain_case_expr(Node *clause); - -static bool -contain_case_expr_walker(Node *node, void *context); - static void create_partial_window_path(PlannerInfo *root, RelOptInfo *window_rel, Path *path, @@ -9139,25 +9133,6 @@ make_new_rollups_for_hash_grouping_set(PlannerInfo *root, return srd; } -static bool -contain_case_expr(Node *clause) -{ - return contain_case_expr_walker(clause, NULL); -} - -static bool -contain_case_expr_walker(Node *node, void *context) -{ - if (node == NULL) - return false; - - if (IsA(node, CaseExpr)) - return true; - - return expression_tree_walker(node, contain_case_expr_walker, - context); -} - /* * Parallel processing of window functions. * @@ -9176,47 +9151,9 @@ create_partial_window_path(PlannerInfo *root, { PathTarget *window_target; ListCell *l; - Bitmapset *sgrefs; window_target = input_target; - sgrefs = NULL; - - foreach(l, activeWindows) - { - WindowClause *wc = lfirst_node(WindowClause, l); - ListCell *lc2; - - foreach(lc2, wc->partitionClause) - { - SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2); - - sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef); - } - foreach(lc2, wc->orderClause) - { - SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2); - - sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef); - } - } - - int x = -1; - while ((x = bms_next_member(sgrefs, x)) >= 0) - { - Index sgref = get_pathtarget_sortgroupref(input_target, x); - if (sgref != 0) - { - ListCell *lc; - foreach(lc, input_target->exprs) - { - Expr *expr = (Expr *) lfirst(lc); - if (contain_case_expr((Node*)expr)) - return; - } - } - } - foreach(l, activeWindows) { WindowClause *wc = lfirst_node(WindowClause, l); diff --git a/src/test/regress/expected/window_parallel.out b/src/test/regress/expected/window_parallel.out index bd4b9161c1b..c9e226b7355 100644 --- a/src/test/regress/expected/window_parallel.out +++ b/src/test/regress/expected/window_parallel.out @@ -498,7 +498,7 @@ SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION B (10 rows) -- w8 --- strict aggs +-- window agg in CASE WHEN clause set enable_parallel = off; EXPLAIN(COSTS OFF) SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( @@ -556,12 +556,12 @@ SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), AVG(salary) OVER (PARTITION BY depname) < salary THEN 200 END AS depadj FROM empsalary )s; - QUERY PLAN -------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------ WindowAgg -> WindowAgg Order By: s.empno - -> Gather Motion 3:1 (slice1; segments: 3) + -> Gather Motion 6:1 (slice1; segments: 6) Merge Key: s.empno -> Sort Sort Key: s.empno @@ -570,9 +570,12 @@ SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), Partition By: empsalary.depname -> Sort Sort Key: empsalary.depname - -> Seq Scan on empsalary + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: empsalary.depname + Hash Module: 3 + -> Parallel Seq Scan on empsalary Optimizer: Postgres query optimizer -(14 rows) +(17 rows) SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( SELECT *, diff --git a/src/test/regress/sql/misc_jiras.sql b/src/test/regress/sql/misc_jiras.sql index 651b26344d5..760f2325d3e 100644 --- a/src/test/regress/sql/misc_jiras.sql +++ b/src/test/regress/sql/misc_jiras.sql @@ -21,6 +21,7 @@ set statement_mem to '1024kB'; set extra_float_digits=0; -- the last decimal digits are somewhat random +set enable_parallel = off; -- Inject fault at 'winagg_after_spool_tuples' to show that the tuplestore spills -- to disk. SELECT gp_inject_fault('winagg_after_spool_tuples', 'skip', dbid) @@ -42,6 +43,7 @@ SELECT gp_inject_fault('winagg_after_spool_tuples', 'reset', dbid) FROM gp_segment_configuration WHERE role='p' AND content>=0; reset statement_mem; +reset enable_parallel; -- non-ASCII multibyte character should show up correctly in error messages. select '溋' || (B'1'); diff --git a/src/test/regress/sql/window_parallel.sql b/src/test/regress/sql/window_parallel.sql index e8136750b2a..9ba4f1b9fc4 100644 --- a/src/test/regress/sql/window_parallel.sql +++ b/src/test/regress/sql/window_parallel.sql @@ -120,7 +120,7 @@ SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION B SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); -- w8 --- strict aggs +-- window agg in CASE WHEN clause set enable_parallel = off; EXPLAIN(COSTS OFF) SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org