This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 31f6eea7af48d80ee934e7c4d9c2296ed3dbed90 Author: Alexander Kondakov <[email protected]> AuthorDate: Mon Aug 14 11:41:32 2023 +0300 Fix ORCA invalid processing of nested SubLinks referenced in GROUP BY clause. The ORCA optimizer could fallback to the legacy optimizer due to incorrect processing of nested SubLinks (SubLink contains one more SubLink or rtable subquery inside) during query normalization. The erroneous behaviour took place when a query had a nested SubLink in its targetList and this SubLink was in GROUP BY clause itself (see the test section). During query normalization for queries like in the tests the RunGroupingColMutator function is called. This function processed nested SubLinks incorrectly. It has Query code branch, which is supposed to be executed for various subqueries of the original query. However, for the nested SubLink case, when mutating the subquery of the SubLink (at second query level), the m_current_query_level field was not increased respectively, because MutateQueryOrExpressionTree, which was responsible for calling the RunGroupingColMutator on the lower subquery, did not know anything about the context. Therefore, for the query like in the tests, wrong value of the m_current_query_level led to an unnecessary modification of the Var's varlevelsup value. And when Var's varlevelsup value had been unnecessary modified, it started referencing different query level. This could lead to the fallback or invalid plan construction at further planning stages. This patch fixes erroneous behaviour of RunGroupingColMutator by replacing the call of MutateQueryOrExpressionTree with the call of RunGroupingColMutator in order to properly modify m_current_query_level when mutating lower level RTEs. --- src/backend/gpopt/translate/CQueryMutators.cpp | 11 +++--- src/test/regress/expected/subselect.out | 43 +++++++++++++++++++++ src/test/regress/expected/subselect_optimizer.out | 47 +++++++++++++++++++++++ src/test/regress/sql/subselect.sql | 19 +++++++++ 4 files changed, 115 insertions(+), 5 deletions(-) diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp b/src/backend/gpopt/translate/CQueryMutators.cpp index 59311bbad6..6557918665 100644 --- a/src/backend/gpopt/translate/CQueryMutators.cpp +++ b/src/backend/gpopt/translate/CQueryMutators.cpp @@ -446,11 +446,12 @@ CQueryMutators::RunGroupingColMutator(Node *node, GPOS_ASSERT(IsA(old_sublink->subselect, Query)); - new_sublink->subselect = gpdb::MutateQueryOrExpressionTree( - old_sublink->subselect, - (MutatorWalkerFn) CQueryMutators::RunGroupingColMutator, context, - 0 // flags -- mutate into cte-lists - ); + // One need to call the Query mutator for subselect and take into + // account that SubLink can be multi-level. Therefore, the + // context->m_current_query_level must be modified properly + // while diving into such nested SubLink. + new_sublink->subselect = + RunGroupingColMutator(old_sublink->subselect, context); context->m_current_query_level--; diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 7be004172b..31bf220bd2 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -2023,4 +2023,47 @@ group by i, j; 2 | 2 (1 row) +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + QUERY PLAN +------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + Output: t.j, 1, ((SubPlan 1)) + -> HashAggregate + Output: t.j, 1, ((SubPlan 1)) + Group Key: t.j, ((SubPlan 1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: t.j, ((SubPlan 1)) + Hash Key: t.j, ((SubPlan 1)) + -> HashAggregate + Output: t.j, ((SubPlan 1)) + Group Key: t.j, (SubPlan 1) + -> Seq Scan on public.t + Output: t.j, (SubPlan 1) + SubPlan 1 + -> Result + Output: t.j + Optimizer: Postgres-based planner +(17 rows) + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + j | c | q1 +---+---+---- + 2 | 1 | 2 +(1 row) + drop table t; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index 38d440d121..322e14951c 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -2103,4 +2103,51 @@ group by i, j; 2 | 2 (1 row) +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + QUERY PLAN +------------------------------------------------------------------------ + Result + Output: j, 1, ((SubPlan 1)) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: j, ((SubPlan 1)) + -> GroupAggregate + Output: j, ((SubPlan 1)) + Group Key: t.j, ((SubPlan 1)) + -> Sort + Output: j, ((SubPlan 1)) + Sort Key: t.j, ((SubPlan 1)) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Output: j, ((SubPlan 1)) + Hash Key: j, ((SubPlan 1)) + -> Seq Scan on public.t + Output: j, (SubPlan 1) + SubPlan 1 + -> Result + Output: t.j + -> Result + Output: true + Optimizer: GPORCA +(21 rows) + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + j | c | q1 +---+---+---- + 2 | 1 | 2 +(1 row) + drop table t; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index 4ee4f8f8f9..e2cccf1a95 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -1006,4 +1006,23 @@ select j, from t group by i, j; +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is +-- not grouping column (1 as c), is added to query targetList to make ORCA +-- perform query normalization. During normalization ORCA modifies the vars of +-- the grouping elements of targetList in order to produce a new Query tree. +-- The modification of vars inside nested part of SubLinks should be handled +-- correctly. ORCA shouldn't fall back due to missing variable entry as a result +-- of incorrect query normalization. +explain (verbose, costs off) +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + +select j, 1 as c, +(select j from (select j) q2) q1 +from t +group by j, q1; + drop table t; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
