This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 31f6eea7af48d80ee934e7c4d9c2296ed3dbed90
Author: Alexander Kondakov <[email protected]>
AuthorDate: Mon Aug 14 11:41:32 2023 +0300

    Fix ORCA invalid processing of nested SubLinks referenced in GROUP BY 
clause.
    
    The ORCA optimizer could fallback to the legacy optimizer due to incorrect
    processing of nested SubLinks (SubLink contains one more SubLink or rtable
    subquery inside) during query normalization.
    
    The erroneous behaviour took place when a query had a nested SubLink in its
    targetList and this SubLink was in GROUP BY clause itself (see the test
    section).
    
    During query normalization for queries like in the tests the
    RunGroupingColMutator function is called. This function processed nested
    SubLinks incorrectly. It has Query code branch, which is supposed to be
    executed for various subqueries of the original query. However, for the
    nested SubLink case, when mutating the subquery of the SubLink (at second 
query
    level), the m_current_query_level field was not increased respectively,
    because MutateQueryOrExpressionTree, which was responsible for calling the
    RunGroupingColMutator on the lower subquery, did not know anything about the
    context. Therefore, for the query like in the tests, wrong value of the
    m_current_query_level led to an unnecessary modification of the Var's
    varlevelsup value. And when Var's varlevelsup value had been unnecessary
    modified, it started referencing different query level. This could lead to 
the
    fallback or invalid plan construction at further planning stages.
    
    This patch fixes erroneous behaviour of RunGroupingColMutator by replacing 
the
    call of MutateQueryOrExpressionTree with the call of RunGroupingColMutator 
in
    order to properly modify m_current_query_level when mutating lower level 
RTEs.
---
 src/backend/gpopt/translate/CQueryMutators.cpp    | 11 +++---
 src/test/regress/expected/subselect.out           | 43 +++++++++++++++++++++
 src/test/regress/expected/subselect_optimizer.out | 47 +++++++++++++++++++++++
 src/test/regress/sql/subselect.sql                | 19 +++++++++
 4 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp 
b/src/backend/gpopt/translate/CQueryMutators.cpp
index 59311bbad6..6557918665 100644
--- a/src/backend/gpopt/translate/CQueryMutators.cpp
+++ b/src/backend/gpopt/translate/CQueryMutators.cpp
@@ -446,11 +446,12 @@ CQueryMutators::RunGroupingColMutator(Node *node,
 
                GPOS_ASSERT(IsA(old_sublink->subselect, Query));
 
-               new_sublink->subselect = gpdb::MutateQueryOrExpressionTree(
-                       old_sublink->subselect,
-                       (MutatorWalkerFn) 
CQueryMutators::RunGroupingColMutator, context,
-                       0  // flags -- mutate into cte-lists
-               );
+               // One need to call the Query mutator for subselect and take 
into
+               // account that SubLink can be multi-level. Therefore, the
+               // context->m_current_query_level must be modified properly
+               // while diving into such nested SubLink.
+               new_sublink->subselect =
+                       RunGroupingColMutator(old_sublink->subselect, context);
 
                context->m_current_query_level--;
 
diff --git a/src/test/regress/expected/subselect.out 
b/src/test/regress/expected/subselect.out
index 7be004172b..31bf220bd2 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2023,4 +2023,47 @@ group by i, j;
  2 | 2
 (1 row)
 
+-- Ensure that both planners produce valid plans for the query with the nested
+-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is
+-- not grouping column (1 as c), is added to query targetList to make ORCA
+-- perform query normalization. During normalization ORCA modifies the vars of
+-- the grouping elements of targetList in order to produce a new Query tree.
+-- The modification of vars inside nested part of SubLinks should be handled
+-- correctly. ORCA shouldn't fall back due to missing variable entry as a 
result
+-- of incorrect query normalization.
+explain (verbose, costs off)
+select j, 1 as c,
+(select j from (select j) q2) q1
+from t
+group by j, q1;
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: t.j, 1, ((SubPlan 1))
+   ->  HashAggregate
+         Output: t.j, 1, ((SubPlan 1))
+         Group Key: t.j, ((SubPlan 1))
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Output: t.j, ((SubPlan 1))
+               Hash Key: t.j, ((SubPlan 1))
+               ->  HashAggregate
+                     Output: t.j, ((SubPlan 1))
+                     Group Key: t.j, (SubPlan 1)
+                     ->  Seq Scan on public.t
+                           Output: t.j, (SubPlan 1)
+                           SubPlan 1
+                             ->  Result
+                                   Output: t.j
+ Optimizer: Postgres-based planner
+(17 rows)
+
+select j, 1 as c,
+(select j from (select j) q2) q1
+from t
+group by j, q1;
+ j | c | q1 
+---+---+----
+ 2 | 1 |  2
+(1 row)
+
 drop table t;
diff --git a/src/test/regress/expected/subselect_optimizer.out 
b/src/test/regress/expected/subselect_optimizer.out
index 38d440d121..322e14951c 100644
--- a/src/test/regress/expected/subselect_optimizer.out
+++ b/src/test/regress/expected/subselect_optimizer.out
@@ -2103,4 +2103,51 @@ group by i, j;
  2 | 2
 (1 row)
 
+-- Ensure that both planners produce valid plans for the query with the nested
+-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is
+-- not grouping column (1 as c), is added to query targetList to make ORCA
+-- perform query normalization. During normalization ORCA modifies the vars of
+-- the grouping elements of targetList in order to produce a new Query tree.
+-- The modification of vars inside nested part of SubLinks should be handled
+-- correctly. ORCA shouldn't fall back due to missing variable entry as a 
result
+-- of incorrect query normalization.
+explain (verbose, costs off)
+select j, 1 as c,
+(select j from (select j) q2) q1
+from t
+group by j, q1;
+                               QUERY PLAN                               
+------------------------------------------------------------------------
+ Result
+   Output: j, 1, ((SubPlan 1))
+   ->  Gather Motion 3:1  (slice1; segments: 3)
+         Output: j, ((SubPlan 1))
+         ->  GroupAggregate
+               Output: j, ((SubPlan 1))
+               Group Key: t.j, ((SubPlan 1))
+               ->  Sort
+                     Output: j, ((SubPlan 1))
+                     Sort Key: t.j, ((SubPlan 1))
+                     ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                           Output: j, ((SubPlan 1))
+                           Hash Key: j, ((SubPlan 1))
+                           ->  Seq Scan on public.t
+                                 Output: j, (SubPlan 1)
+                                 SubPlan 1
+                                   ->  Result
+                                         Output: t.j
+                                         ->  Result
+                                               Output: true
+ Optimizer: GPORCA
+(21 rows)
+
+select j, 1 as c,
+(select j from (select j) q2) q1
+from t
+group by j, q1;
+ j | c | q1 
+---+---+----
+ 2 | 1 |  2
+(1 row)
+
 drop table t;
diff --git a/src/test/regress/sql/subselect.sql 
b/src/test/regress/sql/subselect.sql
index 4ee4f8f8f9..e2cccf1a95 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1006,4 +1006,23 @@ select j,
 from t
 group by i, j;
 
+-- Ensure that both planners produce valid plans for the query with the nested
+-- SubLink when this SubLink is inside the GROUP BY clause. Attribute, which is
+-- not grouping column (1 as c), is added to query targetList to make ORCA
+-- perform query normalization. During normalization ORCA modifies the vars of
+-- the grouping elements of targetList in order to produce a new Query tree.
+-- The modification of vars inside nested part of SubLinks should be handled
+-- correctly. ORCA shouldn't fall back due to missing variable entry as a 
result
+-- of incorrect query normalization.
+explain (verbose, costs off)
+select j, 1 as c,
+(select j from (select j) q2) q1
+from t
+group by j, q1;
+
+select j, 1 as c,
+(select j from (select j) q2) q1
+from t
+group by j, q1;
+
 drop table t;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to