(cloudberry) 02/31: Fix ORCA invalid processing of nested SubLinks with GROUP BY attributes

maxyang Mon, 17 Feb 2025 21:39:59 -0800

This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit 9f2fe29578058910742eeb42f097d76fd35e0b0e
Author: Alexander Kondakov <[email protected]>
AuthorDate: Mon Aug 14 10:53:10 2023 +0300

    Fix ORCA invalid processing of nested SubLinks with GROUP BY attributes
    
    The ORCA optimizer could produce an invalid query plan due to incorrect
    processing of nested SubLinks (SubLink contains one more SubLink or rtable
    subquery inside) during query normalization.
    
    The incorrect behaviour took place when a query had a nested
    SubLink in its targetList, and SubLink lower levels contained attributes 
that
    were referenced in the query's GROUP BY clause. ORCA could construct for
    such queries an invalid plan, according to which some wrong and unexpected
    attributes got into the nested part of SubLink's plan (the example is 
provided
    in tests section). And the presence of those attributes could lead to 
incorrect
    query execution or even segmentation fault.
    
    Inside the NormalizeGroupByProjList function the 
context.m_lower_table_tlist,
    which contains grouping attributes from target_list_copy, is created. And 
the
    entries of target_list_copy, that are not grouping attributes, are mutated
    inside the RunExtractAggregatesMutator in order to make their inner 
expressions
    reference the corresponding entries from context.m_lower_table_tlist.
    The invalid plan could be produced because of incorrect mapping between 
Var's
    varattno (in target_list_copy) and some TargetEntry's resno
    (in context.m_lower_table_tlist). This mapping is established by
    RunExtractAggregatesMutator, where for the Var code branch the following
    assignment takes place: var->varattno = found_tle->resno, where found_tle 
is the
    corresponding entry from context.m_lower_table_tlist. However, the 
behaviour of
    this function was erroneous for the case of nested SubLinks because it
    mutated only the upper level of nested SubLink structure, and other 
subqueries
    remained unmodified because RunExtractAggregatesMutator did not have a 
branch
    for the Query, which was required for futher diving into lower subqueries. 
And
    because inner Vars hadn't been mutated, they started reference wrong 
attributes.
    
    This patch modifies RunExtractAggregatesMutator function by adding the Query
    branch to make the mutator correctly step into nested SubLink queries. And 
the
    call of MutateQueryOrExpressionTree is replaced with the call of mutator 
itself
    in order to correctly modify m_current_query_level when stepping into lower
    subqueries.
---
 src/backend/gpopt/translate/CQueryMutators.cpp    | 38 +++++++++++++++---
 src/test/regress/expected/bfv_olap.out            |  1 -
 src/test/regress/expected/bfv_olap_optimizer.out  |  3 --
 src/test/regress/expected/subselect.out           | 43 ++++++++++++++++++++
 src/test/regress/expected/subselect_optimizer.out | 48 +++++++++++++++++++++++
 src/test/regress/sql/bfv_olap.sql                 |  1 -
 src/test/regress/sql/subselect.sql                | 26 ++++++++++++
 7 files changed, 150 insertions(+), 10 deletions(-)

diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp 
b/src/backend/gpopt/translate/CQueryMutators.cpp
index 31c3c19405..59311bbad6 100644
--- a/src/backend/gpopt/translate/CQueryMutators.cpp
+++ b/src/backend/gpopt/translate/CQueryMutators.cpp
@@ -809,17 +809,45 @@ CQueryMutators::RunExtractAggregatesMutator(Node *node,
 
                GPOS_ASSERT(IsA(old_sublink->subselect, Query));
 
-               new_sublink->subselect = gpdb::MutateQueryOrExpressionTree(
-                       old_sublink->subselect,
-                       (MutatorWalkerFn) RunExtractAggregatesMutator, (void *) 
context,
-                       0  // mutate into cte-lists
-               );
+               // One need to call the Query mutator for subselect and take 
into
+               // account that SubLink can be multi-level. Therefore, the
+               // context->m_current_query_level must be modified properly
+               // while diving into such nested SubLink.
+               new_sublink->subselect =
+                       RunExtractAggregatesMutator(old_sublink->subselect, 
context);
 
                context->m_current_query_level--;
 
                return (Node *) new_sublink;
        }
 
+       if (IsA(node, Query))
+       {
+               // Mutate Query tree and ignore rtable subqueries in order to 
modify
+               // m_current_query_level properly when mutating them below.
+               Query *query = gpdb::MutateQueryTree(
+                       (Query *) node, (MutatorWalkerFn) 
RunExtractAggregatesMutator,
+                       context, QTW_IGNORE_RT_SUBQUERIES);
+
+               ListCell *lc;
+               ForEach(lc, query->rtable)
+               {
+                       RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+
+                       if (RTE_SUBQUERY == rte->rtekind)
+                       {
+                               Query *subquery = rte->subquery;
+                               context->m_current_query_level++;
+                               rte->subquery = (Query *) 
RunExtractAggregatesMutator(
+                                       (Node *) subquery, context);
+                               context->m_current_query_level--;
+                               gpdb::GPDBFree(subquery);
+                       }
+               }
+
+               return (Node *) query;
+       }
+
        return gpdb::MutateExpressionTree(
                node, (MutatorWalkerFn) RunExtractAggregatesMutator, context);
 }
diff --git a/src/test/regress/expected/bfv_olap.out 
b/src/test/regress/expected/bfv_olap.out
index eadb52b004..f913e2c03d 100644
--- a/src/test/regress/expected/bfv_olap.out
+++ b/src/test/regress/expected/bfv_olap.out
@@ -764,7 +764,6 @@ select * from (select sum(a.salary) over(), count(*)
  2100 |     1
 (2 rows)
 
--- this query currently falls back, needs to be fixed
 select (select rn from (select row_number() over () as rn, name
                         from t1_github_issue_10143
                         where code = a.code
diff --git a/src/test/regress/expected/bfv_olap_optimizer.out 
b/src/test/regress/expected/bfv_olap_optimizer.out
index 16de252c48..78f8628003 100644
--- a/src/test/regress/expected/bfv_olap_optimizer.out
+++ b/src/test/regress/expected/bfv_olap_optimizer.out
@@ -824,7 +824,6 @@ select * from (select sum(a.salary) over(), count(*)
  2100 |     1
 (2 rows)
 
--- this query currently falls back, needs to be fixed
 select (select rn from (select row_number() over () as rn, name
                         from t1_github_issue_10143
                         where code = a.code
@@ -833,8 +832,6 @@ select (select rn from (select row_number() over () as rn, 
name
 ,sum(sum(a.salary)) over()
 from t2_github_issue_10143 a
 group by a.code;
-INFO:  GPORCA failed to produce a plan, falling back to Postgres-based planner
-DETAIL:  Query-to-DXL Translation: No variable entry found due to incorrect 
normalization of query
  dongnm | sum  
 --------+------
       1 | 2100
diff --git a/src/test/regress/expected/subselect.out 
b/src/test/regress/expected/subselect.out
index add31f7eeb..7be004172b 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -1981,3 +1981,46 @@ select * from x for update;
 (5 rows)
 
 set gp_cte_sharing to off;
+-- Ensure that both planners produce valid plans for the query with the nested
+-- SubLink, which contains attributes referenced in query's GROUP BY clause.
+-- Due to presence of non-grouping columns in targetList, ORCA performs query
+-- normalization, during which ORCA establishes a correspondence between vars
+-- from targetlist entries to grouping attributes. And this process should
+-- correctly handle nested structures. The inner part of SubPlan in the test
+-- should contain only t.j.
+-- start_ignore
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+-- end_ignore
+create table t (i int, j int) distributed by (i);
+insert into t values (1, 2);
+explain (verbose, costs off)
+select j,
+(select j from (select j) q2)
+from t
+group by i, j;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: t.j, ((SubPlan 1)), t.i
+   ->  HashAggregate
+         Output: t.j, (SubPlan 1), t.i
+         Group Key: t.i, t.j
+         ->  Seq Scan on public.t
+               Output: t.i, t.j
+         SubPlan 1
+           ->  Result
+                 Output: t.j
+ Optimizer: Postgres-based planner
+(11 rows)
+
+select j,
+(select j from (select j) q2)
+from t
+group by i, j;
+ j | j 
+---+---
+ 2 | 2
+(1 row)
+
+drop table t;
diff --git a/src/test/regress/expected/subselect_optimizer.out 
b/src/test/regress/expected/subselect_optimizer.out
index b60d15f74d..38d440d121 100644
--- a/src/test/regress/expected/subselect_optimizer.out
+++ b/src/test/regress/expected/subselect_optimizer.out
@@ -2056,3 +2056,51 @@ select * from x for update;
 (6 rows)
 
 set gp_cte_sharing to off;
+-- Ensure that both planners produce valid plans for the query with the nested
+-- SubLink, which contains attributes referenced in query's GROUP BY clause.
+-- Due to presence of non-grouping columns in targetList, ORCA performs query
+-- normalization, during which ORCA establishes a correspondence between vars
+-- from targetlist entries to grouping attributes. And this process should
+-- correctly handle nested structures. The inner part of SubPlan in the test
+-- should contain only t.j.
+-- start_ignore
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+-- end_ignore
+create table t (i int, j int) distributed by (i);
+insert into t values (1, 2);
+explain (verbose, costs off)
+select j,
+(select j from (select j) q2)
+from t
+group by i, j;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: j, ((SubPlan 1))
+   ->  GroupAggregate
+         Output: j, (SubPlan 1)
+         Group Key: t.i, t.j
+         ->  Sort
+               Output: i, j
+               Sort Key: t.i, t.j
+               ->  Seq Scan on public.t
+                     Output: i, j
+         SubPlan 1
+           ->  Result
+                 Output: t.j
+                 ->  Result
+                       Output: true
+ Optimizer: GPORCA
+(16 rows)
+
+select j,
+(select j from (select j) q2)
+from t
+group by i, j;
+ j | j 
+---+---
+ 2 | 2
+(1 row)
+
+drop table t;
diff --git a/src/test/regress/sql/bfv_olap.sql 
b/src/test/regress/sql/bfv_olap.sql
index c1a7a7759e..2b5b1d67fa 100644
--- a/src/test/regress/sql/bfv_olap.sql
+++ b/src/test/regress/sql/bfv_olap.sql
@@ -449,7 +449,6 @@ select * from (select sum(a.salary) over(), count(*)
                from t2_github_issue_10143 a
                group by a.salary) T;
 
--- this query currently falls back, needs to be fixed
 select (select rn from (select row_number() over () as rn, name
                         from t1_github_issue_10143
                         where code = a.code
diff --git a/src/test/regress/sql/subselect.sql 
b/src/test/regress/sql/subselect.sql
index 414f93beaf..4ee4f8f8f9 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -981,3 +981,29 @@ with x as (select * from subselect_tbl)
 select * from x for update;
 
 set gp_cte_sharing to off;
+
+-- Ensure that both planners produce valid plans for the query with the nested
+-- SubLink, which contains attributes referenced in query's GROUP BY clause.
+-- Due to presence of non-grouping columns in targetList, ORCA performs query
+-- normalization, during which ORCA establishes a correspondence between vars
+-- from targetlist entries to grouping attributes. And this process should
+-- correctly handle nested structures. The inner part of SubPlan in the test
+-- should contain only t.j.
+-- start_ignore
+drop table if exists t;
+-- end_ignore
+create table t (i int, j int) distributed by (i);
+insert into t values (1, 2);
+
+explain (verbose, costs off)
+select j,
+(select j from (select j) q2)
+from t
+group by i, j;
+
+select j,
+(select j from (select j) q2)
+from t
+group by i, j;
+
+drop table t;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 02/31: Fix ORCA invalid processing of nested SubLinks with GROUP BY attributes

Reply via email to