This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit c10ea53b69913e39f5b976e7239febb6e25d0795 Author: Alexander Kondakov <[email protected]> AuthorDate: Tue Aug 29 12:27:27 2023 +0300 Fix ORCA invalid processing of nested SubLinks under aggregates. The ORCA optimizer could produce an invalid query plan and fallback to the legacy optimizer due to incorrect processing of nested SubLinks (SubLink contains one more SubLink or rtable subquery inside) during query normalization. The issue could arise when query had a AggRef inside a Sublink and that AggRef contained one more SubLink inside (see the example of the query in the tests section). While mutating the SubLink inside RunExtractAggregatesMutator, the Aggref branch is executed for SubLink's query targetList, which mutates args of the AggRef. And if inside this AggRef arg some Var had varlevelsup value greater than the m_agg_levels_up, i.e the Var referenced a relation that is higher than the AggRef, the Var was not mutated at all. The varlevelsup field was not modified and because of that, when AggRef was pulled up to the zero query level (by appending it to the context.m_lower_table_tlist), the Var, whose varlevelsup was higher than AggRef's, and which remained unchanged, started referencing the wrong relation. This could lead to the fallback or invalid plan construction when processing attributes at further planning stages. This patch makes the RunExtractAggregatesMutator modify the varlevelsup of the Var relatively the AggRef level in order to preserve proper query level. --- src/backend/gpopt/translate/CQueryMutators.cpp | 38 ++++++++++++++-------- src/test/regress/expected/aggregates_optimizer.out | 2 -- src/test/regress/expected/subselect.out | 31 ++++++++++++++++++ src/test/regress/expected/subselect_optimizer.out | 33 +++++++++++++++++++ src/test/regress/sql/subselect.sql | 9 +++++ 5 files changed, 98 insertions(+), 15 deletions(-) diff --git a/src/backend/gpopt/translate/CQueryMutators.cpp b/src/backend/gpopt/translate/CQueryMutators.cpp index 6557918665..7a77934adb 100644 --- a/src/backend/gpopt/translate/CQueryMutators.cpp +++ b/src/backend/gpopt/translate/CQueryMutators.cpp @@ -698,22 +698,34 @@ CQueryMutators::RunExtractAggregatesMutator(Node *node, // Handle other top-level outer references in the project element. if (var->varlevelsup == context->m_current_query_level) { - if (var->varlevelsup == context->m_agg_levels_up) + if (var->varlevelsup >= context->m_agg_levels_up) { - // If Var references the top level query inside an Aggref that also - // references top level query, the Aggref is moved to the derived query - // (see comments in Aggref if-case above). Thus, these Var references - // are brought up to the top-query level. + // If Var references the top level query (varlevelsup = m_current_query_level) + // inside an Aggref that also references top level query, the Aggref is moved + // to the derived query (see comments in Aggref if-case above). + // And, therefore, if we are mutating such Vars inside the Aggref, we must + // change their varlevelsup field in order to preserve correct reference level. + // i.e these Vars are pulled up as the part of the Aggref by the m_agg_levels_up. // e.g: - // explain select (select sum(foo.a) from jazz) from foo group by a, b; + // select (select max((select foo.a))) from foo; // is transformed into - // select (select fnew.sum_t from jazz) - // from (select foo.a, foo.b, sum(foo.a) sum_t - // from foo group by foo.a, foo.b) fnew; - // - // Note the foo.a var which is in sum() in a subquery must now become a - // var referencing the current query level. - var->varlevelsup = 0; + // select (select fnew.max_t) + // from (select max((select foo.a)) max_t from foo) fnew; + // Here the foo.a inside max referenced top level RTE foo at + // varlevelsup = 2 inside the Aggref at agglevelsup 1. Then the + // Aggref is brought up to the top-query-level of fnew and foo.a + // inside Aggref is bumped up by original Aggref's level. + // We may visualize that logic with the following diagram: + // Query <------┐ <--------------------┐ + // | | + // | m_agg_levels_up = 1 | + // | | + // Aggref --┘ | varlevelsup = 2 + // | + // | + // | + // Var -------------------------┘ + var->varlevelsup -= context->m_agg_levels_up; return (Node *) var; } diff --git a/src/test/regress/expected/aggregates_optimizer.out b/src/test/regress/expected/aggregates_optimizer.out index 4b735570df..944c8cbf57 100644 --- a/src/test/regress/expected/aggregates_optimizer.out +++ b/src/test/regress/expected/aggregates_optimizer.out @@ -659,8 +659,6 @@ LINE 4: where sum(distinct a.four + b.four) = b.four)... select (select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1))) from tenk1 o; -INFO: GPORCA failed to produce a plan, falling back to Postgres-based planner -DETAIL: Query-to-DXL Translation: No variable entry found due to incorrect normalization of query max ------ 9999 diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 31bf220bd2..ea96cc3ca7 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -2066,4 +2066,35 @@ group by j, q1; 2 | 1 | 2 (1 row) +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + QUERY PLAN +------------------------------------------------ + Finalize Aggregate + Output: (SubPlan 2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL max((SubPlan 1))) + -> Partial Aggregate + Output: PARTIAL max((SubPlan 1)) + -> Seq Scan on public.t + Output: t.i, t.j + SubPlan 1 + -> Result + Output: t.i + SubPlan 2 + -> Result + Output: max((SubPlan 1)) + Optimizer: Postgres-based planner +(15 rows) + +select (select max((select t.i))) from t; + max +----- + 1 +(1 row) + drop table t; diff --git a/src/test/regress/expected/subselect_optimizer.out b/src/test/regress/expected/subselect_optimizer.out index 322e14951c..da6a4c95c5 100644 --- a/src/test/regress/expected/subselect_optimizer.out +++ b/src/test/regress/expected/subselect_optimizer.out @@ -2150,4 +2150,37 @@ group by j, q1; 2 | 1 | 2 (1 row) +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + QUERY PLAN +------------------------------------------------ + Finalize Aggregate + Output: (SubPlan 2) + -> Gather Motion 3:1 (slice1; segments: 3) + Output: (PARTIAL max((SubPlan 1))) + -> Partial Aggregate + Output: PARTIAL max((SubPlan 1)) + -> Seq Scan on public.t + Output: i + SubPlan 1 + -> Result + Output: t.i + SubPlan 2 + -> Result + Output: max((SubPlan 1)) + -> Result + Output: true + Optimizer: GPORCA +(17 rows) + +select (select max((select t.i))) from t; + max +----- + 1 +(1 row) + drop table t; diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index e2cccf1a95..02c11c1aac 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -1025,4 +1025,13 @@ select j, 1 as c, from t group by j, q1; +-- Ensure that both planners produce valid plans for the query with the nested +-- SubLink, and this SubLink is under aggregation. ORCA shouldn't fall back due +-- to missing variable entry as a result of incorrect query normalization. ORCA +-- should correctly process args of the aggregation during normalization. +explain (verbose, costs off) +select (select max((select t.i))) from t; + +select (select max((select t.i))) from t; + drop table t; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
