This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 17b210d7117b29e3fb80387d7648dcc2f5d49d0f Author: wangxiaoran <[email protected]> AuthorDate: Thu Nov 21 15:57:40 2024 +0800 [ORCA] optimize eliminate self comparison 'PexprEliminateSelfComparison' only uses the 'pcrsNotNull' from the topmost expression to filter the nullable columns. This can lead the PexprEliminateSelfComparison cannot apply to the subquery properly. create table t1(a int not null, b int not null); create table t2(like t1); create table t3(like t1); select * from t2 left join (select t2.a , t2.b from t1, t2 where t1.a < t1.a) as t on t2. a = t.a; the plan for it from orca is Gather Motion 3:1 (slice1; segments: 3) -> Hash Left Join Hash Cond: (t2.a = t2_1.a) -> Seq Scan on t2 -> Hash -> Nested Loop Join Filter: true -> Seq Scan on t2 t2_1 -> Materialize -> Broadcast Motion 3:3 (slice2; segments: 3) -> Seq Scan on t1 Filter: (a < a) the self comparison in subquery is not eliminated. This commit is to optimize it by fetching 'pcrsNotNull' from the current logical expression and apply them to its child scalar expression. --- .../libgpopt/src/operators/CExpressionPreprocessor.cpp | 7 ++++++- src/test/regress/expected/bfv_joins.out | 17 +++++++++++++++++ src/test/regress/expected/bfv_joins_optimizer.out | 15 +++++++++++++++ src/test/regress/sql/bfv_joins.sql | 4 ++++ 4 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp index fe8c1454ea..2b9e09f1c8 100644 --- a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp @@ -77,12 +77,18 @@ CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp, GPOS_CHECK_STACK_SIZE; GPOS_ASSERT(nullptr != mp); GPOS_ASSERT(nullptr != pexpr); + COperator *pop = pexpr->Pop(); if (CUtils::FScalarCmp(pexpr)) { return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr, pcrsNotNull); } + // Use current expr rather then the root to get not null columns + else if (pop->FLogical()) + { + pcrsNotNull = pexpr->DeriveNotNullColumns(); + } // recursively process children const ULONG arity = pexpr->Arity(); @@ -94,7 +100,6 @@ CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp, pdrgpexprChildren->Append(pexprChild); } - COperator *pop = pexpr->Pop(); pop->AddRef(); return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren); diff --git a/src/test/regress/expected/bfv_joins.out b/src/test/regress/expected/bfv_joins.out index 6d82f60dc3..ac777d7c37 100644 --- a/src/test/regress/expected/bfv_joins.out +++ b/src/test/regress/expected/bfv_joins.out @@ -931,6 +931,23 @@ SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t 1 (3 rows) +-- Test for eliminating self check condition in subquery +explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a + < t3.a) AS t ON t1.a = t.b; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=639.89..44124.32 rows=2022803 width=16) + -> Hash Left Join (cost=639.89..17153.61 rows=674268 width=16) + Hash Cond: (t1.a = t3.b) + -> Seq Scan on t1 (cost=0.00..293.67 rows=25967 width=12) + -> Hash (cost=531.69..531.69 rows=8656 width=4) + -> Redistribute Motion 3:3 (slice2; segments: 3) (cost=0.00..531.69 rows=8656 width=4) + Hash Key: t3.b + -> Seq Scan on t3 (cost=0.00..358.58 rows=8656 width=4) + Filter: (a < a) + Optimizer: Postgres query optimizer +(10 rows) + -- Test for unexpected NLJ qual -- explain select 1 as mrs_t1 where 1 <= ALL (select x from z); diff --git a/src/test/regress/expected/bfv_joins_optimizer.out b/src/test/regress/expected/bfv_joins_optimizer.out index 9a337e1273..b3395f77e1 100644 --- a/src/test/regress/expected/bfv_joins_optimizer.out +++ b/src/test/regress/expected/bfv_joins_optimizer.out @@ -947,6 +947,21 @@ SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t 1 (3 rows) +-- Test for eliminating self check condition in subquery +explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a + < t3.a) AS t ON t1.a = t.b; + QUERY PLAN +------------------------------------------------------------------------------------- + Hash Left Join (cost=0.00..431.00 rows=2 width=16) + Hash Cond: (a = (NULL::integer)) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on t1 (cost=0.00..431.00 rows=1 width=12) + -> Hash (cost=0.00..0.00 rows=0 width=4) + -> Result (cost=0.00..0.00 rows=0 width=4) + One-Time Filter: false + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + -- Test for unexpected NLJ qual -- explain select 1 as mrs_t1 where 1 <= ALL (select x from z); diff --git a/src/test/regress/sql/bfv_joins.sql b/src/test/regress/sql/bfv_joins.sql index 74aee8eff0..176e5f1ff5 100644 --- a/src/test/regress/sql/bfv_joins.sql +++ b/src/test/regress/sql/bfv_joins.sql @@ -112,6 +112,10 @@ SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL; SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL; +-- Test for eliminating self check condition in subquery +explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a + < t3.a) AS t ON t1.a = t.b; + -- Test for unexpected NLJ qual -- explain select 1 as mrs_t1 where 1 <= ALL (select x from z); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
