This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 17b210d7117b29e3fb80387d7648dcc2f5d49d0f
Author: wangxiaoran <[email protected]>
AuthorDate: Thu Nov 21 15:57:40 2024 +0800

    [ORCA] optimize eliminate self comparison
    
    'PexprEliminateSelfComparison' only uses the 'pcrsNotNull'
    from the topmost expression to filter the nullable columns.
    This can lead the PexprEliminateSelfComparison cannot apply
    to the subquery properly.
    
    create table t1(a int not null, b int not null);
    create table t2(like t1);
    create table t3(like t1);
    select * from t2  left join (select  t2.a , t2.b  from t1, t2 where t1.a
            < t1.a) as t on t2. a = t.a;
    
    the plan for it from orca is
     Gather Motion 3:1  (slice1; segments: 3)
       ->  Hash Left Join
             Hash Cond: (t2.a = t2_1.a)
             ->  Seq Scan on t2
             ->  Hash
                   ->  Nested Loop
                         Join Filter: true
                         ->  Seq Scan on t2 t2_1
                         ->  Materialize
                               ->  Broadcast Motion 3:3  (slice2; segments: 3)
                                     ->  Seq Scan on t1
                                           Filter: (a < a)
    the self comparison in subquery is not eliminated.
    
    This commit is to optimize it by fetching 'pcrsNotNull' from
    the current logical expression and apply them to its child
    scalar expression.
---
 .../libgpopt/src/operators/CExpressionPreprocessor.cpp  |  7 ++++++-
 src/test/regress/expected/bfv_joins.out                 | 17 +++++++++++++++++
 src/test/regress/expected/bfv_joins_optimizer.out       | 15 +++++++++++++++
 src/test/regress/sql/bfv_joins.sql                      |  4 ++++
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git 
a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp 
b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
index fe8c1454ea..2b9e09f1c8 100644
--- a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
@@ -77,12 +77,18 @@ 
CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
        GPOS_CHECK_STACK_SIZE;
        GPOS_ASSERT(nullptr != mp);
        GPOS_ASSERT(nullptr != pexpr);
+       COperator *pop = pexpr->Pop();
 
        if (CUtils::FScalarCmp(pexpr))
        {
                return CPredicateUtils::PexprEliminateSelfComparison(mp, pexpr,
                                                                                
                                         pcrsNotNull);
        }
+       // Use current expr rather then the root to get not null columns
+       else if (pop->FLogical())
+       {
+               pcrsNotNull = pexpr->DeriveNotNullColumns();
+       }
 
        // recursively process children
        const ULONG arity = pexpr->Arity();
@@ -94,7 +100,6 @@ 
CExpressionPreprocessor::PexprEliminateSelfComparison(CMemoryPool *mp,
                pdrgpexprChildren->Append(pexprChild);
        }
 
-       COperator *pop = pexpr->Pop();
        pop->AddRef();
 
        return GPOS_NEW(mp) CExpression(mp, pop, pdrgpexprChildren);
diff --git a/src/test/regress/expected/bfv_joins.out 
b/src/test/regress/expected/bfv_joins.out
index 6d82f60dc3..ac777d7c37 100644
--- a/src/test/regress/expected/bfv_joins.out
+++ b/src/test/regress/expected/bfv_joins.out
@@ -931,6 +931,23 @@ SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, 
t3.a+t3.b as cc from t3)t ON t
  1
 (3 rows)
 
+-- Test for eliminating self check condition in subquery
+explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a
+               < t3.a) AS t ON t1.a = t.b;
+                                               QUERY PLAN                      
                          
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=639.89..44124.32 rows=2022803 
width=16)
+   ->  Hash Left Join  (cost=639.89..17153.61 rows=674268 width=16)
+         Hash Cond: (t1.a = t3.b)
+         ->  Seq Scan on t1  (cost=0.00..293.67 rows=25967 width=12)
+         ->  Hash  (cost=531.69..531.69 rows=8656 width=4)
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)  
(cost=0.00..531.69 rows=8656 width=4)
+                     Hash Key: t3.b
+                     ->  Seq Scan on t3  (cost=0.00..358.58 rows=8656 width=4)
+                           Filter: (a < a)
+ Optimizer: Postgres query optimizer
+(10 rows)
+
 -- Test for unexpected NLJ qual
 --
 explain select 1 as mrs_t1 where 1 <= ALL (select x from z);
diff --git a/src/test/regress/expected/bfv_joins_optimizer.out 
b/src/test/regress/expected/bfv_joins_optimizer.out
index 9a337e1273..b3395f77e1 100644
--- a/src/test/regress/expected/bfv_joins_optimizer.out
+++ b/src/test/regress/expected/bfv_joins_optimizer.out
@@ -947,6 +947,21 @@ SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, 
t3.a+t3.b as cc from t3)t ON t
  1
 (3 rows)
 
+-- Test for eliminating self check condition in subquery
+explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a
+               < t3.a) AS t ON t1.a = t.b;
+                                     QUERY PLAN                                
      
+-------------------------------------------------------------------------------------
+ Hash Left Join  (cost=0.00..431.00 rows=2 width=16)
+   Hash Cond: (a = (NULL::integer))
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 
width=12)
+         ->  Seq Scan on t1  (cost=0.00..431.00 rows=1 width=12)
+   ->  Hash  (cost=0.00..0.00 rows=0 width=4)
+         ->  Result  (cost=0.00..0.00 rows=0 width=4)
+               One-Time Filter: false
+ Optimizer: Pivotal Optimizer (GPORCA)
+(8 rows)
+
 -- Test for unexpected NLJ qual
 --
 explain select 1 as mrs_t1 where 1 <= ALL (select x from z);
diff --git a/src/test/regress/sql/bfv_joins.sql 
b/src/test/regress/sql/bfv_joins.sql
index 74aee8eff0..176e5f1ff5 100644
--- a/src/test/regress/sql/bfv_joins.sql
+++ b/src/test/regress/sql/bfv_joins.sql
@@ -112,6 +112,10 @@ SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, 
t3.a+t3.b as cc from t3)t ON t
 explain SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from 
t3)t ON t.b > t.a WHERE t.cc = t.cc IS NULL;
 SELECT t1.c FROM t1 LEFT OUTER JOIN (select t3.*, t3.a+t3.b as cc from t3)t ON 
t.b > t.a WHERE t.cc = t.cc IS NULL;
 
+-- Test for eliminating self check condition in subquery
+explain SELECT * FROM t1 LEFT JOIN (select t3.b from t3 where t3.a
+               < t3.a) AS t ON t1.a = t.b;
+
 -- Test for unexpected NLJ qual
 --
 explain select 1 as mrs_t1 where 1 <= ALL (select x from z);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to