This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit f2b0b5c18cee89b29617a4787871ca25b7f4164e
Author: Pan Wang <[email protected]>
AuthorDate: Tue Nov 14 12:41:59 2023 +0800

    Fix LeftJoinPruning pruns essential left join (#16690)
    
    ORCA performs the left join pruning at the preprocessing stage. For 
performing
    the pruning operation ORCA creates a set of columns which are output by the
    child operators. In the current implementation to generate the output 
columns
    for the child operators following steps are performed
    1.) Derive output columns of the logical parent operator
    2.) If scalar operator is present
            2a.) Derive used columns of the scalar operator
            2b.) Create a set combining columns from step 1 and 2a
    But for some operators like UNION where no scalar child is present ORCA was
    creating a empty set of output columns for the child operators which was
    leading to wrong pruning of left joins. This PR fixes that issue by 
modifying
    the logic for computing output columns.
    Authored-by: tongshu <[email protected]>
---
 .../src/operators/CLeftJoinPruningPreprocessor.cpp |  2 +-
 src/test/regress/expected/join_gp.out              | 31 ++++++++++++++++++++++
 src/test/regress/expected/join_gp_optimizer.out    | 31 ++++++++++++++++++++++
 src/test/regress/sql/join_gp.sql                   | 10 +++++++
 4 files changed, 73 insertions(+), 1 deletion(-)

diff --git 
a/src/backend/gporca/libgpopt/src/operators/CLeftJoinPruningPreprocessor.cpp 
b/src/backend/gporca/libgpopt/src/operators/CLeftJoinPruningPreprocessor.cpp
index 0577b60167..8668b0f833 100644
--- a/src/backend/gporca/libgpopt/src/operators/CLeftJoinPruningPreprocessor.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CLeftJoinPruningPreprocessor.cpp
@@ -417,6 +417,7 @@ CLeftJoinPruningPreprocessor::ComputeOutputColumns(
        }
 
        // Computing output columns of the child tree
+       childs_output_columns->Include(output_columns);
        ULONG arity = pexpr->Arity();
        for (ULONG ul = 0; ul < arity; ul++)
        {
@@ -425,7 +426,6 @@ CLeftJoinPruningPreprocessor::ComputeOutputColumns(
                {
                        CColRefSet *derived_used_columns_scalar =
                                pexpr_child->DeriveUsedColumns();
-                       childs_output_columns->Include(output_columns);
                        
childs_output_columns->Include(derived_used_columns_scalar);
                }
        }
diff --git a/src/test/regress/expected/join_gp.out 
b/src/test/regress/expected/join_gp.out
index f7e2b50ecf..06cf340762 100644
--- a/src/test/regress/expected/join_gp.out
+++ b/src/test/regress/expected/join_gp.out
@@ -2078,6 +2078,37 @@ explain select fooJoinPruning.* from fooJoinPruning left 
join barJoinPruning on
 
 drop table fooJoinPruning;
 drop table barJoinPruning;
+--
+-- Cases where join under union
+--
+create table foo(a int primary key, b int);
+create table bar(a int primary key, b int);
+explain  select foo.a, bar.b from foo left join bar on foo.a = bar.a
+ union
+   select foo.a, bar.b from foo join bar on foo.a = bar.a;
+                                              QUERY PLAN                       
                        
+-------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=5022.18..7892.18 rows=172200 
width=8)
+   ->  HashAggregate  (cost=5022.18..5596.18 rows=57400 width=8)
+         Group Key: foo.a, bar.b
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)  
(cost=679.75..4735.18 rows=57400 width=8)
+               Hash Key: foo.a, bar.b
+               ->  Append  (cost=679.75..3587.18 rows=57400 width=8)
+                     ->  Hash Left Join  (cost=679.75..1363.09 rows=28700 
width=8)
+                           Hash Cond: (foo.a = bar.a)
+                           ->  Seq Scan on foo  (cost=0.00..321.00 rows=28700 
width=4)
+                           ->  Hash  (cost=321.00..321.00 rows=28700 width=8)
+                                 ->  Seq Scan on bar  (cost=0.00..321.00 
rows=28700 width=8)
+                     ->  Hash Join  (cost=679.75..1363.09 rows=28700 width=8)
+                           Hash Cond: (foo_1.a = bar_1.a)
+                           ->  Seq Scan on foo foo_1  (cost=0.00..321.00 
rows=28700 width=4)
+                           ->  Hash  (cost=321.00..321.00 rows=28700 width=8)
+                                 ->  Seq Scan on bar bar_1  (cost=0.00..321.00 
rows=28700 width=8)
+ Optimizer: Postgres-based planner
+(17 rows)
+
+drop table foo;
+drop table bar;
 -----------------------------------------------------------------
 -- Test cases on Dynamic Partition Elimination(DPE) for Right Joins
 -----------------------------------------------------------------
diff --git a/src/test/regress/expected/join_gp_optimizer.out 
b/src/test/regress/expected/join_gp_optimizer.out
index 7995211f97..454aeec8b0 100644
--- a/src/test/regress/expected/join_gp_optimizer.out
+++ b/src/test/regress/expected/join_gp_optimizer.out
@@ -2069,6 +2069,37 @@ explain select fooJoinPruning.* from fooJoinPruning left 
join barJoinPruning on
 
 drop table fooJoinPruning;
 drop table barJoinPruning;
+--
+-- Cases where join under union
+--
+create table foo(a int primary key, b int);
+create table bar(a int primary key, b int);
+explain  select foo.a, bar.b from foo left join bar on foo.a = bar.a
+ union
+   select foo.a, bar.b from foo join bar on foo.a = bar.a;
+                                               QUERY PLAN                      
                         
+--------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..874.00 rows=1 width=8)
+   ->  GroupAggregate  (cost=0.00..874.00 rows=1 width=8)
+         Group Key: foo.a, bar.b
+         ->  Sort  (cost=0.00..874.00 rows=1 width=8)
+               Sort Key: foo.a, bar.b
+               ->  Append  (cost=0.00..874.00 rows=1 width=8)
+                     ->  Nested Loop Left Join  (cost=0.00..437.00 rows=1 
width=8)
+                           Join Filter: true
+                           ->  Seq Scan on foo  (cost=0.00..431.00 rows=1 
width=4)
+                           ->  Index Scan using bar_pkey on bar  
(cost=0.00..6.00 rows=1 width=4)
+                                 Index Cond: (a = foo.a)
+                     ->  Nested Loop  (cost=0.00..437.00 rows=1 width=8)
+                           Join Filter: true
+                           ->  Seq Scan on foo foo_1  (cost=0.00..431.00 
rows=1 width=4)
+                           ->  Index Scan using bar_pkey on bar bar_1  
(cost=0.00..6.00 rows=1 width=4)
+                                 Index Cond: (a = foo_1.a)
+ Optimizer: GPORCA
+(17 rows)
+
+drop table foo;
+drop table bar;
 -----------------------------------------------------------------
 -- Test cases on Dynamic Partition Elimination(DPE) for Right Joins
 -----------------------------------------------------------------
diff --git a/src/test/regress/sql/join_gp.sql b/src/test/regress/sql/join_gp.sql
index bf278e8f3f..85a6517578 100644
--- a/src/test/regress/sql/join_gp.sql
+++ b/src/test/regress/sql/join_gp.sql
@@ -857,6 +857,16 @@ explain select fooJoinPruning.* from fooJoinPruning left 
join barJoinPruning on
 drop table fooJoinPruning;
 drop table barJoinPruning;
 
+--
+-- Cases where join under union
+--
+create table foo(a int primary key, b int);
+create table bar(a int primary key, b int);
+explain  select foo.a, bar.b from foo left join bar on foo.a = bar.a
+ union
+   select foo.a, bar.b from foo join bar on foo.a = bar.a;
+drop table foo;
+drop table bar;
 -----------------------------------------------------------------
 -- Test cases on Dynamic Partition Elimination(DPE) for Right Joins
 -----------------------------------------------------------------


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to