This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 8d4d067aa923c2e1b1944b8e60d28621f109575b
Author: NISHANT SHARMA <[email protected]>
AuthorDate: Mon Dec 4 12:27:31 2023 +0530

    Stop Addition Of 'gp_segment_id,ctid' As keys For Replicated Tables (#16767)
    
    ORCA generated incorrect plan while doing "Left Semi Join with replicated 
outer table" .
    Duplicated rows were observed in the output. In the plan, combination of
    'ctid, gp_segment_id' was used as a 'key' for the replicated outer table.
    
    Since for a replicated table, same data is present across segments, this key
    does not help in uniquely identifying any tuple. Thus, this combination 
should not
    be added as a key. Addition is prevented by not including it, while 
retreiving key
    sets during 'Relcache to DXL' translation.
    
    Co-authored-by: nishant sharma <[email protected]>
---
 .../gpopt/translate/CTranslatorRelcacheToDXL.cpp   |  21 +-
 .../dxl/minidump/LeftSemiJoinWithRepOuterTab.mdp   | 482 +++++++++++++++++++++
 src/backend/gporca/server/CMakeLists.txt           |   2 +-
 .../gpopt/translate/CTranslatorRelcacheToDXL.h     |   8 +-
 src/test/regress/expected/join_gp.out              | 351 +++++++++++++++
 src/test/regress/expected/join_gp_optimizer.out    | 372 ++++++++++++++++
 src/test/regress/sql/join_gp.sql                   | 107 +++++
 7 files changed, 1330 insertions(+), 13 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
index 4cfad96d42..1184d5756d 100644
--- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
@@ -643,7 +643,7 @@ CTranslatorRelcacheToDXL::RetrieveRel(CMemoryPool *mp, 
CMDAccessor *md_accessor,
        // get key sets
        BOOL should_add_default_keys = 
RelHasSystemColumns(rel->rd_rel->relkind);
        keyset_array = RetrieveRelKeysets(mp, oid, should_add_default_keys,
-                                                                         
is_partitioned, attno_mapping);
+                                                                         
is_partitioned, attno_mapping, dist);
 
        // collect all check constraints
        check_constraint_mdids = RetrieveRelCheckConstraints(mp, oid);
@@ -2673,13 +2673,15 @@ 
CTranslatorRelcacheToDXL::ConstructAttnoMapping(CMemoryPool *mp,
 //
 //     @doc:
 //             Get key sets for relation
+//             For a relation, 'key sets' contains all 'Unique keys'
+//             defined as unique constraints in the catalog table.
+//             Conditionally, a combination of {segid, ctid} is also added.
 //
 //---------------------------------------------------------------------------
 ULongPtr2dArray *
-CTranslatorRelcacheToDXL::RetrieveRelKeysets(CMemoryPool *mp, OID oid,
-                                                                               
         BOOL should_add_default_keys,
-                                                                               
         BOOL is_partitioned,
-                                                                               
         ULONG *attno_mapping)
+CTranslatorRelcacheToDXL::RetrieveRelKeysets(
+       CMemoryPool *mp, OID oid, BOOL should_add_default_keys, BOOL 
is_partitioned,
+       ULONG *attno_mapping, IMDRelation::Ereldistrpolicy rel_distr_policy)
 {
        ULongPtr2dArray *key_sets = GPOS_NEW(mp) ULongPtr2dArray(mp);
 
@@ -2704,9 +2706,12 @@ CTranslatorRelcacheToDXL::RetrieveRelKeysets(CMemoryPool 
*mp, OID oid,
                key_sets->Append(key_set);
        }
 
-       // add {segid, ctid} as a key
-
-       if (should_add_default_keys)
+       // 1. add {segid, ctid} as a key
+       // 2. Skip addition of {segid, ctid} as a key for replicated table,
+       // as same data is present across segments thus seg_id,
+       // will not help in defining a unique tuple.
+       if (should_add_default_keys &&
+               IMDRelation::EreldistrReplicated != rel_distr_policy)
        {
                ULongPtrArray *key_set = GPOS_NEW(mp) ULongPtrArray(mp);
                if (is_partitioned)
diff --git 
a/src/backend/gporca/data/dxl/minidump/LeftSemiJoinWithRepOuterTab.mdp 
b/src/backend/gporca/data/dxl/minidump/LeftSemiJoinWithRepOuterTab.mdp
new file mode 100644
index 0000000000..b5554c16b3
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/LeftSemiJoinWithRepOuterTab.mdp
@@ -0,0 +1,482 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/";>
+  <dxl:Comment><![CDATA[
+         Objective :
+            Test if ORCA produces correct plan for Left Semi Join with 
replicated outer table.
+         Setup :
+            create table repli_t1 (a int) distributed replicated;
+            insert into repli_t1 values(1);
+            analyze repli_t1;
+
+            create table dist_t1 (a int , b int) distributed by (a);
+            insert into dist_t1 select i, 1 from generate_series(1, 5) i;
+            analyze dist_t1;
+
+         QUERY
+            Outer - replicated, Inner - distributed table
+            explain select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+
+         Explain Plan :
+                                                        QUERY PLAN
+            
---------------------------------------------------------------------------------------------------
+             Result  (cost=0.00..1324033.30 rows=1 width=4)
+               Filter: (SubPlan 1)
+               ->  Gather Motion 1:1  (slice1; segments: 1)  
(cost=0.00..431.00 rows=1 width=4)
+                     ->  Seq Scan on repli_t1  (cost=0.00..431.00 rows=3 
width=4)
+               SubPlan 1
+                 ->  Result  (cost=0.00..431.00 rows=4 width=4)
+                       Filter: (repli_t1.a >= dist_t1.b)
+                       ->  Materialize  (cost=0.00..431.00 rows=10 width=8)
+                             ->  Gather Motion 3:1  (slice2; segments: 3)  
(cost=0.00..431.00 rows=10 width=8)
+                                   ->  Seq Scan on dist_t1  (cost=0.00..431.00 
rows=4 width=8)
+             Optimizer: GPORCA
+            (11 rows)
+
+         Select Output :
+            select * from repli_t1 where exists ( select 1 from dist_t1 where 
repli_t1.a >= dist_t1.b);
+             a
+            ---
+             1
+            (1 row)
+
+  ]]>
+  </dxl:Comment>
+  <dxl:Thread Id="0">
+    <dxl:OptimizerConfig>
+      <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
+      <dxl:StatisticsConfig DampingFactorFilter="0.750000" 
DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" 
MaxStatsBuckets="100"/>
+      <dxl:CTEConfig CTEInliningCutoff="0"/>
+      <dxl:WindowOids RowNumber="3100" Rank="3101"/>
+      <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
+        <dxl:CostParams>
+          <dxl:CostParam Name="NLJFactor" Value="1024.000000" 
LowerBound="1023.500000" UpperBound="1024.500000"/>
+        </dxl:CostParams>
+      </dxl:CostModelConfig>
+      <dxl:Hint JoinArityForAssociativityCommutativity="18" 
ArrayExpansionThreshold="20" JoinOrderDynamicProgThreshold="10" 
BroadcastThreshold="100000" EnforceConstraintsOnDML="false" 
PushGroupByBelowSetopThreshold="10" XformBindThreshold="0" SkewFactor="0"/>
+      <dxl:TraceFlags 
Value="101013,102001,102002,102003,102043,102074,102120,102144,102162,102163,103001,103014,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,106000"/>
+    </dxl:OptimizerConfig>
+    <dxl:Metadata SystemIds="0.GPDB">
+      <dxl:RelationStatistics Mdid="2.87730.1.0" Name="dist_t1" 
Rows="5.000000" RelPages="3" RelAllVisible="0" EmptyRelation="false"/>
+      <dxl:Relation Mdid="6.87730.1.0" Name="dist_t1" IsTemporary="false" 
StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="8,2">
+        <dxl:Columns>
+          <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" 
ColWidth="4"/>
+          <dxl:Column Name="b" Attno="2" Mdid="0.23.1.0" Nullable="true" 
ColWidth="4"/>
+          <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" 
ColWidth="6"/>
+          <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" 
Nullable="false" ColWidth="4"/>
+          <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" 
Nullable="false" ColWidth="4"/>
+        </dxl:Columns>
+        <dxl:IndexInfoList/>
+        <dxl:CheckConstraints/>
+        <dxl:DistrOpfamilies>
+          <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        </dxl:DistrOpfamilies>
+      </dxl:Relation>
+      <dxl:ColumnStatistics Mdid="1.87730.1.0.1" Name="b" Width="4.000000" 
NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" 
ColStatsMissing="false">
+        <dxl:StatsBucket Frequency="1.000000" DistinctValues="1.000000">
+          <dxl:LowerBound Closed="true" TypeMdid="0.23.1.0" Value="1"/>
+          <dxl:UpperBound Closed="true" TypeMdid="0.23.1.0" Value="1"/>
+        </dxl:StatsBucket>
+      </dxl:ColumnStatistics>
+      <dxl:ColumnStatistics Mdid="1.87730.1.0.0" Name="a" Width="4.000000" 
NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" 
ColStatsMissing="false">
+        <dxl:StatsBucket Frequency="0.250000" DistinctValues="1.250000">
+          <dxl:LowerBound Closed="true" TypeMdid="0.23.1.0" Value="1"/>
+          <dxl:UpperBound Closed="false" TypeMdid="0.23.1.0" Value="2"/>
+        </dxl:StatsBucket>
+        <dxl:StatsBucket Frequency="0.250000" DistinctValues="1.250000">
+          <dxl:LowerBound Closed="true" TypeMdid="0.23.1.0" Value="2"/>
+          <dxl:UpperBound Closed="false" TypeMdid="0.23.1.0" Value="3"/>
+        </dxl:StatsBucket>
+        <dxl:StatsBucket Frequency="0.250000" DistinctValues="1.250000">
+          <dxl:LowerBound Closed="true" TypeMdid="0.23.1.0" Value="3"/>
+          <dxl:UpperBound Closed="false" TypeMdid="0.23.1.0" Value="4"/>
+        </dxl:StatsBucket>
+        <dxl:StatsBucket Frequency="0.250000" DistinctValues="1.250000">
+          <dxl:LowerBound Closed="true" TypeMdid="0.23.1.0" Value="4"/>
+          <dxl:UpperBound Closed="true" TypeMdid="0.23.1.0" Value="5"/>
+        </dxl:StatsBucket>
+      </dxl:ColumnStatistics>
+      <dxl:GPDBScalarOp Mdid="0.525.1.0" Name="&gt;=" ComparisonType="GEq" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.150.1.0"/>
+        <dxl:Commutator Mdid="0.523.1.0"/>
+        <dxl:InverseOp Mdid="0.97.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/>
+        <dxl:PartOpfamily Mdid="0.424.1.0"/>
+        <dxl:EqualityOp Mdid="0.91.1.0"/>
+        <dxl:InequalityOp Mdid="0.85.1.0"/>
+        <dxl:LessThanOp Mdid="0.58.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.59.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
+        <dxl:ComparisonOp Mdid="0.1693.1.0"/>
+        <dxl:ArrayType Mdid="0.1000.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.20.1.0" Name="Int8" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="8" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1976.1.0"/>
+        <dxl:EqualityOp Mdid="0.410.1.0"/>
+        <dxl:InequalityOp Mdid="0.411.1.0"/>
+        <dxl:LessThanOp Mdid="0.412.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.414.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.413.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.415.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1016.1.0"/>
+        <dxl:MinAgg Mdid="0.2131.1.0"/>
+        <dxl:MaxAgg Mdid="0.2115.1.0"/>
+        <dxl:AvgAgg Mdid="0.2100.1.0"/>
+        <dxl:SumAgg Mdid="0.2107.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1976.1.0"/>
+        <dxl:EqualityOp Mdid="0.96.1.0"/>
+        <dxl:InequalityOp Mdid="0.518.1.0"/>
+        <dxl:LessThanOp Mdid="0.97.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.521.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1007.1.0"/>
+        <dxl:MinAgg Mdid="0.2132.1.0"/>
+        <dxl:MaxAgg Mdid="0.2116.1.0"/>
+        <dxl:AvgAgg Mdid="0.2101.1.0"/>
+        <dxl:SumAgg Mdid="0.2108.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1990.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1989.1.0"/>
+        <dxl:EqualityOp Mdid="0.607.1.0"/>
+        <dxl:InequalityOp Mdid="0.608.1.0"/>
+        <dxl:LessThanOp Mdid="0.609.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.611.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.610.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/>
+        <dxl:ComparisonOp Mdid="0.356.1.0"/>
+        <dxl:ArrayType Mdid="0.1028.1.0"/>
+        <dxl:MinAgg Mdid="0.2134.1.0"/>
+        <dxl:MaxAgg Mdid="0.2118.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false">
+        <dxl:DistrOpfamily Mdid="0.2227.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/>
+        <dxl:PartOpfamily Mdid="0.2789.1.0"/>
+        <dxl:EqualityOp Mdid="0.387.1.0"/>
+        <dxl:InequalityOp Mdid="0.402.1.0"/>
+        <dxl:LessThanOp Mdid="0.2799.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.2800.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/>
+        <dxl:ComparisonOp Mdid="0.2794.1.0"/>
+        <dxl:ArrayType Mdid="0.1010.1.0"/>
+        <dxl:MinAgg Mdid="0.2798.1.0"/>
+        <dxl:MaxAgg Mdid="0.2797.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:RelationStatistics Mdid="2.87727.1.0" Name="repli_t1" 
Rows="1.000000" RelPages="1" RelAllVisible="0" EmptyRelation="false"/>
+      <dxl:GPDBScalarOp Mdid="0.413.1.0" Name="&gt;" ComparisonType="GT" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.470.1.0"/>
+        <dxl:Commutator Mdid="0.412.1.0"/>
+        <dxl:InverseOp Mdid="0.414.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="false" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2226.1.0"/>
+        <dxl:EqualityOp Mdid="0.385.1.0"/>
+        <dxl:InequalityOp Mdid="0.0.0.0"/>
+        <dxl:LessThanOp Mdid="0.0.0.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:ComparisonOp Mdid="0.0.0.0"/>
+        <dxl:ArrayType Mdid="0.1012.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="false" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2225.1.0"/>
+        <dxl:EqualityOp Mdid="0.352.1.0"/>
+        <dxl:InequalityOp Mdid="0.3315.1.0"/>
+        <dxl:LessThanOp Mdid="0.0.0.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:ComparisonOp Mdid="0.0.0.0"/>
+        <dxl:ArrayType Mdid="0.1011.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Relation Mdid="6.87727.1.0" Name="repli_t1" IsTemporary="false" 
StorageType="Heap" DistributionPolicy="Replicated">
+        <dxl:Columns>
+          <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" 
ColWidth="4"/>
+          <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" 
ColWidth="6"/>
+          <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4"/>
+          <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" 
Nullable="false" ColWidth="4"/>
+          <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" 
Nullable="false" ColWidth="4"/>
+        </dxl:Columns>
+        <dxl:IndexInfoList/>
+        <dxl:CheckConstraints/>
+      </dxl:Relation>
+      <dxl:RelationExtendedStatistics Mdid="10.87730.1.0" Name="dist_t1"/>
+      <dxl:ColumnStatistics Mdid="1.87727.1.0.0" Name="a" Width="4.000000" 
NullFreq="0.000000" NdvRemain="1.000000" FreqRemain="1.000000" 
ColStatsMissing="false"/>
+      <dxl:GPDBAgg Mdid="0.2803.1.0" Name="count" IsRepSafe="true" 
IsSplittable="true" HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+      <dxl:RelationExtendedStatistics Mdid="10.87727.1.0" Name="repli_t1"/>
+    </dxl:Metadata>
+    <dxl:Query>
+      <dxl:OutputColumns>
+        <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/>
+      </dxl:OutputColumns>
+      <dxl:CTEList/>
+      <dxl:LogicalSelect>
+        <dxl:SubqueryExists>
+          <dxl:LogicalProject>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="18" Alias="?column?">
+                <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:LogicalSelect>
+              <dxl:Comparison ComparisonOperator="&gt;=" 
OperatorMdid="0.525.1.0">
+                <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/>
+                <dxl:Ident ColId="10" ColName="b" TypeMdid="0.23.1.0"/>
+              </dxl:Comparison>
+              <dxl:LogicalGet>
+                <dxl:TableDescriptor Mdid="6.87730.1.0" TableName="dist_t1" 
LockMode="1" AclMode="2">
+                  <dxl:Columns>
+                    <dxl:Column ColId="9" Attno="1" ColName="a" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="10" Attno="2" ColName="b" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="11" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+                    <dxl:Column ColId="12" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="13" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="14" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="15" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="16" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+                    <dxl:Column ColId="17" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                  </dxl:Columns>
+                </dxl:TableDescriptor>
+              </dxl:LogicalGet>
+            </dxl:LogicalSelect>
+          </dxl:LogicalProject>
+        </dxl:SubqueryExists>
+        <dxl:LogicalGet>
+          <dxl:TableDescriptor Mdid="6.87727.1.0" TableName="repli_t1" 
LockMode="1" AclMode="2">
+            <dxl:Columns>
+              <dxl:Column ColId="1" Attno="1" ColName="a" TypeMdid="0.23.1.0" 
ColWidth="4"/>
+              <dxl:Column ColId="2" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+              <dxl:Column ColId="3" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+              <dxl:Column ColId="4" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+              <dxl:Column ColId="5" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+              <dxl:Column ColId="6" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+              <dxl:Column ColId="7" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+              <dxl:Column ColId="8" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+            </dxl:Columns>
+          </dxl:TableDescriptor>
+        </dxl:LogicalGet>
+      </dxl:LogicalSelect>
+    </dxl:Query>
+    <dxl:Plan Id="0" SpaceSize="12">
+      <dxl:Result>
+        <dxl:Properties>
+          <dxl:Cost StartupCost="0" TotalCost="1324032.699156" Rows="1.000000" 
Width="4"/>
+        </dxl:Properties>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="0" Alias="a">
+            <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:Filter>
+          <dxl:SubPlan TypeMdid="0.16.1.0" SubPlanType="ExistsSubPlan">
+            <dxl:TestExpr/>
+            <dxl:ParamList>
+              <dxl:Param ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
+            </dxl:ParamList>
+            <dxl:Result>
+              <dxl:Properties>
+                <dxl:Cost StartupCost="0" TotalCost="431.000584" 
Rows="1.666667" Width="4"/>
+              </dxl:Properties>
+              <dxl:ProjList>
+                <dxl:ProjElem ColId="8" Alias="a">
+                  <dxl:Ident ColId="8" ColName="a" TypeMdid="0.23.1.0"/>
+                </dxl:ProjElem>
+              </dxl:ProjList>
+              <dxl:Filter/>
+              <dxl:OneTimeFilter/>
+              <dxl:Result>
+                <dxl:Properties>
+                  <dxl:Cost StartupCost="0" TotalCost="431.000584" 
Rows="1.666667" Width="4"/>
+                </dxl:Properties>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="17" Alias="?column?">
+                    <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                  </dxl:ProjElem>
+                  <dxl:ProjElem ColId="8" Alias="a">
+                    <dxl:Ident ColId="8" ColName="a" TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:Filter/>
+                <dxl:OneTimeFilter/>
+                <dxl:Result>
+                  <dxl:Properties>
+                    <dxl:Cost StartupCost="0" TotalCost="431.000578" 
Rows="1.666667" Width="4"/>
+                  </dxl:Properties>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="8" Alias="a">
+                      <dxl:Ident ColId="8" ColName="a" TypeMdid="0.23.1.0"/>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:Filter>
+                    <dxl:Comparison ComparisonOperator="&gt;=" 
OperatorMdid="0.525.1.0">
+                      <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
+                      <dxl:Ident ColId="9" ColName="b" TypeMdid="0.23.1.0"/>
+                    </dxl:Comparison>
+                  </dxl:Filter>
+                  <dxl:OneTimeFilter/>
+                  <dxl:Materialize Eager="true">
+                    <dxl:Properties>
+                      <dxl:Cost StartupCost="0" TotalCost="431.000249" 
Rows="5.000000" Width="8"/>
+                    </dxl:Properties>
+                    <dxl:ProjList>
+                      <dxl:ProjElem ColId="8" Alias="a">
+                        <dxl:Ident ColId="8" ColName="a" TypeMdid="0.23.1.0"/>
+                      </dxl:ProjElem>
+                      <dxl:ProjElem ColId="9" Alias="b">
+                        <dxl:Ident ColId="9" ColName="b" TypeMdid="0.23.1.0"/>
+                      </dxl:ProjElem>
+                    </dxl:ProjList>
+                    <dxl:Filter/>
+                    <dxl:GatherMotion InputSegments="0,1,2" 
OutputSegments="-1">
+                      <dxl:Properties>
+                        <dxl:Cost StartupCost="0" TotalCost="431.000209" 
Rows="5.000000" Width="8"/>
+                      </dxl:Properties>
+                      <dxl:ProjList>
+                        <dxl:ProjElem ColId="8" Alias="a">
+                          <dxl:Ident ColId="8" ColName="a" 
TypeMdid="0.23.1.0"/>
+                        </dxl:ProjElem>
+                        <dxl:ProjElem ColId="9" Alias="b">
+                          <dxl:Ident ColId="9" ColName="b" 
TypeMdid="0.23.1.0"/>
+                        </dxl:ProjElem>
+                      </dxl:ProjList>
+                      <dxl:Filter/>
+                      <dxl:SortingColumnList/>
+                      <dxl:TableScan>
+                        <dxl:Properties>
+                          <dxl:Cost StartupCost="0" TotalCost="431.000035" 
Rows="5.000000" Width="8"/>
+                        </dxl:Properties>
+                        <dxl:ProjList>
+                          <dxl:ProjElem ColId="8" Alias="a">
+                            <dxl:Ident ColId="8" ColName="a" 
TypeMdid="0.23.1.0"/>
+                          </dxl:ProjElem>
+                          <dxl:ProjElem ColId="9" Alias="b">
+                            <dxl:Ident ColId="9" ColName="b" 
TypeMdid="0.23.1.0"/>
+                          </dxl:ProjElem>
+                        </dxl:ProjList>
+                        <dxl:Filter/>
+                        <dxl:TableDescriptor Mdid="6.87730.1.0" 
TableName="dist_t1" LockMode="1" AclMode="2">
+                          <dxl:Columns>
+                            <dxl:Column ColId="8" Attno="1" ColName="a" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="9" Attno="2" ColName="b" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="10" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+                            <dxl:Column ColId="11" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="12" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="13" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="14" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="15" Attno="-6" 
ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/>
+                            <dxl:Column ColId="16" Attno="-7" 
ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/>
+                          </dxl:Columns>
+                        </dxl:TableDescriptor>
+                      </dxl:TableScan>
+                    </dxl:GatherMotion>
+                  </dxl:Materialize>
+                </dxl:Result>
+              </dxl:Result>
+            </dxl:Result>
+          </dxl:SubPlan>
+        </dxl:Filter>
+        <dxl:OneTimeFilter/>
+        <dxl:GatherMotion InputSegments="0" OutputSegments="-1">
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="431.000071" Rows="1.000000" 
Width="4"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="0" Alias="a">
+              <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:Filter/>
+          <dxl:SortingColumnList/>
+          <dxl:TableScan>
+            <dxl:Properties>
+              <dxl:Cost StartupCost="0" TotalCost="431.000019" Rows="3.000000" 
Width="4"/>
+            </dxl:Properties>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="0" Alias="a">
+                <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:Filter/>
+            <dxl:TableDescriptor Mdid="6.87727.1.0" TableName="repli_t1" 
LockMode="1" AclMode="2">
+              <dxl:Columns>
+                <dxl:Column ColId="0" Attno="1" ColName="a" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                <dxl:Column ColId="1" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+                <dxl:Column ColId="2" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                <dxl:Column ColId="3" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                <dxl:Column ColId="4" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                <dxl:Column ColId="5" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                <dxl:Column ColId="6" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+                <dxl:Column ColId="7" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+              </dxl:Columns>
+            </dxl:TableDescriptor>
+          </dxl:TableScan>
+        </dxl:GatherMotion>
+      </dxl:Result>
+    </dxl:Plan>
+  </dxl:Thread>
+</dxl:DXLMessage>
diff --git a/src/backend/gporca/server/CMakeLists.txt 
b/src/backend/gporca/server/CMakeLists.txt
index 91344a9032..e283b67eb3 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -59,7 +59,7 @@ NestedInSubqWithPrjListOuterRefNoInnerRef  InEqualityJoin 
Correlated-SemiJoin
 CorrelatedSemiJoin-True CorrelatedIN-LeftSemiJoin-True 
CorrelatedIN-LeftSemiNotIn-True
 InSubqWithPrjListOuterRefNoInnerRefColumnPlusConstIn 
CorrelatedIN-LeftSemiJoin-Limit
 CorrelatedLeftSemiNLJoinWithLimit PushFilterToSemiJoinLeftChild 
SubqOuterReferenceInClause
-SemiJoinDPE InSubqWithPrjListReturnSet;
+SemiJoinDPE InSubqWithPrjListReturnSet LeftSemiJoinWithRepOuterTab;
 
 CAntiSemiJoinTest:
 AntiSemiJoin2Select-1 AntiSemiJoin2Select-2 NOT-IN-NotNullBoth 
NOT-IN-NullInner NOT-IN-NullOuter
diff --git a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h 
b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
index 412d4c5a0b..ae5dbf7699 100644
--- a/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
+++ b/src/include/gpopt/translate/CTranslatorRelcacheToDXL.h
@@ -208,10 +208,10 @@ private:
                                                                                
 CharPtrArray **part_types);
 
        // get keysets for relation
-       static ULongPtr2dArray *RetrieveRelKeysets(CMemoryPool *mp, OID oid,
-                                                                               
           BOOL should_add_default_keys,
-                                                                               
           BOOL is_partitioned,
-                                                                               
           ULONG *attno_mapping);
+       static ULongPtr2dArray *RetrieveRelKeysets(
+               CMemoryPool *mp, OID oid, BOOL should_add_default_keys,
+               BOOL is_partitioned, ULONG *attno_mapping,
+               IMDRelation::Ereldistrpolicy rel_distr_policy);
 
        // storage type for a relation
        static IMDRelation::Erelstoragetype RetrieveRelStorageType(Relation 
rel);
diff --git a/src/test/regress/expected/join_gp.out 
b/src/test/regress/expected/join_gp.out
index 06cf340762..d03be4be93 100644
--- a/src/test/regress/expected/join_gp.out
+++ b/src/test/regress/expected/join_gp.out
@@ -2799,3 +2799,354 @@ drop table random_dis_char;
 set optimizer_enable_hashjoin to on;
 reset enable_hashjoin;
 reset enable_nestloop;
+-----------------------------------------------------------------
+-- Test cases to check if ORCA generates correct result
+-- for "Left Semi Join with replicated outer table"
+-----------------------------------------------------------------
+drop table if exists repli_t1;
+drop table if exists dist_t1;
+create table repli_t1 (a int) distributed replicated;
+insert into repli_t1 values(1);
+analyze repli_t1;
+create table dist_t1 (a int , b int) distributed by (a);
+insert into dist_t1 select i, 1 from generate_series(1, 5) i;
+analyze dist_t1;
+-- No explicitly defined primary key for replicated table
+---------------------------------------------------------
+-- Outer - replicated, Inner - distributed table
+explain (costs off) select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: (RowIdExpr)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: (RowIdExpr)
+               ->  Nested Loop
+                     Join Filter: (repli_t1.a >= dist_t1.b)
+                     ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                           ->  Seq Scan on repli_t1
+                     ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(11 rows)
+
+select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a 
>= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+explain (costs off) select * from (select t1.a as aVal1, t2.a as aVal2 from 
repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3
+where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (t1.a = t2.a)
+         Join Filter: (t2.a >= t4.b)
+         ->  HashAggregate
+               Group Key: (RowIdExpr)
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Hash Key: (RowIdExpr)
+                     ->  Nested Loop
+                           Join Filter: (t1.a >= t4.b)
+                           ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                                 ->  Seq Scan on repli_t1 t1
+                           ->  Seq Scan on dist_t1 t4
+         ->  Hash
+               ->  Seq Scan on repli_t1 t2
+ Optimizer: Postgres-based planner
+(16 rows)
+
+select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , 
repli_t1 as t2 where t1.a = t2.a) as t3
+where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
+ aval1 | aval2 
+-------+-------
+     1 |     1
+(1 row)
+
+create index idx_repl_t1_a ON repli_t1 using btree(a);
+analyze repli_t1;
+explain (costs off) select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: (RowIdExpr)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: (RowIdExpr)
+               ->  Nested Loop
+                     Join Filter: (repli_t1.a >= dist_t1.b)
+                     ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                           ->  Seq Scan on repli_t1
+                     ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(11 rows)
+
+select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a 
>= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+drop index idx_repl_t1_a;
+-- Outer - distributed, Inner - replicated table
+explain (costs off) select * from dist_t1 where exists ( select 1 from 
repli_t1  where repli_t1.a >= dist_t1.b);
+                   QUERY PLAN                   
+------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Nested Loop Semi Join
+         Join Filter: (repli_t1.a >= dist_t1.b)
+         ->  Seq Scan on dist_t1
+         ->  Seq Scan on repli_t1
+ Optimizer: Postgres-based planner
+(6 rows)
+
+select * from dist_t1 where exists ( select 1 from repli_t1  where repli_t1.a 
>= dist_t1.b);
+ a | b 
+---+---
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 1 | 1
+ 5 | 1
+(5 rows)
+
+-- Both replicated table
+explain (costs off) select * from repli_t1 as t1  where exists ( select 1 from 
repli_t1 as t2  where t1.a >= t2.a);
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Nested Loop Semi Join
+         Join Filter: (t1.a >= t2.a)
+         ->  Seq Scan on repli_t1 t1
+         ->  Seq Scan on repli_t1 t2
+ Optimizer: Postgres-based planner
+(6 rows)
+
+select * from repli_t1 as t1  where exists ( select 1 from repli_t1 as t2  
where t1.a >= t2.a);
+ a 
+---
+ 1
+(1 row)
+
+-- Outer - Universal, Inner - distributed table
+explain (costs off) select * from generate_series(1, 5) univ_t where exists ( 
select 1 from dist_t1 where univ_t >= dist_t1.b);
+                        QUERY PLAN                         
+-----------------------------------------------------------
+ HashAggregate
+   Group Key: (RowIdExpr)
+   ->  Nested Loop
+         Join Filter: (univ_t.univ_t >= dist_t1.b)
+         ->  Gather Motion 3:1  (slice1; segments: 3)
+               ->  Seq Scan on dist_t1
+         ->  Materialize
+               ->  Function Scan on generate_series univ_t
+ Optimizer: Postgres-based planner
+(9 rows)
+
+select * from generate_series(1, 5) univ_t where exists ( select 1 from 
dist_t1 where univ_t >= dist_t1.b);
+ univ_t 
+--------
+      3
+      5
+      4
+      2
+      1
+(5 rows)
+
+-- Outer - distributed, Inner - universal table
+explain (costs off) select * from dist_t1 where exists ( select 1 from 
generate_series(1, 5) univ_t where univ_t >= dist_t1.b);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Nested Loop Semi Join
+         Join Filter: (univ_t.univ_t >= dist_t1.b)
+         ->  Seq Scan on dist_t1
+         ->  Function Scan on generate_series univ_t
+ Optimizer: Postgres-based planner
+(6 rows)
+
+select * from dist_t1 where exists ( select 1 from generate_series(1, 5) 
univ_t where univ_t >= dist_t1.b);
+ a | b 
+---+---
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 1 | 1
+ 5 | 1
+(5 rows)
+
+-- Outer - replicated, Inner - universal table
+explain (costs off)select * from repli_t1 where exists ( select 1 from 
generate_series(1, 5) univ_t where univ_t >= repli_t1.a);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Nested Loop Semi Join
+         Join Filter: (univ_t.univ_t >= repli_t1.a)
+         ->  Seq Scan on repli_t1
+         ->  Function Scan on generate_series univ_t
+ Optimizer: Postgres-based planner
+(6 rows)
+
+select * from repli_t1 where exists ( select 1 from generate_series(1, 5) 
univ_t where univ_t >= repli_t1.a);
+ a 
+---
+ 1
+(1 row)
+
+-- Outer - universal, Inner - replicated table
+explain (costs off) select * from generate_series(1, 5) univ_t where exists ( 
select 1 from repli_t1  where univ_t >= repli_t1.a);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Nested Loop Semi Join
+         Join Filter: (univ_t.univ_t >= repli_t1.a)
+         ->  Function Scan on generate_series univ_t
+         ->  Materialize
+               ->  Seq Scan on repli_t1
+ Optimizer: Postgres-based planner
+(7 rows)
+
+select * from generate_series(1, 5) univ_t where exists ( select 1 from 
repli_t1  where univ_t >= repli_t1.a);
+ univ_t 
+--------
+      1
+      2
+      3
+      4
+      5
+(5 rows)
+
+-- Explicitly defined primary key for replicated table
+---------------------------------------------------------
+drop table if exists repli_t1_pk;
+drop table if exists repli_t2_pk;
+drop table if exists repli_t3_pk;
+drop table if exists repli_t4_pk;
+-- Outer - replicated, Inner - distributed table
+create table repli_t1_pk (a int, PRIMARY KEY(a)) distributed replicated;
+insert into repli_t1_pk values(1);
+analyze repli_t1_pk;
+create table repli_t2_pk (a int, CONSTRAINT key1_t2 PRIMARY KEY(a) ) 
distributed replicated;
+insert into repli_t2_pk values(1);
+analyze repli_t2_pk;
+create table repli_t3_pk (a int,b int, c int, d int, CONSTRAINT key1_t3 UNIQUE 
(c,d)) distributed replicated;
+insert into repli_t3_pk values(1,2,3,4);
+analyze repli_t3_pk;
+create table repli_t4_pk (a int,b int, c int, d int,  CONSTRAINT key1_t4 
PRIMARY KEY(a) , CONSTRAINT key2_t4 UNIQUE (c,d)) distributed replicated;
+insert into repli_t4_pk values(1,2,3,4);
+analyze repli_t4_pk;
+explain (costs off) select * from repli_t1_pk where exists ( select 1 from 
dist_t1 where repli_t1_pk.a >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: (RowIdExpr)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: (RowIdExpr)
+               ->  Nested Loop
+                     Join Filter: (repli_t1_pk.a >= dist_t1.b)
+                     ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                           ->  Seq Scan on repli_t1_pk
+                     ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(11 rows)
+
+select * from repli_t1_pk where exists ( select 1 from dist_t1 where 
repli_t1_pk.a >= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+create index idx_repli_t1_pk_a ON repli_t1_pk using btree(a);
+analyze repli_t1_pk;
+explain (costs off) select * from repli_t1_pk where exists ( select 1 from 
dist_t1 where repli_t1_pk.a >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: (RowIdExpr)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: (RowIdExpr)
+               ->  Nested Loop
+                     Join Filter: (repli_t1_pk.a >= dist_t1.b)
+                     ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                           ->  Seq Scan on repli_t1_pk
+                     ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(11 rows)
+
+select * from repli_t1_pk where exists ( select 1 from dist_t1 where 
repli_t1_pk.a >= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+drop index idx_repli_t1_pk_a;
+explain (costs off) select * from repli_t2_pk where exists ( select 1 from 
dist_t1 where repli_t2_pk.a >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: (RowIdExpr)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: (RowIdExpr)
+               ->  Nested Loop
+                     Join Filter: (repli_t2_pk.a >= dist_t1.b)
+                     ->  Broadcast Motion 1:3  (slice3; segments: 1)
+                           ->  Seq Scan on repli_t2_pk
+                     ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(11 rows)
+
+select * from repli_t2_pk where exists ( select 1 from dist_t1 where 
repli_t2_pk.a >= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+explain (costs off) select * from repli_t3_pk where exists ( select 1 from 
dist_t1 where repli_t3_pk.a >= dist_t1.b);
+                      QUERY PLAN                      
+------------------------------------------------------
+ Nested Loop Semi Join
+   Join Filter: (repli_t3_pk.a >= dist_t1.b)
+   ->  Gather Motion 1:1  (slice1; segments: 1)
+         ->  Seq Scan on repli_t3_pk
+   ->  Materialize
+         ->  Gather Motion 3:1  (slice2; segments: 3)
+               ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(8 rows)
+
+select * from repli_t3_pk where exists ( select 1 from dist_t1 where 
repli_t3_pk.a >= dist_t1.b);
+ a | b | c | d 
+---+---+---+---
+ 1 | 2 | 3 | 4
+(1 row)
+
+explain (costs off) select * from repli_t4_pk where exists ( select 1 from 
dist_t1 where repli_t4_pk.a >= dist_t1.b);
+                      QUERY PLAN                      
+------------------------------------------------------
+ Nested Loop Semi Join
+   Join Filter: (repli_t4_pk.a >= dist_t1.b)
+   ->  Gather Motion 1:1  (slice1; segments: 1)
+         ->  Seq Scan on repli_t4_pk
+   ->  Materialize
+         ->  Gather Motion 3:1  (slice2; segments: 3)
+               ->  Seq Scan on dist_t1
+ Optimizer: Postgres-based planner
+(8 rows)
+
+select * from repli_t4_pk where exists ( select 1 from dist_t1 where 
repli_t4_pk.a >= dist_t1.b);
+ a | b | c | d 
+---+---+---+---
+ 1 | 2 | 3 | 4
+(1 row)
+
+drop table if exists repli_t1;
+drop table if exists dist_t1;
+drop table if exists repli_t1_pk;
+drop table if exists repli_t2_pk;
+drop table if exists repli_t3_pk;
+drop table if exists repli_t4_pk;
diff --git a/src/test/regress/expected/join_gp_optimizer.out 
b/src/test/regress/expected/join_gp_optimizer.out
index 454aeec8b0..efc8f8af6c 100644
--- a/src/test/regress/expected/join_gp_optimizer.out
+++ b/src/test/regress/expected/join_gp_optimizer.out
@@ -2688,3 +2688,375 @@ drop table random_dis_char;
 set optimizer_enable_hashjoin to on;
 reset enable_hashjoin;
 reset enable_nestloop;
+-----------------------------------------------------------------
+-- Test cases to check if ORCA generates correct result
+-- for "Left Semi Join with replicated outer table"
+-----------------------------------------------------------------
+drop table if exists repli_t1;
+drop table if exists dist_t1;
+create table repli_t1 (a int) distributed replicated;
+insert into repli_t1 values(1);
+analyze repli_t1;
+create table dist_t1 (a int , b int) distributed by (a);
+insert into dist_t1 select i, 1 from generate_series(1, 5) i;
+analyze dist_t1;
+-- No explicitly defined primary key for replicated table
+---------------------------------------------------------
+-- Outer - replicated, Inner - distributed table
+explain (costs off) select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Result
+   Filter: (SubPlan 1)
+   ->  Gather Motion 1:1  (slice1; segments: 1)
+         ->  Seq Scan on repli_t1
+   SubPlan 1
+     ->  Result
+           Filter: (repli_t1.a >= dist_t1.b)
+           ->  Materialize
+                 ->  Gather Motion 3:1  (slice2; segments: 3)
+                       ->  Seq Scan on dist_t1
+ Optimizer: GPORCA
+(11 rows)
+
+select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a 
>= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+explain (costs off) select * from (select t1.a as aVal1, t2.a as aVal2 from 
repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3
+where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Hash Join
+   Hash Cond: (repli_t1.a = repli_t1_1.a)
+   ->  Result
+         Filter: (SubPlan 1)
+         ->  Gather Motion 1:1  (slice1; segments: 1)
+               ->  Seq Scan on repli_t1
+         SubPlan 1
+           ->  Result
+                 Filter: (repli_t1.a >= dist_t1.b)
+                 ->  Materialize
+                       ->  Gather Motion 3:1  (slice2; segments: 3)
+                             ->  Seq Scan on dist_t1
+   ->  Hash
+         ->  Gather Motion 1:1  (slice3; segments: 1)
+               ->  Seq Scan on repli_t1 repli_t1_1
+ Optimizer: GPORCA
+(16 rows)
+
+select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , 
repli_t1 as t2 where t1.a = t2.a) as t3
+where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
+ aval1 | aval2 
+-------+-------
+     1 |     1
+(1 row)
+
+create index idx_repl_t1_a ON repli_t1 using btree(a);
+analyze repli_t1;
+explain (costs off) select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Result
+   Filter: (SubPlan 1)
+   ->  Gather Motion 1:1  (slice1; segments: 1)
+         ->  Seq Scan on repli_t1
+   SubPlan 1
+     ->  Result
+           Filter: (repli_t1.a >= dist_t1.b)
+           ->  Materialize
+                 ->  Gather Motion 3:1  (slice2; segments: 3)
+                       ->  Seq Scan on dist_t1
+ Optimizer: GPORCA
+(11 rows)
+
+select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a 
>= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+drop index idx_repl_t1_a;
+-- Outer - distributed, Inner - replicated table
+explain (costs off) select * from dist_t1 where exists ( select 1 from 
repli_t1  where repli_t1.a >= dist_t1.b);
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on dist_t1
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on repli_t1
+                 Filter: (a >= dist_t1.b)
+ Optimizer: GPORCA
+(7 rows)
+
+select * from dist_t1 where exists ( select 1 from repli_t1  where repli_t1.a 
>= dist_t1.b);
+ a | b 
+---+---
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 1 | 1
+ 5 | 1
+(5 rows)
+
+-- Both replicated table
+explain (costs off) select * from repli_t1 as t1  where exists ( select 1 from 
repli_t1 as t2  where t1.a >= t2.a);
+                  QUERY PLAN                   
+-----------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Seq Scan on repli_t1
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Seq Scan on repli_t1 repli_t1_1
+                 Filter: (repli_t1.a >= a)
+ Optimizer: GPORCA
+(7 rows)
+
+select * from repli_t1 as t1  where exists ( select 1 from repli_t1 as t2  
where t1.a >= t2.a);
+ a 
+---
+ 1
+(1 row)
+
+-- Outer - Universal, Inner - distributed table
+explain (costs off) select * from generate_series(1, 5) univ_t where exists ( 
select 1 from dist_t1 where univ_t >= dist_t1.b);
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Result
+   Filter: (SubPlan 1)
+   ->  Function Scan on generate_series
+   SubPlan 1
+     ->  Result
+           Filter: (generate_series.generate_series >= dist_t1.b)
+           ->  Materialize
+                 ->  Gather Motion 3:1  (slice1; segments: 3)
+                       ->  Seq Scan on dist_t1
+ Optimizer: GPORCA
+(10 rows)
+
+select * from generate_series(1, 5) univ_t where exists ( select 1 from 
dist_t1 where univ_t >= dist_t1.b);
+ univ_t 
+--------
+      1
+      2
+      3
+      4
+      5
+(5 rows)
+
+-- Outer - distributed, Inner - universal table
+explain (costs off) select * from dist_t1 where exists ( select 1 from 
generate_series(1, 5) univ_t where univ_t >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Nested Loop Semi Join
+         Join Filter: (generate_series.generate_series >= dist_t1.b)
+         ->  Seq Scan on dist_t1
+         ->  Materialize
+               ->  Function Scan on generate_series
+ Optimizer: GPORCA
+(7 rows)
+
+select * from dist_t1 where exists ( select 1 from generate_series(1, 5) 
univ_t where univ_t >= dist_t1.b);
+ a | b 
+---+---
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 1 | 1
+ 5 | 1
+(5 rows)
+
+-- Outer - replicated, Inner - universal table
+explain (costs off)select * from repli_t1 where exists ( select 1 from 
generate_series(1, 5) univ_t where univ_t >= repli_t1.a);
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Seq Scan on repli_t1
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  Result
+                 Filter: (generate_series.generate_series >= repli_t1.a)
+                 ->  Function Scan on generate_series
+ Optimizer: GPORCA
+(8 rows)
+
+select * from repli_t1 where exists ( select 1 from generate_series(1, 5) 
univ_t where univ_t >= repli_t1.a);
+ a 
+---
+ 1
+(1 row)
+
+-- Outer - universal, Inner - replicated table
+explain (costs off) select * from generate_series(1, 5) univ_t where exists ( 
select 1 from repli_t1  where univ_t >= repli_t1.a);
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Result
+   Filter: (SubPlan 1)
+   ->  Function Scan on generate_series
+   SubPlan 1
+     ->  Result
+           Filter: (generate_series.generate_series >= repli_t1.a)
+           ->  Materialize
+                 ->  Gather Motion 1:1  (slice1; segments: 1)
+                       ->  Seq Scan on repli_t1
+ Optimizer: GPORCA
+(10 rows)
+
+select * from generate_series(1, 5) univ_t where exists ( select 1 from 
repli_t1  where univ_t >= repli_t1.a);
+ univ_t 
+--------
+      1
+      2
+      3
+      4
+      5
+(5 rows)
+
+-- Explicitly defined primary key for replicated table
+---------------------------------------------------------
+drop table if exists repli_t1_pk;
+drop table if exists repli_t2_pk;
+drop table if exists repli_t3_pk;
+drop table if exists repli_t4_pk;
+-- Outer - replicated, Inner - distributed table
+create table repli_t1_pk (a int, PRIMARY KEY(a)) distributed replicated;
+insert into repli_t1_pk values(1);
+analyze repli_t1_pk;
+create table repli_t2_pk (a int, CONSTRAINT key1_t2 PRIMARY KEY(a) ) 
distributed replicated;
+insert into repli_t2_pk values(1);
+analyze repli_t2_pk;
+create table repli_t3_pk (a int,b int, c int, d int, CONSTRAINT key1_t3 UNIQUE 
(c,d)) distributed replicated;
+insert into repli_t3_pk values(1,2,3,4);
+analyze repli_t3_pk;
+create table repli_t4_pk (a int,b int, c int, d int,  CONSTRAINT key1_t4 
PRIMARY KEY(a) , CONSTRAINT key2_t4 UNIQUE (c,d)) distributed replicated;
+insert into repli_t4_pk values(1,2,3,4);
+analyze repli_t4_pk;
+explain (costs off) select * from repli_t1_pk where exists ( select 1 from 
dist_t1 where repli_t1_pk.a >= dist_t1.b);
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  GroupAggregate
+         Group Key: repli_t1_pk.a
+         ->  Sort
+               Sort Key: repli_t1_pk.a
+               ->  Nested Loop
+                     Join Filter: true
+                     ->  Broadcast Motion 3:1  (slice2; segments: 3)
+                           ->  Seq Scan on dist_t1
+                     ->  Index Scan using repli_t1_pk_pkey on repli_t1_pk
+                           Index Cond: (a >= dist_t1.b)
+ Optimizer: GPORCA
+(12 rows)
+
+select * from repli_t1_pk where exists ( select 1 from dist_t1 where 
repli_t1_pk.a >= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+create index idx_repli_t1_pk_a ON repli_t1_pk using btree(a);
+analyze repli_t1_pk;
+explain (costs off) select * from repli_t1_pk where exists ( select 1 from 
dist_t1 where repli_t1_pk.a >= dist_t1.b);
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  GroupAggregate
+         Group Key: repli_t1_pk.a
+         ->  Sort
+               Sort Key: repli_t1_pk.a
+               ->  Nested Loop
+                     Join Filter: true
+                     ->  Broadcast Motion 3:1  (slice2; segments: 3)
+                           ->  Seq Scan on dist_t1
+                     ->  Index Scan using repli_t1_pk_pkey on repli_t1_pk
+                           Index Cond: (a >= dist_t1.b)
+ Optimizer: GPORCA
+(12 rows)
+
+select * from repli_t1_pk where exists ( select 1 from dist_t1 where 
repli_t1_pk.a >= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+drop index idx_repli_t1_pk_a;
+explain (costs off) select * from repli_t2_pk where exists ( select 1 from 
dist_t1 where repli_t2_pk.a >= dist_t1.b);
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  GroupAggregate
+         Group Key: repli_t2_pk.a
+         ->  Sort
+               Sort Key: repli_t2_pk.a
+               ->  Nested Loop
+                     Join Filter: true
+                     ->  Broadcast Motion 3:1  (slice2; segments: 3)
+                           ->  Seq Scan on dist_t1
+                     ->  Index Scan using key1_t2 on repli_t2_pk
+                           Index Cond: (a >= dist_t1.b)
+ Optimizer: GPORCA
+(12 rows)
+
+select * from repli_t2_pk where exists ( select 1 from dist_t1 where 
repli_t2_pk.a >= dist_t1.b);
+ a 
+---
+ 1
+(1 row)
+
+explain (costs off) select * from repli_t3_pk where exists ( select 1 from 
dist_t1 where repli_t3_pk.a >= dist_t1.b);
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  GroupAggregate
+         Group Key: repli_t3_pk.a, repli_t3_pk.b, repli_t3_pk.c, repli_t3_pk.d
+         ->  Sort
+               Sort Key: repli_t3_pk.c, repli_t3_pk.d
+               ->  Redistribute Motion 3:3  (slice2; segments: 3)
+                     Hash Key: repli_t3_pk.c, repli_t3_pk.d
+                     ->  Nested Loop
+                           Join Filter: (repli_t3_pk.a >= dist_t1.b)
+                           ->  Seq Scan on repli_t3_pk
+                           ->  Materialize
+                                 ->  Seq Scan on dist_t1
+ Optimizer: GPORCA
+(13 rows)
+
+select * from repli_t3_pk where exists ( select 1 from dist_t1 where 
repli_t3_pk.a >= dist_t1.b);
+ a | b | c | d 
+---+---+---+---
+ 1 | 2 | 3 | 4
+(1 row)
+
+explain (costs off) select * from repli_t4_pk where exists ( select 1 from 
dist_t1 where repli_t4_pk.a >= dist_t1.b);
+                                  QUERY PLAN                                   
+-------------------------------------------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  GroupAggregate
+         Group Key: repli_t4_pk.a, repli_t4_pk.b, repli_t4_pk.c, repli_t4_pk.d
+         ->  Sort
+               Sort Key: repli_t4_pk.a
+               ->  Nested Loop
+                     Join Filter: true
+                     ->  Broadcast Motion 3:1  (slice2; segments: 3)
+                           ->  Seq Scan on dist_t1
+                     ->  Index Scan using key1_t4 on repli_t4_pk
+                           Index Cond: (a >= dist_t1.b)
+ Optimizer: GPORCA
+(12 rows)
+
+select * from repli_t4_pk where exists ( select 1 from dist_t1 where 
repli_t4_pk.a >= dist_t1.b);
+ a | b | c | d 
+---+---+---+---
+ 1 | 2 | 3 | 4
+(1 row)
+
+drop table if exists repli_t1;
+drop table if exists dist_t1;
+drop table if exists repli_t1_pk;
+drop table if exists repli_t2_pk;
+drop table if exists repli_t3_pk;
+drop table if exists repli_t4_pk;
diff --git a/src/test/regress/sql/join_gp.sql b/src/test/regress/sql/join_gp.sql
index 85a6517578..b2824cc9d8 100644
--- a/src/test/regress/sql/join_gp.sql
+++ b/src/test/regress/sql/join_gp.sql
@@ -1035,3 +1035,110 @@ drop table random_dis_char;
 set optimizer_enable_hashjoin to on;
 reset enable_hashjoin;
 reset enable_nestloop;
+
+-----------------------------------------------------------------
+-- Test cases to check if ORCA generates correct result
+-- for "Left Semi Join with replicated outer table"
+-----------------------------------------------------------------
+drop table if exists repli_t1;
+drop table if exists dist_t1;
+
+create table repli_t1 (a int) distributed replicated;
+insert into repli_t1 values(1);
+analyze repli_t1;
+
+create table dist_t1 (a int , b int) distributed by (a);
+insert into dist_t1 select i, 1 from generate_series(1, 5) i;
+analyze dist_t1;
+
+-- No explicitly defined primary key for replicated table
+---------------------------------------------------------
+
+-- Outer - replicated, Inner - distributed table
+explain (costs off) select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a 
>= dist_t1.b);
+
+explain (costs off) select * from (select t1.a as aVal1, t2.a as aVal2 from 
repli_t1 as t1 , repli_t1 as t2 where t1.a = t2.a) as t3
+where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
+select * from (select t1.a as aVal1, t2.a as aVal2 from repli_t1 as t1 , 
repli_t1 as t2 where t1.a = t2.a) as t3
+where exists (select 1 from dist_t1 as t4 where t3.aVal1 >= t4.b);
+
+create index idx_repl_t1_a ON repli_t1 using btree(a);
+analyze repli_t1;
+explain (costs off) select * from repli_t1 where exists ( select 1 from 
dist_t1 where repli_t1.a >= dist_t1.b);
+select * from repli_t1 where exists ( select 1 from dist_t1 where repli_t1.a 
>= dist_t1.b);
+drop index idx_repl_t1_a;
+
+-- Outer - distributed, Inner - replicated table
+explain (costs off) select * from dist_t1 where exists ( select 1 from 
repli_t1  where repli_t1.a >= dist_t1.b);
+select * from dist_t1 where exists ( select 1 from repli_t1  where repli_t1.a 
>= dist_t1.b);
+
+-- Both replicated table
+explain (costs off) select * from repli_t1 as t1  where exists ( select 1 from 
repli_t1 as t2  where t1.a >= t2.a);
+select * from repli_t1 as t1  where exists ( select 1 from repli_t1 as t2  
where t1.a >= t2.a);
+
+-- Outer - Universal, Inner - distributed table
+explain (costs off) select * from generate_series(1, 5) univ_t where exists ( 
select 1 from dist_t1 where univ_t >= dist_t1.b);
+select * from generate_series(1, 5) univ_t where exists ( select 1 from 
dist_t1 where univ_t >= dist_t1.b);
+
+-- Outer - distributed, Inner - universal table
+explain (costs off) select * from dist_t1 where exists ( select 1 from 
generate_series(1, 5) univ_t where univ_t >= dist_t1.b);
+select * from dist_t1 where exists ( select 1 from generate_series(1, 5) 
univ_t where univ_t >= dist_t1.b);
+
+-- Outer - replicated, Inner - universal table
+explain (costs off)select * from repli_t1 where exists ( select 1 from 
generate_series(1, 5) univ_t where univ_t >= repli_t1.a);
+select * from repli_t1 where exists ( select 1 from generate_series(1, 5) 
univ_t where univ_t >= repli_t1.a);
+
+-- Outer - universal, Inner - replicated table
+explain (costs off) select * from generate_series(1, 5) univ_t where exists ( 
select 1 from repli_t1  where univ_t >= repli_t1.a);
+select * from generate_series(1, 5) univ_t where exists ( select 1 from 
repli_t1  where univ_t >= repli_t1.a);
+
+-- Explicitly defined primary key for replicated table
+---------------------------------------------------------
+drop table if exists repli_t1_pk;
+drop table if exists repli_t2_pk;
+drop table if exists repli_t3_pk;
+drop table if exists repli_t4_pk;
+
+-- Outer - replicated, Inner - distributed table
+
+create table repli_t1_pk (a int, PRIMARY KEY(a)) distributed replicated;
+insert into repli_t1_pk values(1);
+analyze repli_t1_pk;
+
+create table repli_t2_pk (a int, CONSTRAINT key1_t2 PRIMARY KEY(a) ) 
distributed replicated;
+insert into repli_t2_pk values(1);
+analyze repli_t2_pk;
+
+create table repli_t3_pk (a int,b int, c int, d int, CONSTRAINT key1_t3 UNIQUE 
(c,d)) distributed replicated;
+insert into repli_t3_pk values(1,2,3,4);
+analyze repli_t3_pk;
+
+create table repli_t4_pk (a int,b int, c int, d int,  CONSTRAINT key1_t4 
PRIMARY KEY(a) , CONSTRAINT key2_t4 UNIQUE (c,d)) distributed replicated;
+insert into repli_t4_pk values(1,2,3,4);
+analyze repli_t4_pk;
+
+explain (costs off) select * from repli_t1_pk where exists ( select 1 from 
dist_t1 where repli_t1_pk.a >= dist_t1.b);
+select * from repli_t1_pk where exists ( select 1 from dist_t1 where 
repli_t1_pk.a >= dist_t1.b);
+
+create index idx_repli_t1_pk_a ON repli_t1_pk using btree(a);
+analyze repli_t1_pk;
+explain (costs off) select * from repli_t1_pk where exists ( select 1 from 
dist_t1 where repli_t1_pk.a >= dist_t1.b);
+select * from repli_t1_pk where exists ( select 1 from dist_t1 where 
repli_t1_pk.a >= dist_t1.b);
+drop index idx_repli_t1_pk_a;
+
+explain (costs off) select * from repli_t2_pk where exists ( select 1 from 
dist_t1 where repli_t2_pk.a >= dist_t1.b);
+select * from repli_t2_pk where exists ( select 1 from dist_t1 where 
repli_t2_pk.a >= dist_t1.b);
+
+explain (costs off) select * from repli_t3_pk where exists ( select 1 from 
dist_t1 where repli_t3_pk.a >= dist_t1.b);
+select * from repli_t3_pk where exists ( select 1 from dist_t1 where 
repli_t3_pk.a >= dist_t1.b);
+
+explain (costs off) select * from repli_t4_pk where exists ( select 1 from 
dist_t1 where repli_t4_pk.a >= dist_t1.b);
+select * from repli_t4_pk where exists ( select 1 from dist_t1 where 
repli_t4_pk.a >= dist_t1.b);
+
+drop table if exists repli_t1;
+drop table if exists dist_t1;
+drop table if exists repli_t1_pk;
+drop table if exists repli_t2_pk;
+drop table if exists repli_t3_pk;
+drop table if exists repli_t4_pk;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to