This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 03eca3430f26a7b5b53fbb9eb3f918137d32f9c7
Author: Chandan Kunal <[email protected]>
AuthorDate: Wed Sep 6 14:18:18 2023 +0530

    Correcting derived distribution spec for CPhysicalJoin. (#16342)
    
    Problem: Wrong results generated for subquery in projection list for 
replicated
    tables.
    
    Analysis: To derive distribution for any join operator,
    CPhysicalJoin::PdsDerive() is invoked. For deriving distribution it checks
    DistributionSpec for outer and inner children. when we have 
DistributionSpec for
    outer child as replicate and inner child  as universal then we return 
universal
    as derived distribution. Eventually "Gather Motion" is not created and as 
data
    is not there with coordinator so it gives no rows as output.
---
 .../dxl/minidump/JoinOnReplicatedUniversal.mdp     | 298 +++++++++++++++++++++
 .../data/dxl/minidump/SubqueryOuterRefTVF.mdp      |   4 +-
 .../libgpopt/src/operators/CPhysicalJoin.cpp       |   8 +-
 src/backend/gporca/server/CMakeLists.txt           |   3 +-
 src/test/regress/expected/rpt.out                  |  37 +++
 src/test/regress/expected/rpt_optimizer.out        |  40 +++
 src/test/regress/sql/rpt.sql                       |   9 +
 7 files changed, 393 insertions(+), 6 deletions(-)

diff --git a/src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp 
b/src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp
new file mode 100644
index 0000000000..b2d8c3df42
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/JoinOnReplicatedUniversal.mdp
@@ -0,0 +1,298 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/";>
+<dxl:Comment><![CDATA[
+          This mdp is testing, replicate as derived distribution when we have 
replicated
+          inner and universal replicated as outer. Due to this Motion Gather 
node is
+          added.
+          create table t(i int, j int) distributed replicated;
+          insert into t values (1, 2);
+          explain select j, (select j) from t;
+          ]]>
+</dxl:Comment>
+  <dxl:Thread Id="0">
+    <dxl:OptimizerConfig>
+      <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
+      <dxl:StatisticsConfig DampingFactorFilter="0.750000" 
DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" 
MaxStatsBuckets="100"/>
+      <dxl:CTEConfig CTEInliningCutoff="0"/>
+      <dxl:WindowOids RowNumber="3100" Rank="3101"/>
+      <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
+        <dxl:CostParams>
+          <dxl:CostParam Name="NLJFactor" Value="1024.000000" 
LowerBound="1023.500000" UpperBound="1024.500000"/>
+        </dxl:CostParams>
+      </dxl:CostModelConfig>
+      <dxl:Hint JoinArityForAssociativityCommutativity="18" 
ArrayExpansionThreshold="20" JoinOrderDynamicProgThreshold="10" 
BroadcastThreshold="100000" EnforceConstraintsOnDML="false" 
PushGroupByBelowSetopThreshold="10" XformBindThreshold="0" SkewFactor="0"/>
+      <dxl:TraceFlags 
Value="101013,102001,102002,102003,102043,102074,102120,102144,102162,102163,103001,103014,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,106000"/>
+    </dxl:OptimizerConfig>
+    <dxl:Metadata SystemIds="0.GPDB">
+      <dxl:RelationExtendedStatistics Mdid="10.22366.1.0" Name="t"/>
+      <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/>
+        <dxl:PartOpfamily Mdid="0.424.1.0"/>
+        <dxl:EqualityOp Mdid="0.91.1.0"/>
+        <dxl:InequalityOp Mdid="0.85.1.0"/>
+        <dxl:LessThanOp Mdid="0.58.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.59.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
+        <dxl:ComparisonOp Mdid="0.1693.1.0"/>
+        <dxl:ArrayType Mdid="0.1000.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1976.1.0"/>
+        <dxl:EqualityOp Mdid="0.96.1.0"/>
+        <dxl:InequalityOp Mdid="0.518.1.0"/>
+        <dxl:LessThanOp Mdid="0.97.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.521.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1007.1.0"/>
+        <dxl:MinAgg Mdid="0.2132.1.0"/>
+        <dxl:MaxAgg Mdid="0.2116.1.0"/>
+        <dxl:AvgAgg Mdid="0.2101.1.0"/>
+        <dxl:SumAgg Mdid="0.2108.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1990.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1989.1.0"/>
+        <dxl:EqualityOp Mdid="0.607.1.0"/>
+        <dxl:InequalityOp Mdid="0.608.1.0"/>
+        <dxl:LessThanOp Mdid="0.609.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.611.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.610.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/>
+        <dxl:ComparisonOp Mdid="0.356.1.0"/>
+        <dxl:ArrayType Mdid="0.1028.1.0"/>
+        <dxl:MinAgg Mdid="0.2134.1.0"/>
+        <dxl:MaxAgg Mdid="0.2118.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false">
+        <dxl:DistrOpfamily Mdid="0.2227.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/>
+        <dxl:PartOpfamily Mdid="0.2789.1.0"/>
+        <dxl:EqualityOp Mdid="0.387.1.0"/>
+        <dxl:InequalityOp Mdid="0.402.1.0"/>
+        <dxl:LessThanOp Mdid="0.2799.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.2800.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/>
+        <dxl:ComparisonOp Mdid="0.2794.1.0"/>
+        <dxl:ArrayType Mdid="0.1010.1.0"/>
+        <dxl:MinAgg Mdid="0.2798.1.0"/>
+        <dxl:MaxAgg Mdid="0.2797.1.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="false" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2226.1.0"/>
+        <dxl:EqualityOp Mdid="0.385.1.0"/>
+        <dxl:InequalityOp Mdid="0.0.0.0"/>
+        <dxl:LessThanOp Mdid="0.0.0.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:ComparisonOp Mdid="0.0.0.0"/>
+        <dxl:ArrayType Mdid="0.1012.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="false" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2225.1.0"/>
+        <dxl:EqualityOp Mdid="0.352.1.0"/>
+        <dxl:InequalityOp Mdid="0.3315.1.0"/>
+        <dxl:LessThanOp Mdid="0.0.0.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanOp Mdid="0.0.0.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/>
+        <dxl:ComparisonOp Mdid="0.0.0.0"/>
+        <dxl:ArrayType Mdid="0.1011.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:ColumnStatistics Mdid="1.22366.1.0.1" Name="j" Width="4.000000" 
NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" 
ColStatsMissing="true"/>
+      <dxl:RelationStatistics Mdid="2.22366.1.0" Name="t" Rows="0.000000" 
RelPages="0" RelAllVisible="0" EmptyRelation="true"/>
+      <dxl:Relation Mdid="6.22366.1.0" Name="t" IsTemporary="false" 
StorageType="Heap" DistributionPolicy="Replicated" Keys="8,2">
+        <dxl:Columns>
+          <dxl:Column Name="i" Attno="1" Mdid="0.23.1.0" Nullable="true" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="j" Attno="2" Mdid="0.23.1.0" Nullable="true" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" 
ColWidth="6">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" 
ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" 
Nullable="false" ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+          <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" 
Nullable="false" ColWidth="4">
+            <dxl:DefaultValue/>
+          </dxl:Column>
+        </dxl:Columns>
+        <dxl:IndexInfoList/>
+        <dxl:CheckConstraints/>
+      </dxl:Relation>
+    </dxl:Metadata>
+    <dxl:Query>
+      <dxl:OutputColumns>
+        <dxl:Ident ColId="2" ColName="j" TypeMdid="0.23.1.0"/>
+        <dxl:Ident ColId="12" ColName="j" TypeMdid="0.23.1.0"/>
+      </dxl:OutputColumns>
+      <dxl:CTEList/>
+      <dxl:LogicalProject>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="12" Alias="j">
+            <dxl:ScalarSubquery ColId="11">
+              <dxl:LogicalProject>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="11" Alias="j">
+                    <dxl:Ident ColId="2" ColName="j" TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:LogicalConstTable>
+                  <dxl:Columns>
+                    <dxl:Column ColId="10" Attno="1" ColName="" 
TypeMdid="0.16.1.0"/>
+                  </dxl:Columns>
+                  <dxl:ConstTuple>
+                    <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+                  </dxl:ConstTuple>
+                </dxl:LogicalConstTable>
+              </dxl:LogicalProject>
+            </dxl:ScalarSubquery>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:LogicalGet>
+          <dxl:TableDescriptor Mdid="6.22366.1.0" TableName="t" LockMode="1" 
AclMode="2">
+            <dxl:Columns>
+              <dxl:Column ColId="1" Attno="1" ColName="i" TypeMdid="0.23.1.0" 
ColWidth="4"/>
+              <dxl:Column ColId="2" Attno="2" ColName="j" TypeMdid="0.23.1.0" 
ColWidth="4"/>
+              <dxl:Column ColId="3" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+              <dxl:Column ColId="4" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+              <dxl:Column ColId="5" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+              <dxl:Column ColId="6" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+              <dxl:Column ColId="7" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+              <dxl:Column ColId="8" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+              <dxl:Column ColId="9" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+            </dxl:Columns>
+          </dxl:TableDescriptor>
+        </dxl:LogicalGet>
+      </dxl:LogicalProject>
+    </dxl:Query>
+    <dxl:Plan Id="0" SpaceSize="6">
+      <dxl:GatherMotion InputSegments="0" OutputSegments="-1">
+        <dxl:Properties>
+          <dxl:Cost StartupCost="0" TotalCost="882724.329545" Rows="1.000000" 
Width="8"/>
+        </dxl:Properties>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="1" Alias="j">
+            <dxl:Ident ColId="1" ColName="j" TypeMdid="0.23.1.0"/>
+          </dxl:ProjElem>
+          <dxl:ProjElem ColId="11" Alias="j">
+            <dxl:Ident ColId="11" ColName="j" TypeMdid="0.23.1.0"/>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:Filter/>
+        <dxl:SortingColumnList/>
+        <dxl:Result>
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="882724.329455" 
Rows="3.000000" Width="8"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="1" Alias="j">
+              <dxl:Ident ColId="1" ColName="j" TypeMdid="0.23.1.0"/>
+            </dxl:ProjElem>
+            <dxl:ProjElem ColId="11" Alias="j">
+              <dxl:SubPlan TypeMdid="0.23.1.0" SubPlanType="ScalarSubPlan">
+                <dxl:TestExpr/>
+                <dxl:ParamList>
+                  <dxl:Param ColId="1" ColName="j" TypeMdid="0.23.1.0"/>
+                </dxl:ParamList>
+                <dxl:Result>
+                  <dxl:Properties>
+                    <dxl:Cost StartupCost="0" TotalCost="0.000005" 
Rows="1.000000" Width="4"/>
+                  </dxl:Properties>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="10" Alias="j">
+                      <dxl:Ident ColId="1" ColName="j" TypeMdid="0.23.1.0"/>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:Filter/>
+                  <dxl:OneTimeFilter/>
+                  <dxl:Result>
+                    <dxl:Properties>
+                      <dxl:Cost StartupCost="0" TotalCost="0.000001" 
Rows="1.000000" Width="1"/>
+                    </dxl:Properties>
+                    <dxl:ProjList>
+                      <dxl:ProjElem ColId="9" Alias="">
+                        <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                      </dxl:ProjElem>
+                    </dxl:ProjList>
+                    <dxl:Filter/>
+                    <dxl:OneTimeFilter/>
+                  </dxl:Result>
+                </dxl:Result>
+              </dxl:SubPlan>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:Filter/>
+          <dxl:OneTimeFilter/>
+          <dxl:TableScan>
+            <dxl:Properties>
+              <dxl:Cost StartupCost="0" TotalCost="882724.329447" 
Rows="3000.000000" Width="8"/>
+            </dxl:Properties>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="1" Alias="j">
+                <dxl:Ident ColId="1" ColName="j" TypeMdid="0.23.1.0"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:Filter/>
+            <dxl:TableDescriptor Mdid="6.22366.1.0" TableName="t" LockMode="1" 
AclMode="2">
+              <dxl:Columns>
+                <dxl:Column ColId="1" Attno="2" ColName="j" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+                <dxl:Column ColId="2" Attno="-1" ColName="ctid" 
TypeMdid="0.27.1.0" ColWidth="6"/>
+                <dxl:Column ColId="3" Attno="-2" ColName="xmin" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                <dxl:Column ColId="4" Attno="-3" ColName="cmin" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                <dxl:Column ColId="5" Attno="-4" ColName="xmax" 
TypeMdid="0.28.1.0" ColWidth="4"/>
+                <dxl:Column ColId="6" Attno="-5" ColName="cmax" 
TypeMdid="0.29.1.0" ColWidth="4"/>
+                <dxl:Column ColId="7" Attno="-6" ColName="tableoid" 
TypeMdid="0.26.1.0" ColWidth="4"/>
+                <dxl:Column ColId="8" Attno="-7" ColName="gp_segment_id" 
TypeMdid="0.23.1.0" ColWidth="4"/>
+              </dxl:Columns>
+            </dxl:TableDescriptor>
+          </dxl:TableScan>
+        </dxl:Result>
+      </dxl:GatherMotion>
+    </dxl:Plan>
+  </dxl:Thread>
+</dxl:DXLMessage>
diff --git a/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp 
b/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp
index 6c67ca1cb4..964554d7ae 100644
--- a/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp
+++ b/src/backend/gporca/data/dxl/minidump/SubqueryOuterRefTVF.mdp
@@ -264,7 +264,7 @@
                 </dxl:ParamList>
                 <dxl:Aggregate AggregationStrategy="Plain" StreamSafe="false">
                   <dxl:Properties>
-                    <dxl:Cost StartupCost="0" TotalCost="1324057.288099" 
Rows="1.000000" Width="8"/>
+                    <dxl:Cost StartupCost="0" TotalCost="1324057.288099" 
Rows="3.000000" Width="8"/>
                   </dxl:Properties>
                   <dxl:GroupingColumns/>
                   <dxl:ProjList>
@@ -280,7 +280,7 @@
                   <dxl:Filter/>
                   <dxl:NestedLoopJoin JoinType="Inner" 
IndexNestedLoopJoin="false" OuterRefAsParam="false">
                     <dxl:Properties>
-                      <dxl:Cost StartupCost="0" TotalCost="1324057.288099" 
Rows="2000.000000" Width="1"/>
+                      <dxl:Cost StartupCost="0" TotalCost="1324057.288099" 
Rows="6000.000000" Width="1"/>
                     </dxl:Properties>
                     <dxl:ProjList/>
                     <dxl:Filter/>
diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp 
b/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp
index abcb44365f..8ad219964c 100644
--- a/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalJoin.cpp
@@ -419,11 +419,13 @@ CPhysicalJoin::PdsDerive(CMemoryPool *mp, 
CExpressionHandle &exprhdl) const
 
        CDistributionSpec *pds;
 
-       if (CDistributionSpec::EdtStrictReplicated == pdsOuter->Edt() ||
+       if ((CDistributionSpec::EdtStrictReplicated == pdsOuter->Edt() ||
                CDistributionSpec::EdtTaintedReplicated == pdsOuter->Edt() ||
-               CDistributionSpec::EdtUniversal == pdsOuter->Edt())
+               CDistributionSpec::EdtUniversal == pdsOuter->Edt()) &&
+               CDistributionSpec::EdtUniversal != pdsInner->Edt())
        {
-               // if outer is replicated/universal, return inner distribution
+               // if outer is replicated/universal and inner is not universal
+               // then return inner distribution
                pds = pdsInner;
        }
        else
diff --git a/src/backend/gporca/server/CMakeLists.txt 
b/src/backend/gporca/server/CMakeLists.txt
index 8e4acdeba6..b5c4e8a7ca 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -353,7 +353,8 @@ ReplicatedJoinRandomDistributedTable 
ReplicatedLOJHashDistributedTable
 ReplicatedLOJRandomDistributedTable ReplicatedLOJReplicated
 ReplicatedNLJReplicated ReplicatedTableAggregate ReplicatedTableCTE
 ReplicatedTableGroupBy ReplicatedJoinPartitionedTable
-ReplicatedTableInClause ReplicatedTableSequenceInsert;
+ReplicatedTableInClause ReplicatedTableSequenceInsert
+JoinOnReplicatedUniversal;
 
 CTaintedReplicatedTest:
 InsertNonSingleton NonSingleton TaintedReplicatedAgg 
TaintedReplicatedWindowAgg TaintedReplicatedLimit TaintedReplicatedFilter
diff --git a/src/test/regress/expected/rpt.out 
b/src/test/regress/expected/rpt.out
index 4ba1103233..1b3bfd826e 100644
--- a/src/test/regress/expected/rpt.out
+++ b/src/test/regress/expected/rpt.out
@@ -1312,6 +1312,43 @@ explain (costs off) select * from rep_tab;
 
 reset optimizer_trace_fallback;
 reset optimizer_enable_replicated_table;
+-- Ensure plan with Gather Motion node is generated.
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+create table t (i int, j int) distributed replicated;
+insert into t values (1, 2);
+explain (costs off) select j, (select j) AS "Correlated Field" from t;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Seq Scan on t
+         SubPlan 1
+           ->  Result
+ Optimizer: Postgres query optimizer
+(5 rows)
+
+select j, (select j) AS "Correlated Field" from t;
+ j | Correlated Field 
+---+------------------
+ 2 |                2
+(1 row)
+
+explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   InitPlan 1 (returns $0)  (slice2)
+     ->  Result
+   ->  Seq Scan on t
+ Optimizer: Postgres query optimizer
+(5 rows)
+
+select j, (select 5) AS "Uncorrelated Field" from t;
+ j | Uncorrelated Field 
+---+--------------------
+ 2 |                  5
+(1 row)
+
 -- start_ignore
 drop schema rpt cascade;
 NOTICE:  drop cascades to 7 other objects
diff --git a/src/test/regress/expected/rpt_optimizer.out 
b/src/test/regress/expected/rpt_optimizer.out
index a51562c4b4..f8a79cd3e0 100644
--- a/src/test/regress/expected/rpt_optimizer.out
+++ b/src/test/regress/expected/rpt_optimizer.out
@@ -1307,6 +1307,46 @@ DETAIL:  Falling back to Postgres-based planner because 
GPORCA does not support
 
 reset optimizer_trace_fallback;
 reset optimizer_enable_replicated_table;
+-- Ensure plan with Gather Motion node is generated.
+drop table if exists t;
+NOTICE:  table "t" does not exist, skipping
+create table t (i int, j int) distributed replicated;
+insert into t values (1, 2);
+explain (costs off) select j, (select j) AS "Correlated Field" from t;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Seq Scan on t
+         SubPlan 1
+           ->  Result
+                 ->  Result
+ Optimizer: Pivotal Optimizer (GPORCA)
+(6 rows)
+
+select j, (select j) AS "Correlated Field" from t;
+ j | Correlated Field 
+---+------------------
+ 2 |                2
+(1 row)
+
+explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 1:1  (slice1; segments: 1)
+   ->  Nested Loop Left Join
+         Join Filter: true
+         ->  Seq Scan on t
+         ->  Materialize
+               ->  Result
+ Optimizer: Pivotal Optimizer (GPORCA)
+(7 rows)
+
+select j, (select 5) AS "Uncorrelated Field" from t;
+ j | Uncorrelated Field 
+---+--------------------
+ 2 |                  5
+(1 row)
+
 -- start_ignore
 drop schema rpt cascade;
 NOTICE:  drop cascades to 7 other objects
diff --git a/src/test/regress/sql/rpt.sql b/src/test/regress/sql/rpt.sql
index f412bb1b01..4d85d11931 100644
--- a/src/test/regress/sql/rpt.sql
+++ b/src/test/regress/sql/rpt.sql
@@ -555,6 +555,15 @@ explain (costs off) select * from rep_tab;
 reset optimizer_trace_fallback;
 reset optimizer_enable_replicated_table;
 
+-- Ensure plan with Gather Motion node is generated.
+drop table if exists t;
+create table t (i int, j int) distributed replicated;
+insert into t values (1, 2);
+explain (costs off) select j, (select j) AS "Correlated Field" from t;
+select j, (select j) AS "Correlated Field" from t;
+explain (costs off) select j, (select 5) AS "Uncorrelated Field" from t;
+select j, (select 5) AS "Uncorrelated Field" from t;
+
 -- start_ignore
 drop schema rpt cascade;
 -- end_ignore


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to