This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit a8ac5b3d74d6758bd3008a4478a08e5a4ae538ce
Author: bhari <[email protected]>
AuthorDate: Mon Aug 7 18:33:01 2023 +0530

    Do not convert IN query to Exists for set returning function (#16049)
    
    Fixes Issue: #15644
    
    Issue description:
    The preprocess step (26) rewrites IN query to EXIST query with a predicate.
    Conditions for conversion:
    * The query is a nested query with an outer IN clause on an inner SELECT 
FROM query and
    * Projection list from inner clause refers to outer query table columns and
    * Projection list from inner clause does not refer to any inner query table 
columns
    Example:
    Query: SELECT * FROM foo WHERE foo.a IN (SELECT foo.a+1 FROM bar);
    Converted query: SELECT * FROM foo WHERE foo.a=foo.a+1 AND EXISTS (SELECT * 
FROM bar);
    
    This conversion is not valid if the inner sub query is a set returning 
function.
    For example the query in the issue
    Query: SELECT a IN (SELECT generate_series(1,a)) FROM foo;
    when converted using the preprocessing step will result in
    Converted query: SELECT a=generate_series(1,a) FROM foo.
    This converted query is invalid because a single value of column a
    can't be compared to set returned from generate_series(1,a) with = operator.
    
    FIX:
    To fix the issue, an extra check has been added on inner subquery.
    This check tests if the algebrized expression of inner (CScalarSubqueryAny/ 
IN) subquery is of the hierarchy:
    CLogicalProject->Set returning CScalarProjectList
    The conversion is not done if the the inner subquery has a set returning 
function.
    
    * Added one new test case in gp_tsrf.sql and corrected the expected output 
from one previous test case.
    * Added a new mdp test case
    
    Issue can be seen in both 6X and 7X.
---
 .../dxl/minidump/InSubqWithPrjListReturnSet.mdp    | 306 +++++++++++++++++++++
 .../src/operators/CExpressionPreprocessor.cpp      |  11 +-
 src/backend/gporca/server/CMakeLists.txt           |   2 +-
 src/test/regress/expected/gp_tsrf.out              |  76 ++++-
 src/test/regress/expected/gp_tsrf_optimizer.out    |  85 +++++-
 src/test/regress/sql/gp_tsrf.sql                   |  21 +-
 6 files changed, 491 insertions(+), 10 deletions(-)

diff --git 
a/src/backend/gporca/data/dxl/minidump/InSubqWithPrjListReturnSet.mdp 
b/src/backend/gporca/data/dxl/minidump/InSubqWithPrjListReturnSet.mdp
new file mode 100644
index 0000000000..b2583ddb3b
--- /dev/null
+++ b/src/backend/gporca/data/dxl/minidump/InSubqWithPrjListReturnSet.mdp
@@ -0,0 +1,306 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/";>
+<dxl:Comment><![CDATA[
+Objective: The preprocess that rewrites a nested query from IN subquery to 
EXIST subquery with a predicate, if
+  * Query is nested with an outer IN clause
+  * Inner subquery is a SELECT FROM query and
+  * Projection list from inner clause refers to outer query table columns and
+  * Projection list from inner clause does not refer to any inner query table 
columns
+
+It should not happen if inner sub query returns set,
+because a single value and a set can't be compared with "=" operator.
+
+SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT generate_series(1, 
3) AS a) AS s;
+              QUERY PLAN
+---------------------------------------
+Result
+  ->  ProjectSet
+        ->  Result
+  SubPlan 1
+    ->  Result
+          ->  ProjectSet
+                ->  Result
+]]>
+</dxl:Comment>
+  <dxl:Thread Id="0">
+    <dxl:OptimizerConfig>
+      <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/>
+      <dxl:StatisticsConfig DampingFactorFilter="0.750000" 
DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" 
MaxStatsBuckets="100"/>
+      <dxl:CTEConfig CTEInliningCutoff="0"/>
+      <dxl:WindowOids RowNumber="3100" Rank="3101"/>
+      <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3">
+        <dxl:CostParams>
+          <dxl:CostParam Name="NLJFactor" Value="1024.000000" 
LowerBound="1023.500000" UpperBound="1024.500000"/>
+        </dxl:CostParams>
+      </dxl:CostModelConfig>
+      <dxl:Hint JoinArityForAssociativityCommutativity="18" 
ArrayExpansionThreshold="20" JoinOrderDynamicProgThreshold="10" 
BroadcastThreshold="100000" EnforceConstraintsOnDML="false" 
PushGroupByBelowSetopThreshold="10" XformBindThreshold="0" SkewFactor="0"/>
+      <dxl:TraceFlags 
Value="101013,102001,102002,102003,102043,102074,102120,102144,102162,102163,103001,103003,103014,103015,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,106000"/>
+    </dxl:OptimizerConfig>
+    <dxl:Metadata SystemIds="0.GPDB">
+      <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.2222.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/>
+        <dxl:PartOpfamily Mdid="0.424.1.0"/>
+        <dxl:EqualityOp Mdid="0.91.1.0"/>
+        <dxl:InequalityOp Mdid="0.85.1.0"/>
+        <dxl:LessThanOp Mdid="0.58.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.59.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/>
+        <dxl:ComparisonOp Mdid="0.1693.1.0"/>
+        <dxl:ArrayType Mdid="0.1000.1.0"/>
+        <dxl:MinAgg Mdid="0.0.0.0"/>
+        <dxl:MaxAgg Mdid="0.0.0.0"/>
+        <dxl:AvgAgg Mdid="0.0.0.0"/>
+        <dxl:SumAgg Mdid="0.0.0.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.20.1.0" Name="Int8" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="8" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1976.1.0"/>
+        <dxl:EqualityOp Mdid="0.410.1.0"/>
+        <dxl:InequalityOp Mdid="0.411.1.0"/>
+        <dxl:LessThanOp Mdid="0.412.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.414.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.413.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.415.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1016.1.0"/>
+        <dxl:MinAgg Mdid="0.2131.1.0"/>
+        <dxl:MaxAgg Mdid="0.2115.1.0"/>
+        <dxl:AvgAgg Mdid="0.2100.1.0"/>
+        <dxl:SumAgg Mdid="0.2107.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" 
IsHashable="true" IsMergeJoinable="true" IsComposite="false" 
IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true">
+        <dxl:DistrOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/>
+        <dxl:PartOpfamily Mdid="0.1976.1.0"/>
+        <dxl:EqualityOp Mdid="0.96.1.0"/>
+        <dxl:InequalityOp Mdid="0.518.1.0"/>
+        <dxl:LessThanOp Mdid="0.97.1.0"/>
+        <dxl:LessThanEqualsOp Mdid="0.523.1.0"/>
+        <dxl:GreaterThanOp Mdid="0.521.1.0"/>
+        <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/>
+        <dxl:ComparisonOp Mdid="0.351.1.0"/>
+        <dxl:ArrayType Mdid="0.1007.1.0"/>
+        <dxl:MinAgg Mdid="0.2132.1.0"/>
+        <dxl:MaxAgg Mdid="0.2116.1.0"/>
+        <dxl:AvgAgg Mdid="0.2101.1.0"/>
+        <dxl:SumAgg Mdid="0.2108.1.0"/>
+        <dxl:CountAgg Mdid="0.2147.1.0"/>
+      </dxl:Type>
+      <dxl:GPDBScalarOp Mdid="0.410.1.0" Name="=" ComparisonType="Eq" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.467.1.0"/>
+        <dxl:Commutator Mdid="0.410.1.0"/>
+        <dxl:InverseOp Mdid="0.411.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.1977.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.7100.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBScalarOp Mdid="0.413.1.0" Name="&gt;" ComparisonType="GT" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.20.1.0"/>
+        <dxl:RightType Mdid="0.20.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.470.1.0"/>
+        <dxl:Commutator Mdid="0.412.1.0"/>
+        <dxl:InverseOp Mdid="0.414.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBFunc Mdid="0.1067.1.0" Name="generate_series" ReturnsSet="true" 
Stability="Immutable" IsStrict="true" IsNDVPreserving="false" 
IsAllowedForPS="false">
+        <dxl:ResultType Mdid="0.23.1.0"/>
+      </dxl:GPDBFunc>
+      <dxl:GPDBAgg Mdid="0.2108.1.0" Name="sum" IsRepSafe="true" 
IsSplittable="true" HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+      <dxl:GPDBFunc Mdid="0.65.1.0" Name="int4eq" ReturnsSet="false" 
Stability="Immutable" IsStrict="true" IsNDVPreserving="false" 
IsAllowedForPS="false">
+        <dxl:ResultType Mdid="0.16.1.0"/>
+      </dxl:GPDBFunc>
+      <dxl:MDCast Mdid="3.23.1.0;23.1.0" Name="int4" BinaryCoercible="true" 
SourceTypeId="0.23.1.0" DestinationTypeId="0.23.1.0" CastFuncId="0.0.0.0" 
CoercePathType="0"/>
+      <dxl:GPDBScalarOp Mdid="0.96.1.0" Name="=" ComparisonType="Eq" 
ReturnsNullOnNullInput="true" IsNDVPreserving="false">
+        <dxl:LeftType Mdid="0.23.1.0"/>
+        <dxl:RightType Mdid="0.23.1.0"/>
+        <dxl:ResultType Mdid="0.16.1.0"/>
+        <dxl:OpFunc Mdid="0.65.1.0"/>
+        <dxl:Commutator Mdid="0.96.1.0"/>
+        <dxl:InverseOp Mdid="0.518.1.0"/>
+        <dxl:HashOpfamily Mdid="0.1977.1.0"/>
+        <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/>
+        <dxl:Opfamilies>
+          <dxl:Opfamily Mdid="0.1976.1.0"/>
+          <dxl:Opfamily Mdid="0.1977.1.0"/>
+          <dxl:Opfamily Mdid="0.4054.1.0"/>
+          <dxl:Opfamily Mdid="0.7100.1.0"/>
+          <dxl:Opfamily Mdid="0.10009.1.0"/>
+        </dxl:Opfamilies>
+      </dxl:GPDBScalarOp>
+      <dxl:GPDBAgg Mdid="0.2147.1.0" Name="count" IsRepSafe="true" 
IsSplittable="true" HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+      <dxl:GPDBAgg Mdid="0.2803.1.0" Name="count" IsRepSafe="true" 
IsSplittable="true" HashAggCapable="true">
+        <dxl:ResultType Mdid="0.20.1.0"/>
+        <dxl:IntermediateResultType Mdid="0.20.1.0"/>
+      </dxl:GPDBAgg>
+    </dxl:Metadata>
+    <dxl:Query>
+      <dxl:OutputColumns>
+        <dxl:Ident ColId="5" ColName="x" TypeMdid="0.16.1.0"/>
+      </dxl:OutputColumns>
+      <dxl:CTEList/>
+      <dxl:LogicalProject>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="5" Alias="x">
+            <dxl:SubqueryAny OperatorName="=" OperatorMdid="0.96.1.0" 
ColId="4">
+              <dxl:Ident ColId="2" ColName="a" TypeMdid="0.23.1.0"/>
+              <dxl:LogicalProject>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="4" Alias="generate_series">
+                    <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" 
TypeMdid="0.23.1.0" FuncVariadic="false">
+                      <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                      <dxl:Ident ColId="2" ColName="a" TypeMdid="0.23.1.0"/>
+                    </dxl:FuncExpr>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:LogicalConstTable>
+                  <dxl:Columns>
+                    <dxl:Column ColId="3" Attno="1" ColName="" 
TypeMdid="0.16.1.0"/>
+                  </dxl:Columns>
+                  <dxl:ConstTuple>
+                    <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+                  </dxl:ConstTuple>
+                </dxl:LogicalConstTable>
+              </dxl:LogicalProject>
+            </dxl:SubqueryAny>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:LogicalProject>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="2" Alias="a">
+              <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" 
TypeMdid="0.23.1.0" FuncVariadic="false">
+                <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                <dxl:ConstValue TypeMdid="0.23.1.0" Value="3"/>
+              </dxl:FuncExpr>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:LogicalConstTable>
+            <dxl:Columns>
+              <dxl:Column ColId="1" Attno="1" ColName="" TypeMdid="0.16.1.0"/>
+            </dxl:Columns>
+            <dxl:ConstTuple>
+              <dxl:Datum TypeMdid="0.16.1.0" Value="true"/>
+            </dxl:ConstTuple>
+          </dxl:LogicalConstTable>
+        </dxl:LogicalProject>
+      </dxl:LogicalProject>
+    </dxl:Query>
+    <dxl:Plan Id="0" SpaceSize="66">
+      <dxl:Result>
+        <dxl:Properties>
+          <dxl:Cost StartupCost="0" TotalCost="441357.573926" Rows="1.000000" 
Width="1"/>
+        </dxl:Properties>
+        <dxl:ProjList>
+          <dxl:ProjElem ColId="4" Alias="x">
+            <dxl:SubPlan TypeMdid="0.16.1.0" SubPlanType="AnySubPlan">
+              <dxl:TestExpr>
+                <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0">
+                  <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/>
+                  <dxl:Ident ColId="3" ColName="generate_series" 
TypeMdid="0.23.1.0"/>
+                </dxl:Comparison>
+              </dxl:TestExpr>
+              <dxl:ParamList>
+                <dxl:Param ColId="1" ColName="a" TypeMdid="0.23.1.0"/>
+              </dxl:ParamList>
+              <dxl:Result>
+                <dxl:Properties>
+                  <dxl:Cost StartupCost="0" TotalCost="0.000106" 
Rows="1.000000" Width="5"/>
+                </dxl:Properties>
+                <dxl:ProjList>
+                  <dxl:ProjElem ColId="3" Alias="generate_series">
+                    <dxl:Ident ColId="3" ColName="generate_series" 
TypeMdid="0.23.1.0"/>
+                  </dxl:ProjElem>
+                </dxl:ProjList>
+                <dxl:Filter/>
+                <dxl:OneTimeFilter/>
+                <dxl:Result>
+                  <dxl:Properties>
+                    <dxl:Cost StartupCost="0" TotalCost="0.000106" 
Rows="1.000000" Width="5"/>
+                  </dxl:Properties>
+                  <dxl:ProjList>
+                    <dxl:ProjElem ColId="9" Alias="ColRef_0009">
+                      <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                    </dxl:ProjElem>
+                    <dxl:ProjElem ColId="3" Alias="generate_series">
+                      <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" 
TypeMdid="0.23.1.0" FuncVariadic="false">
+                        <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                        <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/>
+                      </dxl:FuncExpr>
+                    </dxl:ProjElem>
+                  </dxl:ProjList>
+                  <dxl:Filter/>
+                  <dxl:OneTimeFilter/>
+                  <dxl:Result>
+                    <dxl:Properties>
+                      <dxl:Cost StartupCost="0" TotalCost="0.000001" 
Rows="1.000000" Width="1"/>
+                    </dxl:Properties>
+                    <dxl:ProjList>
+                      <dxl:ProjElem ColId="2" Alias="">
+                        <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+                      </dxl:ProjElem>
+                    </dxl:ProjList>
+                    <dxl:Filter/>
+                    <dxl:OneTimeFilter/>
+                  </dxl:Result>
+                </dxl:Result>
+              </dxl:Result>
+            </dxl:SubPlan>
+          </dxl:ProjElem>
+        </dxl:ProjList>
+        <dxl:Filter/>
+        <dxl:OneTimeFilter/>
+        <dxl:Result>
+          <dxl:Properties>
+            <dxl:Cost StartupCost="0" TotalCost="0.000105" Rows="1.000000" 
Width="4"/>
+          </dxl:Properties>
+          <dxl:ProjList>
+            <dxl:ProjElem ColId="1" Alias="a">
+              <dxl:FuncExpr FuncId="0.1067.1.0" FuncRetSet="true" 
TypeMdid="0.23.1.0" FuncVariadic="false">
+                <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
+                <dxl:ConstValue TypeMdid="0.23.1.0" Value="3"/>
+              </dxl:FuncExpr>
+            </dxl:ProjElem>
+          </dxl:ProjList>
+          <dxl:Filter/>
+          <dxl:OneTimeFilter/>
+          <dxl:Result>
+            <dxl:Properties>
+              <dxl:Cost StartupCost="0" TotalCost="0.000001" Rows="1.000000" 
Width="1"/>
+            </dxl:Properties>
+            <dxl:ProjList>
+              <dxl:ProjElem ColId="0" Alias="">
+                <dxl:ConstValue TypeMdid="0.16.1.0" Value="true"/>
+              </dxl:ProjElem>
+            </dxl:ProjList>
+            <dxl:Filter/>
+            <dxl:OneTimeFilter/>
+          </dxl:Result>
+        </dxl:Result>
+      </dxl:Result>
+    </dxl:Plan>
+  </dxl:Thread>
+</dxl:DXLMessage>
diff --git 
a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp 
b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
index f87f22d3a6..def2eb6395 100644
--- a/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CExpressionPreprocessor.cpp
@@ -2779,12 +2779,20 @@ 
CExpressionPreprocessor::PexprExistWithPredFromINSubq(CMemoryPool *mp,
                // it does not include any column from the relational child.
                if (COperator::EopLogicalProject == 
pexprLogicalProject->Pop()->Eopid())
                {
-                       // bail out if subquery has an inner reference or does 
not have any outer reference
+                       // bail out if subquery has an inner reference or
+                       // does not have any outer reference
                        if (!CUtils::HasOuterRefs(pexprLogicalProject) ||
                                
CUtils::FInnerRefInProjectList(pexprLogicalProject))
                        {
                                return pexprNew;
                        }
+                       // also bail out if the project list returns set
+                       CExpression *pexprProjectList = 
(*pexprLogicalProject)[1];
+                       if 
(pexprProjectList->DeriveSetReturningFunctionColumns()->Size() >
+                               0)
+                       {
+                               return pexprNew;
+                       }
                }
                else
                {
@@ -2805,7 +2813,6 @@ 
CExpressionPreprocessor::PexprExistWithPredFromINSubq(CMemoryPool *mp,
                {
                        pexprNew->Release();
                        pexprNew = pexprNewConverted;
-                       ;
                }
        }
 
diff --git a/src/backend/gporca/server/CMakeLists.txt 
b/src/backend/gporca/server/CMakeLists.txt
index 6fe0e3e86d..6bb79426c5 100644
--- a/src/backend/gporca/server/CMakeLists.txt
+++ b/src/backend/gporca/server/CMakeLists.txt
@@ -59,7 +59,7 @@ NestedInSubqWithPrjListOuterRefNoInnerRef  InEqualityJoin 
Correlated-SemiJoin
 CorrelatedSemiJoin-True CorrelatedIN-LeftSemiJoin-True 
CorrelatedIN-LeftSemiNotIn-True
 InSubqWithPrjListOuterRefNoInnerRefColumnPlusConstIn 
CorrelatedIN-LeftSemiJoin-Limit
 CorrelatedLeftSemiNLJoinWithLimit PushFilterToSemiJoinLeftChild 
SubqOuterReferenceInClause
-SemiJoinDPE;
+SemiJoinDPE InSubqWithPrjListReturnSet;
 
 CAntiSemiJoinTest:
 AntiSemiJoin2Select-1 AntiSemiJoin2Select-2 NOT-IN-NotNullBoth 
NOT-IN-NullInner NOT-IN-NullOuter
diff --git a/src/test/regress/expected/gp_tsrf.out 
b/src/test/regress/expected/gp_tsrf.out
index 089481b1ea..5fb8041f72 100644
--- a/src/test/regress/expected/gp_tsrf.out
+++ b/src/test/regress/expected/gp_tsrf.out
@@ -251,7 +251,26 @@ select generate_series(1,a+1),b+generate_series(1,4),c 
from test_srf;
 (8 rows)
 
 drop table test_srf;
--- Below query giving incorrect output with ORCA.Works fine on planner.Github 
Issue #15644
+-- Test that the preprocessor step where
+-- IN subquery is converted to EXIST subquery with a predicate,
+-- is not happening if inner sub query is SRF
+-- Fixed as part of github issue #15644
+explain verbose SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT 
generate_series(1, 3) AS a) AS s;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Subquery Scan on s  (cost=0.00..11.35 rows=3 width=1)
+   Output: (SubPlan 1)
+   ->  ProjectSet  (cost=0.00..0.03 rows=3 width=4)
+         Output: generate_series(1, 3)
+         ->  Result  (cost=0.00..0.01 rows=1 width=0)
+   SubPlan 1
+     ->  ProjectSet  (cost=0.00..5.02 rows=1000 width=4)
+           Output: generate_series(1, s.a)
+           ->  Result  (cost=0.00..0.01 rows=1 width=0)
+ Optimizer: Postgres query optimizer
+ Settings: optimizer = 'off'
+(11 rows)
+
 SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT generate_series(1, 
3) AS a) AS s;
  x 
 ---
@@ -260,3 +279,58 @@ SELECT a IN (SELECT generate_series(1,a)) AS x FROM 
(SELECT generate_series(1, 3
  t
 (3 rows)
 
+SELECT a FROM (values(1),(2),(3)) as t(a) where a IN (SELECT 
generate_series(1,a));
+ a 
+---
+ 1
+ 2
+ 3
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF)
+  SELECT a FROM (values(1),(2),(3)) as t(a) where a IN (SELECT 
generate_series(1,a));
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Values Scan on "*VALUES*"
+   Output: "*VALUES*".column1
+   Filter: (SubPlan 1)
+   SubPlan 1
+     ->  ProjectSet
+           Output: generate_series(1, "*VALUES*".column1)
+           ->  Result
+ Optimizer: Postgres query optimizer
+ Settings: optimizer = 'off'
+(9 rows)
+
+CREATE TABLE t_outer (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO t_outer SELECT i, i+1 FROM generate_series(1,3) as i;  
+CREATE TABLE t_inner (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO t_inner SELECT i, i+1 FROM generate_series(1,3) as i;
+SELECT * FROM t_outer WHERE t_outer.b IN (SELECT generate_series(1, t_outer.b) 
FROM t_inner);
+ a | b 
+---+---
+ 2 | 3
+ 3 | 4
+ 1 | 2
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF)
+  SELECT * FROM t_outer WHERE t_outer.b IN (SELECT generate_series(1, 
t_outer.b)  FROM t_inner);
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: t_outer.a, t_outer.b
+   ->  Seq Scan on public.t_outer
+         Output: t_outer.a, t_outer.b
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  ProjectSet
+                 Output: generate_series(1, t_outer.b)
+                 ->  Materialize
+                       ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                             ->  Seq Scan on public.t_inner
+ Optimizer: Postgres query optimizer
+ Settings: optimizer = 'off'
+(13 rows)
+
+DROP TABLE t_outer, t_inner;
diff --git a/src/test/regress/expected/gp_tsrf_optimizer.out 
b/src/test/regress/expected/gp_tsrf_optimizer.out
index 122ae7690f..4a3e164742 100644
--- a/src/test/regress/expected/gp_tsrf_optimizer.out
+++ b/src/test/regress/expected/gp_tsrf_optimizer.out
@@ -249,15 +249,90 @@ select generate_series(1,a+1),b+generate_series(1,4),c 
from test_srf;
 (8 rows)
 
 drop table test_srf;
--- Below query giving incorrect output with ORCA.Works fine on planner.Github 
Issue #15644
+-- Test that the preprocessor step where
+-- IN subquery is converted to EXIST subquery with a predicate,
+-- is not happening if inner sub query is SRF
+-- Fixed as part of github issue #15644
+explain verbose SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT 
generate_series(1, 3) AS a) AS s;
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
+ Result  (cost=0.00..441357.57 rows=1 width=1)
+   Output: (SubPlan 1)
+   ->  ProjectSet  (cost=0.00..0.00 rows=1 width=4)
+         Output: generate_series(1, 3)
+         ->  Result  (cost=0.00..0.00 rows=1 width=1)
+               Output: true
+   SubPlan 1
+     ->  Result  (cost=0.00..0.00 rows=1 width=5)
+           Output: (generate_series(1, (generate_series(1, 3))))
+           ->  ProjectSet  (cost=0.00..0.00 rows=1 width=5)
+                 Output: true, generate_series(1, (generate_series(1, 3)))
+                 ->  Result  (cost=0.00..0.00 rows=1 width=1)
+                       Output: true
+ Optimizer: Pivotal Optimizer (GPORCA)
+(14 rows)
+
 SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT generate_series(1, 
3) AS a) AS s;
  x 
 ---
  t
- f
  t
- f
- f
  t
-(6 rows)
+(3 rows)
+
+SELECT a FROM (values(1),(2),(3)) as t(a) where a IN (SELECT 
generate_series(1,a));
+ a 
+---
+ 1
+ 2
+ 3
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF)
+  SELECT a FROM (values(1),(2),(3)) as t(a) where a IN (SELECT 
generate_series(1,a));
+                       QUERY PLAN                       
+--------------------------------------------------------
+ Result
+   Output: column1
+   Filter: (SubPlan 1)
+   ->  Values Scan on "Values"
+         Output: column1
+   SubPlan 1
+     ->  ProjectSet
+           Output: generate_series(1, "Values".column1)
+           ->  Result
+                 Output: true
+ Optimizer: Pivotal Optimizer (GPORCA)
+(11 rows)
+
+CREATE TABLE t_outer (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO t_outer SELECT i, i+1 FROM generate_series(1,3) as i;  
+CREATE TABLE t_inner (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO t_inner SELECT i, i+1 FROM generate_series(1,3) as i;
+SELECT * FROM t_outer WHERE t_outer.b IN (SELECT generate_series(1, t_outer.b) 
FROM t_inner);
+ a | b 
+---+---
+ 2 | 3
+ 3 | 4
+ 1 | 2
+(3 rows)
+
+EXPLAIN (VERBOSE, COSTS OFF)
+  SELECT * FROM t_outer WHERE t_outer.b IN (SELECT generate_series(1, 
t_outer.b)  FROM t_inner);
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Output: t_outer.a, t_outer.b
+   ->  Seq Scan on public.t_outer
+         Output: t_outer.a, t_outer.b
+         Filter: (SubPlan 1)
+         SubPlan 1
+           ->  ProjectSet
+                 Output: generate_series(1, t_outer.b)
+                 ->  Materialize
+                       ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                             ->  Seq Scan on public.t_inner
+ Optimizer: Pivotal Optimizer (GPORCA)
+(12 rows)
 
+DROP TABLE t_outer, t_inner;
diff --git a/src/test/regress/sql/gp_tsrf.sql b/src/test/regress/sql/gp_tsrf.sql
index a3732e5a48..515729912e 100644
--- a/src/test/regress/sql/gp_tsrf.sql
+++ b/src/test/regress/sql/gp_tsrf.sql
@@ -41,5 +41,24 @@ explain verbose select 
generate_series(1,a+1),b+generate_series(1,4),c from test
 select generate_series(1,a+1),b+generate_series(1,4),c from test_srf;
 drop table test_srf;
 
--- Below query giving incorrect output with ORCA.Works fine on planner.Github 
Issue #15644
+-- Test that the preprocessor step where
+-- IN subquery is converted to EXIST subquery with a predicate,
+-- is not happening if inner sub query is SRF
+-- Fixed as part of github issue #15644
+
+explain verbose SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT 
generate_series(1, 3) AS a) AS s;
 SELECT a IN (SELECT generate_series(1,a)) AS x FROM (SELECT generate_series(1, 
3) AS a) AS s;
+
+SELECT a FROM (values(1),(2),(3)) as t(a) where a IN (SELECT 
generate_series(1,a));
+EXPLAIN (VERBOSE, COSTS OFF)
+  SELECT a FROM (values(1),(2),(3)) as t(a) where a IN (SELECT 
generate_series(1,a));
+
+CREATE TABLE t_outer (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO t_outer SELECT i, i+1 FROM generate_series(1,3) as i;  
+CREATE TABLE t_inner (a int, b int) DISTRIBUTED BY (a);
+INSERT INTO t_inner SELECT i, i+1 FROM generate_series(1,3) as i;
+
+SELECT * FROM t_outer WHERE t_outer.b IN (SELECT generate_series(1, t_outer.b) 
FROM t_inner);
+EXPLAIN (VERBOSE, COSTS OFF)
+  SELECT * FROM t_outer WHERE t_outer.b IN (SELECT generate_series(1, 
t_outer.b)  FROM t_inner);
+DROP TABLE t_outer, t_inner;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to