This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 5a720a1e7c6a0465f833bf7b32090add2126bb83 Author: Chris Hajas <[email protected]> AuthorDate: Thu Sep 8 16:18:18 2022 -0700 Allow certain functions to be safely executed on replicated slices in Orca (#13873) Previously, Orca disallowed all aggregate functions from being executed on replicated slices. This meant that the results were broadcasted or gathered on a single segment to ensure consistency and correct results. This is necessary because some functions such as array_agg and custom user-created functions are sensitive to the order of data. This can cause wrong results in some cases. However, many functions, especially commonly used ones such as sum, avg, count, min, and max, are not sensitive to the order of data and can be safely executed. We now make an exception for these common cases, currently the above agg functions on ints and count(*). See https://github.com/greenplum-db/gpdb/pull/10978 for previous discussion. --- .../data/dxl/minidump/ReplicatedTableGroupBy.mdp | 115 +++---- .../data/dxl/minidump/ReplicatedTableInClause.mdp | 127 ++----- .../minidump/ReplicatedTableWithAggNoMotion.mdp | 370 +++++++++++++++++++++ .../libgpopt/include/gpopt/base/CDrvdPropScalar.h | 8 + .../gporca/libgpopt/include/gpopt/base/CUtils.h | 5 + .../libgpopt/include/gpopt/operators/CExpression.h | 1 + .../include/gpopt/operators/CExpressionHandle.h | 1 + .../include/gpopt/operators/CScalarProjectList.h | 3 + .../gporca/libgpopt/src/base/CDrvdPropScalar.cpp | 27 ++ .../gporca/libgpopt/src/operators/CExpression.cpp | 7 + .../libgpopt/src/operators/CExpressionHandle.cpp | 14 + .../src/operators/COrderedAggPreprocessor.cpp | 5 +- .../gporca/libgpopt/src/operators/CPhysicalAgg.cpp | 9 + .../libgpopt/src/operators/CScalarProjectList.cpp | 49 +++ .../include/naucrates/md/CMDTypeOidGPDB.h | 4 +- src/backend/gporca/server/CMakeLists.txt | 2 +- src/test/regress/expected/rpt_optimizer.out | 23 +- 17 files changed, 574 insertions(+), 196 deletions(-) diff --git a/src/backend/gporca/data/dxl/minidump/ReplicatedTableGroupBy.mdp b/src/backend/gporca/data/dxl/minidump/ReplicatedTableGroupBy.mdp index b433f9c8aa..e620dcac08 100644 --- a/src/backend/gporca/data/dxl/minidump/ReplicatedTableGroupBy.mdp +++ b/src/backend/gporca/data/dxl/minidump/ReplicatedTableGroupBy.mdp @@ -1001,10 +1001,10 @@ </dxl:LogicalGet> </dxl:LogicalGroupBy> </dxl:Query> - <dxl:Plan Id="0" SpaceSize="8"> - <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> + <dxl:Plan Id="0" SpaceSize="10"> + <dxl:GatherMotion InputSegments="0" OutputSegments="-1"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.028243" Rows="99.999999" Width="12"/> + <dxl:Cost StartupCost="0" TotalCost="431.042609" Rows="99.999999" Width="12"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="1" Alias="b"> @@ -1018,7 +1018,7 @@ <dxl:SortingColumnList/> <dxl:Aggregate AggregationStrategy="Hashed" StreamSafe="false"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.023771" Rows="99.999999" Width="12"/> + <dxl:Cost StartupCost="0" TotalCost="431.029193" Rows="299.999997" Width="12"/> </dxl:Properties> <dxl:GroupingColumns> <dxl:GroupingColumn ColId="1"/> @@ -1030,7 +1030,7 @@ <dxl:ProjElem ColId="9" Alias="count"> <dxl:AggFunc AggMdid="0.2147.1.0" AggDistinct="false" AggStage="Final" AggKind="n" AggArgTypes=""> <dxl:ValuesList ParamType="aggargs"> - <dxl:Ident ColId="10" ColName="ColRef_0010" TypeMdid="0.20.1.0"/> + <dxl:Ident ColId="10" ColName="ColRef_0010" TypeMdid="0.20.1.0"/> </dxl:ValuesList> <dxl:ValuesList ParamType="aggdirectargs"/> <dxl:ValuesList ParamType="aggorder"/> @@ -1039,92 +1039,57 @@ </dxl:ProjElem> </dxl:ProjList> <dxl:Filter/> - <dxl:RedistributeMotion InputSegments="0" OutputSegments="0,1,2"> + <dxl:Aggregate AggregationStrategy="Hashed" StreamSafe="true"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.019502" Rows="99.999999" Width="12"/> + <dxl:Cost StartupCost="0" TotalCost="431.016386" Rows="299.999997" Width="12"/> </dxl:Properties> + <dxl:GroupingColumns> + <dxl:GroupingColumn ColId="1"/> + </dxl:GroupingColumns> <dxl:ProjList> + <dxl:ProjElem ColId="10" Alias="ColRef_0010"> + <dxl:AggFunc AggMdid="0.2147.1.0" AggDistinct="false" AggStage="Partial" AggKind="n" AggArgTypes=""> + <dxl:ValuesList ParamType="aggargs"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ValuesList> + <dxl:ValuesList ParamType="aggdirectargs"/> + <dxl:ValuesList ParamType="aggorder"/> + <dxl:ValuesList ParamType="aggdistinct"/> + </dxl:AggFunc> + </dxl:ProjElem> <dxl:ProjElem ColId="1" Alias="b"> <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> </dxl:ProjElem> - <dxl:ProjElem ColId="10" Alias="ColRef_0010"> - <dxl:Ident ColId="10" ColName="ColRef_0010" TypeMdid="0.20.1.0"/> - </dxl:ProjElem> </dxl:ProjList> <dxl:Filter/> - <dxl:SortingColumnList/> - <dxl:HashExprList> - <dxl:HashExpr> - <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> - </dxl:HashExpr> - </dxl:HashExprList> - <dxl:Result> + <dxl:TableScan> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.016386" Rows="299.999997" Width="12"/> + <dxl:Cost StartupCost="0" TotalCost="431.002090" Rows="300.000000" Width="8"/> </dxl:Properties> <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> <dxl:ProjElem ColId="1" Alias="b"> <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> </dxl:ProjElem> - <dxl:ProjElem ColId="10" Alias="ColRef_0010"> - <dxl:Ident ColId="10" ColName="ColRef_0010" TypeMdid="0.20.1.0"/> - </dxl:ProjElem> </dxl:ProjList> <dxl:Filter/> - <dxl:OneTimeFilter/> - <dxl:Aggregate AggregationStrategy="Hashed" StreamSafe="true"> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.016386" Rows="299.999997" Width="12"/> - </dxl:Properties> - <dxl:GroupingColumns> - <dxl:GroupingColumn ColId="1"/> - </dxl:GroupingColumns> - <dxl:ProjList> - <dxl:ProjElem ColId="10" Alias="ColRef_0010"> - <dxl:AggFunc AggMdid="0.2147.1.0" AggDistinct="false" AggStage="Partial" AggKind="n" AggArgTypes=""> - <dxl:ValuesList ParamType="aggargs"> - <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ValuesList> - <dxl:ValuesList ParamType="aggdirectargs"/> - <dxl:ValuesList ParamType="aggorder"/> - <dxl:ValuesList ParamType="aggdistinct"/> - </dxl:AggFunc> - </dxl:ProjElem> - <dxl:ProjElem ColId="1" Alias="b"> - <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter/> - <dxl:TableScan> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.002090" Rows="300.000000" Width="8"/> - </dxl:Properties> - <dxl:ProjList> - <dxl:ProjElem ColId="0" Alias="a"> - <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - <dxl:ProjElem ColId="1" Alias="b"> - <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter/> - <dxl:TableDescriptor Mdid="0.16421.1.0" TableName="r1"> - <dxl:Columns> - <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> - <dxl:Column ColId="1" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> - <dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> - <dxl:Column ColId="3" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> - <dxl:Column ColId="4" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> - <dxl:Column ColId="5" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> - <dxl:Column ColId="6" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> - <dxl:Column ColId="7" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> - <dxl:Column ColId="8" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> - </dxl:Columns> - </dxl:TableDescriptor> - </dxl:TableScan> - </dxl:Aggregate> - </dxl:Result> - </dxl:RedistributeMotion> + <dxl:TableDescriptor Mdid="0.16421.1.0" TableName="r1"> + <dxl:Columns> + <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="1" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="3" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="4" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:TableScan> + </dxl:Aggregate> </dxl:Aggregate> </dxl:GatherMotion> </dxl:Plan> diff --git a/src/backend/gporca/data/dxl/minidump/ReplicatedTableInClause.mdp b/src/backend/gporca/data/dxl/minidump/ReplicatedTableInClause.mdp index afb250525a..60dab4c14d 100644 --- a/src/backend/gporca/data/dxl/minidump/ReplicatedTableInClause.mdp +++ b/src/backend/gporca/data/dxl/minidump/ReplicatedTableInClause.mdp @@ -1105,10 +1105,10 @@ </dxl:LogicalGet> </dxl:LogicalSelect> </dxl:Query> - <dxl:Plan Id="0" SpaceSize="60"> + <dxl:Plan Id="0" SpaceSize="84"> <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="862.014467" Rows="9.090900" Width="8"/> + <dxl:Cost StartupCost="0" TotalCost="862.014686" Rows="9.090900" Width="8"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -1120,9 +1120,9 @@ </dxl:ProjList> <dxl:Filter/> <dxl:SortingColumnList/> - <dxl:HashJoin JoinType="Inner"> + <dxl:HashJoin JoinType="In"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="862.014196" Rows="9.090900" Width="8"/> + <dxl:Cost StartupCost="0" TotalCost="862.014415" Rows="9.090900" Width="8"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -1167,111 +1167,34 @@ </dxl:Columns> </dxl:TableDescriptor> </dxl:TableScan> - <dxl:Aggregate AggregationStrategy="Sorted" StreamSafe="false"> + <dxl:TableScan> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.006384" Rows="9.090900" Width="4"/> + <dxl:Cost StartupCost="0" TotalCost="431.005448" Rows="27.272700" Width="4"/> </dxl:Properties> - <dxl:GroupingColumns> - <dxl:GroupingColumn ColId="9"/> - </dxl:GroupingColumns> <dxl:ProjList> <dxl:ProjElem ColId="9" Alias="a"> <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> </dxl:ProjElem> </dxl:ProjList> - <dxl:Filter/> - <dxl:Sort SortDiscardDuplicates="false"> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.006365" Rows="9.090900" Width="4"/> - </dxl:Properties> - <dxl:ProjList> - <dxl:ProjElem ColId="9" Alias="a"> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter/> - <dxl:SortingColumnList> - <dxl:SortingColumn ColId="9" SortOperatorMdid="0.97.1.0" SortOperatorName="<" SortNullsFirst="false"/> - </dxl:SortingColumnList> - <dxl:LimitCount/> - <dxl:LimitOffset/> - <dxl:RedistributeMotion InputSegments="0" OutputSegments="0,1,2"> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.006255" Rows="9.090900" Width="4"/> - </dxl:Properties> - <dxl:ProjList> - <dxl:ProjElem ColId="9" Alias="a"> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter/> - <dxl:SortingColumnList/> - <dxl:HashExprList> - <dxl:HashExpr> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:HashExpr> - </dxl:HashExprList> - <dxl:Aggregate AggregationStrategy="Sorted" StreamSafe="false"> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.006161" Rows="27.272700" Width="4"/> - </dxl:Properties> - <dxl:GroupingColumns> - <dxl:GroupingColumn ColId="9"/> - </dxl:GroupingColumns> - <dxl:ProjList> - <dxl:ProjElem ColId="9" Alias="a"> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter/> - <dxl:Sort SortDiscardDuplicates="false"> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.006104" Rows="27.272700" Width="4"/> - </dxl:Properties> - <dxl:ProjList> - <dxl:ProjElem ColId="9" Alias="a"> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter/> - <dxl:SortingColumnList> - <dxl:SortingColumn ColId="9" SortOperatorMdid="0.97.1.0" SortOperatorName="<" SortNullsFirst="false"/> - </dxl:SortingColumnList> - <dxl:LimitCount/> - <dxl:LimitOffset/> - <dxl:TableScan> - <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="431.005448" Rows="27.272700" Width="4"/> - </dxl:Properties> - <dxl:ProjList> - <dxl:ProjElem ColId="9" Alias="a"> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - </dxl:ProjElem> - </dxl:ProjList> - <dxl:Filter> - <dxl:Comparison ComparisonOperator="<" OperatorMdid="0.97.1.0"> - <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> - <dxl:ConstValue TypeMdid="0.23.1.0" Value="10"/> - </dxl:Comparison> - </dxl:Filter> - <dxl:TableDescriptor Mdid="0.16421.1.0" TableName="r1"> - <dxl:Columns> - <dxl:Column ColId="9" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> - <dxl:Column ColId="11" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> - <dxl:Column ColId="12" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> - <dxl:Column ColId="13" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> - <dxl:Column ColId="14" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> - <dxl:Column ColId="15" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> - <dxl:Column ColId="16" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> - <dxl:Column ColId="17" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> - </dxl:Columns> - </dxl:TableDescriptor> - </dxl:TableScan> - </dxl:Sort> - </dxl:Aggregate> - </dxl:RedistributeMotion> - </dxl:Sort> - </dxl:Aggregate> + <dxl:Filter> + <dxl:Comparison ComparisonOperator="<" OperatorMdid="0.97.1.0"> + <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="10"/> + </dxl:Comparison> + </dxl:Filter> + <dxl:TableDescriptor Mdid="0.16421.1.0" TableName="r1"> + <dxl:Columns> + <dxl:Column ColId="9" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="11" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="12" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="13" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="14" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="15" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="16" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="17" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:TableScan> </dxl:HashJoin> </dxl:GatherMotion> </dxl:Plan> diff --git a/src/backend/gporca/data/dxl/minidump/ReplicatedTableWithAggNoMotion.mdp b/src/backend/gporca/data/dxl/minidump/ReplicatedTableWithAggNoMotion.mdp new file mode 100644 index 0000000000..7d05ee29c5 --- /dev/null +++ b/src/backend/gporca/data/dxl/minidump/ReplicatedTableWithAggNoMotion.mdp @@ -0,0 +1,370 @@ +<?xml version="1.0" encoding="UTF-8"?> +<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/"> + <dxl:Comment><![CDATA[ + + CREATE TABLE foo (a int) DISTRIBUTED REPLICATED; + CREATE TABLE bar (c int) DISTRIBUTED REPLICATED; + EXPLAIN INSERT INTO foo(a) SELECT sum(c) FROM bar; + + Objective: Since sum is safe to execute on replicated slices, we don't need a motion + + + test=# EXPLAIN INSERT INTO foo(a) SELECT sum(c) FROM bar; + QUERY PLAN + ----------------------------------------------------------------- + Insert on foo (cost=0.00..431.03 rows=1 width=4) + -> Aggregate (cost=0.00..431.00 rows=3 width=8) + -> Seq Scan on bar (cost=0.00..431.00 rows=3 width=4) + Optimizer: Pivotal Optimizer (GPORCA) + (4 rows) + + ]]> + </dxl:Comment> + <dxl:Thread Id="0"> + <dxl:OptimizerConfig> + <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/> + <dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" MaxStatsBuckets="100"/> + <dxl:CTEConfig CTEInliningCutoff="0"/> + <dxl:WindowOids RowNumber="3100" Rank="3101"/> + <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3"> + <dxl:CostParams> + <dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/> + </dxl:CostParams> + </dxl:CostModelConfig> + <dxl:Hint MinNumOfPartsToRequireSortOnInsert="2147483647" JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" XformBindThreshold="0"/> + <dxl:TraceFlags Value="102001,102002,102003,102043,102074,102120,102144,103001,103014,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,105000,106000"/> + </dxl:OptimizerConfig> + <dxl:Metadata SystemIds="0.GPDB"> + <dxl:RelationStatistics Mdid="2.222467.1.0" Name="bar" Rows="0.000000" RelPages="0" RelAllVisible="0" EmptyRelation="true"/> + <dxl:Relation Mdid="0.222467.1.0" Name="bar" IsTemporary="false" HasOids="false" StorageType="Heap" DistributionPolicy="Replicated" Keys="7,1" NumberLeafPartitions="0"> + <dxl:Columns> + <dxl:Column Name="c" Attno="1" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + </dxl:Columns> + <dxl:IndexInfoList/> + <dxl:Triggers/> + <dxl:CheckConstraints/> + </dxl:Relation> + <dxl:Relation Mdid="0.222464.1.0" Name="foo" IsTemporary="false" HasOids="false" StorageType="Heap" DistributionPolicy="Replicated" Keys="7,1" NumberLeafPartitions="0"> + <dxl:Columns> + <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + </dxl:Columns> + <dxl:IndexInfoList/> + <dxl:Triggers/> + <dxl:CheckConstraints/> + </dxl:Relation> + <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2222.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/> + <dxl:EqualityOp Mdid="0.91.1.0"/> + <dxl:InequalityOp Mdid="0.85.1.0"/> + <dxl:LessThanOp Mdid="0.58.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/> + <dxl:GreaterThanOp Mdid="0.59.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/> + <dxl:ComparisonOp Mdid="0.1693.1.0"/> + <dxl:ArrayType Mdid="0.1000.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.20.1.0" Name="Int8" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="8" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/> + <dxl:EqualityOp Mdid="0.410.1.0"/> + <dxl:InequalityOp Mdid="0.411.1.0"/> + <dxl:LessThanOp Mdid="0.412.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.414.1.0"/> + <dxl:GreaterThanOp Mdid="0.413.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.415.1.0"/> + <dxl:ComparisonOp Mdid="0.351.1.0"/> + <dxl:ArrayType Mdid="0.1016.1.0"/> + <dxl:MinAgg Mdid="0.2131.1.0"/> + <dxl:MaxAgg Mdid="0.2115.1.0"/> + <dxl:AvgAgg Mdid="0.2100.1.0"/> + <dxl:SumAgg Mdid="0.2107.1.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/> + <dxl:EqualityOp Mdid="0.96.1.0"/> + <dxl:InequalityOp Mdid="0.518.1.0"/> + <dxl:LessThanOp Mdid="0.97.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.523.1.0"/> + <dxl:GreaterThanOp Mdid="0.521.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/> + <dxl:ComparisonOp Mdid="0.351.1.0"/> + <dxl:ArrayType Mdid="0.1007.1.0"/> + <dxl:MinAgg Mdid="0.2132.1.0"/> + <dxl:MaxAgg Mdid="0.2116.1.0"/> + <dxl:AvgAgg Mdid="0.2101.1.0"/> + <dxl:SumAgg Mdid="0.2108.1.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1990.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/> + <dxl:EqualityOp Mdid="0.607.1.0"/> + <dxl:InequalityOp Mdid="0.608.1.0"/> + <dxl:LessThanOp Mdid="0.609.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.611.1.0"/> + <dxl:GreaterThanOp Mdid="0.610.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/> + <dxl:ComparisonOp Mdid="0.356.1.0"/> + <dxl:ArrayType Mdid="0.1028.1.0"/> + <dxl:MinAgg Mdid="0.2134.1.0"/> + <dxl:MaxAgg Mdid="0.2118.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false"> + <dxl:DistrOpfamily Mdid="0.2227.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/> + <dxl:EqualityOp Mdid="0.387.1.0"/> + <dxl:InequalityOp Mdid="0.402.1.0"/> + <dxl:LessThanOp Mdid="0.2799.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/> + <dxl:GreaterThanOp Mdid="0.2800.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/> + <dxl:ComparisonOp Mdid="0.2794.1.0"/> + <dxl:ArrayType Mdid="0.1010.1.0"/> + <dxl:MinAgg Mdid="0.2798.1.0"/> + <dxl:MaxAgg Mdid="0.2797.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2226.1.0"/> + <dxl:EqualityOp Mdid="0.385.1.0"/> + <dxl:InequalityOp Mdid="0.0.0.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1012.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2225.1.0"/> + <dxl:EqualityOp Mdid="0.352.1.0"/> + <dxl:InequalityOp Mdid="0.3315.1.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1011.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:ColumnStatistics Mdid="1.222467.1.0.0" Name="c" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:GPDBAgg Mdid="0.2108.1.0" Name="sum" IsSplittable="true" HashAggCapable="true"> + <dxl:ResultType Mdid="0.20.1.0"/> + <dxl:IntermediateResultType Mdid="0.20.1.0"/> + </dxl:GPDBAgg> + <dxl:GPDBFunc Mdid="0.480.1.0" Name="int4" ReturnsSet="false" Stability="Immutable" DataAccess="NoSQL" IsStrict="true" IsNDVPreserving="false" IsAllowedForPS="true"> + <dxl:ResultType Mdid="0.23.1.0"/> + </dxl:GPDBFunc> + </dxl:Metadata> + <dxl:Query> + <dxl:OutputColumns> + <dxl:Ident ColId="10" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:OutputColumns> + <dxl:CTEList/> + <dxl:LogicalInsert InsertColumns="10"> + <dxl:TableDescriptor Mdid="0.222464.1.0" TableName="foo" LockMode="3"> + <dxl:Columns> + <dxl:Column ColId="11" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="12" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="13" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="14" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="15" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="16" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="17" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="18" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + <dxl:LogicalProject> + <dxl:ProjList> + <dxl:ProjElem ColId="10" Alias="a"> + <dxl:FuncExpr FuncId="0.480.1.0" FuncRetSet="false" TypeMdid="0.23.1.0"> + <dxl:Ident ColId="9" ColName="sum" TypeMdid="0.20.1.0"/> + </dxl:FuncExpr> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:LogicalGroupBy> + <dxl:GroupingColumns/> + <dxl:ProjList> + <dxl:ProjElem ColId="9" Alias="sum"> + <dxl:AggFunc AggMdid="0.2108.1.0" AggDistinct="false" AggStage="Normal" AggKind="n" AggArgTypes="23"> + <dxl:ValuesList ParamType="aggargs"> + <dxl:Ident ColId="1" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ValuesList> + <dxl:ValuesList ParamType="aggdirectargs"/> + <dxl:ValuesList ParamType="aggorder"/> + <dxl:ValuesList ParamType="aggdistinct"/> + </dxl:AggFunc> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:LogicalGet> + <dxl:TableDescriptor Mdid="0.222467.1.0" TableName="bar" LockMode="1"> + <dxl:Columns> + <dxl:Column ColId="1" Attno="1" ColName="c" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="3" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="4" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:LogicalGet> + </dxl:LogicalGroupBy> + </dxl:LogicalProject> + </dxl:LogicalInsert> + </dxl:Query> + <dxl:Plan Id="0" SpaceSize="5"> + <dxl:DMLInsert Columns="9" ActionCol="11" OidCol="0" CtidCol="0" SegmentIdCol="0" InputSorted="false"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.031385" Rows="3.000000" Width="4"/> + </dxl:Properties> + <dxl:DirectDispatchInfo/> + <dxl:ProjList> + <dxl:ProjElem ColId="9" Alias="a"> + <dxl:Ident ColId="9" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:TableDescriptor Mdid="0.222464.1.0" TableName="foo" LockMode="3"> + <dxl:Columns> + <dxl:Column ColId="12" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="13" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="14" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="15" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="16" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="17" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="18" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="19" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + <dxl:Result> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000135" Rows="3.000000" Width="8"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="9" Alias="a"> + <dxl:FuncExpr FuncId="0.480.1.0" FuncRetSet="false" TypeMdid="0.23.1.0"> + <dxl:Ident ColId="8" ColName="sum" TypeMdid="0.20.1.0"/> + </dxl:FuncExpr> + </dxl:ProjElem> + <dxl:ProjElem ColId="11" Alias="ColRef_0011"> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:OneTimeFilter/> + <dxl:Aggregate AggregationStrategy="Plain" StreamSafe="false"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000027" Rows="3.000000" Width="8"/> + </dxl:Properties> + <dxl:GroupingColumns/> + <dxl:ProjList> + <dxl:ProjElem ColId="8" Alias="sum"> + <dxl:AggFunc AggMdid="0.2108.1.0" AggDistinct="false" AggStage="Normal" AggKind="n" AggArgTypes="23"> + <dxl:ValuesList ParamType="aggargs"> + <dxl:Ident ColId="0" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ValuesList> + <dxl:ValuesList ParamType="aggdirectargs"/> + <dxl:ValuesList ParamType="aggorder"/> + <dxl:ValuesList ParamType="aggdistinct"/> + </dxl:AggFunc> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:TableScan> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000019" Rows="3.000000" Width="4"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="c"> + <dxl:Ident ColId="0" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:TableDescriptor Mdid="0.222467.1.0" TableName="bar" LockMode="1"> + <dxl:Columns> + <dxl:Column ColId="0" Attno="1" ColName="c" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="1" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="2" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="3" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="4" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:TableScan> + </dxl:Aggregate> + </dxl:Result> + </dxl:DMLInsert> + </dxl:Plan> + </dxl:Thread> +</dxl:DXLMessage> diff --git a/src/backend/gporca/libgpopt/include/gpopt/base/CDrvdPropScalar.h b/src/backend/gporca/libgpopt/include/gpopt/base/CDrvdPropScalar.h index a9a93513d5..395b3f794f 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/base/CDrvdPropScalar.h +++ b/src/backend/gporca/libgpopt/include/gpopt/base/CDrvdPropScalar.h @@ -55,6 +55,7 @@ class CDrvdPropScalar : public CDrvdProp EdptFHasMultipleDistinctAggs, EdptFHasScalarArrayCmp, EdptFHasScalarFuncProject, + EdptFContainsOnlyReplicationSafeAggFuncs, EdptSentinel }; @@ -99,6 +100,9 @@ private: // does expression contain ScalarArrayCmp generated for "scalar op ANY/ALL (array)" construct BOOL m_fHasScalarArrayCmp; + // does expression contain only replication safe agg funcs + BOOL m_fContainsOnlyReplicationSafeAggFuncs; + // Have all the properties been derived? // // NOTE1: This is set ONLY when Derive() is called. If all the properties @@ -133,6 +137,8 @@ protected: BOOL DeriveHasMultipleDistinctAggs(CExpressionHandle &); + BOOL DeriveContainsOnlyReplicationSafeAggFuncs(CExpressionHandle &); + BOOL DeriveHasScalarArrayCmp(CExpressionHandle &); ULONG DeriveTotalOrderedAggs(CExpressionHandle &); @@ -196,6 +202,8 @@ public: BOOL HasScalarArrayCmp() const; + BOOL ContainsOnlyReplicationSafeAggFuncs() const; + // short hand for conversion static CDrvdPropScalar *GetDrvdScalarProps(CDrvdProp *pdp); diff --git a/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h b/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h index b8a54ca63e..94cc391f6d 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h +++ b/src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h @@ -359,6 +359,11 @@ public: // check if the aggregate is local or global static BOOL FHasGlobalAggFunc(const CExpression *pexprProjList); + // check if given project list has only aggregate functions + // that can be safely executed on replicated slices + static BOOL FContainsOnlyReplicationSafeAggFuncs( + const CExpression *pexprProjList); + // generate a bool expression static CExpression *PexprScalarConstBool(CMemoryPool *mp, BOOL value, BOOL is_null = false); diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpression.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpression.h index e88aaf9577..a1888eab5f 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpression.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpression.h @@ -311,6 +311,7 @@ public: BOOL DeriveHasMultipleDistinctAggs(); BOOL DeriveHasScalarArrayCmp(); BOOL DeriveHasScalarFuncProject(); + BOOL DeriveContainsOnlyReplicationSafeAggFuncs(); ULONG DeriveTotalOrderedAggs(); }; // class CExpression diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionHandle.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionHandle.h index af3a05a4f6..9046469077 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionHandle.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CExpressionHandle.h @@ -338,6 +338,7 @@ public: BOOL DeriveHasMultipleDistinctAggs(ULONG child_index) const; BOOL DeriveHasScalarArrayCmp(ULONG child_index) const; BOOL DeriveHasScalarFuncProject(ULONG child_index) const; + BOOL DeriveContainsOnlyReplicationSafeAggFuncs(ULONG child_index) const; }; // class CExpressionHandle diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CScalarProjectList.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CScalarProjectList.h index 67e6b872a7..37de2efa57 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CScalarProjectList.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CScalarProjectList.h @@ -99,6 +99,9 @@ public: // check if a project list has a scalar func static BOOL FHasScalarFunc(CExpressionHandle &exprhdl); + // check if a project list has only replication safe agg funcs + static BOOL FContainsOnlyReplicationSafeAggFuncs( + CExpressionHandle &exprhdl); }; // class CScalarProjectList } // namespace gpopt diff --git a/src/backend/gporca/libgpopt/src/base/CDrvdPropScalar.cpp b/src/backend/gporca/libgpopt/src/base/CDrvdPropScalar.cpp index 3114c59fd2..9ef03f3889 100644 --- a/src/backend/gporca/libgpopt/src/base/CDrvdPropScalar.cpp +++ b/src/backend/gporca/libgpopt/src/base/CDrvdPropScalar.cpp @@ -105,6 +105,8 @@ CDrvdPropScalar::Derive(CMemoryPool *, CExpressionHandle &exprhdl, DeriveHasScalarArrayCmp(exprhdl); + DeriveContainsOnlyReplicationSafeAggFuncs(exprhdl); + m_is_complete = true; } @@ -429,6 +431,31 @@ CDrvdPropScalar::DeriveTotalOrderedAggs(CExpressionHandle &exprhdl) return m_ulOrderedAggs; } +BOOL +CDrvdPropScalar::ContainsOnlyReplicationSafeAggFuncs() const +{ + GPOS_RTL_ASSERT(IsComplete()); + return m_fContainsOnlyReplicationSafeAggFuncs; +} + +BOOL +CDrvdPropScalar::DeriveContainsOnlyReplicationSafeAggFuncs( + CExpressionHandle &exprhdl) +{ + if (!m_is_prop_derived->ExchangeSet( + EdptFContainsOnlyReplicationSafeAggFuncs)) + { + if (COperator::EopScalarProjectList == exprhdl.Pop()->Eopid()) + { + m_fContainsOnlyReplicationSafeAggFuncs = + CScalarProjectList::FContainsOnlyReplicationSafeAggFuncs( + exprhdl); + } + } + return m_fContainsOnlyReplicationSafeAggFuncs; +} + + //--------------------------------------------------------------------------- // @function: // CDrvdPropScalar::OsPrint diff --git a/src/backend/gporca/libgpopt/src/operators/CExpression.cpp b/src/backend/gporca/libgpopt/src/operators/CExpression.cpp index a8e534bc88..8de663ffa7 100644 --- a/src/backend/gporca/libgpopt/src/operators/CExpression.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CExpression.cpp @@ -1548,4 +1548,11 @@ CExpression::DeriveTotalOrderedAggs() exprhdl.Attach(this); return m_pdpscalar->DeriveTotalOrderedAggs(exprhdl); } +BOOL +CExpression::DeriveContainsOnlyReplicationSafeAggFuncs() +{ + CExpressionHandle exprhdl(m_mp); + exprhdl.Attach(this); + return m_pdpscalar->DeriveContainsOnlyReplicationSafeAggFuncs(exprhdl); +} // EOF diff --git a/src/backend/gporca/libgpopt/src/operators/CExpressionHandle.cpp b/src/backend/gporca/libgpopt/src/operators/CExpressionHandle.cpp index 13f2e9283a..f28d38249d 100644 --- a/src/backend/gporca/libgpopt/src/operators/CExpressionHandle.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CExpressionHandle.cpp @@ -2126,4 +2126,18 @@ CExpressionHandle::DeriveHasScalarFuncProject(ULONG child_index) const return GetDrvdScalarProps(child_index)->HasScalarFuncProject(); } + +BOOL +CExpressionHandle::DeriveContainsOnlyReplicationSafeAggFuncs( + ULONG child_index) const +{ + if (nullptr != Pexpr()) + { + return (*Pexpr())[child_index] + ->DeriveContainsOnlyReplicationSafeAggFuncs(); + } + + return GetDrvdScalarProps(child_index) + ->ContainsOnlyReplicationSafeAggFuncs(); +} // EOF diff --git a/src/backend/gporca/libgpopt/src/operators/COrderedAggPreprocessor.cpp b/src/backend/gporca/libgpopt/src/operators/COrderedAggPreprocessor.cpp index c078a8d6f5..f135944cdf 100644 --- a/src/backend/gporca/libgpopt/src/operators/COrderedAggPreprocessor.cpp +++ b/src/backend/gporca/libgpopt/src/operators/COrderedAggPreprocessor.cpp @@ -285,8 +285,11 @@ COrderedAggPreprocessor::SplitPrjList( mp, (cast_func->Mdname().GetMDName())->GetBuffer()); mdid_func->AddRef(); cast_func->GetResultTypeMdid()->AddRef(); + // MERGE_FIXME: use the COERCE_EXPLICIT_CAST + // Is it necessary to keep `m_func_format` in `CScalarFunc`? CScalarFunc *popCastScalarFunc = GPOS_NEW(mp) CScalarFunc( - mp, mdid_func, cast_func->GetResultTypeMdid(), -1, pstrFunc); + mp, mdid_func, cast_func->GetResultTypeMdid(), -1, pstrFunc, + 1); CExpression *pexprCastScalarIdent = GPOS_NEW(mp) CExpression(mp, popCastScalarFunc, pexprScalarIdentSum); CExpressionArray *colref_array1 = GPOS_NEW(mp) CExpressionArray(mp); diff --git a/src/backend/gporca/libgpopt/src/operators/CPhysicalAgg.cpp b/src/backend/gporca/libgpopt/src/operators/CPhysicalAgg.cpp index c20d68cc42..b1ff9fdee9 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPhysicalAgg.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPhysicalAgg.cpp @@ -483,6 +483,15 @@ CPhysicalAgg::PdsDerive(CMemoryPool *mp, CExpressionHandle &exprhdl) const } else if (CDistributionSpec::EdtStrictReplicated == pds->Edt()) { + // Aggregate functions that are not sensitive to the order of data (eg: sum, avg, min, max, count) + // can be executed safely on replicated slices and do not need to be broadcasted/gathered, allowing + // for more performant plans in some cases + if (exprhdl.DeriveContainsOnlyReplicationSafeAggFuncs(1)) + { + return GPOS_NEW(mp) CDistributionSpecReplicated( + CDistributionSpec::EdtStrictReplicated); + } + // Aggregate functions which are not trivial and which are sensitive to // the order of their input cannot guarantee replicated data. If the child // was replicated, we can no longer guarantee that property. Therefore diff --git a/src/backend/gporca/libgpopt/src/operators/CScalarProjectList.cpp b/src/backend/gporca/libgpopt/src/operators/CScalarProjectList.cpp index 40e768e018..92cf701cf7 100644 --- a/src/backend/gporca/libgpopt/src/operators/CScalarProjectList.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CScalarProjectList.cpp @@ -19,6 +19,7 @@ #include "gpopt/operators/CExpressionHandle.h" #include "gpopt/operators/CScalarWindowFunc.h" #include "gpopt/xforms/CXformUtils.h" +#include "naucrates/md/CMDTypeInt4GPDB.h" using namespace gpopt; @@ -225,4 +226,52 @@ CScalarProjectList::FHasScalarFunc(CExpressionHandle &exprhdl) } +//--------------------------------------------------------------------------- +// @function: +// CScalarProjectList::FContainsOnlyReplicationSafeAggFuncs +// +// @doc: +// Check if given project list contains only replication safe agg funcs, +// which allows it to be executed safely on replicated slices. +// +//--------------------------------------------------------------------------- +BOOL +CScalarProjectList::FContainsOnlyReplicationSafeAggFuncs( + CExpressionHandle &exprhdl) +{ + // We make do with an inexact representative expression returned by exprhdl.PexprScalarRep(), + // knowing that at this time, aggregate functions are accurately contained in it. What's not + // exact are subqueries. This is better than just returning 0 for project lists with subqueries. + CExpression *pexprPrjList = exprhdl.PexprScalarRep(); + + GPOS_ASSERT(nullptr != pexprPrjList); + GPOS_ASSERT(COperator::EopScalarProjectList == + pexprPrjList->Pop()->Eopid()); + + const ULONG arity = pexprPrjList->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprPrjEl = (*pexprPrjList)[ul]; + CExpression *pexprAggFunc = (*pexprPrjEl)[0]; + if (EopScalarAggFunc != pexprAggFunc->Pop()->Eopid()) + { + continue; + } + CScalarAggFunc *popScAggFunc = + CScalarAggFunc::PopConvert(pexprAggFunc->Pop()); + OID safe_oid = CMDIdGPDB::CastMdid(popScAggFunc->MDId())->Oid(); + + // We use an allow-list approach here. While there are other functions that can be + // safely replicated, users could create custom agg funcs that could lead to wrong results + if (!(safe_oid == GPDB_INT4_AGG_MIN || safe_oid == GPDB_INT4_AGG_MAX || + safe_oid == GPDB_INT4_AGG_AVG || safe_oid == GPDB_INT4_AGG_SUM || + safe_oid == GPDB_INT4_AGG_COUNT || safe_oid == GPDB_COUNT_STAR)) + { + return false; + } + } + + return true; +} + // EOF diff --git a/src/backend/gporca/libnaucrates/include/naucrates/md/CMDTypeOidGPDB.h b/src/backend/gporca/libnaucrates/include/naucrates/md/CMDTypeOidGPDB.h index 17419325f0..5af87fc31c 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/md/CMDTypeOidGPDB.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/md/CMDTypeOidGPDB.h @@ -32,8 +32,8 @@ #define GPDB_OID_COMP_OP OID(356) #define GPDB_OID_HASH_OP OID(0) -#define GPDB_OID_AGG_MIN OID(2118) -#define GPDB_OID_AGG_MAX OID(2134) +#define GPDB_OID_AGG_MIN OID(2134) +#define GPDB_OID_AGG_MAX OID(2118) #define GPDB_OID_AGG_AVG OID(0) #define GPDB_OID_AGG_SUM OID(0) #define GPDB_OID_AGG_COUNT OID(2147) diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt index 799bec8da1..2a90aeb108 100644 --- a/src/backend/gporca/server/CMakeLists.txt +++ b/src/backend/gporca/server/CMakeLists.txt @@ -331,7 +331,7 @@ ReplicatedTableInClause ReplicatedTableSequenceInsert; CTaintedReplicatedTest: InsertNonSingleton NonSingleton TaintedReplicatedAgg TaintedReplicatedWindowAgg TaintedReplicatedLimit TaintedReplicatedFilter -InsertReplicatedIntoSerialHashDistributedTable TaintedReplicatedTablesCTE; +InsertReplicatedIntoSerialHashDistributedTable TaintedReplicatedTablesCTE ReplicatedTableWithAggNoMotion; CDqaTest: NonSplittableAgg DqaHavingMax DqaMax DqaMin DqaSubqueryMax DqaNoRedistribute; diff --git a/src/test/regress/expected/rpt_optimizer.out b/src/test/regress/expected/rpt_optimizer.out index d64f039a41..fdd9c51844 100644 --- a/src/test/regress/expected/rpt_optimizer.out +++ b/src/test/regress/expected/rpt_optimizer.out @@ -1125,23 +1125,16 @@ analyze rand_tab; -- -- join derives EdtHashed explain select c from rep_tab where c in (select distinct c from rep_tab); - QUERY PLAN -------------------------------------------------------------------------------------------------------- - Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..862.00 rows=2 width=4) - -> Hash Semi Join (cost=0.00..862.00 rows=1 width=4) + QUERY PLAN +------------------------------------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) (cost=0.00..862.00 rows=2 width=4) + -> Hash Semi Join (cost=0.00..862.00 rows=6 width=4) Hash Cond: (rep_tab.c = rep_tab_1.c) - -> Result (cost=0.00..431.00 rows=1 width=4) - -> Seq Scan on rep_tab (cost=0.00..431.00 rows=2 width=4) - -> Hash (cost=431.00..431.00 rows=1 width=4) - -> Redistribute Motion 1:3 (slice2; segments: 1) (cost=0.00..431.00 rows=1 width=4) - Hash Key: rep_tab_1.c - -> GroupAggregate (cost=0.00..431.00 rows=6 width=4) - Group Key: rep_tab_1.c - -> Sort (cost=0.00..431.00 rows=6 width=4) - Sort Key: rep_tab_1.c - -> Seq Scan on rep_tab rep_tab_1 (cost=0.00..431.00 rows=6 width=4) + -> Seq Scan on rep_tab (cost=0.00..431.00 rows=6 width=4) + -> Hash (cost=431.00..431.00 rows=6 width=4) + -> Seq Scan on rep_tab rep_tab_1 (cost=0.00..431.00 rows=6 width=4) Optimizer: Pivotal Optimizer (GPORCA) -(14 rows) +(7 rows) select c from rep_tab where c in (select distinct c from rep_tab); c --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
