This is an automated email from the ASF dual-hosted git repository. jiaqizho pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 7a8da0708b50dd9ac6bc2a61551e3cdfb8682720 Author: gpopt <103469259+gp...@users.noreply.github.com> AuthorDate: Wed Dec 21 13:42:27 2022 -0800 Add a GUC to discard redistribute hashjoin for Orca (#14642) Add a GUC `optimizer_discard_redistribute_hashjoin` to control redistribute hashjoin. If any one of HashJoin's children operators is RedistributeMotion, this plan alternative will be given MAX cost, hence it will not be picked by optimizer. Default value is `OFF`. --- src/backend/gpopt/config/CConfigParamMapping.cpp | 6 + .../dxl/minidump/DiscardRedistributeHashJoin.mdp | 423 +++++++++++++++++++++ .../gporca/libgpdbcost/src/CCostModelGPDB.cpp | 13 + .../include/naucrates/traceflags/traceflags.h | 3 + src/backend/gporca/server/CMakeLists.txt | 2 +- src/backend/utils/misc/guc_gp.c | 11 + src/include/utils/guc.h | 1 + src/include/utils/unsync_guc_name.h | 2 +- 8 files changed, 459 insertions(+), 2 deletions(-) diff --git a/src/backend/gpopt/config/CConfigParamMapping.cpp b/src/backend/gpopt/config/CConfigParamMapping.cpp index 48061c9e56..f0364f29b5 100644 --- a/src/backend/gpopt/config/CConfigParamMapping.cpp +++ b/src/backend/gpopt/config/CConfigParamMapping.cpp @@ -211,6 +211,12 @@ CConfigParamMapping::SConfigMappingElem CConfigParamMapping::m_elements[] = { GPOS_WSZ_LIT( "Enable plan alternatives where NLJ's outer child is replicated")}, + {EopttraceDiscardRedistributeHashJoin, + &optimizer_discard_redistribute_hashjoin, + false, // m_negate_param + GPOS_WSZ_LIT( + "Discard plan alternatives where hash join has a redistribute motion child")}, + {EopttraceMotionHazardHandling, &optimizer_enable_streaming_material, false, // m_fNegate GPOS_WSZ_LIT( diff --git a/src/backend/gporca/data/dxl/minidump/DiscardRedistributeHashJoin.mdp b/src/backend/gporca/data/dxl/minidump/DiscardRedistributeHashJoin.mdp new file mode 100644 index 0000000000..f32a8809c7 --- /dev/null +++ b/src/backend/gporca/data/dxl/minidump/DiscardRedistributeHashJoin.mdp @@ -0,0 +1,423 @@ +<?xml version="1.0" encoding="UTF-8"?> +<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/"> + <dxl:Comment><![CDATA[ + Make sure that optimizer doesn't pick the redistribute hashjoin plan + when the GUC `optimizer_discard_redistribute_hashjoin` is turned on. + + create table foo(a int, b int, c int); + create table bar(a int, b int, c int); + set optimizer_discard_redistribute_hashjoin=on; + explain select * from foo, bar where foo.b=bar.b; + QUERY PLAN + ------------------------------------------------------------------------------------------- + Hash Join (cost=0.00..862.00 rows=1 width=24) + Hash Cond: (foo.b = bar.b) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on foo (cost=0.00..431.00 rows=1 width=12) + -> Hash (cost=431.00..431.00 rows=1 width=12) + -> Gather Motion 3:1 (slice2; segments: 3) (cost=0.00..431.00 rows=1 width=12) + -> Seq Scan on bar (cost=0.00..431.00 rows=1 width=12) + Optimizer: Pivotal Optimizer (GPORCA) + ]]> + </dxl:Comment> + <dxl:Thread Id="0"> + <dxl:OptimizerConfig> + <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/> + <dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" MaxStatsBuckets="100"/> + <dxl:CTEConfig CTEInliningCutoff="0"/> + <dxl:WindowOids RowNumber="3100" Rank="3101"/> + <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3"> + <dxl:CostParams> + <dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/> + </dxl:CostParams> + </dxl:CostModelConfig> + <dxl:Hint JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="100" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" XformBindThreshold="0"/> + <dxl:TraceFlags Value="102074,102120,102146,102155,102156,103001,103014,103022,103027,103029,103038,103041,103044,104002,104003,104004,104005,105000,106000"/> + </dxl:OptimizerConfig> + <dxl:Metadata SystemIds="0.GPDB"> + <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2222.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/> + <dxl:EqualityOp Mdid="0.91.1.0"/> + <dxl:InequalityOp Mdid="0.85.1.0"/> + <dxl:LessThanOp Mdid="0.58.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/> + <dxl:GreaterThanOp Mdid="0.59.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/> + <dxl:ComparisonOp Mdid="0.1693.1.0"/> + <dxl:ArrayType Mdid="0.1000.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/> + <dxl:EqualityOp Mdid="0.96.1.0"/> + <dxl:InequalityOp Mdid="0.518.1.0"/> + <dxl:LessThanOp Mdid="0.97.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.523.1.0"/> + <dxl:GreaterThanOp Mdid="0.521.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/> + <dxl:ComparisonOp Mdid="0.351.1.0"/> + <dxl:ArrayType Mdid="0.1007.1.0"/> + <dxl:MinAgg Mdid="0.2132.1.0"/> + <dxl:MaxAgg Mdid="0.2116.1.0"/> + <dxl:AvgAgg Mdid="0.2101.1.0"/> + <dxl:SumAgg Mdid="0.2108.1.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1990.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/> + <dxl:EqualityOp Mdid="0.607.1.0"/> + <dxl:InequalityOp Mdid="0.608.1.0"/> + <dxl:LessThanOp Mdid="0.609.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.611.1.0"/> + <dxl:GreaterThanOp Mdid="0.610.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/> + <dxl:ComparisonOp Mdid="0.356.1.0"/> + <dxl:ArrayType Mdid="0.1028.1.0"/> + <dxl:MinAgg Mdid="0.2134.1.0"/> + <dxl:MaxAgg Mdid="0.2118.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false"> + <dxl:DistrOpfamily Mdid="0.7077.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/> + <dxl:EqualityOp Mdid="0.387.1.0"/> + <dxl:InequalityOp Mdid="0.402.1.0"/> + <dxl:LessThanOp Mdid="0.2799.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/> + <dxl:GreaterThanOp Mdid="0.2800.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/> + <dxl:ComparisonOp Mdid="0.2794.1.0"/> + <dxl:ArrayType Mdid="0.1010.1.0"/> + <dxl:MinAgg Mdid="0.2798.1.0"/> + <dxl:MaxAgg Mdid="0.2797.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2226.1.0"/> + <dxl:EqualityOp Mdid="0.385.1.0"/> + <dxl:InequalityOp Mdid="0.0.0.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1012.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2225.1.0"/> + <dxl:EqualityOp Mdid="0.352.1.0"/> + <dxl:InequalityOp Mdid="0.0.0.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1011.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:RelationStatistics Mdid="2.337819.1.0" Name="foo" Rows="0.000000" RelPages="0" RelAllVisible="0" EmptyRelation="true"/> + <dxl:Relation Mdid="6.337819.1.0" Name="foo" IsTemporary="false" HasOids="false" StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="9,3" NumberLeafPartitions="0"> + <dxl:Columns> + <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="b" Attno="2" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="c" Attno="3" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmin" Attno="-3" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmin" Attno="-4" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmax" Attno="-5" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmax" Attno="-6" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="tableoid" Attno="-7" Mdid="0.26.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="gp_segment_id" Attno="-8" Mdid="0.23.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + </dxl:Columns> + <dxl:IndexInfoList/> + <dxl:CheckConstraints/> + <dxl:DistrOpfamilies> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + </dxl:DistrOpfamilies> + </dxl:Relation> + <dxl:RelationStatistics Mdid="2.337822.1.0" Name="bar" Rows="0.000000" RelPages="0" RelAllVisible="0" EmptyRelation="true"/> + <dxl:Relation Mdid="6.337822.1.0" Name="bar" IsTemporary="false" HasOids="false" StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="9,3" NumberLeafPartitions="0"> + <dxl:Columns> + <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="b" Attno="2" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="c" Attno="3" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmin" Attno="-3" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmin" Attno="-4" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmax" Attno="-5" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmax" Attno="-6" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="tableoid" Attno="-7" Mdid="0.26.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="gp_segment_id" Attno="-8" Mdid="0.23.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + </dxl:Columns> + <dxl:IndexInfoList/> + <dxl:CheckConstraints/> + <dxl:DistrOpfamilies> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + </dxl:DistrOpfamilies> + </dxl:Relation> + <dxl:MDCast Mdid="3.23.1.0;23.1.0" Name="int4" BinaryCoercible="true" SourceTypeId="0.23.1.0" DestinationTypeId="0.23.1.0" CastFuncId="0.0.0.0" CoercePathType="0"/> + <dxl:ColumnStatistics Mdid="1.337822.1.0.1" Name="b" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:ColumnStatistics Mdid="1.337822.1.0.0" Name="a" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:GPDBScalarOp Mdid="0.96.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false"> + <dxl:LeftType Mdid="0.23.1.0"/> + <dxl:RightType Mdid="0.23.1.0"/> + <dxl:ResultType Mdid="0.16.1.0"/> + <dxl:OpFunc Mdid="0.65.1.0"/> + <dxl:Commutator Mdid="0.96.1.0"/> + <dxl:InverseOp Mdid="0.518.1.0"/> + <dxl:HashOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/> + <dxl:Opfamilies> + <dxl:Opfamily Mdid="0.1976.1.0"/> + <dxl:Opfamily Mdid="0.1977.1.0"/> + <dxl:Opfamily Mdid="0.7027.1.0"/> + <dxl:Opfamily Mdid="0.7100.1.0"/> + </dxl:Opfamilies> + </dxl:GPDBScalarOp> + <dxl:ColumnStatistics Mdid="1.337819.1.0.1" Name="b" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:ColumnStatistics Mdid="1.337819.1.0.0" Name="a" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + </dxl:Metadata> + <dxl:Query> + <dxl:OutputColumns> + <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="2" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="3" ColName="c" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="11" ColName="a" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="12" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="13" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:OutputColumns> + <dxl:CTEList/> + <dxl:LogicalJoin JoinType="Inner"> + <dxl:LogicalGet> + <dxl:TableDescriptor Mdid="6.337819.1.0" TableName="foo"> + <dxl:Columns> + <dxl:Column ColId="1" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="3" Attno="3" ColName="c" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="4" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="5" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="9" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="10" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:LogicalGet> + <dxl:LogicalGet> + <dxl:TableDescriptor Mdid="6.337822.1.0" TableName="bar"> + <dxl:Columns> + <dxl:Column ColId="11" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="12" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="13" Attno="3" ColName="c" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="14" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="15" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="16" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="17" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="18" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="19" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="20" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:LogicalGet> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="2" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="12" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:Comparison> + </dxl:LogicalJoin> + </dxl:Query> + <dxl:Plan Id="0" SpaceSize="12"> + <dxl:HashJoin JoinType="Inner"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="862.000757" Rows="1.000000" Width="24"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="2" Alias="c"> + <dxl:Ident ColId="2" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="10" Alias="a"> + <dxl:Ident ColId="10" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="11" Alias="b"> + <dxl:Ident ColId="11" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="12" Alias="c"> + <dxl:Ident ColId="12" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:JoinFilter/> + <dxl:HashCondList> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="11" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:Comparison> + </dxl:HashCondList> + <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000060" Rows="1.000000" Width="12"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="2" Alias="c"> + <dxl:Ident ColId="2" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:SortingColumnList/> + <dxl:TableScan> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000008" Rows="1.000000" Width="12"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="2" Alias="c"> + <dxl:Ident ColId="2" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:TableDescriptor Mdid="6.337819.1.0" TableName="foo"> + <dxl:Columns> + <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="1" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="3" ColName="c" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="3" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="4" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="9" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:TableScan> + </dxl:GatherMotion> + <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000060" Rows="1.000000" Width="12"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="10" Alias="a"> + <dxl:Ident ColId="10" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="11" Alias="b"> + <dxl:Ident ColId="11" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="12" Alias="c"> + <dxl:Ident ColId="12" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:SortingColumnList/> + <dxl:TableScan> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="431.000008" Rows="1.000000" Width="12"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="10" Alias="a"> + <dxl:Ident ColId="10" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="11" Alias="b"> + <dxl:Ident ColId="11" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="12" Alias="c"> + <dxl:Ident ColId="12" ColName="c" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:TableDescriptor Mdid="6.337822.1.0" TableName="bar"> + <dxl:Columns> + <dxl:Column ColId="10" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="11" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="12" Attno="3" ColName="c" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="13" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="14" Attno="-3" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="15" Attno="-4" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="16" Attno="-5" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="17" Attno="-6" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="18" Attno="-7" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="19" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:TableScan> + </dxl:GatherMotion> + </dxl:HashJoin> + </dxl:Plan> + </dxl:Thread> +</dxl:DXLMessage> diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp index cd98eb078e..fbd818b009 100644 --- a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp +++ b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp @@ -991,6 +991,19 @@ CCostModelGPDB::CostHashJoin(CMemoryPool *mp, CExpressionHandle &exprhdl, CDouble skew_ratio = 1; ULONG arity = exprhdl.Arity(); + if (GPOS_FTRACE(EopttraceDiscardRedistributeHashJoin)) + { + for (ULONG ul = 0; ul < arity - 1; ul++) + { + COperator *popChild = exprhdl.Pop(ul); + if (nullptr != popChild && + COperator::EopPhysicalMotionHashDistribute == popChild->Eopid()) + { + return CCost(GPOS_FP_ABS_MAX); + } + } + } + // Hashjoin with skewed HashRedistribute below them are expensive // find out if there is a skewed redistribute child of this HashJoin. if (!GPOS_FTRACE(EopttracePenalizeSkewedHashJoin)) diff --git a/src/backend/gporca/libnaucrates/include/naucrates/traceflags/traceflags.h b/src/backend/gporca/libnaucrates/include/naucrates/traceflags/traceflags.h index d1bc807e8d..f8c8ccc57c 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/traceflags/traceflags.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/traceflags/traceflags.h @@ -218,6 +218,9 @@ enum EOptTraceFlag // Enable use the distribution key in DQA EopttraceEnableUseDistributionInDQA = 103042, + // Discard HashJoin with RedistributeMotion nodes + EopttraceDiscardRedistributeHashJoin = 103044, + /////////////////////////////////////////////////////// ///////////////////// statistics flags //////////////// ////////////////////////////////////////////////////// diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt index f8f43d10f5..c46c505697 100644 --- a/src/backend/gporca/server/CMakeLists.txt +++ b/src/backend/gporca/server/CMakeLists.txt @@ -309,7 +309,7 @@ JOIN-NonRedistributableCol DQA-NonRedistributableCol MotionHazard-NoMaterializeHashAggUnderResult; CInvalidCostContextTest: -SpoolShouldInvalidateUnresolvedDynamicScans; +SpoolShouldInvalidateUnresolvedDynamicScans DiscardRedistributeHashJoin; CRandomDataInsertionTest: AddRedistributeBeforeInsert-1 AddRedistributeBeforeInsert-2 AddRedistributeBeforeInsert-3 diff --git a/src/backend/utils/misc/guc_gp.c b/src/backend/utils/misc/guc_gp.c index f816a4b9b2..f48d3d388e 100644 --- a/src/backend/utils/misc/guc_gp.c +++ b/src/backend/utils/misc/guc_gp.c @@ -328,6 +328,7 @@ bool optimizer_enable_multiple_distinct_aggs; bool optimizer_enable_direct_dispatch; bool optimizer_enable_hashjoin_redistribute_broadcast_children; bool optimizer_enable_broadcast_nestloop_outer_child; +bool optimizer_discard_redistribute_hashjoin; bool optimizer_enable_streaming_material; bool optimizer_enable_gather_on_segment_for_dml; bool optimizer_enable_assert_maxonerow; @@ -2447,6 +2448,16 @@ struct config_bool ConfigureNamesBool_gp[] = true, NULL, NULL, NULL }, + { + {"optimizer_discard_redistribute_hashjoin", PGC_USERSET, DEVELOPER_OPTIONS, + gettext_noop("Discard hash join with redistribute motion in the optimizer."), + NULL, + GUC_NOT_IN_SAMPLE + }, + &optimizer_discard_redistribute_hashjoin, + false, + NULL, NULL, NULL + }, { {"optimizer_expand_fulljoin", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Enables the optimizer's support of expanding full outer joins using union all."), diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index bf5a84fdb5..ed1c51bc5b 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -507,6 +507,7 @@ extern bool optimizer_enable_outerjoin_rewrite; extern bool optimizer_enable_multiple_distinct_aggs; extern bool optimizer_enable_hashjoin_redistribute_broadcast_children; extern bool optimizer_enable_broadcast_nestloop_outer_child; +extern bool optimizer_discard_redistribute_hashjoin; extern bool optimizer_enable_streaming_material; extern bool optimizer_enable_gather_on_segment_for_dml; extern bool optimizer_enable_assert_maxonerow; diff --git a/src/include/utils/unsync_guc_name.h b/src/include/utils/unsync_guc_name.h index a66559f0ab..5df84dedc2 100644 --- a/src/include/utils/unsync_guc_name.h +++ b/src/include/utils/unsync_guc_name.h @@ -403,7 +403,7 @@ "optimizer_enable_hashagg", "optimizer_enable_hashjoin", "optimizer_enable_hashjoin_redistribute_broadcast_children", - "optimizer_enable_nljoin", + "optimizer_discard_redistribute_hashjoin", "optimizer_enable_indexjoin", "optimizer_enable_indexonlyscan", "optimizer_enable_indexscan", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@cloudberry.apache.org For additional commands, e-mail: commits-h...@cloudberry.apache.org