This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit d0d363e4e5a4293f8689d491f83df2bd99b80e21 Author: hari krishna <[email protected]> AuthorDate: Fri Feb 17 16:06:59 2023 +0530 Support HashIndexes in Orca Currently ORCA is not using hash indexes while generating plan. This PR allows to generate plan with hashindexes in ORCA. - Push down the hashindex type to dxl - Use hash indexes at CXformSelect2BitmapBoolOp, ExfSelect2IndexGet, ExfSelect2DynamicIndexGet, ExfSelect2DynamicBitmapBoolOp to generate plane - Generate a plan with indexscan by using hash indexes when bitmapscan was turned off as like planner. --- .../gpopt/translate/CTranslatorRelcacheToDXL.cpp | 4 + .../gporca/data/dxl/minidump/Hash-BitmapScan.mdp | 290 +++++++++++++++++++ .../gporca/data/dxl/minidump/Hash-IndexScan.mdp | 280 +++++++++++++++++++ .../gporca/libgpopt/src/operators/CLogical.cpp | 5 +- .../libgpopt/src/operators/CPredicateUtils.cpp | 4 + .../gporca/libgpopt/src/xforms/CXformUtils.cpp | 6 +- .../include/naucrates/dxl/xml/dxltokens.h | 2 +- .../libnaucrates/include/naucrates/md/IMDIndex.h | 1 + .../gporca/libnaucrates/src/md/CMDIndexGPDB.cpp | 3 +- .../gporca/libnaucrates/src/md/IMDIndex.cpp | 2 + .../src/operators/CDXLOperatorFactory.cpp | 5 + .../gporca/libnaucrates/src/xml/dxltokens.cpp | 1 + src/backend/gporca/server/CMakeLists.txt | 4 +- src/test/regress/expected/bfv_index.out | 311 +++++++++++++++++++++ src/test/regress/expected/bfv_index_optimizer.out | 299 ++++++++++++++++++++ .../regress/expected/create_index_optimizer.out | 10 +- src/test/regress/expected/hash_index.out | 2 + src/test/regress/sql/bfv_index.sql | 117 ++++++++ src/test/regress/sql/hash_index.sql | 3 + 19 files changed, 1335 insertions(+), 14 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp index 9c47c2d78b..efa7ab65e9 100644 --- a/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp @@ -957,6 +957,9 @@ CTranslatorRelcacheToDXL::RetrieveIndex(CMemoryPool *mp, case BTREE_AM_OID: index_type = IMDIndex::EmdindBtree; break; + case HASH_AM_OID: + index_type = IMDIndex::EmdindHash; + break; case BITMAP_AM_OID: index_type = IMDIndex::EmdindBitmap; break; @@ -2725,6 +2728,7 @@ CTranslatorRelcacheToDXL::IsIndexSupported(Relation index_rel) gpdb::HeapAttIsNull(tup, Anum_pg_index_indpred) && index_rel->rd_index->indisvalid && (BTREE_AM_OID == index_rel->rd_rel->relam || + HASH_AM_OID == index_rel->rd_rel->relam || BITMAP_AM_OID == index_rel->rd_rel->relam || GIST_AM_OID == index_rel->rd_rel->relam || GIN_AM_OID == index_rel->rd_rel->relam || diff --git a/src/backend/gporca/data/dxl/minidump/Hash-BitmapScan.mdp b/src/backend/gporca/data/dxl/minidump/Hash-BitmapScan.mdp new file mode 100644 index 0000000000..8237fec240 --- /dev/null +++ b/src/backend/gporca/data/dxl/minidump/Hash-BitmapScan.mdp @@ -0,0 +1,290 @@ +<?xml version="1.0" encoding="UTF-8"?> +<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/"> + <dxl:Comment><![CDATA[ + Objective: Generate plan using hash indexes in ORCA by using bitmap operators. + + create table foo(a int, b int); + create index idx1 on foo using hash(b); + set optimizer_enable_indexscan to off; + explain select * from foo where b=2; + + QUERY PLAN + ------------------------------------------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..391.30 rows=1 width=8) + -> Bitmap Heap Scan on foo (cost=0.00..391.30 rows=1 width=8) + Recheck Cond: (b = 2) + -> Bitmap Index Scan on idx1 (cost=0.00..0.00 rows=0 width=0) + Index Cond: (b = 2) + Optimizer: Pivotal Optimizer (GPORCA) + (6 rows) + ]]> + </dxl:Comment> + <dxl:Thread Id="0"> + <dxl:OptimizerConfig> + <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/> + <dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" MaxStatsBuckets="100"/> + <dxl:CTEConfig CTEInliningCutoff="0"/> + <dxl:WindowOids RowNumber="3100" Rank="3101"/> + <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3"> + <dxl:CostParams> + <dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/> + </dxl:CostParams> + </dxl:CostModelConfig> + <dxl:Hint JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="20" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" XformBindThreshold="0" SkewFactor="0"/> + <dxl:TraceFlags Value="101013,102001,102002,102003,102005,102043,102074,102120,102144,103001,103014,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,106000"/> + </dxl:OptimizerConfig> + <dxl:Metadata SystemIds="0.GPDB"> + <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2222.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/> + <dxl:EqualityOp Mdid="0.91.1.0"/> + <dxl:InequalityOp Mdid="0.85.1.0"/> + <dxl:LessThanOp Mdid="0.58.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/> + <dxl:GreaterThanOp Mdid="0.59.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/> + <dxl:ComparisonOp Mdid="0.1693.1.0"/> + <dxl:ArrayType Mdid="0.1000.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:RelationExtendedStatistics Mdid="10.52420.1.0" Name="foo"/> + <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/> + <dxl:EqualityOp Mdid="0.96.1.0"/> + <dxl:InequalityOp Mdid="0.518.1.0"/> + <dxl:LessThanOp Mdid="0.97.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.523.1.0"/> + <dxl:GreaterThanOp Mdid="0.521.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/> + <dxl:ComparisonOp Mdid="0.351.1.0"/> + <dxl:ArrayType Mdid="0.1007.1.0"/> + <dxl:MinAgg Mdid="0.2132.1.0"/> + <dxl:MaxAgg Mdid="0.2116.1.0"/> + <dxl:AvgAgg Mdid="0.2101.1.0"/> + <dxl:SumAgg Mdid="0.2108.1.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:ColumnStatistics Mdid="1.52420.1.0.0" Name="a" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:ColumnStatistics Mdid="1.52420.1.0.1" Name="b" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1990.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/> + <dxl:EqualityOp Mdid="0.607.1.0"/> + <dxl:InequalityOp Mdid="0.608.1.0"/> + <dxl:LessThanOp Mdid="0.609.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.611.1.0"/> + <dxl:GreaterThanOp Mdid="0.610.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/> + <dxl:ComparisonOp Mdid="0.356.1.0"/> + <dxl:ArrayType Mdid="0.1028.1.0"/> + <dxl:MinAgg Mdid="0.2134.1.0"/> + <dxl:MaxAgg Mdid="0.2118.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false"> + <dxl:DistrOpfamily Mdid="0.2227.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/> + <dxl:EqualityOp Mdid="0.387.1.0"/> + <dxl:InequalityOp Mdid="0.402.1.0"/> + <dxl:LessThanOp Mdid="0.2799.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/> + <dxl:GreaterThanOp Mdid="0.2800.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/> + <dxl:ComparisonOp Mdid="0.2794.1.0"/> + <dxl:ArrayType Mdid="0.1010.1.0"/> + <dxl:MinAgg Mdid="0.2798.1.0"/> + <dxl:MaxAgg Mdid="0.2797.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2226.1.0"/> + <dxl:EqualityOp Mdid="0.385.1.0"/> + <dxl:InequalityOp Mdid="0.0.0.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1012.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2225.1.0"/> + <dxl:EqualityOp Mdid="0.352.1.0"/> + <dxl:InequalityOp Mdid="0.3315.1.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1011.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:GPDBScalarOp Mdid="0.96.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false"> + <dxl:LeftType Mdid="0.23.1.0"/> + <dxl:RightType Mdid="0.23.1.0"/> + <dxl:ResultType Mdid="0.16.1.0"/> + <dxl:OpFunc Mdid="0.65.1.0"/> + <dxl:Commutator Mdid="0.96.1.0"/> + <dxl:InverseOp Mdid="0.518.1.0"/> + <dxl:HashOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/> + <dxl:Opfamilies> + <dxl:Opfamily Mdid="0.1976.1.0"/> + <dxl:Opfamily Mdid="0.1977.1.0"/> + <dxl:Opfamily Mdid="0.4054.1.0"/> + <dxl:Opfamily Mdid="0.7100.1.0"/> + <dxl:Opfamily Mdid="0.10009.1.0"/> + </dxl:Opfamilies> + </dxl:GPDBScalarOp> + <dxl:RelationStatistics Mdid="2.52420.1.0" Name="foo" Rows="0.000000" RelPages="0" RelAllVisible="0" EmptyRelation="true"/> + <dxl:Relation Mdid="6.52420.1.0" Name="foo" IsTemporary="false" StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="8,2"> + <dxl:Columns> + <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="b" Attno="2" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + </dxl:Columns> + <dxl:IndexInfoList> + <dxl:IndexInfo Mdid="7.52423.1.0" IsPartial="false"/> + </dxl:IndexInfoList> + <dxl:CheckConstraints/> + <dxl:DistrOpfamilies> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + </dxl:DistrOpfamilies> + </dxl:Relation> + <dxl:Index Mdid="7.52423.1.0" Name="idx1" IsClustered="false" IndexType="Hash" IndexItemType="0.2283.1.0" KeyColumns="1" IncludedColumns="0,1,2,3,4,5,6,7,8"> + <dxl:Opfamilies> + <dxl:Opfamily Mdid="0.1977.1.0"/> + </dxl:Opfamilies> + </dxl:Index> + </dxl:Metadata> + <dxl:Query> + <dxl:OutputColumns> + <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="2" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:OutputColumns> + <dxl:CTEList/> + <dxl:LogicalSelect> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="2" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/> + </dxl:Comparison> + <dxl:LogicalGet> + <dxl:TableDescriptor Mdid="6.52420.1.0" TableName="foo" LockMode="1"> + <dxl:Columns> + <dxl:Column ColId="1" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="3" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="4" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="9" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:LogicalGet> + </dxl:LogicalSelect> + </dxl:Query> + <dxl:Plan Id="0" SpaceSize="2"> + <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="391.295563" Rows="1.000000" Width="8"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:SortingColumnList/> + <dxl:BitmapTableScan> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="391.295528" Rows="1.000000" Width="8"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:RecheckCond> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/> + </dxl:Comparison> + </dxl:RecheckCond> + <dxl:BitmapIndexProbe> + <dxl:IndexCondList> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/> + </dxl:Comparison> + </dxl:IndexCondList> + <dxl:IndexDescriptor Mdid="7.52423.1.0" IndexName="idx1"/> + </dxl:BitmapIndexProbe> + <dxl:TableDescriptor Mdid="6.52420.1.0" TableName="foo" LockMode="1"> + <dxl:Columns> + <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="1" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="3" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="4" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:BitmapTableScan> + </dxl:GatherMotion> + </dxl:Plan> + </dxl:Thread> +</dxl:DXLMessage> diff --git a/src/backend/gporca/data/dxl/minidump/Hash-IndexScan.mdp b/src/backend/gporca/data/dxl/minidump/Hash-IndexScan.mdp new file mode 100644 index 0000000000..0ca91dec8f --- /dev/null +++ b/src/backend/gporca/data/dxl/minidump/Hash-IndexScan.mdp @@ -0,0 +1,280 @@ +<?xml version="1.0" encoding="UTF-8"?> +<dxl:DXLMessage xmlns:dxl="http://greenplum.com/dxl/2010/12/"> + <dxl:Comment><![CDATA[ + Objective: Generate plan using hash indexes in ORCA by using indexscan operator. + + create table foo(a int, b int); + create index idx1 on foo using hash(b); + explain select * from foo where b=2; + + QUERY PLAN + ---------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..6.00 rows=1 width=8) + -> Index Scan using idx1 on foo (cost=0.00..6.00 rows=1 width=8) + Index Cond: (b = 2) + Optimizer: Pivotal Optimizer (GPORCA) + (4 rows) + ]]> + </dxl:Comment> + <dxl:Thread Id="0"> + <dxl:OptimizerConfig> + <dxl:EnumeratorConfig Id="0" PlanSamples="0" CostThreshold="0"/> + <dxl:StatisticsConfig DampingFactorFilter="0.750000" DampingFactorJoin="0.000000" DampingFactorGroupBy="0.750000" MaxStatsBuckets="100"/> + <dxl:CTEConfig CTEInliningCutoff="0"/> + <dxl:WindowOids RowNumber="3100" Rank="3101"/> + <dxl:CostModelConfig CostModelType="1" SegmentsForCosting="3"> + <dxl:CostParams> + <dxl:CostParam Name="NLJFactor" Value="1024.000000" LowerBound="1023.500000" UpperBound="1024.500000"/> + </dxl:CostParams> + </dxl:CostModelConfig> + <dxl:Hint JoinArityForAssociativityCommutativity="18" ArrayExpansionThreshold="20" JoinOrderDynamicProgThreshold="10" BroadcastThreshold="100000" EnforceConstraintsOnDML="false" PushGroupByBelowSetopThreshold="10" XformBindThreshold="0" SkewFactor="0"/> + <dxl:TraceFlags Value="101013,102001,102002,102003,102043,102074,102120,102144,103001,103014,103022,103026,103027,103029,103033,103038,103040,104002,104003,104004,104005,106000"/> + </dxl:OptimizerConfig> + <dxl:Metadata SystemIds="0.GPDB"> + <dxl:Type Mdid="0.16.1.0" Name="bool" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="1" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2222.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7124.1.0"/> + <dxl:EqualityOp Mdid="0.91.1.0"/> + <dxl:InequalityOp Mdid="0.85.1.0"/> + <dxl:LessThanOp Mdid="0.58.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.1694.1.0"/> + <dxl:GreaterThanOp Mdid="0.59.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.1695.1.0"/> + <dxl:ComparisonOp Mdid="0.1693.1.0"/> + <dxl:ArrayType Mdid="0.1000.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:RelationExtendedStatistics Mdid="10.52420.1.0" Name="foo"/> + <dxl:Type Mdid="0.23.1.0" Name="int4" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7100.1.0"/> + <dxl:EqualityOp Mdid="0.96.1.0"/> + <dxl:InequalityOp Mdid="0.518.1.0"/> + <dxl:LessThanOp Mdid="0.97.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.523.1.0"/> + <dxl:GreaterThanOp Mdid="0.521.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.525.1.0"/> + <dxl:ComparisonOp Mdid="0.351.1.0"/> + <dxl:ArrayType Mdid="0.1007.1.0"/> + <dxl:MinAgg Mdid="0.2132.1.0"/> + <dxl:MaxAgg Mdid="0.2116.1.0"/> + <dxl:AvgAgg Mdid="0.2101.1.0"/> + <dxl:SumAgg Mdid="0.2108.1.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:ColumnStatistics Mdid="1.52420.1.0.0" Name="a" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:ColumnStatistics Mdid="1.52420.1.0.1" Name="b" Width="4.000000" NullFreq="0.000000" NdvRemain="0.000000" FreqRemain="0.000000" ColStatsMissing="true"/> + <dxl:Type Mdid="0.26.1.0" Name="oid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.1990.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7109.1.0"/> + <dxl:EqualityOp Mdid="0.607.1.0"/> + <dxl:InequalityOp Mdid="0.608.1.0"/> + <dxl:LessThanOp Mdid="0.609.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.611.1.0"/> + <dxl:GreaterThanOp Mdid="0.610.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.612.1.0"/> + <dxl:ComparisonOp Mdid="0.356.1.0"/> + <dxl:ArrayType Mdid="0.1028.1.0"/> + <dxl:MinAgg Mdid="0.2134.1.0"/> + <dxl:MaxAgg Mdid="0.2118.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.27.1.0" Name="tid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="true" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="6" PassByValue="false"> + <dxl:DistrOpfamily Mdid="0.2227.1.0"/> + <dxl:LegacyDistrOpfamily Mdid="0.7110.1.0"/> + <dxl:EqualityOp Mdid="0.387.1.0"/> + <dxl:InequalityOp Mdid="0.402.1.0"/> + <dxl:LessThanOp Mdid="0.2799.1.0"/> + <dxl:LessThanEqualsOp Mdid="0.2801.1.0"/> + <dxl:GreaterThanOp Mdid="0.2800.1.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.2802.1.0"/> + <dxl:ComparisonOp Mdid="0.2794.1.0"/> + <dxl:ArrayType Mdid="0.1010.1.0"/> + <dxl:MinAgg Mdid="0.2798.1.0"/> + <dxl:MaxAgg Mdid="0.2797.1.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.29.1.0" Name="cid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2226.1.0"/> + <dxl:EqualityOp Mdid="0.385.1.0"/> + <dxl:InequalityOp Mdid="0.0.0.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1012.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:Type Mdid="0.28.1.0" Name="xid" IsRedistributable="true" IsHashable="true" IsMergeJoinable="false" IsComposite="false" IsTextRelated="false" IsFixedLength="true" Length="4" PassByValue="true"> + <dxl:DistrOpfamily Mdid="0.2225.1.0"/> + <dxl:EqualityOp Mdid="0.352.1.0"/> + <dxl:InequalityOp Mdid="0.3315.1.0"/> + <dxl:LessThanOp Mdid="0.0.0.0"/> + <dxl:LessThanEqualsOp Mdid="0.0.0.0"/> + <dxl:GreaterThanOp Mdid="0.0.0.0"/> + <dxl:GreaterThanEqualsOp Mdid="0.0.0.0"/> + <dxl:ComparisonOp Mdid="0.0.0.0"/> + <dxl:ArrayType Mdid="0.1011.1.0"/> + <dxl:MinAgg Mdid="0.0.0.0"/> + <dxl:MaxAgg Mdid="0.0.0.0"/> + <dxl:AvgAgg Mdid="0.0.0.0"/> + <dxl:SumAgg Mdid="0.0.0.0"/> + <dxl:CountAgg Mdid="0.2147.1.0"/> + </dxl:Type> + <dxl:GPDBScalarOp Mdid="0.96.1.0" Name="=" ComparisonType="Eq" ReturnsNullOnNullInput="true" IsNDVPreserving="false"> + <dxl:LeftType Mdid="0.23.1.0"/> + <dxl:RightType Mdid="0.23.1.0"/> + <dxl:ResultType Mdid="0.16.1.0"/> + <dxl:OpFunc Mdid="0.65.1.0"/> + <dxl:Commutator Mdid="0.96.1.0"/> + <dxl:InverseOp Mdid="0.518.1.0"/> + <dxl:HashOpfamily Mdid="0.1977.1.0"/> + <dxl:LegacyHashOpfamily Mdid="0.7100.1.0"/> + <dxl:Opfamilies> + <dxl:Opfamily Mdid="0.1976.1.0"/> + <dxl:Opfamily Mdid="0.1977.1.0"/> + <dxl:Opfamily Mdid="0.4054.1.0"/> + <dxl:Opfamily Mdid="0.7100.1.0"/> + <dxl:Opfamily Mdid="0.10009.1.0"/> + </dxl:Opfamilies> + </dxl:GPDBScalarOp> + <dxl:RelationStatistics Mdid="2.52420.1.0" Name="foo" Rows="0.000000" RelPages="0" RelAllVisible="0" EmptyRelation="true"/> + <dxl:Relation Mdid="6.52420.1.0" Name="foo" IsTemporary="false" StorageType="Heap" DistributionPolicy="Hash" DistributionColumns="0" Keys="8,2"> + <dxl:Columns> + <dxl:Column Name="a" Attno="1" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="b" Attno="2" Mdid="0.23.1.0" Nullable="true" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="ctid" Attno="-1" Mdid="0.27.1.0" Nullable="false" ColWidth="6"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmin" Attno="-2" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmin" Attno="-3" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="xmax" Attno="-4" Mdid="0.28.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="cmax" Attno="-5" Mdid="0.29.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="tableoid" Attno="-6" Mdid="0.26.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + <dxl:Column Name="gp_segment_id" Attno="-7" Mdid="0.23.1.0" Nullable="false" ColWidth="4"> + <dxl:DefaultValue/> + </dxl:Column> + </dxl:Columns> + <dxl:IndexInfoList> + <dxl:IndexInfo Mdid="7.52423.1.0" IsPartial="false"/> + </dxl:IndexInfoList> + <dxl:CheckConstraints/> + <dxl:DistrOpfamilies> + <dxl:DistrOpfamily Mdid="0.1977.1.0"/> + </dxl:DistrOpfamilies> + </dxl:Relation> + <dxl:Index Mdid="7.52423.1.0" Name="idx1" IsClustered="false" IndexType="Hash" IndexItemType="0.2283.1.0" KeyColumns="1" IncludedColumns="0,1,2,3,4,5,6,7,8"> + <dxl:Opfamilies> + <dxl:Opfamily Mdid="0.1977.1.0"/> + </dxl:Opfamilies> + </dxl:Index> + </dxl:Metadata> + <dxl:Query> + <dxl:OutputColumns> + <dxl:Ident ColId="1" ColName="a" TypeMdid="0.23.1.0"/> + <dxl:Ident ColId="2" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:OutputColumns> + <dxl:CTEList/> + <dxl:LogicalSelect> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="2" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/> + </dxl:Comparison> + <dxl:LogicalGet> + <dxl:TableDescriptor Mdid="6.52420.1.0" TableName="foo" LockMode="1"> + <dxl:Columns> + <dxl:Column ColId="1" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="3" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="4" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="9" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:LogicalGet> + </dxl:LogicalSelect> + </dxl:Query> + <dxl:Plan Id="0" SpaceSize="3"> + <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="6.000136" Rows="1.000000" Width="8"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:SortingColumnList/> + <dxl:IndexScan IndexScanDirection="Forward"> + <dxl:Properties> + <dxl:Cost StartupCost="0" TotalCost="6.000101" Rows="1.000000" Width="8"/> + </dxl:Properties> + <dxl:ProjList> + <dxl:ProjElem ColId="0" Alias="a"> + <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + <dxl:ProjElem ColId="1" Alias="b"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + </dxl:ProjElem> + </dxl:ProjList> + <dxl:Filter/> + <dxl:IndexCondList> + <dxl:Comparison ComparisonOperator="=" OperatorMdid="0.96.1.0"> + <dxl:Ident ColId="1" ColName="b" TypeMdid="0.23.1.0"/> + <dxl:ConstValue TypeMdid="0.23.1.0" Value="2"/> + </dxl:Comparison> + </dxl:IndexCondList> + <dxl:Partitions/> + <dxl:IndexDescriptor Mdid="7.52423.1.0" IndexName="idx1"/> + <dxl:TableDescriptor Mdid="6.52420.1.0" TableName="foo" LockMode="1"> + <dxl:Columns> + <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="1" Attno="2" ColName="b" TypeMdid="0.23.1.0" ColWidth="4"/> + <dxl:Column ColId="2" Attno="-1" ColName="ctid" TypeMdid="0.27.1.0" ColWidth="6"/> + <dxl:Column ColId="3" Attno="-2" ColName="xmin" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="4" Attno="-3" ColName="cmin" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="5" Attno="-4" ColName="xmax" TypeMdid="0.28.1.0" ColWidth="4"/> + <dxl:Column ColId="6" Attno="-5" ColName="cmax" TypeMdid="0.29.1.0" ColWidth="4"/> + <dxl:Column ColId="7" Attno="-6" ColName="tableoid" TypeMdid="0.26.1.0" ColWidth="4"/> + <dxl:Column ColId="8" Attno="-7" ColName="gp_segment_id" TypeMdid="0.23.1.0" ColWidth="4"/> + </dxl:Columns> + </dxl:TableDescriptor> + </dxl:IndexScan> + </dxl:GatherMotion> + </dxl:Plan> + </dxl:Thread> +</dxl:DXLMessage> diff --git a/src/backend/gporca/libgpopt/src/operators/CLogical.cpp b/src/backend/gporca/libgpopt/src/operators/CLogical.cpp index 87236da3b7..a500cf7180 100644 --- a/src/backend/gporca/libgpopt/src/operators/CLogical.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CLogical.cpp @@ -158,10 +158,11 @@ CLogical::PosFromIndex(CMemoryPool *mp, const IMDIndex *pmdindex, COrderSpec *pos = GPOS_NEW(mp) COrderSpec(mp); - // GiST, GIN and BRIN indexes have no order, so return an empty order spec + // GiST, GIN, BRIN and Hash indexes have no order, so return an empty order spec if (pmdindex->IndexType() == IMDIndex::EmdindGist || pmdindex->IndexType() == IMDIndex::EmdindGin || - pmdindex->IndexType() == IMDIndex::EmdindBrin) + pmdindex->IndexType() == IMDIndex::EmdindBrin || + pmdindex->IndexType() == IMDIndex::EmdindHash) { return pos; } diff --git a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp index 5910691f31..555ee19a83 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp @@ -1912,6 +1912,10 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, CMDAccessor *md_accessor, pmdindex->IndexType() == IMDIndex::EmdindBrin); if (cmptype == IMDType::EcmptNEq || cmptype == IMDType::EcmptIDF || + (pmdindex->IndexType() == IMDIndex::EmdindHash && + cmptype != + IMDType:: + EcmptEq) || // Hash indexes with only comparison type equals is ok (cmptype == IMDType::EcmptOther && !gin_or_gist_or_brin) || // only GIN/GiST/BRIN indexes with a comparison type other are ok (gin_or_gist_or_brin && diff --git a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp index 64bdbbfc18..8a08c8afdb 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CXformUtils.cpp @@ -1913,8 +1913,9 @@ CXformUtils::FIndexApplicable(CMemoryPool *mp, const IMDIndex *pmdindex, BOOL possible_ao_table = pmdrel->IsAORowOrColTable() || pmdrel->RetrieveRelStorageType() == IMDRelation::ErelstorageMixedPartitioned; - // GiST can match with either Btree or Bitmap indexes + // GiST and Hash can match with either Btree or Bitmap indexes if (pmdindex->IndexType() == IMDIndex::EmdindGist || + pmdindex->IndexType() == IMDIndex::EmdindHash || // GIN and BRIN can only match with Bitmap Indexes (emdindtype == IMDIndex::EmdindBitmap && (IMDIndex::EmdindGin == pmdindex->IndexType() || @@ -2426,7 +2427,8 @@ CXformUtils::PexprBuildBtreeIndexPlan( IMdIdArray *partition_mdids = nullptr; if (ptabdesc->RetrieveRelStorageType() != IMDRelation::ErelstorageHeap && - pmdindex->IndexType() == IMDIndex::EmdindGist) + (pmdindex->IndexType() == IMDIndex::EmdindGist || + pmdindex->IndexType() == IMDIndex::EmdindHash)) { // Non-heap tables not supported for GiST return nullptr; diff --git a/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h b/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h index a346a2e291..079a0da26d 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/dxl/xml/dxltokens.h @@ -516,6 +516,7 @@ enum Edxltoken EdxltokenIndexPartial, EdxltokenIndexType, EdxltokenIndexTypeBtree, + EdxltokenIndexTypeHash, EdxltokenIndexTypeBitmap, EdxltokenIndexTypeGist, EdxltokenIndexTypeGin, @@ -735,7 +736,6 @@ enum Edxltoken EdxltokenVarTypeModList, EdxltokenIndexTypeBrin, - EdxltokenSentinel }; diff --git a/src/backend/gporca/libnaucrates/include/naucrates/md/IMDIndex.h b/src/backend/gporca/libnaucrates/include/naucrates/md/IMDIndex.h index 749f7f4c09..eabade4e02 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/md/IMDIndex.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/md/IMDIndex.h @@ -41,6 +41,7 @@ public: enum EmdindexType { EmdindBtree, // btree + EmdindHash, // hash EmdindBitmap, // bitmap EmdindGist, // gist using btree or bitmap EmdindGin, // gin using btree or bitmap diff --git a/src/backend/gporca/libnaucrates/src/md/CMDIndexGPDB.cpp b/src/backend/gporca/libnaucrates/src/md/CMDIndexGPDB.cpp index d46b1e61b8..aa7630585a 100644 --- a/src/backend/gporca/libnaucrates/src/md/CMDIndexGPDB.cpp +++ b/src/backend/gporca/libnaucrates/src/md/CMDIndexGPDB.cpp @@ -60,7 +60,8 @@ CMDIndexGPDB::CMDIndexGPDB(CMemoryPool *mp, IMDId *mdid, CMDName *mdname, IMDIndex::EmdindBtree == index_type || IMDIndex::EmdindGist == index_type || IMDIndex::EmdindGin == index_type || - IMDIndex::EmdindBrin == index_type); + IMDIndex::EmdindBrin == index_type || + IMDIndex::EmdindHash == index_type); GPOS_ASSERT_IMP(IMDIndex::EmdindBitmap == index_type, nullptr != mdid_item_type && mdid_item_type->IsValid()); GPOS_ASSERT(nullptr != mdid_opfamilies_array); diff --git a/src/backend/gporca/libnaucrates/src/md/IMDIndex.cpp b/src/backend/gporca/libnaucrates/src/md/IMDIndex.cpp index caca03dd09..0966385752 100644 --- a/src/backend/gporca/libnaucrates/src/md/IMDIndex.cpp +++ b/src/backend/gporca/libnaucrates/src/md/IMDIndex.cpp @@ -41,6 +41,8 @@ IMDIndex::GetDXLStr(EmdindexType index_type) return CDXLTokens::GetDXLTokenStr(EdxltokenIndexTypeGin); case EmdindBrin: return CDXLTokens::GetDXLTokenStr(EdxltokenIndexTypeBrin); + case EmdindHash: + return CDXLTokens::GetDXLTokenStr(EdxltokenIndexTypeHash); default: GPOS_ASSERT(!"Unrecognized index type"); return nullptr; diff --git a/src/backend/gporca/libnaucrates/src/operators/CDXLOperatorFactory.cpp b/src/backend/gporca/libnaucrates/src/operators/CDXLOperatorFactory.cpp index f800102792..53d34950c4 100644 --- a/src/backend/gporca/libnaucrates/src/operators/CDXLOperatorFactory.cpp +++ b/src/backend/gporca/libnaucrates/src/operators/CDXLOperatorFactory.cpp @@ -3697,6 +3697,11 @@ CDXLOperatorFactory::ParseIndexType(const Attributes &attrs) { return IMDIndex::EmdindBrin; } + else if (0 == XMLString::compareString( + xml_val, CDXLTokens::XmlstrToken(EdxltokenIndexTypeHash))) + { + return IMDIndex::EmdindHash; + } GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiDXLInvalidAttributeValue, CDXLTokens::GetDXLTokenStr(EdxltokenIndexType)->GetBuffer(), diff --git a/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp b/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp index 94d7c064d2..eee2f407de 100644 --- a/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp +++ b/src/backend/gporca/libnaucrates/src/xml/dxltokens.cpp @@ -582,6 +582,7 @@ CDXLTokens::Init(CMemoryPool *mp) {EdxltokenIndexTypeGist, GPOS_WSZ_LIT("Gist")}, {EdxltokenIndexTypeGin, GPOS_WSZ_LIT("Gin")}, {EdxltokenIndexTypeBrin, GPOS_WSZ_LIT("Brin")}, + {EdxltokenIndexTypeHash, GPOS_WSZ_LIT("Hash")}, {EdxltokenIndexItemType, GPOS_WSZ_LIT("IndexItemType")}, {EdxltokenOpfamily, GPOS_WSZ_LIT("Opfamily")}, diff --git a/src/backend/gporca/server/CMakeLists.txt b/src/backend/gporca/server/CMakeLists.txt index 0c9fad2f89..68db25cc50 100644 --- a/src/backend/gporca/server/CMakeLists.txt +++ b/src/backend/gporca/server/CMakeLists.txt @@ -105,13 +105,13 @@ CIndexScanTest: BTreeIndex-Against-InList BTreeIndex-Against-InListLarge BTreeIndex-Against-ScalarSubquery IndexScan-AOTable IndexScan-DroppedColumns IndexScan-BoolTrue IndexScan-BoolFalse IndexScan-Relabel IndexGet-OuterRefs LogicalIndexGetDroppedCols -IndexScan-ORPredsNonPart IndexScan-ORPredsAOPart IndexScan-AndedIn SubqInIndexPred; +IndexScan-ORPredsNonPart IndexScan-ORPredsAOPart IndexScan-AndedIn SubqInIndexPred Hash-IndexScan; CBitmapScanTest: IndexedNLJBitmap BitmapIndex-ChooseHashJoin BitmapTableScan-AO-Btree-PickOnlyHighNDV BitmapIndex-Against-InList BitmapTableScan-AO-Btree-PickIndexWithNoGap BitmapTableScan-ComplexConjDisj BitmapTableScan-ConjDisjWithOuterRefs -BRINScan-Or BitmapScan-Hetrogeneous-Partitioned; +BRINScan-Or BitmapScan-Hetrogeneous-Partitioned Hash-BitmapScan; CIndexApplyTest: IndexNLJ-IndexGet-OuterRef IndexApply_NestLoopWithNestParamTrue IndexApply1 diff --git a/src/test/regress/expected/bfv_index.out b/src/test/regress/expected/bfv_index.out index 0cc1ac86f0..75ca7f3b53 100644 --- a/src/test/regress/expected/bfv_index.out +++ b/src/test/regress/expected/bfv_index.out @@ -737,3 +737,314 @@ RESET enable_bitmapscan; RESET optimizer_enable_tablescan; RESET optimizer_enable_indexscan; RESET optimizer_enable_indexonlyscan; +-- +-- Test Hash indexes +-- +CREATE TABLE hash_tbl (a int, b int) DISTRIBUTED BY(a); +INSERT INTO hash_tbl select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl; +CREATE INDEX hash_idx1 ON hash_tbl USING hash(b); +-- Now check the results by turning on indexscan +SET enable_seqscan = ON; +SET enable_indexscan = ON; +SET enable_bitmapscan = OFF; +SET optimizer_enable_tablescan =ON; +SET optimizer_enable_indexscan = ON; +SET optimizer_enable_bitmapscan = OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; + QUERY PLAN +---------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Index Scan using hash_idx1 on hash_tbl + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; + QUERY PLAN +---------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Index Scan using hash_idx1 on hash_tbl + Index Cond: (b = 3) + Filter: (a = 3) + Optimizer: Postgres query optimizer +(5 rows) + +SELECT * FROM hash_tbl WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_tbl + Filter: ((b = 3) OR (b = 5)) + Optimizer: Postgres query optimizer +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3 or b=5; + a | b +---+--- + 5 | 5 + 3 | 3 +(2 rows) + +-- Now check the results by turning on bitmapscan +SET enable_seqscan = OFF; +SET enable_indexscan = OFF; +SET enable_bitmapscan = ON; +SET optimizer_enable_tablescan =OFF; +SET optimizer_enable_indexscan = OFF; +SET optimizer_enable_bitmapscan = ON; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(6 rows) + +SELECT * FROM hash_tbl WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(7 rows) + +SELECT * FROM hash_tbl WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 5) + Optimizer: Postgres query optimizer +(9 rows) + +SELECT * FROM hash_tbl WHERE b=3 or b=5; + a | b +---+--- + 3 | 3 + 5 | 5 +(2 rows) + +DROP INDEX hash_idx1; +DROP TABLE hash_tbl; +RESET enable_seqscan; +RESET enable_indexscan; +RESET enable_bitmapscan; +RESET optimizer_enable_tablescan; +RESET optimizer_enable_indexscan; +RESET optimizer_enable_bitmapscan; +-- Test Hash indexes with AO tables +CREATE TABLE hash_tbl_ao (a int, b int) WITH (appendonly = true) DISTRIBUTED BY(a); +INSERT INTO hash_tbl_ao select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl_ao; +CREATE INDEX hash_idx2 ON hash_tbl_ao USING hash(b); +-- get results for comparison purposes +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(6 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(7 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 5) + Optimizer: Postgres query optimizer +(9 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + a | b +---+--- + 3 | 3 + 5 | 5 +(2 rows) + +-- Now check the results by turning off seqscan/tablescan +SET enable_seqscan = OFF; +SET optimizer_enable_tablescan =OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 5) + Optimizer: Postgres query optimizer +(9 rows) + +DROP INDEX hash_idx2; +DROP TABLE hash_tbl_ao; +RESET enable_seqscan; +RESET optimizer_enable_tablescan; +-- Test hash indexes with partition table +CREATE TABLE hash_prt_tbl (a int, b int) DISTRIBUTED BY(a) PARTITION BY RANGE(a) +(PARTITION p1 START (1) END (500) INCLUSIVE, +PARTITION p2 START(501) END (1000) INCLUSIVE); +INSERT INTO hash_prt_tbl select i,i FROM generate_series(1, 1000)i; +ANALYZE hash_prt_tbl; +CREATE INDEX hash_idx3 ON hash_prt_tbl USING hash(b); +-- Now check the results by turning off dynamictablescan/seqscan +SET enable_seqscan = OFF; +SET optimizer_enable_dynamictablescan =OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3; + QUERY PLAN +----------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Index Scan using hash_prt_tbl_1_prt_p1_b_idx on hash_prt_tbl_1_prt_p1 + Index Cond: (b = 3) + -> Index Scan using hash_prt_tbl_1_prt_p2_b_idx on hash_prt_tbl_1_prt_p2 + Index Cond: (b = 3) + Optimizer: Postgres query optimizer +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 and a=3; + QUERY PLAN +----------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Index Scan using hash_prt_tbl_1_prt_p1_b_idx on hash_prt_tbl_1_prt_p1 + Index Cond: (b = 3) + Filter: (a = 3) + Optimizer: Postgres query optimizer +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Append + -> Bitmap Heap Scan on hash_prt_tbl_1_prt_p1 + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_prt_tbl_1_prt_p1_b_idx + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_prt_tbl_1_prt_p1_b_idx + Index Cond: (b = 5) + -> Bitmap Heap Scan on hash_prt_tbl_1_prt_p2 + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_prt_tbl_1_prt_p2_b_idx + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_prt_tbl_1_prt_p2_b_idx + Index Cond: (b = 5) + Optimizer: Postgres query optimizer +(17 rows) + +DROP INDEX hash_idx3; +DROP TABLE hash_prt_tbl; +RESET enable_seqscan; +RESET optimizer_enable_dynamictablescan; diff --git a/src/test/regress/expected/bfv_index_optimizer.out b/src/test/regress/expected/bfv_index_optimizer.out index e998c36e94..715da4af11 100644 --- a/src/test/regress/expected/bfv_index_optimizer.out +++ b/src/test/regress/expected/bfv_index_optimizer.out @@ -705,3 +705,302 @@ RESET enable_bitmapscan; RESET optimizer_enable_tablescan; RESET optimizer_enable_indexscan; RESET optimizer_enable_indexonlyscan; +-- +-- Test Hash indexes +-- +CREATE TABLE hash_tbl (a int, b int) DISTRIBUTED BY(a); +INSERT INTO hash_tbl select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl; +CREATE INDEX hash_idx1 ON hash_tbl USING hash(b); +-- Now check the results by turning on indexscan +SET enable_seqscan = ON; +SET enable_indexscan = ON; +SET enable_bitmapscan = OFF; +SET optimizer_enable_tablescan =ON; +SET optimizer_enable_indexscan = ON; +SET optimizer_enable_bitmapscan = OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; + QUERY PLAN +---------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Index Scan using hash_idx1 on hash_tbl + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; + QUERY PLAN +---------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Index Scan using hash_idx1 on hash_tbl + Index Cond: (b = 3) + Filter: (a = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +SELECT * FROM hash_tbl WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_tbl + Filter: ((b = 3) OR (b = 5)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +SELECT * FROM hash_tbl WHERE b=3 or b=5; + a | b +---+--- + 5 | 5 + 3 | 3 +(2 rows) + +-- Now check the results by turning on bitmapscan +SET enable_seqscan = OFF; +SET enable_indexscan = OFF; +SET enable_bitmapscan = ON; +SET optimizer_enable_tablescan =OFF; +SET optimizer_enable_indexscan = OFF; +SET optimizer_enable_bitmapscan = ON; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) + +SELECT * FROM hash_tbl WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +SELECT * FROM hash_tbl WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx1 + Index Cond: (b = 5) + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +SELECT * FROM hash_tbl WHERE b=3 or b=5; + a | b +---+--- + 5 | 5 + 3 | 3 +(2 rows) + +DROP INDEX hash_idx1; +DROP TABLE hash_tbl; +RESET enable_seqscan; +RESET enable_indexscan; +RESET enable_bitmapscan; +RESET optimizer_enable_tablescan; +RESET optimizer_enable_indexscan; +RESET optimizer_enable_bitmapscan; +-- Test Hash indexes with AO tables +CREATE TABLE hash_tbl_ao (a int, b int) WITH (appendonly = true) DISTRIBUTED BY(a); +INSERT INTO hash_tbl_ao select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl_ao; +CREATE INDEX hash_idx2 ON hash_tbl_ao USING hash(b); +-- get results for comparison purposes +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + a | b +---+--- + 3 | 3 +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> Seq Scan on hash_tbl_ao + Filter: ((b = 3) OR (b = 5)) + Optimizer: Pivotal Optimizer (GPORCA) +(4 rows) + +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + a | b +---+--- + 5 | 5 + 3 | 3 +(2 rows) + +-- Now check the results by turning off seqscan/tablescan +SET enable_seqscan = OFF; +SET optimizer_enable_tablescan =OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; + QUERY PLAN +-------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: (b = 3) + Filter: (a = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + Optimizer: Pivotal Optimizer (GPORCA) +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + QUERY PLAN +-------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Bitmap Heap Scan on hash_tbl_ao + Recheck Cond: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 3) + -> Bitmap Index Scan on hash_idx2 + Index Cond: (b = 5) + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +DROP INDEX hash_idx2; +DROP TABLE hash_tbl_ao; +RESET enable_seqscan; +RESET optimizer_enable_tablescan; +-- Test hash indexes with partition table +CREATE TABLE hash_prt_tbl (a int, b int) DISTRIBUTED BY(a) PARTITION BY RANGE(a) +(PARTITION p1 START (1) END (500) INCLUSIVE, +PARTITION p2 START(501) END (1000) INCLUSIVE); +INSERT INTO hash_prt_tbl select i,i FROM generate_series(1, 1000)i; +ANALYZE hash_prt_tbl; +CREATE INDEX hash_idx3 ON hash_prt_tbl USING hash(b); +-- Now check the results by turning off dynamictablescan/seqscan +SET enable_seqscan = OFF; +SET optimizer_enable_dynamictablescan =OFF; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3; + QUERY PLAN +------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Index Scan on hash_idx3 on hash_prt_tbl + Index Cond: (b = 3) + Number of partitions to scan: 2 (out of 2) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 and a=3; + QUERY PLAN +------------------------------------------------------- + Gather Motion 1:1 (slice1; segments: 1) + -> Dynamic Index Scan on hash_idx3 on hash_prt_tbl + Index Cond: (b = 3) + Filter: ((b = 3) AND (a = 3)) + Number of partitions to scan: 1 (out of 2) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 or b=5; + QUERY PLAN +---------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> Dynamic Bitmap Heap Scan on hash_prt_tbl + Number of partitions to scan: 2 (out of 2) + Recheck Cond: ((b = 3) OR (b = 5)) + Filter: ((b = 3) OR (b = 5)) + -> BitmapOr + -> Dynamic Bitmap Index Scan on hash_idx3 + Index Cond: (b = 3) + -> Dynamic Bitmap Index Scan on hash_idx3 + Index Cond: (b = 5) + Optimizer: Pivotal Optimizer (GPORCA) +(11 rows) + +DROP INDEX hash_idx3; +DROP TABLE hash_prt_tbl; +RESET enable_seqscan; +RESET optimizer_enable_dynamictablescan; diff --git a/src/test/regress/expected/create_index_optimizer.out b/src/test/regress/expected/create_index_optimizer.out index 9b1d90f645..3ada9b15b6 100644 --- a/src/test/regress/expected/create_index_optimizer.out +++ b/src/test/regress/expected/create_index_optimizer.out @@ -1384,12 +1384,10 @@ SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA'; Finalize Aggregate -> Gather Motion 3:1 (slice1; segments: 3) -> Partial Aggregate - -> Bitmap Heap Scan on tenk1 - Recheck Cond: (stringu1 = 'TVAAAA'::name) - -> Bitmap Index Scan on hash_tuplesort_idx - Index Cond: (stringu1 = 'TVAAAA'::name) - Optimizer: Postgres query optimizer -(8 rows) + -> Index Scan using hash_tuplesort_idx on tenk1 + Index Cond: (stringu1 = 'TVAAAA'::name) + Optimizer: Pivotal Optimizer (GPORCA) +(6 rows) SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA'; count diff --git a/src/test/regress/expected/hash_index.out b/src/test/regress/expected/hash_index.out index e47c6aaf9e..1bd7df36c1 100644 --- a/src/test/regress/expected/hash_index.out +++ b/src/test/regress/expected/hash_index.out @@ -1,3 +1,4 @@ +set optimizer_trace_fallback=on; -- -- HASH_INDEX -- grep 843938989 hash.data @@ -244,3 +245,4 @@ CREATE INDEX hash_f8_index2 ON hash_f8_heap USING hash (random float8_ops) WITH (fillfactor=101); ERROR: value 101 out of bounds for option "fillfactor" DETAIL: Valid values are between "10" and "100". +reset optimizer_trace_fallback; diff --git a/src/test/regress/sql/bfv_index.sql b/src/test/regress/sql/bfv_index.sql index 5204af2af6..52d2e0f61d 100644 --- a/src/test/regress/sql/bfv_index.sql +++ b/src/test/regress/sql/bfv_index.sql @@ -350,3 +350,120 @@ RESET enable_bitmapscan; RESET optimizer_enable_tablescan; RESET optimizer_enable_indexscan; RESET optimizer_enable_indexonlyscan; + +-- +-- Test Hash indexes +-- + +CREATE TABLE hash_tbl (a int, b int) DISTRIBUTED BY(a); +INSERT INTO hash_tbl select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl; +CREATE INDEX hash_idx1 ON hash_tbl USING hash(b); + +-- Now check the results by turning on indexscan +SET enable_seqscan = ON; +SET enable_indexscan = ON; +SET enable_bitmapscan = OFF; + +SET optimizer_enable_tablescan =ON; +SET optimizer_enable_indexscan = ON; +SET optimizer_enable_bitmapscan = OFF; + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; +SELECT * FROM hash_tbl WHERE b=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; +SELECT * FROM hash_tbl WHERE b=3 and a=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; +SELECT * FROM hash_tbl WHERE b=3 or b=5; + +-- Now check the results by turning on bitmapscan +SET enable_seqscan = OFF; +SET enable_indexscan = OFF; +SET enable_bitmapscan = ON; + +SET optimizer_enable_tablescan =OFF; +SET optimizer_enable_indexscan = OFF; +SET optimizer_enable_bitmapscan = ON; + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3; +SELECT * FROM hash_tbl WHERE b=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 and a=3; +SELECT * FROM hash_tbl WHERE b=3 and a=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl WHERE b=3 or b=5; +SELECT * FROM hash_tbl WHERE b=3 or b=5; + +DROP INDEX hash_idx1; +DROP TABLE hash_tbl; + +RESET enable_seqscan; +RESET enable_indexscan; +RESET enable_bitmapscan; +RESET optimizer_enable_tablescan; +RESET optimizer_enable_indexscan; +RESET optimizer_enable_bitmapscan; + +-- Test Hash indexes with AO tables +CREATE TABLE hash_tbl_ao (a int, b int) WITH (appendonly = true) DISTRIBUTED BY(a); +INSERT INTO hash_tbl_ao select i,i FROM generate_series(1, 100)i; +ANALYZE hash_tbl_ao; +CREATE INDEX hash_idx2 ON hash_tbl_ao USING hash(b); + +-- get results for comparison purposes +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; +SELECT * FROM hash_tbl_ao WHERE b=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + +-- Now check the results by turning off seqscan/tablescan +SET enable_seqscan = OFF; +SET optimizer_enable_tablescan =OFF; + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 and a=3; +EXPLAIN (COSTS OFF) +SELECT * FROM hash_tbl_ao WHERE b=3 or b=5; + +DROP INDEX hash_idx2; +DROP TABLE hash_tbl_ao; +RESET enable_seqscan; +RESET optimizer_enable_tablescan; +-- Test hash indexes with partition table + +CREATE TABLE hash_prt_tbl (a int, b int) DISTRIBUTED BY(a) PARTITION BY RANGE(a) +(PARTITION p1 START (1) END (500) INCLUSIVE, +PARTITION p2 START(501) END (1000) INCLUSIVE); +INSERT INTO hash_prt_tbl select i,i FROM generate_series(1, 1000)i; +ANALYZE hash_prt_tbl; +CREATE INDEX hash_idx3 ON hash_prt_tbl USING hash(b); + +-- Now check the results by turning off dynamictablescan/seqscan +SET enable_seqscan = OFF; +SET optimizer_enable_dynamictablescan =OFF; + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3; + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 and a=3; + +EXPLAIN (COSTS OFF) +SELECT * FROM hash_prt_tbl WHERE b=3 or b=5; + +DROP INDEX hash_idx3; +DROP TABLE hash_prt_tbl; + +RESET enable_seqscan; +RESET optimizer_enable_dynamictablescan; diff --git a/src/test/regress/sql/hash_index.sql b/src/test/regress/sql/hash_index.sql index 4d1aa020a9..005384edf7 100644 --- a/src/test/regress/sql/hash_index.sql +++ b/src/test/regress/sql/hash_index.sql @@ -1,3 +1,4 @@ +set optimizer_trace_fallback=on; -- -- HASH_INDEX -- grep 843938989 hash.data @@ -202,3 +203,5 @@ CREATE INDEX hash_f8_index2 ON hash_f8_heap USING hash (random float8_ops) WITH (fillfactor=9); CREATE INDEX hash_f8_index2 ON hash_f8_heap USING hash (random float8_ops) WITH (fillfactor=101); + +reset optimizer_trace_fallback; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
