This is an automated email from the ASF dual-hosted git repository. reshke pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 45c177bcc3836128e7fb09067ae4042155591b78 Author: Ekta Khanna <[email protected]> AuthorDate: Wed Jul 19 17:29:06 2023 -0700 Enable ORCA to generate IndexScan plans with ScalarArrayOp quals For older versions of GPDB(and PG) the executor supported ScalarArrayCmp with BitmapIndexScans only. As part of the PG 92 merge (commit: 9e8da0f) support was added to IndexScans aswell but wasn't updated in ORCA. This commit adds support in ORCA to consider IndexScan plans for Btree indexes with indexquals on ScalarArrayOp. Similar to planner for multikey indexes, a scalararrayop qual will only be considered as an index qual if the predicate column is the first index key else it will be added as a filter on top. This ensures we do not assume scan results are ordered. (ref commit: 807a40c) This commit also renames the boolean flag and pass it further down in the function call. --- .../gpopt/translate/CTranslatorDXLToPlStmt.cpp | 4 +- .../dxl/minidump/BTreeIndex-Against-InList.mdp | 28 +-- .../include/gpopt/operators/CPredicateUtils.h | 28 +-- .../libgpopt/src/operators/CPredicateUtils.cpp | 77 ++++++--- src/test/regress/expected/bfv_index.out | 179 ++++++++++++++++++- src/test/regress/expected/bfv_index_optimizer.out | 190 ++++++++++++++++++++- .../regress/expected/create_index_optimizer.out | 14 +- src/test/regress/sql/bfv_index.sql | 69 +++++++- 8 files changed, 512 insertions(+), 77 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp index c910293376..a73cdcfd85 100644 --- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp +++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp @@ -1118,8 +1118,10 @@ CTranslatorDXLToPlStmt::TranslateIndexConditions( IsA(index_cond_expr, ScalarArrayOpExpr)) && "expected OpExpr or ScalarArrayOpExpr in index qual"); + // allow Index quals with scalar array only for bitmap and btree indexes if (!is_bitmap_index_probe && IsA(index_cond_expr, ScalarArrayOpExpr) && - IMDIndex::EmdindBitmap != index->IndexType()) + !(IMDIndex::EmdindBitmap == index->IndexType() || + IMDIndex::EmdindBtree == index->IndexType())) { GPOS_RAISE( gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtConversion, diff --git a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp index 0bdb7f4af4..a10e22d3d6 100644 --- a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp +++ b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp @@ -301,10 +301,10 @@ SELECT * FROM test WHERE a in (1, 47); </dxl:LogicalGet> </dxl:LogicalSelect> </dxl:Query> - <dxl:Plan Id="0" SpaceSize="2"> + <dxl:Plan Id="0" SpaceSize="4"> <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="387.964368" Rows="2.000000" Width="4"/> + <dxl:Cost StartupCost="0" TotalCost="6.000228" Rows="2.000000" Width="4"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -313,9 +313,9 @@ SELECT * FROM test WHERE a in (1, 47); </dxl:ProjList> <dxl:Filter/> <dxl:SortingColumnList/> - <dxl:BitmapTableScan> + <dxl:IndexScan IndexScanDirection="Forward"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="387.964333" Rows="2.000000" Width="4"/> + <dxl:Cost StartupCost="0" TotalCost="6.000193" Rows="2.000000" Width="4"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -323,7 +323,7 @@ SELECT * FROM test WHERE a in (1, 47); </dxl:ProjElem> </dxl:ProjList> <dxl:Filter/> - <dxl:RecheckCond> + <dxl:IndexCondList> <dxl:ArrayComp OperatorName="=" OperatorMdid="0.96.1.0" OperatorType="Any"> <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> <dxl:Array ArrayType="0.1007.1.0" ElementType="0.23.1.0" MultiDimensional="false"> @@ -331,19 +331,9 @@ SELECT * FROM test WHERE a in (1, 47); <dxl:ConstValue TypeMdid="0.23.1.0" Value="47"/> </dxl:Array> </dxl:ArrayComp> - </dxl:RecheckCond> - <dxl:BitmapIndexProbe> - <dxl:IndexCondList> - <dxl:ArrayComp OperatorName="=" OperatorMdid="0.96.1.0" OperatorType="Any"> - <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/> - <dxl:Array ArrayType="0.1007.1.0" ElementType="0.23.1.0" MultiDimensional="false"> - <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/> - <dxl:ConstValue TypeMdid="0.23.1.0" Value="47"/> - </dxl:Array> - </dxl:ArrayComp> - </dxl:IndexCondList> - <dxl:IndexDescriptor Mdid="0.41692.1.0" IndexName="test_index"/> - </dxl:BitmapIndexProbe> + </dxl:IndexCondList> + <dxl:Partitions/> + <dxl:IndexDescriptor Mdid="0.41692.1.0" IndexName="test_index"/> <dxl:TableDescriptor Mdid="6.41665.1.1" TableName="test"> <dxl:Columns> <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0"/> @@ -356,7 +346,7 @@ SELECT * FROM test WHERE a in (1, 47); <dxl:Column ColId="7" Attno="-8" ColName="gp_segment_id" TypeMdid="0.23.1.0"/> </dxl:Columns> </dxl:TableDescriptor> - </dxl:BitmapTableScan> + </dxl:IndexScan> </dxl:GatherMotion> </dxl:Plan> </dxl:Thread> diff --git a/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h b/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h index 49dcbaa002..7630210e8f 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h +++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h @@ -70,20 +70,16 @@ private: CColRefSetArray *pdrgpcrsEquivClasses); // helper to create index lookup comparison predicate with index key on left side - static CExpression *PexprIndexLookupKeyOnLeft(CMemoryPool *mp, - CMDAccessor *md_accessor, - CExpression *pexprScalar, - const IMDIndex *pmdindex, - CColRefArray *pdrgpcrIndex, - CColRefSet *outer_refs); + static CExpression *PexprIndexLookupKeyOnLeft( + CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprScalar, + const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex, + CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual); // helper to create index lookup comparison predicate with index key on right side - static CExpression *PexprIndexLookupKeyOnRight(CMemoryPool *mp, - CMDAccessor *md_accessor, - CExpression *pexprScalar, - const IMDIndex *pmdindex, - CColRefArray *pdrgpcrIndex, - CColRefSet *outer_refs); + static CExpression *PexprIndexLookupKeyOnRight( + CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprScalar, + const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex, + CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual); // for all columns that appear in the given expression and are members // of the given set, replace these columns with NULL constants @@ -210,6 +206,10 @@ public: // is the given expression a comparison between a scalar ident and a constant array static BOOL FCompareIdentToConstArray(CExpression *pexpr); + // is the given ScalarArrayCmp a valid index qual + static BOOL IsScalarArrayCmpValidIndexQual(CExpression *pexpr, + CColRefArray *pdrgpcrIndex); + // is the given expression an AND static BOOL FAnd(CExpression *pexpr) @@ -403,7 +403,7 @@ public: CExpressionArray *pdrgpexprResidual, CColRefSet *pcrsAcceptedOuterRefs = nullptr, // outer refs that are acceptable in an index predicate - BOOL considerBitmapAltForArrayCmp = false); + BOOL allowArrayCmpIndexQual = false); // return the inverse of given comparison expression static CExpression *PexprInverseComparison(CMemoryPool *mp, @@ -433,7 +433,7 @@ public: static CExpression *PexprIndexLookup( CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexpPred, const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex, - CColRefSet *outer_refs, BOOL considerBitmapAltForArrayCmp); + CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual); // split given scalar expression into two conjunctions; without and with outer references static void SeparateOuterRefs(CMemoryPool *mp, CExpression *pexprScalar, diff --git a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp index 7c123c0b7e..7e59cf028a 100644 --- a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp +++ b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp @@ -1295,6 +1295,29 @@ CPredicateUtils::FCompareIdentToConstArray(CExpression *pexpr) return CUtils::FScalarConstArray(pexprArray); } +// is the given ScalarArrayCmp a valid index qual +BOOL +CPredicateUtils::IsScalarArrayCmpValidIndexQual(CExpression *pexpr, + CColRefArray *pdrgpcrIndex) +{ + GPOS_ASSERT(nullptr != pexpr); + CColRef *pcrFirstIndexKey = (*pdrgpcrIndex)[0]; + CExpression *pexprScalar = (*pexpr)[0]; + + // returns true if ScalarArrayCmp Ident is the first index key col id. + // A ScalarArrayCmp is a valid index clause only if it's on the first index column to + // ensure that ORCA doesn't assume its ordered and skip on adding a sort when there is + // order by clause. If it's not on the first index key column, the qual will be considered + // as a filter instead of being part of the index qual. + return ( + (CUtils::FScalarIdent(pexprScalar) && + pcrFirstIndexKey->Id() == + CScalarIdent::PopConvert(pexprScalar->Pop())->Pcr()->Id()) || + (CCastUtils::FBinaryCoercibleCastedScId(pexprScalar) && + pcrFirstIndexKey->Id() == + CScalarIdent::PopConvert((*pexprScalar)[0]->Pop())->Pcr()->Id())); +} + CExpression * CPredicateUtils::ValidatePartPruningExpr(CMemoryPool *mp, CExpression *expr, CColRef *pcrPartKey, @@ -1910,12 +1933,10 @@ CPredicateUtils::FColumnDisjunctionOfConst(CConstraintInterval *pcnstrInterval, // helper to create index lookup comparison predicate with index key on left side CExpression * -CPredicateUtils::PexprIndexLookupKeyOnLeft(CMemoryPool *mp, - CMDAccessor *md_accessor, - CExpression *pexprScalar, - const IMDIndex *pmdindex, - CColRefArray *pdrgpcrIndex, - CColRefSet *outer_refs) +CPredicateUtils::PexprIndexLookupKeyOnLeft( + CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprScalar, + const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex, + CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual) { GPOS_ASSERT(nullptr != pexprScalar); @@ -1923,6 +1944,14 @@ CPredicateUtils::PexprIndexLookupKeyOnLeft(CMemoryPool *mp, CExpression *pexprRight = (*pexprScalar)[1]; CColRefSet *pcrsIndex = GPOS_NEW(mp) CColRefSet(mp, pdrgpcrIndex); + if (!allowArrayCmpIndexQual && + IMDIndex::EmdindBtree == pmdindex->IndexType() && + CUtils::FScalarArrayCmp(pexprScalar) && + !IsScalarArrayCmpValidIndexQual(pexprScalar, pdrgpcrIndex)) + { + pcrsIndex->Release(); + return nullptr; + } if ((CUtils::FScalarIdent(pexprLeft) && pcrsIndex->FMember( @@ -1970,12 +1999,10 @@ CPredicateUtils::PexprIndexLookupKeyOnLeft(CMemoryPool *mp, // helper to create index lookup comparison predicate with index key on right side CExpression * -CPredicateUtils::PexprIndexLookupKeyOnRight(CMemoryPool *mp, - CMDAccessor *md_accessor, - CExpression *pexprScalar, - const IMDIndex *pmdindex, - CColRefArray *pdrgpcrIndex, - CColRefSet *outer_refs) +CPredicateUtils::PexprIndexLookupKeyOnRight( + CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprScalar, + const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex, + CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual) { GPOS_ASSERT(nullptr != pexprScalar); @@ -1993,9 +2020,9 @@ CPredicateUtils::PexprIndexLookupKeyOnRight(CMemoryPool *mp, pexprLeft->AddRef(); CExpression *pexprCommuted = GPOS_NEW(mp) CExpression(mp, popScCmpCommute, pexprRight, pexprLeft); - CExpression *pexprIndexCond = - PexprIndexLookupKeyOnLeft(mp, md_accessor, pexprCommuted, - pmdindex, pdrgpcrIndex, outer_refs); + CExpression *pexprIndexCond = PexprIndexLookupKeyOnLeft( + mp, md_accessor, pexprCommuted, pmdindex, pdrgpcrIndex, + outer_refs, allowArrayCmpIndexQual); pexprCommuted->Release(); return pexprIndexCond; @@ -2019,7 +2046,7 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, CMDAccessor *md_accessor, const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex, CColRefSet *outer_refs, - BOOL considerBitmapAltForArrayCmp) + BOOL allowArrayCmpIndexQual) { GPOS_ASSERT(nullptr != pexprScalar); GPOS_ASSERT(nullptr != pdrgpcrIndex); @@ -2034,11 +2061,11 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, CMDAccessor *md_accessor, CScalarArrayCmp::EarrcmpAny == CScalarArrayCmp::PopConvert(pexprScalar->Pop())->Earrcmpt() && (IMDIndex::EmdindBitmap == pmdindex->IndexType() || - (considerBitmapAltForArrayCmp && - (IMDIndex::EmdindBtree == pmdindex->IndexType() || - IMDIndex::EmdindHash == pmdindex->IndexType())))) + IMDIndex::EmdindBtree == pmdindex->IndexType() || + (allowArrayCmpIndexQual && + IMDIndex::EmdindHash == pmdindex->IndexType()))) { - // array cmps are always allowed on bitmap indexes and when requested on btree/hash indexes + // array cmps are always allowed on bitmap/btree indexes and when requested on hash indexes cmptype = CUtils::ParseCmpType( CScalarArrayCmp::PopConvert(pexprScalar->Pop())->MdIdOp()); } @@ -2062,14 +2089,16 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, CMDAccessor *md_accessor, } CExpression *pexprIndexLookupKeyOnLeft = PexprIndexLookupKeyOnLeft( - mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, outer_refs); + mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, outer_refs, + allowArrayCmpIndexQual); if (nullptr != pexprIndexLookupKeyOnLeft) { return pexprIndexLookupKeyOnLeft; } CExpression *pexprIndexLookupKeyOnRight = PexprIndexLookupKeyOnRight( - mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, outer_refs); + mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, outer_refs, + allowArrayCmpIndexQual); if (nullptr != pexprIndexLookupKeyOnRight) { return pexprIndexLookupKeyOnRight; @@ -2087,7 +2116,7 @@ CPredicateUtils::ExtractIndexPredicates( CExpressionArray *pdrgpexprResidual, CColRefSet * pcrsAcceptedOuterRefs, // outer refs that are acceptable in an index predicate - BOOL considerBitmapAltForArrayCmp) + BOOL allowArrayCmpIndexQual) { const ULONG length = pdrgpexprPredicate->Size(); @@ -2144,7 +2173,7 @@ CPredicateUtils::ExtractIndexPredicates( // attempt building index lookup predicate CExpression *pexprLookupPred = PexprIndexLookup( mp, md_accessor, pexprCond, pmdindex, pdrgpcrIndex, - pcrsAcceptedOuterRefs, considerBitmapAltForArrayCmp); + pcrsAcceptedOuterRefs, allowArrayCmpIndexQual); if (nullptr != pexprLookupPred) { pexprCond->Release(); diff --git a/src/test/regress/expected/bfv_index.out b/src/test/regress/expected/bfv_index.out index adf0164108..805b623c04 100644 --- a/src/test/regress/expected/bfv_index.out +++ b/src/test/regress/expected/bfv_index.out @@ -1049,7 +1049,7 @@ DROP TABLE hash_prt_tbl; RESET enable_seqscan; RESET optimizer_enable_dynamictablescan; -- --- Test ORCA generates BitmapIndexScan alternative for ScalarArrayOpExpr ANY only +-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY only -- CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, hash_idx_col int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table. @@ -1158,6 +1158,183 @@ SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]); ----+----------------+---------------+-------------- (0 rows) +-- +-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table +-- with multikey indexes only when predicate key is the first index key +-- (similar test for non-partitioned tables in create_index) +-- +CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1)) +PARTITION BY list(key2) +(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION other); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2); +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from generate_series(1,500)i; +explain (costs off) +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pt_with_multikey_index_1_prt_p2.key1 + -> Merge Append + Sort Key: pt_with_multikey_index_1_prt_p2.key1 + -> Index Only Scan using pt_with_multikey_index_1_prt_p2_key1_key2_idx on pt_with_multikey_index_1_prt_p2 + Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[])) + -> Index Only Scan using pt_with_multikey_index_1_prt_p1_key1_key2_idx on pt_with_multikey_index_1_prt_p1 + Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[])) + -> Index Only Scan using pt_with_multikey_index_1_prt_other_key1_key2_idx on pt_with_multikey_index_1_prt_other + Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[])) + Optimizer: Postgres query optimizer +(11 rows) + +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + key1 +-------- + KEY5 + KEY5 + KEY5 + KEY55 + KEY55 + KEY55 + KEY65 + KEY65 + KEY65 +(9 rows) + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pt_with_multikey_index_1_prt_p2.key2 + -> Sort + Sort Key: pt_with_multikey_index_1_prt_p2.key2 + -> Append + -> Index Scan using pt_with_multikey_index_1_prt_p2_key1_key2_idx on pt_with_multikey_index_1_prt_p2 + Index Cond: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY ('{R,G}'::bpchar[]))) + -> Index Scan using pt_with_multikey_index_1_prt_p1_key1_key2_idx on pt_with_multikey_index_1_prt_p1 + Index Cond: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY ('{R,G}'::bpchar[]))) + Optimizer: Postgres query optimizer +(10 rows) + +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + a | key1 | key2 +----+--------+------ + 55 | KEY55 | G + 55 | KEY55 | R +(2 rows) + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + QUERY PLAN +--------------------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: pt_with_multikey_index_1_prt_p1.key1 + -> Index Scan using pt_with_multikey_index_1_prt_p1_key1_key2_idx on pt_with_multikey_index_1_prt_p1 + Index Cond: ((key1 = ANY ('{KEY55,KEY65}'::bpchar[])) AND (key2 = 'R'::bpchar)) + Optimizer: Postgres query optimizer +(5 rows) + +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + a | key1 | key2 +----+--------+------ + 55 | KEY55 | R + 65 | KEY65 | R +(2 rows) + +-- +-- Enable the index only scan in append only table. +-- Note: expect ORCA to use seq scan rather than index only scan like planner, +-- because ORCA hasn't yet implemented index only scan for AO/CO tables. +-- +CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true); +CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b); +insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i; +explain select count(*) from bfv_index_only_ao where a < 100; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=5.37..5.38 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=5.31..5.36 rows=3 width=8) + -> Partial Aggregate (cost=5.31..5.32 rows=1 width=8) + -> Index Only Scan using bfv_index_only_ao_a_b on bfv_index_only_ao (cost=0.16..5.23 rows=33 width=0) + Index Cond: (a < 100) + Optimizer: Postgres query optimizer +(6 rows) + +select count(*) from bfv_index_only_ao where a < 100; + count +------- + 99 +(1 row) + +explain select count(*) from bfv_index_only_ao where a < 1000; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=19.87..19.88 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=19.81..19.86 rows=3 width=8) + -> Partial Aggregate (cost=19.81..19.82 rows=1 width=8) + -> Index Only Scan using bfv_index_only_ao_a_b on bfv_index_only_ao (cost=0.16..18.98 rows=333 width=0) + Index Cond: (a < 1000) + Optimizer: Postgres query optimizer +(6 rows) + +select count(*) from bfv_index_only_ao where a < 1000; + count +------- + 999 +(1 row) + +CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, orientation=column); +CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b); +insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i; +explain select count(*) from bfv_index_only_aocs where a < 100; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=5.37..5.38 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=5.31..5.36 rows=3 width=8) + -> Partial Aggregate (cost=5.31..5.32 rows=1 width=8) + -> Index Only Scan using bfv_index_only_aocs_a_b on bfv_index_only_aocs (cost=0.16..5.23 rows=33 width=0) + Index Cond: (a < 100) + Optimizer: Postgres query optimizer +(6 rows) + +select count(*) from bfv_index_only_aocs where a < 100; + count +------- + 99 +(1 row) + +explain select count(*) from bfv_index_only_aocs where a < 1000; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=19.87..19.88 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=19.81..19.86 rows=3 width=8) + -> Partial Aggregate (cost=19.81..19.82 rows=1 width=8) + -> Index Only Scan using bfv_index_only_aocs_a_b on bfv_index_only_aocs (cost=0.16..18.98 rows=333 width=0) + Index Cond: (a < 1000) + Optimizer: Postgres query optimizer +(6 rows) + +select count(*) from bfv_index_only_aocs where a < 1000; + count +------- + 999 +(1 row) + -- The following tests are to verify a fix that allows ORCA to -- choose the bitmap index scan alternative when the predicate -- is in the form of `value operator cast(column)`. The fix diff --git a/src/test/regress/expected/bfv_index_optimizer.out b/src/test/regress/expected/bfv_index_optimizer.out index cf3a117271..52a5c7acc5 100644 --- a/src/test/regress/expected/bfv_index_optimizer.out +++ b/src/test/regress/expected/bfv_index_optimizer.out @@ -1005,7 +1005,7 @@ DROP TABLE hash_prt_tbl; RESET enable_seqscan; RESET optimizer_enable_dynamictablescan; -- --- Test ORCA generates BitmapIndexScan alternative for ScalarArrayOpExpr ANY only +-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY only -- CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, hash_idx_col int); NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Database data distribution key for this table. @@ -1037,15 +1037,13 @@ SELECT * FROM bitmap_alt WHERE bitmap_idx_col IN (3, 5); EXPLAIN (COSTS OFF) SELECT * FROM bitmap_alt WHERE btree_idx_col IN (3, 5); - QUERY PLAN ----------------------------------------------------------------------- + QUERY PLAN +---------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) - -> Bitmap Heap Scan on bitmap_alt - Recheck Cond: (btree_idx_col = ANY ('{3,5}'::integer[])) - -> Bitmap Index Scan on bitmap_alt_idx2 - Index Cond: (btree_idx_col = ANY ('{3,5}'::integer[])) + -> Index Scan using bitmap_alt_idx2 on bitmap_alt + Index Cond: (btree_idx_col = ANY ('{3,5}'::integer[])) Optimizer: Pivotal Optimizer (GPORCA) -(6 rows) +(4 rows) SELECT * FROM bitmap_alt WHERE btree_idx_col IN (3, 5); id | bitmap_idx_col | btree_idx_col | hash_idx_col @@ -1119,6 +1117,182 @@ SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]); ----+----------------+---------------+-------------- (0 rows) +-- +-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table +-- with multikey indexes only when predicate key is the first index key +-- (similar test for non-partitioned tables in create_index) +-- +CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1)) +PARTITION BY list(key2) +(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION other); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2); +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from generate_series(1,500)i; +explain (costs off) +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + QUERY PLAN +-------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: key1 + -> Sort + Sort Key: key1 + -> Dynamic Index Scan on multikey_idx on pt_with_multikey_index + Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[])) + Number of partitions to scan: 3 (out of 3) + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + key1 +-------- + KEY5 + KEY5 + KEY5 + KEY55 + KEY55 + KEY55 + KEY65 + KEY65 + KEY65 +(9 rows) + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + QUERY PLAN +--------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: key2 + -> Sort + Sort Key: key2 + -> Dynamic Index Scan on multikey_idx on pt_with_multikey_index + Index Cond: (key1 = 'KEY55'::bpchar) + Filter: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY ('{R,G}'::bpchar[]))) + Number of partitions to scan: 2 (out of 3) + Optimizer: Pivotal Optimizer (GPORCA) +(9 rows) + +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + a | key1 | key2 +----+--------+------ + 55 | KEY55 | G + 55 | KEY55 | R +(2 rows) + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + QUERY PLAN +----------------------------------------------------------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: key1 + -> Sort + Sort Key: key1 + -> Dynamic Index Scan on multikey_idx on pt_with_multikey_index + Index Cond: ((key1 = ANY ('{KEY55,KEY65}'::bpchar[])) AND (key2 = 'R'::bpchar)) + Number of partitions to scan: 1 (out of 3) + Optimizer: Pivotal Optimizer (GPORCA) +(8 rows) + +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + a | key1 | key2 +----+--------+------ + 55 | KEY55 | R + 65 | KEY65 | R +(2 rows) + +-- +-- Enable the index only scan in append only table. +-- Note: expect ORCA to use seq scan rather than index only scan like planner, +-- because ORCA hasn't yet implemented index only scan for AO/CO tables. +-- +CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b); +insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i; +explain select count(*) from bfv_index_only_ao where a < 100; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on bfv_index_only_ao (cost=0.00..431.00 rows=1 width=1) + Filter: (a < 100) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +select count(*) from bfv_index_only_ao where a < 100; + count +------- + 99 +(1 row) + +explain select count(*) from bfv_index_only_ao where a < 1000; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on bfv_index_only_ao (cost=0.00..431.00 rows=1 width=1) + Filter: (a < 1000) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +select count(*) from bfv_index_only_ao where a < 1000; + count +------- + 999 +(1 row) + +CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, orientation=column); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b); +insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i; +explain select count(*) from bfv_index_only_aocs where a < 100; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on bfv_index_only_aocs (cost=0.00..431.00 rows=1 width=1) + Filter: (a < 100) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +select count(*) from bfv_index_only_aocs where a < 100; + count +------- + 99 +(1 row) + +explain select count(*) from bfv_index_only_aocs where a < 1000; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate (cost=0.00..431.00 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=0.00..431.00 rows=1 width=1) + -> Seq Scan on bfv_index_only_aocs (cost=0.00..431.00 rows=1 width=1) + Filter: (a < 1000) + Optimizer: Pivotal Optimizer (GPORCA) +(5 rows) + +select count(*) from bfv_index_only_aocs where a < 1000; + count +------- + 999 +(1 row) + -- The following tests are to verify a fix that allows ORCA to -- choose the bitmap index scan alternative when the predicate -- is in the form of `value operator cast(column)`. The fix diff --git a/src/test/regress/expected/create_index_optimizer.out b/src/test/regress/expected/create_index_optimizer.out index 55f42000a6..067b6f1e3f 100644 --- a/src/test/regress/expected/create_index_optimizer.out +++ b/src/test/regress/expected/create_index_optimizer.out @@ -2029,18 +2029,14 @@ explain (costs off) SELECT unique1 FROM tenk1 WHERE unique1 IN (1,42,7) ORDER BY unique1; - QUERY PLAN -------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) Merge Key: unique1 - -> Sort - Sort Key: unique1 - -> Bitmap Heap Scan on tenk1 - Recheck Cond: (unique1 = ANY ('{1,42,7}'::integer[])) - -> Bitmap Index Scan on tenk1_unique1 - Index Cond: (unique1 = ANY ('{1,42,7}'::integer[])) + -> Index Only Scan using tenk1_unique1 on tenk1 + Index Cond: (unique1 = ANY ('{1,42,7}'::integer[])) Optimizer: Pivotal Optimizer (GPORCA) -(9 rows) +(5 rows) SELECT unique1 FROM tenk1 WHERE unique1 IN (1,42,7) diff --git a/src/test/regress/sql/bfv_index.sql b/src/test/regress/sql/bfv_index.sql index bb026ce57b..13e17179ed 100644 --- a/src/test/regress/sql/bfv_index.sql +++ b/src/test/regress/sql/bfv_index.sql @@ -469,7 +469,7 @@ RESET enable_seqscan; RESET optimizer_enable_dynamictablescan; -- --- Test ORCA generates BitmapIndexScan alternative for ScalarArrayOpExpr ANY only +-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY only -- CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, hash_idx_col int); @@ -500,6 +500,73 @@ SELECT * FROM bitmap_alt WHERE btree_idx_col=ALL(ARRAY[3, 5]); EXPLAIN (COSTS OFF) SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]); SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]); + +-- +-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table +-- with multikey indexes only when predicate key is the first index key +-- (similar test for non-partitioned tables in create_index) +-- +CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1)) +PARTITION BY list(key2) +(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION other); + +CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2); +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from generate_series(1,500)i; +INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from generate_series(1,500)i; + +explain (costs off) +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + +SELECT key1 FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65', 'KEY5') +ORDER BY key1; + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + +SELECT * FROM pt_with_multikey_index +WHERE key1 = 'KEY55' AND key2 IN ('R', 'G') +ORDER BY key2; + +EXPLAIN (costs off) +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + +SELECT * FROM pt_with_multikey_index +WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R' +ORDER BY key1; + +-- +-- Enable the index only scan in append only table. +-- Note: expect ORCA to use seq scan rather than index only scan like planner, +-- because ORCA hasn't yet implemented index only scan for AO/CO tables. +-- +CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true); +CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b); + +insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i; + +explain select count(*) from bfv_index_only_ao where a < 100; +select count(*) from bfv_index_only_ao where a < 100; +explain select count(*) from bfv_index_only_ao where a < 1000; +select count(*) from bfv_index_only_ao where a < 1000; + +CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, orientation=column); +CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b); + +insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i; + +explain select count(*) from bfv_index_only_aocs where a < 100; +select count(*) from bfv_index_only_aocs where a < 100; +explain select count(*) from bfv_index_only_aocs where a < 1000; +select count(*) from bfv_index_only_aocs where a < 1000; + -- The following tests are to verify a fix that allows ORCA to -- choose the bitmap index scan alternative when the predicate -- is in the form of `value operator cast(column)`. The fix --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
