This is an automated email from the ASF dual-hosted git repository.

reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 45c177bcc3836128e7fb09067ae4042155591b78
Author: Ekta Khanna <[email protected]>
AuthorDate: Wed Jul 19 17:29:06 2023 -0700

    Enable ORCA to generate IndexScan plans with ScalarArrayOp quals
    
    For older versions of GPDB(and PG) the executor supported ScalarArrayCmp
    with BitmapIndexScans only. As part of the PG 92 merge (commit: 9e8da0f)
    support was added to IndexScans aswell but wasn't updated in ORCA. This
    commit adds support in ORCA to consider IndexScan plans for Btree
    indexes with indexquals on ScalarArrayOp.  Similar to planner for
    multikey indexes, a scalararrayop qual will only be considered as an
    index qual if the predicate column is the first index key else it will
    be added as a filter on top. This ensures we do not assume scan results
    are ordered. (ref commit: 807a40c) This commit also renames the boolean
    flag and pass it further down in the function call.
---
 .../gpopt/translate/CTranslatorDXLToPlStmt.cpp     |   4 +-
 .../dxl/minidump/BTreeIndex-Against-InList.mdp     |  28 +--
 .../include/gpopt/operators/CPredicateUtils.h      |  28 +--
 .../libgpopt/src/operators/CPredicateUtils.cpp     |  77 ++++++---
 src/test/regress/expected/bfv_index.out            | 179 ++++++++++++++++++-
 src/test/regress/expected/bfv_index_optimizer.out  | 190 ++++++++++++++++++++-
 .../regress/expected/create_index_optimizer.out    |  14 +-
 src/test/regress/sql/bfv_index.sql                 |  69 +++++++-
 8 files changed, 512 insertions(+), 77 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp 
b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
index c910293376..a73cdcfd85 100644
--- a/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
+++ b/src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
@@ -1118,8 +1118,10 @@ CTranslatorDXLToPlStmt::TranslateIndexConditions(
                                         IsA(index_cond_expr, 
ScalarArrayOpExpr)) &&
                                        "expected OpExpr or ScalarArrayOpExpr 
in index qual");
 
+               // allow Index quals with scalar array only for bitmap and 
btree indexes
                if (!is_bitmap_index_probe && IsA(index_cond_expr, 
ScalarArrayOpExpr) &&
-                       IMDIndex::EmdindBitmap != index->IndexType())
+                       !(IMDIndex::EmdindBitmap == index->IndexType() ||
+                         IMDIndex::EmdindBtree == index->IndexType()))
                {
                        GPOS_RAISE(
                                gpdxl::ExmaDXL, gpdxl::ExmiDXL2PlStmtConversion,
diff --git a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp 
b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp
index 0bdb7f4af4..a10e22d3d6 100644
--- a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp
+++ b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InList.mdp
@@ -301,10 +301,10 @@ SELECT * FROM test WHERE a in (1, 47);
         </dxl:LogicalGet>
       </dxl:LogicalSelect>
     </dxl:Query>
-    <dxl:Plan Id="0" SpaceSize="2">
+    <dxl:Plan Id="0" SpaceSize="4">
       <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1">
         <dxl:Properties>
-          <dxl:Cost StartupCost="0" TotalCost="387.964368" Rows="2.000000" 
Width="4"/>
+          <dxl:Cost StartupCost="0" TotalCost="6.000228" Rows="2.000000" 
Width="4"/>
         </dxl:Properties>
         <dxl:ProjList>
           <dxl:ProjElem ColId="0" Alias="a">
@@ -313,9 +313,9 @@ SELECT * FROM test WHERE a in (1, 47);
         </dxl:ProjList>
         <dxl:Filter/>
         <dxl:SortingColumnList/>
-        <dxl:BitmapTableScan>
+        <dxl:IndexScan IndexScanDirection="Forward">
           <dxl:Properties>
-            <dxl:Cost StartupCost="0" TotalCost="387.964333" Rows="2.000000" 
Width="4"/>
+            <dxl:Cost StartupCost="0" TotalCost="6.000193" Rows="2.000000" 
Width="4"/>
           </dxl:Properties>
           <dxl:ProjList>
             <dxl:ProjElem ColId="0" Alias="a">
@@ -323,7 +323,7 @@ SELECT * FROM test WHERE a in (1, 47);
             </dxl:ProjElem>
           </dxl:ProjList>
           <dxl:Filter/>
-          <dxl:RecheckCond>
+          <dxl:IndexCondList>
             <dxl:ArrayComp OperatorName="=" OperatorMdid="0.96.1.0" 
OperatorType="Any">
               <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
               <dxl:Array ArrayType="0.1007.1.0" ElementType="0.23.1.0" 
MultiDimensional="false">
@@ -331,19 +331,9 @@ SELECT * FROM test WHERE a in (1, 47);
                 <dxl:ConstValue TypeMdid="0.23.1.0" Value="47"/>
               </dxl:Array>
             </dxl:ArrayComp>
-          </dxl:RecheckCond>
-          <dxl:BitmapIndexProbe>
-            <dxl:IndexCondList>
-              <dxl:ArrayComp OperatorName="=" OperatorMdid="0.96.1.0" 
OperatorType="Any">
-                <dxl:Ident ColId="0" ColName="a" TypeMdid="0.23.1.0"/>
-                <dxl:Array ArrayType="0.1007.1.0" ElementType="0.23.1.0" 
MultiDimensional="false">
-                  <dxl:ConstValue TypeMdid="0.23.1.0" Value="1"/>
-                  <dxl:ConstValue TypeMdid="0.23.1.0" Value="47"/>
-                </dxl:Array>
-              </dxl:ArrayComp>
-            </dxl:IndexCondList>
-            <dxl:IndexDescriptor Mdid="0.41692.1.0" IndexName="test_index"/>
-          </dxl:BitmapIndexProbe>
+          </dxl:IndexCondList>
+          <dxl:Partitions/>
+          <dxl:IndexDescriptor Mdid="0.41692.1.0" IndexName="test_index"/>
           <dxl:TableDescriptor Mdid="6.41665.1.1" TableName="test">
             <dxl:Columns>
               <dxl:Column ColId="0" Attno="1" ColName="a" TypeMdid="0.23.1.0"/>
@@ -356,7 +346,7 @@ SELECT * FROM test WHERE a in (1, 47);
               <dxl:Column ColId="7" Attno="-8" ColName="gp_segment_id" 
TypeMdid="0.23.1.0"/>
             </dxl:Columns>
           </dxl:TableDescriptor>
-        </dxl:BitmapTableScan>
+        </dxl:IndexScan>
       </dxl:GatherMotion>
     </dxl:Plan>
   </dxl:Thread>
diff --git 
a/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h 
b/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h
index 49dcbaa002..7630210e8f 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/operators/CPredicateUtils.h
@@ -70,20 +70,16 @@ private:
                                                                  
CColRefSetArray *pdrgpcrsEquivClasses);
 
        // helper to create index lookup comparison predicate with index key on 
left side
-       static CExpression *PexprIndexLookupKeyOnLeft(CMemoryPool *mp,
-                                                                               
                  CMDAccessor *md_accessor,
-                                                                               
                  CExpression *pexprScalar,
-                                                                               
                  const IMDIndex *pmdindex,
-                                                                               
                  CColRefArray *pdrgpcrIndex,
-                                                                               
                  CColRefSet *outer_refs);
+       static CExpression *PexprIndexLookupKeyOnLeft(
+               CMemoryPool *mp, CMDAccessor *md_accessor, CExpression 
*pexprScalar,
+               const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex,
+               CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual);
 
        // helper to create index lookup comparison predicate with index key on 
right side
-       static CExpression *PexprIndexLookupKeyOnRight(CMemoryPool *mp,
-                                                                               
                   CMDAccessor *md_accessor,
-                                                                               
                   CExpression *pexprScalar,
-                                                                               
                   const IMDIndex *pmdindex,
-                                                                               
                   CColRefArray *pdrgpcrIndex,
-                                                                               
                   CColRefSet *outer_refs);
+       static CExpression *PexprIndexLookupKeyOnRight(
+               CMemoryPool *mp, CMDAccessor *md_accessor, CExpression 
*pexprScalar,
+               const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex,
+               CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual);
 
        // for all columns that appear in the given expression and are members
        // of the given set, replace these columns with NULL constants
@@ -210,6 +206,10 @@ public:
        // is the given expression a comparison between a scalar ident and a 
constant array
        static BOOL FCompareIdentToConstArray(CExpression *pexpr);
 
+       // is the given ScalarArrayCmp a valid index qual
+       static BOOL IsScalarArrayCmpValidIndexQual(CExpression *pexpr,
+                                                                               
           CColRefArray *pdrgpcrIndex);
+
        // is the given expression an AND
        static BOOL
        FAnd(CExpression *pexpr)
@@ -403,7 +403,7 @@ public:
                CExpressionArray *pdrgpexprResidual,
                CColRefSet *pcrsAcceptedOuterRefs =
                        nullptr,  // outer refs that are acceptable in an index 
predicate
-               BOOL considerBitmapAltForArrayCmp = false);
+               BOOL allowArrayCmpIndexQual = false);
 
        // return the inverse of given comparison expression
        static CExpression *PexprInverseComparison(CMemoryPool *mp,
@@ -433,7 +433,7 @@ public:
        static CExpression *PexprIndexLookup(
                CMemoryPool *mp, CMDAccessor *md_accessor, CExpression 
*pexpPred,
                const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex,
-               CColRefSet *outer_refs, BOOL considerBitmapAltForArrayCmp);
+               CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual);
 
        // split given scalar expression into two conjunctions; without and 
with outer references
        static void SeparateOuterRefs(CMemoryPool *mp, CExpression *pexprScalar,
diff --git a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp 
b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
index 7c123c0b7e..7e59cf028a 100644
--- a/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
+++ b/src/backend/gporca/libgpopt/src/operators/CPredicateUtils.cpp
@@ -1295,6 +1295,29 @@ CPredicateUtils::FCompareIdentToConstArray(CExpression 
*pexpr)
        return CUtils::FScalarConstArray(pexprArray);
 }
 
+// is the given ScalarArrayCmp a valid index qual
+BOOL
+CPredicateUtils::IsScalarArrayCmpValidIndexQual(CExpression *pexpr,
+                                                                               
                CColRefArray *pdrgpcrIndex)
+{
+       GPOS_ASSERT(nullptr != pexpr);
+       CColRef *pcrFirstIndexKey = (*pdrgpcrIndex)[0];
+       CExpression *pexprScalar = (*pexpr)[0];
+
+       // returns true if ScalarArrayCmp Ident is the first index key col id.
+       // A ScalarArrayCmp is a valid index clause only if it's on the first 
index column to
+       // ensure that ORCA doesn't assume its ordered and skip on adding a 
sort when there is
+       // order by clause. If it's not on the first index key column, the qual 
will be considered
+       // as a filter instead of being part of the index qual.
+       return (
+               (CUtils::FScalarIdent(pexprScalar) &&
+                pcrFirstIndexKey->Id() ==
+                        
CScalarIdent::PopConvert(pexprScalar->Pop())->Pcr()->Id()) ||
+               (CCastUtils::FBinaryCoercibleCastedScId(pexprScalar) &&
+                pcrFirstIndexKey->Id() ==
+                        
CScalarIdent::PopConvert((*pexprScalar)[0]->Pop())->Pcr()->Id()));
+}
+
 CExpression *
 CPredicateUtils::ValidatePartPruningExpr(CMemoryPool *mp, CExpression *expr,
                                                                                
 CColRef *pcrPartKey,
@@ -1910,12 +1933,10 @@ 
CPredicateUtils::FColumnDisjunctionOfConst(CConstraintInterval *pcnstrInterval,
 
 // helper to create index lookup comparison predicate with index key on left 
side
 CExpression *
-CPredicateUtils::PexprIndexLookupKeyOnLeft(CMemoryPool *mp,
-                                                                               
   CMDAccessor *md_accessor,
-                                                                               
   CExpression *pexprScalar,
-                                                                               
   const IMDIndex *pmdindex,
-                                                                               
   CColRefArray *pdrgpcrIndex,
-                                                                               
   CColRefSet *outer_refs)
+CPredicateUtils::PexprIndexLookupKeyOnLeft(
+       CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprScalar,
+       const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex,
+       CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual)
 {
        GPOS_ASSERT(nullptr != pexprScalar);
 
@@ -1923,6 +1944,14 @@ CPredicateUtils::PexprIndexLookupKeyOnLeft(CMemoryPool 
*mp,
        CExpression *pexprRight = (*pexprScalar)[1];
 
        CColRefSet *pcrsIndex = GPOS_NEW(mp) CColRefSet(mp, pdrgpcrIndex);
+       if (!allowArrayCmpIndexQual &&
+               IMDIndex::EmdindBtree == pmdindex->IndexType() &&
+               CUtils::FScalarArrayCmp(pexprScalar) &&
+               !IsScalarArrayCmpValidIndexQual(pexprScalar, pdrgpcrIndex))
+       {
+               pcrsIndex->Release();
+               return nullptr;
+       }
 
        if ((CUtils::FScalarIdent(pexprLeft) &&
                 pcrsIndex->FMember(
@@ -1970,12 +1999,10 @@ CPredicateUtils::PexprIndexLookupKeyOnLeft(CMemoryPool 
*mp,
 
 // helper to create index lookup comparison predicate with index key on right 
side
 CExpression *
-CPredicateUtils::PexprIndexLookupKeyOnRight(CMemoryPool *mp,
-                                                                               
        CMDAccessor *md_accessor,
-                                                                               
        CExpression *pexprScalar,
-                                                                               
        const IMDIndex *pmdindex,
-                                                                               
        CColRefArray *pdrgpcrIndex,
-                                                                               
        CColRefSet *outer_refs)
+CPredicateUtils::PexprIndexLookupKeyOnRight(
+       CMemoryPool *mp, CMDAccessor *md_accessor, CExpression *pexprScalar,
+       const IMDIndex *pmdindex, CColRefArray *pdrgpcrIndex,
+       CColRefSet *outer_refs, BOOL allowArrayCmpIndexQual)
 {
        GPOS_ASSERT(nullptr != pexprScalar);
 
@@ -1993,9 +2020,9 @@ CPredicateUtils::PexprIndexLookupKeyOnRight(CMemoryPool 
*mp,
                        pexprLeft->AddRef();
                        CExpression *pexprCommuted = GPOS_NEW(mp)
                                CExpression(mp, popScCmpCommute, pexprRight, 
pexprLeft);
-                       CExpression *pexprIndexCond =
-                               PexprIndexLookupKeyOnLeft(mp, md_accessor, 
pexprCommuted,
-                                                                               
  pmdindex, pdrgpcrIndex, outer_refs);
+                       CExpression *pexprIndexCond = PexprIndexLookupKeyOnLeft(
+                               mp, md_accessor, pexprCommuted, pmdindex, 
pdrgpcrIndex,
+                               outer_refs, allowArrayCmpIndexQual);
                        pexprCommuted->Release();
 
                        return pexprIndexCond;
@@ -2019,7 +2046,7 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, 
CMDAccessor *md_accessor,
                                                                  const 
IMDIndex *pmdindex,
                                                                  CColRefArray 
*pdrgpcrIndex,
                                                                  CColRefSet 
*outer_refs,
-                                                                 BOOL 
considerBitmapAltForArrayCmp)
+                                                                 BOOL 
allowArrayCmpIndexQual)
 {
        GPOS_ASSERT(nullptr != pexprScalar);
        GPOS_ASSERT(nullptr != pdrgpcrIndex);
@@ -2034,11 +2061,11 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, 
CMDAccessor *md_accessor,
                         CScalarArrayCmp::EarrcmpAny ==
                                 
CScalarArrayCmp::PopConvert(pexprScalar->Pop())->Earrcmpt() &&
                         (IMDIndex::EmdindBitmap == pmdindex->IndexType() ||
-                         (considerBitmapAltForArrayCmp &&
-                          (IMDIndex::EmdindBtree == pmdindex->IndexType() ||
-                               IMDIndex::EmdindHash == 
pmdindex->IndexType()))))
+                         IMDIndex::EmdindBtree == pmdindex->IndexType() ||
+                         (allowArrayCmpIndexQual &&
+                          IMDIndex::EmdindHash == pmdindex->IndexType())))
        {
-               // array cmps are always allowed on bitmap indexes and when 
requested on btree/hash indexes
+               // array cmps are always allowed on bitmap/btree indexes and 
when requested on hash indexes
                cmptype = CUtils::ParseCmpType(
                        
CScalarArrayCmp::PopConvert(pexprScalar->Pop())->MdIdOp());
        }
@@ -2062,14 +2089,16 @@ CPredicateUtils::PexprIndexLookup(CMemoryPool *mp, 
CMDAccessor *md_accessor,
        }
 
        CExpression *pexprIndexLookupKeyOnLeft = PexprIndexLookupKeyOnLeft(
-               mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, 
outer_refs);
+               mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, 
outer_refs,
+               allowArrayCmpIndexQual);
        if (nullptr != pexprIndexLookupKeyOnLeft)
        {
                return pexprIndexLookupKeyOnLeft;
        }
 
        CExpression *pexprIndexLookupKeyOnRight = PexprIndexLookupKeyOnRight(
-               mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, 
outer_refs);
+               mp, md_accessor, pexprScalar, pmdindex, pdrgpcrIndex, 
outer_refs,
+               allowArrayCmpIndexQual);
        if (nullptr != pexprIndexLookupKeyOnRight)
        {
                return pexprIndexLookupKeyOnRight;
@@ -2087,7 +2116,7 @@ CPredicateUtils::ExtractIndexPredicates(
        CExpressionArray *pdrgpexprResidual,
        CColRefSet *
                pcrsAcceptedOuterRefs,  // outer refs that are acceptable in an 
index predicate
-       BOOL considerBitmapAltForArrayCmp)
+       BOOL allowArrayCmpIndexQual)
 {
        const ULONG length = pdrgpexprPredicate->Size();
 
@@ -2144,7 +2173,7 @@ CPredicateUtils::ExtractIndexPredicates(
                        // attempt building index lookup predicate
                        CExpression *pexprLookupPred = PexprIndexLookup(
                                mp, md_accessor, pexprCond, pmdindex, 
pdrgpcrIndex,
-                               pcrsAcceptedOuterRefs, 
considerBitmapAltForArrayCmp);
+                               pcrsAcceptedOuterRefs, allowArrayCmpIndexQual);
                        if (nullptr != pexprLookupPred)
                        {
                                pexprCond->Release();
diff --git a/src/test/regress/expected/bfv_index.out 
b/src/test/regress/expected/bfv_index.out
index adf0164108..805b623c04 100644
--- a/src/test/regress/expected/bfv_index.out
+++ b/src/test/regress/expected/bfv_index.out
@@ -1049,7 +1049,7 @@ DROP TABLE hash_prt_tbl;
 RESET enable_seqscan;
 RESET optimizer_enable_dynamictablescan;
 --
--- Test ORCA generates BitmapIndexScan alternative for ScalarArrayOpExpr ANY 
only
+-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY 
only
 --
 CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, 
hash_idx_col int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' 
as the Greenplum Database data distribution key for this table.
@@ -1158,6 +1158,183 @@ SELECT * FROM bitmap_alt WHERE 
hash_idx_col=ALL(ARRAY[3, 5]);
 ----+----------------+---------------+--------------
 (0 rows)
 
+--
+-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table
+-- with multikey indexes only when predicate key is the first index key
+-- (similar test for non-partitioned tables in create_index)
+--
+CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1))
+PARTITION BY list(key2)
+(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION 
other);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2);
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from 
generate_series(1,500)i;
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from 
generate_series(1,500)i;
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from 
generate_series(1,500)i;
+explain (costs off)
+SELECT key1 FROM pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65', 'KEY5')
+ORDER BY key1;
+                                                        QUERY PLAN             
                                           
+--------------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: pt_with_multikey_index_1_prt_p2.key1
+   ->  Merge Append
+         Sort Key: pt_with_multikey_index_1_prt_p2.key1
+         ->  Index Only Scan using 
pt_with_multikey_index_1_prt_p2_key1_key2_idx on pt_with_multikey_index_1_prt_p2
+               Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[]))
+         ->  Index Only Scan using 
pt_with_multikey_index_1_prt_p1_key1_key2_idx on pt_with_multikey_index_1_prt_p1
+               Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[]))
+         ->  Index Only Scan using 
pt_with_multikey_index_1_prt_other_key1_key2_idx on 
pt_with_multikey_index_1_prt_other
+               Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[]))
+ Optimizer: Postgres query optimizer
+(11 rows)
+
+SELECT key1 FROM pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65', 'KEY5')
+ORDER BY key1;
+  key1  
+--------
+ KEY5  
+ KEY5  
+ KEY5  
+ KEY55 
+ KEY55 
+ KEY55 
+ KEY65 
+ KEY65 
+ KEY65 
+(9 rows)
+
+EXPLAIN (costs off)
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 = 'KEY55' AND key2 IN ('R', 'G')
+ORDER BY key2;
+                                                     QUERY PLAN                
                                      
+---------------------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: pt_with_multikey_index_1_prt_p2.key2
+   ->  Sort
+         Sort Key: pt_with_multikey_index_1_prt_p2.key2
+         ->  Append
+               ->  Index Scan using 
pt_with_multikey_index_1_prt_p2_key1_key2_idx on pt_with_multikey_index_1_prt_p2
+                     Index Cond: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY 
('{R,G}'::bpchar[])))
+               ->  Index Scan using 
pt_with_multikey_index_1_prt_p1_key1_key2_idx on pt_with_multikey_index_1_prt_p1
+                     Index Cond: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY 
('{R,G}'::bpchar[])))
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 = 'KEY55' AND key2 IN ('R', 'G')
+ORDER BY key2;
+ a  |  key1  | key2 
+----+--------+------
+ 55 | KEY55  | G
+ 55 | KEY55  | R
+(2 rows)
+
+EXPLAIN (costs off)
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R'
+ORDER BY key1;
+                                               QUERY PLAN                      
                          
+---------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: pt_with_multikey_index_1_prt_p1.key1
+   ->  Index Scan using pt_with_multikey_index_1_prt_p1_key1_key2_idx on 
pt_with_multikey_index_1_prt_p1
+         Index Cond: ((key1 = ANY ('{KEY55,KEY65}'::bpchar[])) AND (key2 = 
'R'::bpchar))
+ Optimizer: Postgres query optimizer
+(5 rows)
+
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R'
+ORDER BY key1;
+ a  |  key1  | key2 
+----+--------+------
+ 55 | KEY55  | R
+ 65 | KEY65  | R
+(2 rows)
+
+--
+-- Enable the index only scan in append only table.
+-- Note: expect ORCA to use seq scan rather than index only scan like planner,
+-- because ORCA hasn't yet implemented index only scan for AO/CO tables.
+--
+CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true);
+CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b);
+insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i;
+explain select count(*) from bfv_index_only_ao where a < 100;
+                                                      QUERY PLAN               
                                        
+-----------------------------------------------------------------------------------------------------------------------
+ Finalize Aggregate  (cost=5.37..5.38 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=5.31..5.36 rows=3 
width=8)
+         ->  Partial Aggregate  (cost=5.31..5.32 rows=1 width=8)
+               ->  Index Only Scan using bfv_index_only_ao_a_b on 
bfv_index_only_ao  (cost=0.16..5.23 rows=33 width=0)
+                     Index Cond: (a < 100)
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+select count(*) from bfv_index_only_ao where a < 100;
+ count 
+-------
+    99
+(1 row)
+
+explain select count(*) from bfv_index_only_ao where a < 1000;
+                                                       QUERY PLAN              
                                          
+-------------------------------------------------------------------------------------------------------------------------
+ Finalize Aggregate  (cost=19.87..19.88 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=19.81..19.86 rows=3 
width=8)
+         ->  Partial Aggregate  (cost=19.81..19.82 rows=1 width=8)
+               ->  Index Only Scan using bfv_index_only_ao_a_b on 
bfv_index_only_ao  (cost=0.16..18.98 rows=333 width=0)
+                     Index Cond: (a < 1000)
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+select count(*) from bfv_index_only_ao where a < 1000;
+ count 
+-------
+   999
+(1 row)
+
+CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, 
orientation=column);
+CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b);
+insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i;
+explain select count(*) from bfv_index_only_aocs where a < 100;
+                                                        QUERY PLAN             
                                            
+---------------------------------------------------------------------------------------------------------------------------
+ Finalize Aggregate  (cost=5.37..5.38 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=5.31..5.36 rows=3 
width=8)
+         ->  Partial Aggregate  (cost=5.31..5.32 rows=1 width=8)
+               ->  Index Only Scan using bfv_index_only_aocs_a_b on 
bfv_index_only_aocs  (cost=0.16..5.23 rows=33 width=0)
+                     Index Cond: (a < 100)
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+select count(*) from bfv_index_only_aocs where a < 100;
+ count 
+-------
+    99
+(1 row)
+
+explain select count(*) from bfv_index_only_aocs where a < 1000;
+                                                         QUERY PLAN            
                                              
+-----------------------------------------------------------------------------------------------------------------------------
+ Finalize Aggregate  (cost=19.87..19.88 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=19.81..19.86 rows=3 
width=8)
+         ->  Partial Aggregate  (cost=19.81..19.82 rows=1 width=8)
+               ->  Index Only Scan using bfv_index_only_aocs_a_b on 
bfv_index_only_aocs  (cost=0.16..18.98 rows=333 width=0)
+                     Index Cond: (a < 1000)
+ Optimizer: Postgres query optimizer
+(6 rows)
+
+select count(*) from bfv_index_only_aocs where a < 1000;
+ count 
+-------
+   999
+(1 row)
+
 -- The following tests are to verify a fix that allows ORCA to
 -- choose the bitmap index scan alternative when the predicate
 -- is in the form of `value operator cast(column)`. The fix
diff --git a/src/test/regress/expected/bfv_index_optimizer.out 
b/src/test/regress/expected/bfv_index_optimizer.out
index cf3a117271..52a5c7acc5 100644
--- a/src/test/regress/expected/bfv_index_optimizer.out
+++ b/src/test/regress/expected/bfv_index_optimizer.out
@@ -1005,7 +1005,7 @@ DROP TABLE hash_prt_tbl;
 RESET enable_seqscan;
 RESET optimizer_enable_dynamictablescan;
 --
--- Test ORCA generates BitmapIndexScan alternative for ScalarArrayOpExpr ANY 
only
+-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY 
only
 --
 CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, 
hash_idx_col int);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' 
as the Greenplum Database data distribution key for this table.
@@ -1037,15 +1037,13 @@ SELECT * FROM bitmap_alt WHERE bitmap_idx_col IN (3, 5);
 
 EXPLAIN (COSTS OFF)
 SELECT * FROM bitmap_alt WHERE btree_idx_col IN (3, 5);
-                              QUERY PLAN                              
-----------------------------------------------------------------------
+                           QUERY PLAN                           
+----------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
-   ->  Bitmap Heap Scan on bitmap_alt
-         Recheck Cond: (btree_idx_col = ANY ('{3,5}'::integer[]))
-         ->  Bitmap Index Scan on bitmap_alt_idx2
-               Index Cond: (btree_idx_col = ANY ('{3,5}'::integer[]))
+   ->  Index Scan using bitmap_alt_idx2 on bitmap_alt
+         Index Cond: (btree_idx_col = ANY ('{3,5}'::integer[]))
  Optimizer: Pivotal Optimizer (GPORCA)
-(6 rows)
+(4 rows)
 
 SELECT * FROM bitmap_alt WHERE btree_idx_col IN (3, 5);
  id | bitmap_idx_col | btree_idx_col | hash_idx_col 
@@ -1119,6 +1117,182 @@ SELECT * FROM bitmap_alt WHERE 
hash_idx_col=ALL(ARRAY[3, 5]);
 ----+----------------+---------------+--------------
 (0 rows)
 
+--
+-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table
+-- with multikey indexes only when predicate key is the first index key
+-- (similar test for non-partitioned tables in create_index)
+--
+CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1))
+PARTITION BY list(key2)
+(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION 
other);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2);
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from 
generate_series(1,500)i;
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from 
generate_series(1,500)i;
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from 
generate_series(1,500)i;
+explain (costs off)
+SELECT key1 FROM pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65', 'KEY5')
+ORDER BY key1;
+                                QUERY PLAN                                
+--------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: key1
+   ->  Sort
+         Sort Key: key1
+         ->  Dynamic Index Scan on multikey_idx on pt_with_multikey_index
+               Index Cond: (key1 = ANY ('{KEY55,KEY65,KEY5}'::bpchar[]))
+               Number of partitions to scan: 3 (out of 3)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(8 rows)
+
+SELECT key1 FROM pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65', 'KEY5')
+ORDER BY key1;
+  key1  
+--------
+ KEY5  
+ KEY5  
+ KEY5  
+ KEY55 
+ KEY55 
+ KEY55 
+ KEY65 
+ KEY65 
+ KEY65 
+(9 rows)
+
+EXPLAIN (costs off)
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 = 'KEY55' AND key2 IN ('R', 'G')
+ORDER BY key2;
+                                      QUERY PLAN                               
        
+---------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: key2
+   ->  Sort
+         Sort Key: key2
+         ->  Dynamic Index Scan on multikey_idx on pt_with_multikey_index
+               Index Cond: (key1 = 'KEY55'::bpchar)
+               Filter: ((key1 = 'KEY55'::bpchar) AND (key2 = ANY 
('{R,G}'::bpchar[])))
+               Number of partitions to scan: 2 (out of 3)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(9 rows)
+
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 = 'KEY55' AND key2 IN ('R', 'G')
+ORDER BY key2;
+ a  |  key1  | key2 
+----+--------+------
+ 55 | KEY55  | G
+ 55 | KEY55  | R
+(2 rows)
+
+EXPLAIN (costs off)
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R'
+ORDER BY key1;
+                                          QUERY PLAN                           
                
+-----------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   Merge Key: key1
+   ->  Sort
+         Sort Key: key1
+         ->  Dynamic Index Scan on multikey_idx on pt_with_multikey_index
+               Index Cond: ((key1 = ANY ('{KEY55,KEY65}'::bpchar[])) AND (key2 
= 'R'::bpchar))
+               Number of partitions to scan: 1 (out of 3)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(8 rows)
+
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R'
+ORDER BY key1;
+ a  |  key1  | key2 
+----+--------+------
+ 55 | KEY55  | R
+ 65 | KEY65  | R
+(2 rows)
+
+--
+-- Enable the index only scan in append only table.
+-- Note: expect ORCA to use seq scan rather than index only scan like planner,
+-- because ORCA hasn't yet implemented index only scan for AO/CO tables.
+--
+CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b);
+insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i;
+explain select count(*) from bfv_index_only_ao where a < 100;
+                                     QUERY PLAN                                
     
+------------------------------------------------------------------------------------
+ Aggregate  (cost=0.00..431.00 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 
width=1)
+         ->  Seq Scan on bfv_index_only_ao  (cost=0.00..431.00 rows=1 width=1)
+               Filter: (a < 100)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(5 rows)
+
+select count(*) from bfv_index_only_ao where a < 100;
+ count 
+-------
+    99
+(1 row)
+
+explain select count(*) from bfv_index_only_ao where a < 1000;
+                                     QUERY PLAN                                
     
+------------------------------------------------------------------------------------
+ Aggregate  (cost=0.00..431.00 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 
width=1)
+         ->  Seq Scan on bfv_index_only_ao  (cost=0.00..431.00 rows=1 width=1)
+               Filter: (a < 1000)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(5 rows)
+
+select count(*) from bfv_index_only_ao where a < 1000;
+ count 
+-------
+   999
+(1 row)
+
+CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, 
orientation=column);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b);
+insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i;
+explain select count(*) from bfv_index_only_aocs where a < 100;
+                                     QUERY PLAN                                
     
+------------------------------------------------------------------------------------
+ Aggregate  (cost=0.00..431.00 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 
width=1)
+         ->  Seq Scan on bfv_index_only_aocs  (cost=0.00..431.00 rows=1 
width=1)
+               Filter: (a < 100)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(5 rows)
+
+select count(*) from bfv_index_only_aocs where a < 100;
+ count 
+-------
+    99
+(1 row)
+
+explain select count(*) from bfv_index_only_aocs where a < 1000;
+                                     QUERY PLAN                                
     
+------------------------------------------------------------------------------------
+ Aggregate  (cost=0.00..431.00 rows=1 width=8)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..431.00 rows=1 
width=1)
+         ->  Seq Scan on bfv_index_only_aocs  (cost=0.00..431.00 rows=1 
width=1)
+               Filter: (a < 1000)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(5 rows)
+
+select count(*) from bfv_index_only_aocs where a < 1000;
+ count 
+-------
+   999
+(1 row)
+
 -- The following tests are to verify a fix that allows ORCA to
 -- choose the bitmap index scan alternative when the predicate
 -- is in the form of `value operator cast(column)`. The fix
diff --git a/src/test/regress/expected/create_index_optimizer.out 
b/src/test/regress/expected/create_index_optimizer.out
index 55f42000a6..067b6f1e3f 100644
--- a/src/test/regress/expected/create_index_optimizer.out
+++ b/src/test/regress/expected/create_index_optimizer.out
@@ -2029,18 +2029,14 @@ explain (costs off)
 SELECT unique1 FROM tenk1
 WHERE unique1 IN (1,42,7)
 ORDER BY unique1;
-                               QUERY PLAN
--------------------------------------------------------------------------
+                         QUERY PLAN                          
+-------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    Merge Key: unique1
-   ->  Sort
-         Sort Key: unique1
-         ->  Bitmap Heap Scan on tenk1
-               Recheck Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
-               ->  Bitmap Index Scan on tenk1_unique1
-                     Index Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
+   ->  Index Only Scan using tenk1_unique1 on tenk1
+         Index Cond: (unique1 = ANY ('{1,42,7}'::integer[]))
  Optimizer: Pivotal Optimizer (GPORCA)
-(9 rows)
+(5 rows)
 
 SELECT unique1 FROM tenk1
 WHERE unique1 IN (1,42,7)
diff --git a/src/test/regress/sql/bfv_index.sql 
b/src/test/regress/sql/bfv_index.sql
index bb026ce57b..13e17179ed 100644
--- a/src/test/regress/sql/bfv_index.sql
+++ b/src/test/regress/sql/bfv_index.sql
@@ -469,7 +469,7 @@ RESET enable_seqscan;
 RESET optimizer_enable_dynamictablescan;
 
 --
--- Test ORCA generates BitmapIndexScan alternative for ScalarArrayOpExpr ANY 
only
+-- Test ORCA generates Bitmap/IndexScan alternative for ScalarArrayOpExpr ANY 
only
 --
 
 CREATE TABLE bitmap_alt (id int, bitmap_idx_col int, btree_idx_col int, 
hash_idx_col int);
@@ -500,6 +500,73 @@ SELECT * FROM bitmap_alt WHERE btree_idx_col=ALL(ARRAY[3, 
5]);
 EXPLAIN (COSTS OFF)
 SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]);
 SELECT * FROM bitmap_alt WHERE hash_idx_col=ALL(ARRAY[3, 5]);
+
+--
+-- Test ORCA considers ScalarArrayOp in indexqual for partitioned table
+-- with multikey indexes only when predicate key is the first index key
+-- (similar test for non-partitioned tables in create_index)
+--
+CREATE TABLE pt_with_multikey_index (a int, key1 char(6), key2 char(1))
+PARTITION BY list(key2)
+(PARTITION p1 VALUES ('R'), PARTITION p2 VALUES ('G'), DEFAULT PARTITION 
other);
+
+CREATE INDEX multikey_idx on pt_with_multikey_index (key1, key2);
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'R' from 
generate_series(1,500)i;
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'G' from 
generate_series(1,500)i;
+INSERT INTO pt_with_multikey_index SELECT i, 'KEY'||i, 'B' from 
generate_series(1,500)i;
+
+explain (costs off)
+SELECT key1 FROM pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65', 'KEY5')
+ORDER BY key1;
+
+SELECT key1 FROM pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65', 'KEY5')
+ORDER BY key1;
+
+EXPLAIN (costs off)
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 = 'KEY55' AND key2 IN ('R', 'G')
+ORDER BY key2;
+
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 = 'KEY55' AND key2 IN ('R', 'G')
+ORDER BY key2;
+
+EXPLAIN (costs off)
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R'
+ORDER BY key1;
+
+SELECT * FROM  pt_with_multikey_index
+WHERE key1 IN ('KEY55', 'KEY65') AND key2 = 'R'
+ORDER BY key1;
+
+--
+-- Enable the index only scan in append only table.
+-- Note: expect ORCA to use seq scan rather than index only scan like planner,
+-- because ORCA hasn't yet implemented index only scan for AO/CO tables.
+--
+CREATE TABLE bfv_index_only_ao(a int, b int) WITH (appendonly =true);
+CREATE INDEX bfv_index_only_ao_a_b on bfv_index_only_ao(a) include (b);
+
+insert into bfv_index_only_ao select i,i from generate_series(1, 10000) i;
+
+explain select count(*) from bfv_index_only_ao where a < 100;
+select count(*) from bfv_index_only_ao where a < 100;
+explain select count(*) from bfv_index_only_ao where a < 1000;
+select count(*) from bfv_index_only_ao where a < 1000;
+
+CREATE TABLE bfv_index_only_aocs(a int, b int) WITH (appendonly =true, 
orientation=column);
+CREATE INDEX bfv_index_only_aocs_a_b on bfv_index_only_aocs(a) include (b);
+
+insert into bfv_index_only_aocs select i,i from generate_series(1, 10000) i;
+
+explain select count(*) from bfv_index_only_aocs where a < 100;
+select count(*) from bfv_index_only_aocs where a < 100;
+explain select count(*) from bfv_index_only_aocs where a < 1000;
+select count(*) from bfv_index_only_aocs where a < 1000;
+
 -- The following tests are to verify a fix that allows ORCA to
 -- choose the bitmap index scan alternative when the predicate
 -- is in the form of `value operator cast(column)`. The fix


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to