This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 74aac059a3facd13f19f1abc70e32832d75c4f40
Author: Soumyadeep Chakraborty <[email protected]>
AuthorDate: Wed Sep 27 18:03:32 2023 -0700

    ao/co: Consider all blocks as all-visible in ORCA
    
    We never scan the underlying append-optimized table relfile for
    performing visibility checks. It's as if all blocks are all-visible. See
    cdb_estimate_rel_size(). So consider dPartialVisFrac as 0.
    
    This will make Index Only Scans more favorable on AO and CO tables.
    
    Co-authored-by: David Kimura <[email protected]>
---
 .../minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp   |  4 ++--
 .../dxl/minidump/BTreeIndex-Against-InListLarge.mdp    |  4 ++--
 .../data/dxl/minidump/DoubleNDVCardinalityEquals.mdp   |  4 ++--
 src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp  | 18 ++++++++++++++----
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git 
a/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp 
b/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp
index cd4fa44a2a..7f924dee89 100644
--- 
a/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp
+++ 
b/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp
@@ -631,7 +631,7 @@
     <dxl:Plan Id="0" SpaceSize="3">
       <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1">
         <dxl:Properties>
-          <dxl:Cost StartupCost="0" TotalCost="6.005581" Rows="1.000000" 
Width="8"/>
+          <dxl:Cost StartupCost="0" TotalCost="6.005535" Rows="1.000000" 
Width="8"/>
         </dxl:Properties>
         <dxl:ProjList>
           <dxl:ProjElem ColId="0" Alias="a">
@@ -642,7 +642,7 @@
         <dxl:SortingColumnList/>
         <dxl:IndexOnlyScan IndexScanDirection="Forward">
           <dxl:Properties>
-            <dxl:Cost StartupCost="0" TotalCost="6.005546" Rows="1.000000" 
Width="8"/>
+            <dxl:Cost StartupCost="0" TotalCost="6.005500" Rows="1.000000" 
Width="8"/>
           </dxl:Properties>
           <dxl:ProjList>
             <dxl:ProjElem ColId="0" Alias="a">
diff --git 
a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp 
b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp
index f762df8fb5..2f4654fcb5 100644
--- a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp
+++ b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp
@@ -572,7 +572,7 @@
     <dxl:Plan Id="0" SpaceSize="3">
       <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1">
         <dxl:Properties>
-          <dxl:Cost StartupCost="0" TotalCost="6.072012" Rows="13.000000" 
Width="4"/>
+          <dxl:Cost StartupCost="0" TotalCost="6.071726" Rows="13.000000" 
Width="4"/>
         </dxl:Properties>
         <dxl:ProjList>
           <dxl:ProjElem ColId="0" Alias="a">
@@ -583,7 +583,7 @@
         <dxl:SortingColumnList/>
         <dxl:IndexOnlyScan IndexScanDirection="Forward">
           <dxl:Properties>
-            <dxl:Cost StartupCost="0" TotalCost="6.071785" Rows="13.000000" 
Width="4"/>
+            <dxl:Cost StartupCost="0" TotalCost="6.071500" Rows="13.000000" 
Width="4"/>
           </dxl:Properties>
           <dxl:ProjList>
             <dxl:ProjElem ColId="0" Alias="a">
diff --git 
a/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp 
b/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp
index 8f48c8f420..1fcfb1c9c0 100644
--- a/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp
+++ b/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp
@@ -216,7 +216,7 @@
     <dxl:Plan Id="0" SpaceSize="3">
       <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1">
         <dxl:Properties>
-          <dxl:Cost StartupCost="0" TotalCost="15.365435" Rows="1683.942093" 
Width="8"/>
+          <dxl:Cost StartupCost="0" TotalCost="15.320238" Rows="1683.942093" 
Width="8"/>
         </dxl:Properties>
         <dxl:ProjList>
           <dxl:ProjElem ColId="0" Alias="a">
@@ -227,7 +227,7 @@
         <dxl:SortingColumnList/>
         <dxl:IndexOnlyScan IndexScanDirection="Forward">
           <dxl:Properties>
-            <dxl:Cost StartupCost="0" TotalCost="15.306879" Rows="1683.942093" 
Width="8"/>
+            <dxl:Cost StartupCost="0" TotalCost="15.261682" Rows="1683.942093" 
Width="8"/>
           </dxl:Properties>
           <dxl:ProjList>
             <dxl:ProjElem ColId="0" Alias="a">
diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp 
b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
index 89fbc66a17..b3fd7da8b4 100644
--- a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
+++ b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp
@@ -1846,6 +1846,10 @@ CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp 
GPOS_UNUSED,     // mp
        const CDouble dTableWidth =
                CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width();
 
+       BOOL isAO = CPhysicalScan::PopConvert(exprhdl.Pop())
+                                       ->Ptabdesc()
+                                       ->IsAORowOrColTable();
+
        CDouble dIndexFilterCostUnit =
                pcmgpdb->GetCostModelParams()
                        
->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit)
@@ -1866,9 +1870,7 @@ CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp 
GPOS_UNUSED,      // mp
        GPOS_ASSERT(0 < dIndexScanTupCostUnit);
        GPOS_ASSERT(0 < dIndexScanTupRandomFactor);
 
-       if (CPhysicalScan::PopConvert(exprhdl.Pop())
-                       ->Ptabdesc()
-                       ->IsAORowOrColTable())
+       if (isAO)
        {
                // AO specific costs related to index-scan/index-only-scan:
                //
@@ -1953,9 +1955,17 @@ CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp 
GPOS_UNUSED,     // mp
        // approximately equal to the precent of tuples in all-visible blocks
        // compared to total blocks. It is approximate because there is no
        // guarantee that blocks are equally filled with live tuples.
+       //
+       // We never scan the underlying append-optimized table relfile for
+       // performing visibility checks. It's as if all blocks are all-visible. 
See
+       // cdb_estimate_rel_size(). So consider dPartialVisFrac as 0.
 
        CDouble dPartialVisFrac(1);
-       if (stats->RelPages() != 0)
+       if (isAO)
+       {
+               dPartialVisFrac = 0;
+       }
+       else if (stats->RelPages() != 0)
        {
                dPartialVisFrac =
                        1 - (CDouble(stats->RelAllVisible()) / 
CDouble(stats->RelPages()));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to