This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 74aac059a3facd13f19f1abc70e32832d75c4f40 Author: Soumyadeep Chakraborty <[email protected]> AuthorDate: Wed Sep 27 18:03:32 2023 -0700 ao/co: Consider all blocks as all-visible in ORCA We never scan the underlying append-optimized table relfile for performing visibility checks. It's as if all blocks are all-visible. See cdb_estimate_rel_size(). So consider dPartialVisFrac as 0. This will make Index Only Scans more favorable on AO and CO tables. Co-authored-by: David Kimura <[email protected]> --- .../minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp | 4 ++-- .../dxl/minidump/BTreeIndex-Against-InListLarge.mdp | 4 ++-- .../data/dxl/minidump/DoubleNDVCardinalityEquals.mdp | 4 ++-- src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp | 18 ++++++++++++++---- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp b/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp index cd4fa44a2a..7f924dee89 100644 --- a/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp +++ b/src/backend/gporca/data/dxl/minidump/AllowIndexOnlyScanOnAppendOnlyTable.mdp @@ -631,7 +631,7 @@ <dxl:Plan Id="0" SpaceSize="3"> <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="6.005581" Rows="1.000000" Width="8"/> + <dxl:Cost StartupCost="0" TotalCost="6.005535" Rows="1.000000" Width="8"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -642,7 +642,7 @@ <dxl:SortingColumnList/> <dxl:IndexOnlyScan IndexScanDirection="Forward"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="6.005546" Rows="1.000000" Width="8"/> + <dxl:Cost StartupCost="0" TotalCost="6.005500" Rows="1.000000" Width="8"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> diff --git a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp index f762df8fb5..2f4654fcb5 100644 --- a/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp +++ b/src/backend/gporca/data/dxl/minidump/BTreeIndex-Against-InListLarge.mdp @@ -572,7 +572,7 @@ <dxl:Plan Id="0" SpaceSize="3"> <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="6.072012" Rows="13.000000" Width="4"/> + <dxl:Cost StartupCost="0" TotalCost="6.071726" Rows="13.000000" Width="4"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -583,7 +583,7 @@ <dxl:SortingColumnList/> <dxl:IndexOnlyScan IndexScanDirection="Forward"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="6.071785" Rows="13.000000" Width="4"/> + <dxl:Cost StartupCost="0" TotalCost="6.071500" Rows="13.000000" Width="4"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> diff --git a/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp b/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp index 8f48c8f420..1fcfb1c9c0 100644 --- a/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp +++ b/src/backend/gporca/data/dxl/minidump/DoubleNDVCardinalityEquals.mdp @@ -216,7 +216,7 @@ <dxl:Plan Id="0" SpaceSize="3"> <dxl:GatherMotion InputSegments="0,1,2" OutputSegments="-1"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="15.365435" Rows="1683.942093" Width="8"/> + <dxl:Cost StartupCost="0" TotalCost="15.320238" Rows="1683.942093" Width="8"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> @@ -227,7 +227,7 @@ <dxl:SortingColumnList/> <dxl:IndexOnlyScan IndexScanDirection="Forward"> <dxl:Properties> - <dxl:Cost StartupCost="0" TotalCost="15.306879" Rows="1683.942093" Width="8"/> + <dxl:Cost StartupCost="0" TotalCost="15.261682" Rows="1683.942093" Width="8"/> </dxl:Properties> <dxl:ProjList> <dxl:ProjElem ColId="0" Alias="a"> diff --git a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp index 89fbc66a17..b3fd7da8b4 100644 --- a/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp +++ b/src/backend/gporca/libgpdbcost/src/CCostModelGPDB.cpp @@ -1846,6 +1846,10 @@ CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp GPOS_UNUSED, // mp const CDouble dTableWidth = CPhysicalScan::PopConvert(pop)->PstatsBaseTable()->Width(); + BOOL isAO = CPhysicalScan::PopConvert(exprhdl.Pop()) + ->Ptabdesc() + ->IsAORowOrColTable(); + CDouble dIndexFilterCostUnit = pcmgpdb->GetCostModelParams() ->PcpLookup(CCostModelParamsGPDB::EcpIndexFilterCostUnit) @@ -1866,9 +1870,7 @@ CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp GPOS_UNUSED, // mp GPOS_ASSERT(0 < dIndexScanTupCostUnit); GPOS_ASSERT(0 < dIndexScanTupRandomFactor); - if (CPhysicalScan::PopConvert(exprhdl.Pop()) - ->Ptabdesc() - ->IsAORowOrColTable()) + if (isAO) { // AO specific costs related to index-scan/index-only-scan: // @@ -1953,9 +1955,17 @@ CCostModelGPDB::CostIndexOnlyScan(CMemoryPool *mp GPOS_UNUSED, // mp // approximately equal to the precent of tuples in all-visible blocks // compared to total blocks. It is approximate because there is no // guarantee that blocks are equally filled with live tuples. + // + // We never scan the underlying append-optimized table relfile for + // performing visibility checks. It's as if all blocks are all-visible. See + // cdb_estimate_rel_size(). So consider dPartialVisFrac as 0. CDouble dPartialVisFrac(1); - if (stats->RelPages() != 0) + if (isAO) + { + dPartialVisFrac = 0; + } + else if (stats->RelPages() != 0) { dPartialVisFrac = 1 - (CDouble(stats->RelAllVisible()) / CDouble(stats->RelPages())); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
