This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit fcb9d008afd5d5e5baf5c2f80a0c40ec8ae561a7 Author: Chris Hajas <[email protected]> AuthorDate: Mon Aug 15 12:36:28 2022 -0700 Fix improper copying of group statistics in Orca (#13926) In Orca, we copy group statistics to avoid costly stats re-deriving. However, we unintentionally didn't copy the relpages, relallvisible, and rebinds fields. These fields are used in costing, and in some cases the wrong rebind value caused us to improperly cost a NLJ much lower and Orca selected a non-optimal plan. --- .../gporca/libgpopt/src/mdcache/CMDAccessor.cpp | 3 +- .../include/naucrates/statistics/CStatistics.h | 3 +- .../libnaucrates/src/statistics/CStatistics.cpp | 17 +++---- .../unittest/dxl/statistics/CStatisticsTest.h | 3 ++ .../unittest/dxl/statistics/CStatisticsTest.cpp | 54 ++++++++++++++++++++++ 5 files changed, 70 insertions(+), 10 deletions(-) diff --git a/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp b/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp index 286e6baff3..7582705088 100644 --- a/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp +++ b/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp @@ -1117,7 +1117,8 @@ CMDAccessor::Pstats(CMemoryPool *mp, IMDId *rel_mdid, CColRefSet *pcrsHist, return GPOS_NEW(mp) CStatistics( mp, col_histogram_mapping, colid_width_mapping, rows, fEmptyTable, - pmdRelStats->RelPages(), pmdRelStats->RelAllVisible()); + pmdRelStats->RelPages(), pmdRelStats->RelAllVisible(), + 1.0 /* default rebinds */, 0 /* default predicates*/); } diff --git a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h index 8f0a2fc4b6..bf6be0a1b4 100644 --- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h +++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h @@ -143,7 +143,8 @@ public: CStatistics(CMemoryPool *mp, UlongToHistogramMap *col_histogram_mapping, UlongToDoubleMap *colid_width_mapping, CDouble rows, - BOOL is_empty, ULONG relpages, ULONG relallvisible); + BOOL is_empty, ULONG relpages, ULONG relallvisible, + CDouble rebinds, ULONG num_predicates); // dtor diff --git a/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp b/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp index 3860ee0ffa..ce18411196 100644 --- a/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp +++ b/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp @@ -87,7 +87,8 @@ CStatistics::CStatistics(CMemoryPool *mp, CStatistics::CStatistics(CMemoryPool *mp, UlongToHistogramMap *col_histogram_mapping, UlongToDoubleMap *colid_width_mapping, CDouble rows, - BOOL is_empty, ULONG relpages, ULONG relallvisible) + BOOL is_empty, ULONG relpages, ULONG relallvisible, + CDouble rebinds, ULONG num_predicates) : m_colid_histogram_mapping(col_histogram_mapping), m_colid_width_mapping(colid_width_mapping), m_rows(rows), @@ -95,9 +96,8 @@ CStatistics::CStatistics(CMemoryPool *mp, m_empty(is_empty), m_relpages(relpages), m_relallvisible(relallvisible), - m_num_rebinds( - 1.0), // by default, a stats object is rebound to parameters only once - m_num_predicates(0), + m_num_rebinds(rebinds), + m_num_predicates(num_predicates), m_src_upper_bound_NDVs(nullptr) { GPOS_ASSERT(nullptr != m_colid_histogram_mapping); @@ -570,9 +570,9 @@ CStatistics::ScaleStats(CMemoryPool *mp, CDouble factor) const CDouble scaled_num_rows = m_rows * factor; // create a scaled stats object - CStatistics *scaled_stats = - GPOS_NEW(mp) CStatistics(mp, histograms_new, widths_new, - scaled_num_rows, IsEmpty(), m_num_predicates); + CStatistics *scaled_stats = GPOS_NEW(mp) CStatistics( + mp, histograms_new, widths_new, scaled_num_rows, IsEmpty(), RelPages(), + RelAllVisible(), NumRebinds(), m_num_predicates); // In the output statistics object, the upper bound source cardinality of the scaled column // cannot be greater than the the upper bound source cardinality information maintained in the input @@ -605,7 +605,8 @@ CStatistics::CopyStatsWithRemap(CMemoryPool *mp, // create a copy of the stats object CStatistics *stats_copy = GPOS_NEW(mp) CStatistics( - mp, histograms_new, widths_new, m_rows, IsEmpty(), m_num_predicates); + mp, histograms_new, widths_new, m_rows, IsEmpty(), RelPages(), + RelAllVisible(), NumRebinds(), m_num_predicates); // In the output statistics object, the upper bound source cardinality of the join column // cannot be greater than the the upper bound source cardinality information maintained in the input diff --git a/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h b/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h index 45b39f9897..f2946f49f3 100644 --- a/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h +++ b/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h @@ -119,6 +119,9 @@ public: // GbAgg test when grouping on repeated columns static GPOS_RESULT EresUnittest_GbAggWithRepeatedGbCols(); + // test that stats copy methods copy all fields + static GPOS_RESULT EresUnittest_CStatisticsCopy(); + }; // class CStatisticsTest } // namespace gpnaucrates diff --git a/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp b/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp index 2ae6fc9acd..aafda5dd3b 100644 --- a/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp +++ b/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp @@ -58,6 +58,8 @@ CStatisticsTest::EresUnittest() CUnittest rgutSharedOptCtxt[] = { GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_CStatisticsBasic), GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_UnionAll), + GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_CStatisticsCopy), + // TODO, Mar 18 2013 temporarily disabling the test // GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_CStatisticsSelectDerivation), }; @@ -697,4 +699,56 @@ CStatisticsTest::EresUnittest_CStatisticsSelectDerivation() ); } +// test that stats copy methods copy all fields +GPOS_RESULT +CStatisticsTest::EresUnittest_CStatisticsCopy() +{ + CAutoMemoryPool amp; + CMemoryPool *mp = amp.Pmp(); + + // create another statistics structure with a single int4 column with id 10 + UlongToHistogramMap *phmulhist = GPOS_NEW(mp) UlongToHistogramMap(mp); + phmulhist->Insert(GPOS_NEW(mp) ULONG(10), PhistExampleInt4Dim(mp)); + + UlongToDoubleMap *phmuldoubleWidth = GPOS_NEW(mp) UlongToDoubleMap(mp); + phmuldoubleWidth->Insert(GPOS_NEW(mp) ULONG(10), GPOS_NEW(mp) CDouble(4.0)); + + CStatistics *pstats = GPOS_NEW(mp) CStatistics( + mp, phmulhist, phmuldoubleWidth, 100.0 /* rows */, false /* is_empty */, + ULONG(5) /* relpages */, ULONG(10) /* relallvisible */, + CDouble(100.0) /* rebinds */, ULONG(3) /* num predicates */); + + IStatistics *stats_copy = pstats->CopyStats(mp); + + UlongToColRefMap *colref_mapping = GPOS_NEW(mp) UlongToColRefMap(mp); + IStatistics *stats_copy_remap = + pstats->CopyStatsWithRemap(mp, colref_mapping, false); + + GPOS_RESULT eres = GPOS_OK; + if (pstats->Width() != stats_copy->Width() || + pstats->NumRebinds() != stats_copy->NumRebinds() || + pstats->RelPages() != stats_copy->RelPages() || + pstats->RelAllVisible() != stats_copy->RelAllVisible() || + pstats->GetNumberOfPredicates() != stats_copy->GetNumberOfPredicates()) + { + eres = GPOS_FAILED; + } + + if (pstats->Width() != stats_copy_remap->Width() || + pstats->NumRebinds() != stats_copy_remap->NumRebinds() || + pstats->RelPages() != stats_copy_remap->RelPages() || + pstats->RelAllVisible() != stats_copy_remap->RelAllVisible() || + pstats->GetNumberOfPredicates() != + stats_copy_remap->GetNumberOfPredicates()) + { + eres = GPOS_FAILED; + } + stats_copy->Release(); + stats_copy_remap->Release(); + pstats->Release(); + colref_mapping->Release(); + + return eres; +} + // EOF --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
