This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit fcb9d008afd5d5e5baf5c2f80a0c40ec8ae561a7
Author: Chris Hajas <[email protected]>
AuthorDate: Mon Aug 15 12:36:28 2022 -0700

    Fix improper copying of group statistics in Orca (#13926)
    
    In Orca, we copy group statistics to avoid costly stats re-deriving.
    However, we unintentionally didn't copy the relpages, relallvisible, and
    rebinds fields.  These fields are used in costing, and in some cases the
    wrong rebind value caused us to improperly cost a NLJ much lower and
    Orca selected a non-optimal plan.
---
 .../gporca/libgpopt/src/mdcache/CMDAccessor.cpp    |  3 +-
 .../include/naucrates/statistics/CStatistics.h     |  3 +-
 .../libnaucrates/src/statistics/CStatistics.cpp    | 17 +++----
 .../unittest/dxl/statistics/CStatisticsTest.h      |  3 ++
 .../unittest/dxl/statistics/CStatisticsTest.cpp    | 54 ++++++++++++++++++++++
 5 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp 
b/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp
index 286e6baff3..7582705088 100644
--- a/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp
+++ b/src/backend/gporca/libgpopt/src/mdcache/CMDAccessor.cpp
@@ -1117,7 +1117,8 @@ CMDAccessor::Pstats(CMemoryPool *mp, IMDId *rel_mdid, 
CColRefSet *pcrsHist,
 
        return GPOS_NEW(mp) CStatistics(
                mp, col_histogram_mapping, colid_width_mapping, rows, 
fEmptyTable,
-               pmdRelStats->RelPages(), pmdRelStats->RelAllVisible());
+               pmdRelStats->RelPages(), pmdRelStats->RelAllVisible(),
+               1.0 /* default rebinds */, 0 /* default predicates*/);
 }
 
 
diff --git 
a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h 
b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h
index 8f0a2fc4b6..bf6be0a1b4 100644
--- a/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h
+++ b/src/backend/gporca/libnaucrates/include/naucrates/statistics/CStatistics.h
@@ -143,7 +143,8 @@ public:
 
        CStatistics(CMemoryPool *mp, UlongToHistogramMap *col_histogram_mapping,
                                UlongToDoubleMap *colid_width_mapping, CDouble 
rows,
-                               BOOL is_empty, ULONG relpages, ULONG 
relallvisible);
+                               BOOL is_empty, ULONG relpages, ULONG 
relallvisible,
+                               CDouble rebinds, ULONG num_predicates);
 
 
        // dtor
diff --git a/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp 
b/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp
index 3860ee0ffa..ce18411196 100644
--- a/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp
+++ b/src/backend/gporca/libnaucrates/src/statistics/CStatistics.cpp
@@ -87,7 +87,8 @@ CStatistics::CStatistics(CMemoryPool *mp,
 CStatistics::CStatistics(CMemoryPool *mp,
                                                 UlongToHistogramMap 
*col_histogram_mapping,
                                                 UlongToDoubleMap 
*colid_width_mapping, CDouble rows,
-                                                BOOL is_empty, ULONG relpages, 
ULONG relallvisible)
+                                                BOOL is_empty, ULONG relpages, 
ULONG relallvisible,
+                                                CDouble rebinds, ULONG 
num_predicates)
        : m_colid_histogram_mapping(col_histogram_mapping),
          m_colid_width_mapping(colid_width_mapping),
          m_rows(rows),
@@ -95,9 +96,8 @@ CStatistics::CStatistics(CMemoryPool *mp,
          m_empty(is_empty),
          m_relpages(relpages),
          m_relallvisible(relallvisible),
-         m_num_rebinds(
-                 1.0),  // by default, a stats object is rebound to parameters 
only once
-         m_num_predicates(0),
+         m_num_rebinds(rebinds),
+         m_num_predicates(num_predicates),
          m_src_upper_bound_NDVs(nullptr)
 {
        GPOS_ASSERT(nullptr != m_colid_histogram_mapping);
@@ -570,9 +570,9 @@ CStatistics::ScaleStats(CMemoryPool *mp, CDouble factor) 
const
        CDouble scaled_num_rows = m_rows * factor;
 
        // create a scaled stats object
-       CStatistics *scaled_stats =
-               GPOS_NEW(mp) CStatistics(mp, histograms_new, widths_new,
-                                                                
scaled_num_rows, IsEmpty(), m_num_predicates);
+       CStatistics *scaled_stats = GPOS_NEW(mp) CStatistics(
+               mp, histograms_new, widths_new, scaled_num_rows, IsEmpty(), 
RelPages(),
+               RelAllVisible(), NumRebinds(), m_num_predicates);
 
        // In the output statistics object, the upper bound source cardinality 
of the scaled column
        // cannot be greater than the the upper bound source cardinality 
information maintained in the input
@@ -605,7 +605,8 @@ CStatistics::CopyStatsWithRemap(CMemoryPool *mp,
 
        // create a copy of the stats object
        CStatistics *stats_copy = GPOS_NEW(mp) CStatistics(
-               mp, histograms_new, widths_new, m_rows, IsEmpty(), 
m_num_predicates);
+               mp, histograms_new, widths_new, m_rows, IsEmpty(), RelPages(),
+               RelAllVisible(), NumRebinds(), m_num_predicates);
 
        // In the output statistics object, the upper bound source cardinality 
of the join column
        // cannot be greater than the the upper bound source cardinality 
information maintained in the input
diff --git 
a/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h 
b/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h
index 45b39f9897..f2946f49f3 100644
--- 
a/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h
+++ 
b/src/backend/gporca/server/include/unittest/dxl/statistics/CStatisticsTest.h
@@ -119,6 +119,9 @@ public:
        // GbAgg test when grouping on repeated columns
        static GPOS_RESULT EresUnittest_GbAggWithRepeatedGbCols();
 
+       // test that stats copy methods copy all fields
+       static GPOS_RESULT EresUnittest_CStatisticsCopy();
+
 
 };     // class CStatisticsTest
 }  // namespace gpnaucrates
diff --git 
a/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp 
b/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp
index 2ae6fc9acd..aafda5dd3b 100644
--- a/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp
+++ b/src/backend/gporca/server/src/unittest/dxl/statistics/CStatisticsTest.cpp
@@ -58,6 +58,8 @@ CStatisticsTest::EresUnittest()
        CUnittest rgutSharedOptCtxt[] = {
                
GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_CStatisticsBasic),
                GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_UnionAll),
+               
GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_CStatisticsCopy),
+
                // TODO,  Mar 18 2013 temporarily disabling the test
                // 
GPOS_UNITTEST_FUNC(CStatisticsTest::EresUnittest_CStatisticsSelectDerivation),
        };
@@ -697,4 +699,56 @@ CStatisticsTest::EresUnittest_CStatisticsSelectDerivation()
        );
 }
 
+// test that stats copy methods copy all fields
+GPOS_RESULT
+CStatisticsTest::EresUnittest_CStatisticsCopy()
+{
+       CAutoMemoryPool amp;
+       CMemoryPool *mp = amp.Pmp();
+
+       // create another statistics structure with a single int4 column with 
id 10
+       UlongToHistogramMap *phmulhist = GPOS_NEW(mp) UlongToHistogramMap(mp);
+       phmulhist->Insert(GPOS_NEW(mp) ULONG(10), PhistExampleInt4Dim(mp));
+
+       UlongToDoubleMap *phmuldoubleWidth = GPOS_NEW(mp) UlongToDoubleMap(mp);
+       phmuldoubleWidth->Insert(GPOS_NEW(mp) ULONG(10), GPOS_NEW(mp) 
CDouble(4.0));
+
+       CStatistics *pstats = GPOS_NEW(mp) CStatistics(
+               mp, phmulhist, phmuldoubleWidth, 100.0 /* rows */, false /* 
is_empty */,
+               ULONG(5) /* relpages */, ULONG(10) /* relallvisible */,
+               CDouble(100.0) /* rebinds */, ULONG(3) /* num predicates */);
+
+       IStatistics *stats_copy = pstats->CopyStats(mp);
+
+       UlongToColRefMap *colref_mapping = GPOS_NEW(mp) UlongToColRefMap(mp);
+       IStatistics *stats_copy_remap =
+               pstats->CopyStatsWithRemap(mp, colref_mapping, false);
+
+       GPOS_RESULT eres = GPOS_OK;
+       if (pstats->Width() != stats_copy->Width() ||
+               pstats->NumRebinds() != stats_copy->NumRebinds() ||
+               pstats->RelPages() != stats_copy->RelPages() ||
+               pstats->RelAllVisible() != stats_copy->RelAllVisible() ||
+               pstats->GetNumberOfPredicates() != 
stats_copy->GetNumberOfPredicates())
+       {
+               eres = GPOS_FAILED;
+       }
+
+       if (pstats->Width() != stats_copy_remap->Width() ||
+               pstats->NumRebinds() != stats_copy_remap->NumRebinds() ||
+               pstats->RelPages() != stats_copy_remap->RelPages() ||
+               pstats->RelAllVisible() != stats_copy_remap->RelAllVisible() ||
+               pstats->GetNumberOfPredicates() !=
+                       stats_copy_remap->GetNumberOfPredicates())
+       {
+               eres = GPOS_FAILED;
+       }
+       stats_copy->Release();
+       stats_copy_remap->Release();
+       pstats->Release();
+       colref_mapping->Release();
+
+       return eres;
+}
+
 // EOF


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to