This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 0600f616ec6ae3cb6f1cf64eadc87583e95137cc
Author: David Kimura <[email protected]>
AuthorDate: Thu Sep 8 23:54:48 2022 +0000

    [ORCA] Resolve merge FIXMEs in CPartitionPropagationSpec
    
    Prior to this commit, partition propogation spec stored a partition info
    list in an array that needed to stay sorted. It needed to stay sorted in
    order to compare for equality against another partition propogation spec
    where order of the stored partition info list is relevant.
    
    By using an array implementation we pay a cost of O(N log N X N) to
    insert data that is sorted on each insert and at best O(log N)) to find
    using binary search. By contrast a hash map implementation costs O(N) to
    build and O(1) to find.
    
    This commit store partition info list in a hash map.
---
 .../include/gpopt/base/CPartitionPropagationSpec.h | 15 +++-
 .../src/base/CPartitionPropagationSpec.cpp         | 86 +++++++++++-----------
 2 files changed, 55 insertions(+), 46 deletions(-)

diff --git 
a/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h 
b/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h
index 918eb94d08..e8c167bdde 100644
--- a/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h
+++ b/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h
@@ -86,11 +86,18 @@ private:
                static INT CmpFunc(const void *val1, const void *val2);
        };
 
-       using SPartPropSpecInfoArray =
-               CDynamicPtrArray<SPartPropSpecInfo, CleanupRelease>;
-
        // partition required/derived info, sorted by scanid
-       SPartPropSpecInfoArray *m_part_prop_spec_infos = nullptr;
+       using UlongToSPartPropSpecInfoMap =
+               CHashMap<ULONG, SPartPropSpecInfo, gpos::HashValue<ULONG>,
+                                gpos::Equals<ULONG>, CleanupDelete<ULONG>,
+                                CleanupRelease<SPartPropSpecInfo>>;
+
+       using UlongToSPartPropSpecInfoMapIter =
+               CHashMapIter<ULONG, SPartPropSpecInfo, gpos::HashValue<ULONG>,
+                                        gpos::Equals<ULONG>, 
CleanupDelete<ULONG>,
+                                        CleanupRelease<SPartPropSpecInfo>>;
+
+       UlongToSPartPropSpecInfoMap *m_part_prop_spec_infos = nullptr;
 
        // Present scanids (for easy lookup)
        CBitSet *m_scan_ids = nullptr;
diff --git a/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp 
b/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp
index c2778b43b3..fb0d2deb91 100644
--- a/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp
+++ b/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp
@@ -56,7 +56,7 @@ CPartitionPropagationSpec::SPartPropSpecInfo::CmpFunc(const 
void *val1,
 
 CPartitionPropagationSpec::CPartitionPropagationSpec(CMemoryPool *mp)
 {
-       m_part_prop_spec_infos = GPOS_NEW(mp) SPartPropSpecInfoArray(mp);
+       m_part_prop_spec_infos = GPOS_NEW(mp) UlongToSPartPropSpecInfoMap(mp);
        m_scan_ids = GPOS_NEW(mp) CBitSet(mp);
 }
 
@@ -84,26 +84,24 @@ CPartitionPropagationSpec::Equals(const 
CPartitionPropagationSpec *pps) const
 
        GPOS_ASSERT(m_part_prop_spec_infos != nullptr &&
                                pps->m_part_prop_spec_infos != nullptr);
-       
GPOS_ASSERT(m_part_prop_spec_infos->IsSorted(SPartPropSpecInfo::CmpFunc));
-       GPOS_ASSERT(
-               
pps->m_part_prop_spec_infos->IsSorted(SPartPropSpecInfo::CmpFunc));
 
        if (m_part_prop_spec_infos->Size() != 
pps->m_part_prop_spec_infos->Size())
        {
                return false;
        }
 
-       ULONG size = m_part_prop_spec_infos->Size();
-       for (ULONG ul = 0; ul < size; ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos);
+       UlongToSPartPropSpecInfoMapIter 
hmulpi_other(pps->m_part_prop_spec_infos);
+       while (hmulpi.Advance() && hmulpi_other.Advance())
        {
-               if (!(*m_part_prop_spec_infos)[ul]->Equals(
-                               (*pps->m_part_prop_spec_infos)[ul]))
+               const SPartPropSpecInfo *info = hmulpi.Value();
+               const SPartPropSpecInfo *info_other = hmulpi_other.Value();
+               if (!info->Equals(info_other))
                {
                        return false;
                }
        }
-
-       return true;
+       return hmulpi.Advance() == hmulpi_other.Advance();
 }
 
 CPartitionPropagationSpec::SPartPropSpecInfo *
@@ -114,18 +112,10 @@ CPartitionPropagationSpec::FindPartPropSpecInfo(ULONG 
scan_id) const
                return nullptr;
        }
 
-       // GPDB_12_MERGE_FIXME: Use binary search here instead!
-       for (ULONG ut = 0; ut < m_part_prop_spec_infos->Size(); ut++)
-       {
-               SPartPropSpecInfo *part_info = (*m_part_prop_spec_infos)[ut];
-
-               if (part_info->m_scan_id == scan_id)
-               {
-                       return part_info;
-               }
-       }
+       SPartPropSpecInfo *info = m_part_prop_spec_infos->Find(&scan_id);
+       GPOS_RTL_ASSERT(info != nullptr);
 
-       GPOS_RTL_ASSERT(!"Unreachable");
+       return info;
 }
 
 const CBitSet *
@@ -165,9 +155,7 @@ CPartitionPropagationSpec::Insert(ULONG scan_id, 
EPartPropSpecInfoType type,
        }
 
        m_scan_ids->ExchangeSet(scan_id);
-       m_part_prop_spec_infos->Append(info);
-       // GPDB_12_MERGE_FIXME: Use CKHeap or CHashMap instead of sorting every 
insert!
-       m_part_prop_spec_infos->Sort(SPartPropSpecInfo::CmpFunc);
+       m_part_prop_spec_infos->Insert(GPOS_NEW(mp) ULONG(scan_id), info);
 }
 
 void
@@ -180,9 +168,11 @@ CPartitionPropagationSpec::Insert(SPartPropSpecInfo *other)
 void
 CPartitionPropagationSpec::InsertAll(CPartitionPropagationSpec *pps)
 {
-       for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *other_info = 
(*pps->m_part_prop_spec_infos)[ul];
+               SPartPropSpecInfo *other_info =
+                       const_cast<SPartPropSpecInfo *>(hmulpi.Value());
 
                SPartPropSpecInfo *found_info =
                        FindPartPropSpecInfo(other_info->m_scan_id);
@@ -209,9 +199,11 @@ void
 CPartitionPropagationSpec::InsertAllowedConsumers(
        CPartitionPropagationSpec *pps, CBitSet *allowed_scan_ids)
 {
-       for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *other_info = 
(*pps->m_part_prop_spec_infos)[ul];
+               SPartPropSpecInfo *other_info =
+                       const_cast<SPartPropSpecInfo *>(hmulpi.Value());
 
                // only process allowed_scan_ids ...
                if (allowed_scan_ids != nullptr &&
@@ -251,9 +243,11 @@ void
 CPartitionPropagationSpec::InsertAllExcept(CPartitionPropagationSpec *pps,
                                                                                
   ULONG scan_id)
 {
-       for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *other_info = 
(*pps->m_part_prop_spec_infos)[ul];
+               SPartPropSpecInfo *other_info =
+                       const_cast<SPartPropSpecInfo *>(hmulpi.Value());
 
                if (other_info->m_scan_id == scan_id)
                {
@@ -284,9 +278,11 @@ 
CPartitionPropagationSpec::InsertAllExcept(CPartitionPropagationSpec *pps,
 void
 CPartitionPropagationSpec::InsertAllResolve(CPartitionPropagationSpec *pps)
 {
-       for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *other_info = 
(*pps->m_part_prop_spec_infos)[ul];
+               SPartPropSpecInfo *other_info =
+                       const_cast<SPartPropSpecInfo *>(hmulpi.Value());
 
                SPartPropSpecInfo *found_info =
                        FindPartPropSpecInfo(other_info->m_scan_id);
@@ -331,9 +327,10 @@ CPartitionPropagationSpec::FSatisfies(
                return true;
        }
 
-       for (ULONG ul = 0; ul < pps_reqd->m_part_prop_spec_infos->Size(); ul++)
+       UlongToSPartPropSpecInfoMapIter 
hmulpi(pps_reqd->m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *reqd_info = 
(*pps_reqd->m_part_prop_spec_infos)[ul];
+               const SPartPropSpecInfo *reqd_info = hmulpi.Value();
                SPartPropSpecInfo *found_info =
                        FindPartPropSpecInfo(reqd_info->m_scan_id);
 
@@ -360,9 +357,10 @@ CPartitionPropagationSpec::AppendEnforcers(CMemoryPool *mp,
                                                                                
   CExpressionArray *pdrgpexpr,
                                                                                
   CExpression *expr)
 {
-       for (ULONG ul = 0; ul < m_part_prop_spec_infos->Size(); ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *info = (*m_part_prop_spec_infos)[ul];
+               const SPartPropSpecInfo *info = hmulpi.Value();
 
                if (info->m_type != CPartitionPropagationSpec::EpptPropagator)
                {
@@ -414,10 +412,11 @@ CPartitionPropagationSpec::OsPrint(IOstream &os) const
                return os;
        }
 
-       ULONG size = m_part_prop_spec_infos->Size();
-       for (ULONG ul = 0; ul < size; ++ul)
+       ULONG ul = 0;
+       UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *part_info = (*m_part_prop_spec_infos)[ul];
+               const SPartPropSpecInfo *part_info = hmulpi.Value();
 
                switch (part_info->m_type)
                {
@@ -435,10 +434,12 @@ CPartitionPropagationSpec::OsPrint(IOstream &os) const
                part_info->m_selector_ids->OsPrint(os);
                os << ")";
 
-               if (ul < (size - 1))
+               if (ul < (m_part_prop_spec_infos->Size() - 1))
                {
                        os << ", ";
                }
+
+               ul += 1;
        }
        return os;
 }
@@ -451,9 +452,10 @@ CPartitionPropagationSpec::ContainsAnyConsumers() const
                return false;
        }
 
-       for (ULONG ul = 0; ul < m_part_prop_spec_infos->Size(); ++ul)
+       UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos);
+       while (hmulpi.Advance())
        {
-               SPartPropSpecInfo *info = (*m_part_prop_spec_infos)[ul];
+               const SPartPropSpecInfo *info = hmulpi.Value();
 
                if (info->m_type == CPartitionPropagationSpec::EpptConsumer)
                {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to