This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 0600f616ec6ae3cb6f1cf64eadc87583e95137cc Author: David Kimura <[email protected]> AuthorDate: Thu Sep 8 23:54:48 2022 +0000 [ORCA] Resolve merge FIXMEs in CPartitionPropagationSpec Prior to this commit, partition propogation spec stored a partition info list in an array that needed to stay sorted. It needed to stay sorted in order to compare for equality against another partition propogation spec where order of the stored partition info list is relevant. By using an array implementation we pay a cost of O(N log N X N) to insert data that is sorted on each insert and at best O(log N)) to find using binary search. By contrast a hash map implementation costs O(N) to build and O(1) to find. This commit store partition info list in a hash map. --- .../include/gpopt/base/CPartitionPropagationSpec.h | 15 +++- .../src/base/CPartitionPropagationSpec.cpp | 86 +++++++++++----------- 2 files changed, 55 insertions(+), 46 deletions(-) diff --git a/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h b/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h index 918eb94d08..e8c167bdde 100644 --- a/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h +++ b/src/backend/gporca/libgpopt/include/gpopt/base/CPartitionPropagationSpec.h @@ -86,11 +86,18 @@ private: static INT CmpFunc(const void *val1, const void *val2); }; - using SPartPropSpecInfoArray = - CDynamicPtrArray<SPartPropSpecInfo, CleanupRelease>; - // partition required/derived info, sorted by scanid - SPartPropSpecInfoArray *m_part_prop_spec_infos = nullptr; + using UlongToSPartPropSpecInfoMap = + CHashMap<ULONG, SPartPropSpecInfo, gpos::HashValue<ULONG>, + gpos::Equals<ULONG>, CleanupDelete<ULONG>, + CleanupRelease<SPartPropSpecInfo>>; + + using UlongToSPartPropSpecInfoMapIter = + CHashMapIter<ULONG, SPartPropSpecInfo, gpos::HashValue<ULONG>, + gpos::Equals<ULONG>, CleanupDelete<ULONG>, + CleanupRelease<SPartPropSpecInfo>>; + + UlongToSPartPropSpecInfoMap *m_part_prop_spec_infos = nullptr; // Present scanids (for easy lookup) CBitSet *m_scan_ids = nullptr; diff --git a/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp b/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp index c2778b43b3..fb0d2deb91 100644 --- a/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp +++ b/src/backend/gporca/libgpopt/src/base/CPartitionPropagationSpec.cpp @@ -56,7 +56,7 @@ CPartitionPropagationSpec::SPartPropSpecInfo::CmpFunc(const void *val1, CPartitionPropagationSpec::CPartitionPropagationSpec(CMemoryPool *mp) { - m_part_prop_spec_infos = GPOS_NEW(mp) SPartPropSpecInfoArray(mp); + m_part_prop_spec_infos = GPOS_NEW(mp) UlongToSPartPropSpecInfoMap(mp); m_scan_ids = GPOS_NEW(mp) CBitSet(mp); } @@ -84,26 +84,24 @@ CPartitionPropagationSpec::Equals(const CPartitionPropagationSpec *pps) const GPOS_ASSERT(m_part_prop_spec_infos != nullptr && pps->m_part_prop_spec_infos != nullptr); - GPOS_ASSERT(m_part_prop_spec_infos->IsSorted(SPartPropSpecInfo::CmpFunc)); - GPOS_ASSERT( - pps->m_part_prop_spec_infos->IsSorted(SPartPropSpecInfo::CmpFunc)); if (m_part_prop_spec_infos->Size() != pps->m_part_prop_spec_infos->Size()) { return false; } - ULONG size = m_part_prop_spec_infos->Size(); - for (ULONG ul = 0; ul < size; ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos); + UlongToSPartPropSpecInfoMapIter hmulpi_other(pps->m_part_prop_spec_infos); + while (hmulpi.Advance() && hmulpi_other.Advance()) { - if (!(*m_part_prop_spec_infos)[ul]->Equals( - (*pps->m_part_prop_spec_infos)[ul])) + const SPartPropSpecInfo *info = hmulpi.Value(); + const SPartPropSpecInfo *info_other = hmulpi_other.Value(); + if (!info->Equals(info_other)) { return false; } } - - return true; + return hmulpi.Advance() == hmulpi_other.Advance(); } CPartitionPropagationSpec::SPartPropSpecInfo * @@ -114,18 +112,10 @@ CPartitionPropagationSpec::FindPartPropSpecInfo(ULONG scan_id) const return nullptr; } - // GPDB_12_MERGE_FIXME: Use binary search here instead! - for (ULONG ut = 0; ut < m_part_prop_spec_infos->Size(); ut++) - { - SPartPropSpecInfo *part_info = (*m_part_prop_spec_infos)[ut]; - - if (part_info->m_scan_id == scan_id) - { - return part_info; - } - } + SPartPropSpecInfo *info = m_part_prop_spec_infos->Find(&scan_id); + GPOS_RTL_ASSERT(info != nullptr); - GPOS_RTL_ASSERT(!"Unreachable"); + return info; } const CBitSet * @@ -165,9 +155,7 @@ CPartitionPropagationSpec::Insert(ULONG scan_id, EPartPropSpecInfoType type, } m_scan_ids->ExchangeSet(scan_id); - m_part_prop_spec_infos->Append(info); - // GPDB_12_MERGE_FIXME: Use CKHeap or CHashMap instead of sorting every insert! - m_part_prop_spec_infos->Sort(SPartPropSpecInfo::CmpFunc); + m_part_prop_spec_infos->Insert(GPOS_NEW(mp) ULONG(scan_id), info); } void @@ -180,9 +168,11 @@ CPartitionPropagationSpec::Insert(SPartPropSpecInfo *other) void CPartitionPropagationSpec::InsertAll(CPartitionPropagationSpec *pps) { - for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *other_info = (*pps->m_part_prop_spec_infos)[ul]; + SPartPropSpecInfo *other_info = + const_cast<SPartPropSpecInfo *>(hmulpi.Value()); SPartPropSpecInfo *found_info = FindPartPropSpecInfo(other_info->m_scan_id); @@ -209,9 +199,11 @@ void CPartitionPropagationSpec::InsertAllowedConsumers( CPartitionPropagationSpec *pps, CBitSet *allowed_scan_ids) { - for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *other_info = (*pps->m_part_prop_spec_infos)[ul]; + SPartPropSpecInfo *other_info = + const_cast<SPartPropSpecInfo *>(hmulpi.Value()); // only process allowed_scan_ids ... if (allowed_scan_ids != nullptr && @@ -251,9 +243,11 @@ void CPartitionPropagationSpec::InsertAllExcept(CPartitionPropagationSpec *pps, ULONG scan_id) { - for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *other_info = (*pps->m_part_prop_spec_infos)[ul]; + SPartPropSpecInfo *other_info = + const_cast<SPartPropSpecInfo *>(hmulpi.Value()); if (other_info->m_scan_id == scan_id) { @@ -284,9 +278,11 @@ CPartitionPropagationSpec::InsertAllExcept(CPartitionPropagationSpec *pps, void CPartitionPropagationSpec::InsertAllResolve(CPartitionPropagationSpec *pps) { - for (ULONG ul = 0; ul < pps->m_part_prop_spec_infos->Size(); ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(pps->m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *other_info = (*pps->m_part_prop_spec_infos)[ul]; + SPartPropSpecInfo *other_info = + const_cast<SPartPropSpecInfo *>(hmulpi.Value()); SPartPropSpecInfo *found_info = FindPartPropSpecInfo(other_info->m_scan_id); @@ -331,9 +327,10 @@ CPartitionPropagationSpec::FSatisfies( return true; } - for (ULONG ul = 0; ul < pps_reqd->m_part_prop_spec_infos->Size(); ul++) + UlongToSPartPropSpecInfoMapIter hmulpi(pps_reqd->m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *reqd_info = (*pps_reqd->m_part_prop_spec_infos)[ul]; + const SPartPropSpecInfo *reqd_info = hmulpi.Value(); SPartPropSpecInfo *found_info = FindPartPropSpecInfo(reqd_info->m_scan_id); @@ -360,9 +357,10 @@ CPartitionPropagationSpec::AppendEnforcers(CMemoryPool *mp, CExpressionArray *pdrgpexpr, CExpression *expr) { - for (ULONG ul = 0; ul < m_part_prop_spec_infos->Size(); ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *info = (*m_part_prop_spec_infos)[ul]; + const SPartPropSpecInfo *info = hmulpi.Value(); if (info->m_type != CPartitionPropagationSpec::EpptPropagator) { @@ -414,10 +412,11 @@ CPartitionPropagationSpec::OsPrint(IOstream &os) const return os; } - ULONG size = m_part_prop_spec_infos->Size(); - for (ULONG ul = 0; ul < size; ++ul) + ULONG ul = 0; + UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *part_info = (*m_part_prop_spec_infos)[ul]; + const SPartPropSpecInfo *part_info = hmulpi.Value(); switch (part_info->m_type) { @@ -435,10 +434,12 @@ CPartitionPropagationSpec::OsPrint(IOstream &os) const part_info->m_selector_ids->OsPrint(os); os << ")"; - if (ul < (size - 1)) + if (ul < (m_part_prop_spec_infos->Size() - 1)) { os << ", "; } + + ul += 1; } return os; } @@ -451,9 +452,10 @@ CPartitionPropagationSpec::ContainsAnyConsumers() const return false; } - for (ULONG ul = 0; ul < m_part_prop_spec_infos->Size(); ++ul) + UlongToSPartPropSpecInfoMapIter hmulpi(m_part_prop_spec_infos); + while (hmulpi.Advance()) { - SPartPropSpecInfo *info = (*m_part_prop_spec_infos)[ul]; + const SPartPropSpecInfo *info = hmulpi.Value(); if (info->m_type == CPartitionPropagationSpec::EpptConsumer) { --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
