This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 5fd7ed78e976ae54919fe5cd961b66ef53ec1368 Author: zhoujiaqi <[email protected]> AuthorDate: Thu Jan 2 14:36:15 2025 +0800 ORCA: Support multiple grouping sets distinct `group by distinct` will same as `group by`, cause it have not been passed into `TranslateGroupingSets`. In the old CBDB version, ORCA does not have this problem, because the old version of ORCA does not support multi-grouping-set(will fallback). After supporting multi-grouping-set, it is also necessary to support themultiple grouping sets distinct. --- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 6 ++-- src/backend/gpopt/translate/CTranslatorUtils.cpp | 34 ++++++++++++++++++++++ .../gpopt/translate/CTranslatorQueryToDXL.h | 2 +- src/include/gpopt/translate/CTranslatorUtils.h | 2 +- 4 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index fb293964fe..4afe36172e 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -604,7 +604,7 @@ CTranslatorQueryToDXL::TranslateSelectQueryToDXL() { child_dxlnode = TranslateGroupingSets( m_query->jointree, m_query->targetList, m_query->groupClause, - m_query->groupingSets, m_query->hasAggs, + m_query->groupingSets, m_query->groupDistinct, m_query->hasAggs, sort_group_attno_to_colid_mapping, output_attno_to_colid_mapping); } @@ -2414,7 +2414,7 @@ CTranslatorQueryToDXL::CheckNoDuplicateAliasGroupingColumn(List *target_list, CDXLNode * CTranslatorQueryToDXL::TranslateGroupingSets( FromExpr *from_expr, List *target_list, List *group_clause, - List *grouping_set, BOOL has_aggs, + List *grouping_set, bool grouping_distinct, BOOL has_aggs, IntToUlongMap *sort_grpref_to_colid_mapping, IntToUlongMap *output_attno_to_colid_mapping) { @@ -2457,7 +2457,7 @@ CTranslatorQueryToDXL::TranslateGroupingSets( GPOS_NEW(m_mp) UlongToUlongMap(m_mp); CBitSet *unique_grp_cols_bitset = GPOS_NEW(m_mp) CBitSet(m_mp, num_of_cols); CBitSetArray *bitset_array = CTranslatorUtils::GetColumnAttnosForGroupBy( - m_mp, group_clause, grouping_set, num_of_cols, + m_mp, group_clause, grouping_set, grouping_distinct, num_of_cols, grpcol_index_to_colid_mapping, unique_grp_cols_bitset); const ULONG num_of_grouping_sets = bitset_array->Size(); diff --git a/src/backend/gpopt/translate/CTranslatorUtils.cpp b/src/backend/gpopt/translate/CTranslatorUtils.cpp index 43b54c6e42..593c4f6c80 100644 --- a/src/backend/gpopt/translate/CTranslatorUtils.cpp +++ b/src/backend/gpopt/translate/CTranslatorUtils.cpp @@ -932,6 +932,7 @@ CTranslatorUtils::GetGroupingColidArray( CBitSetArray * CTranslatorUtils::GetColumnAttnosForGroupBy( CMemoryPool *mp, List *group_clause_list, List *grouping_set_list, + bool grouping_distinct, ULONG num_cols, UlongToUlongMap * group_col_pos, // mapping of grouping col positions to SortGroupRef ids @@ -1034,6 +1035,39 @@ CTranslatorUtils::GetColumnAttnosForGroupBy( } } + // Deduplicate the grouping sets result + // Can't do dedup when building the `col_attnos_arr` + if (grouping_distinct) + { + CBitSetArray *col_attnos_arr_dedup = GPOS_NEW(mp) CBitSetArray(mp); + for (ULONG ul = 0; ul < col_attnos_arr->Size(); ul++) + { + auto col_attnos = (*col_attnos_arr)[ul]; + bool exist = false; + + for (ULONG ulInner = 0; ulInner < col_attnos_arr_dedup->Size(); ulInner++) + { + auto col_attnos_dedup = (*col_attnos_arr_dedup)[ulInner]; + if (col_attnos_dedup->Equals(col_attnos)) + { + exist = true; + break; + } + } + + if (!exist) + { + // still need copy here + CBitSet *bset = + GPOS_NEW(mp) CBitSet(mp, *col_attnos); + col_attnos_arr_dedup->Append(bset); + } + } + + col_attnos_arr->Release(); + col_attnos_arr = col_attnos_arr_dedup; + } + return col_attnos_arr; } diff --git a/src/include/gpopt/translate/CTranslatorQueryToDXL.h b/src/include/gpopt/translate/CTranslatorQueryToDXL.h index fbee1b31b3..990a9f6e55 100644 --- a/src/include/gpopt/translate/CTranslatorQueryToDXL.h +++ b/src/include/gpopt/translate/CTranslatorQueryToDXL.h @@ -224,7 +224,7 @@ private: // translate a query with grouping sets CDXLNode *TranslateGroupingSets( FromExpr *from_expr, List *target_list, List *group_clause, - List *grouping_set, BOOL has_aggs, + List *grouping_set, bool grouping_distinct, BOOL has_aggs, IntToUlongMap *phmiulSortGrpColsColId, IntToUlongMap *output_attno_to_colid_mapping); diff --git a/src/include/gpopt/translate/CTranslatorUtils.h b/src/include/gpopt/translate/CTranslatorUtils.h index d005fe5e08..7a1927fa52 100644 --- a/src/include/gpopt/translate/CTranslatorUtils.h +++ b/src/include/gpopt/translate/CTranslatorUtils.h @@ -199,7 +199,7 @@ public: // construct a dynamic array of sets of column attnos corresponding // to the group by clause static CBitSetArray *GetColumnAttnosForGroupBy( - CMemoryPool *mp, List *group_clause, List *grouping_set_list, + CMemoryPool *mp, List *group_clause, List *grouping_set_list, bool grouping_distinct, ULONG num_cols, UlongToUlongMap *group_col_pos, CBitSet *group_cold); // return a copy of the query with constant of unknown type being coerced --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
