This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 5fd7ed78e976ae54919fe5cd961b66ef53ec1368
Author: zhoujiaqi <[email protected]>
AuthorDate: Thu Jan 2 14:36:15 2025 +0800

    ORCA: Support multiple grouping sets distinct
    
    `group by distinct` will same as `group by`, cause it have not been
    passed into `TranslateGroupingSets`.
    
    In the old CBDB version, ORCA does not have this problem, because
    the old version of ORCA does not support multi-grouping-set(will
    fallback). After supporting multi-grouping-set, it is also
    necessary to support themultiple grouping sets distinct.
---
 .../gpopt/translate/CTranslatorQueryToDXL.cpp      |  6 ++--
 src/backend/gpopt/translate/CTranslatorUtils.cpp   | 34 ++++++++++++++++++++++
 .../gpopt/translate/CTranslatorQueryToDXL.h        |  2 +-
 src/include/gpopt/translate/CTranslatorUtils.h     |  2 +-
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
index fb293964fe..4afe36172e 100644
--- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
@@ -604,7 +604,7 @@ CTranslatorQueryToDXL::TranslateSelectQueryToDXL()
        {
                child_dxlnode = TranslateGroupingSets(
                        m_query->jointree, m_query->targetList, 
m_query->groupClause,
-                       m_query->groupingSets, m_query->hasAggs,
+                       m_query->groupingSets, m_query->groupDistinct, 
m_query->hasAggs,
                        sort_group_attno_to_colid_mapping, 
output_attno_to_colid_mapping);
        }
 
@@ -2414,7 +2414,7 @@ 
CTranslatorQueryToDXL::CheckNoDuplicateAliasGroupingColumn(List *target_list,
 CDXLNode *
 CTranslatorQueryToDXL::TranslateGroupingSets(
        FromExpr *from_expr, List *target_list, List *group_clause,
-       List *grouping_set, BOOL has_aggs,
+       List *grouping_set, bool grouping_distinct, BOOL has_aggs,
        IntToUlongMap *sort_grpref_to_colid_mapping,
        IntToUlongMap *output_attno_to_colid_mapping)
 {
@@ -2457,7 +2457,7 @@ CTranslatorQueryToDXL::TranslateGroupingSets(
                GPOS_NEW(m_mp) UlongToUlongMap(m_mp);
        CBitSet *unique_grp_cols_bitset = GPOS_NEW(m_mp) CBitSet(m_mp, 
num_of_cols);
        CBitSetArray *bitset_array = 
CTranslatorUtils::GetColumnAttnosForGroupBy(
-               m_mp, group_clause, grouping_set, num_of_cols,
+               m_mp, group_clause, grouping_set, grouping_distinct, 
num_of_cols,
                grpcol_index_to_colid_mapping, unique_grp_cols_bitset);
 
        const ULONG num_of_grouping_sets = bitset_array->Size();
diff --git a/src/backend/gpopt/translate/CTranslatorUtils.cpp 
b/src/backend/gpopt/translate/CTranslatorUtils.cpp
index 43b54c6e42..593c4f6c80 100644
--- a/src/backend/gpopt/translate/CTranslatorUtils.cpp
+++ b/src/backend/gpopt/translate/CTranslatorUtils.cpp
@@ -932,6 +932,7 @@ CTranslatorUtils::GetGroupingColidArray(
 CBitSetArray *
 CTranslatorUtils::GetColumnAttnosForGroupBy(
        CMemoryPool *mp, List *group_clause_list, List *grouping_set_list,
+       bool grouping_distinct,
        ULONG num_cols,
        UlongToUlongMap *
                group_col_pos,  // mapping of grouping col positions to 
SortGroupRef ids
@@ -1034,6 +1035,39 @@ CTranslatorUtils::GetColumnAttnosForGroupBy(
                }
        }
 
+       // Deduplicate the grouping sets result
+       // Can't do dedup when building the `col_attnos_arr`
+       if (grouping_distinct)
+       {
+               CBitSetArray *col_attnos_arr_dedup = GPOS_NEW(mp) 
CBitSetArray(mp);
+               for (ULONG ul = 0; ul < col_attnos_arr->Size(); ul++)
+               {
+                       auto col_attnos = (*col_attnos_arr)[ul];
+                       bool exist = false;
+
+                       for (ULONG ulInner = 0; ulInner < 
col_attnos_arr_dedup->Size(); ulInner++)
+                       {
+                               auto col_attnos_dedup = 
(*col_attnos_arr_dedup)[ulInner];
+                               if (col_attnos_dedup->Equals(col_attnos))
+                               {
+                                       exist = true;
+                                       break;
+                               }
+                       }
+
+                       if (!exist)
+                       {
+                               // still need copy here
+                               CBitSet *bset =
+                                               GPOS_NEW(mp) CBitSet(mp, 
*col_attnos);
+                               col_attnos_arr_dedup->Append(bset);
+                       }
+               }
+
+               col_attnos_arr->Release();
+               col_attnos_arr = col_attnos_arr_dedup;
+       }
+
        return col_attnos_arr;
 }
 
diff --git a/src/include/gpopt/translate/CTranslatorQueryToDXL.h 
b/src/include/gpopt/translate/CTranslatorQueryToDXL.h
index fbee1b31b3..990a9f6e55 100644
--- a/src/include/gpopt/translate/CTranslatorQueryToDXL.h
+++ b/src/include/gpopt/translate/CTranslatorQueryToDXL.h
@@ -224,7 +224,7 @@ private:
        // translate a query with grouping sets
        CDXLNode *TranslateGroupingSets(
                FromExpr *from_expr, List *target_list, List *group_clause,
-               List *grouping_set, BOOL has_aggs,
+               List *grouping_set, bool grouping_distinct, BOOL has_aggs,
                IntToUlongMap *phmiulSortGrpColsColId,
                IntToUlongMap *output_attno_to_colid_mapping);
 
diff --git a/src/include/gpopt/translate/CTranslatorUtils.h 
b/src/include/gpopt/translate/CTranslatorUtils.h
index d005fe5e08..7a1927fa52 100644
--- a/src/include/gpopt/translate/CTranslatorUtils.h
+++ b/src/include/gpopt/translate/CTranslatorUtils.h
@@ -199,7 +199,7 @@ public:
        // construct a dynamic array of sets of column attnos corresponding
        // to the group by clause
        static CBitSetArray *GetColumnAttnosForGroupBy(
-               CMemoryPool *mp, List *group_clause, List *grouping_set_list,
+               CMemoryPool *mp, List *group_clause, List *grouping_set_list, 
bool grouping_distinct,
                ULONG num_cols, UlongToUlongMap *group_col_pos, CBitSet 
*group_cold);
 
        // return a copy of the query with constant of unknown type being 
coerced


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to