(cloudberry) 13/15: [ORCA] Enable multiple grouping set specs (#14366)

maxyang Thu, 02 Jan 2025 02:22:03 -0800

This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit e1ea1cf19a469f328380ded6f7b081963e301b43
Author: David Kimura <[email protected]>
AuthorDate: Wed Nov 9 11:44:55 2022 -0800

    [ORCA] Enable multiple grouping set specs (#14366)
    
    After this commit, following produces valid plan in ORCA:
      ```
      CREATE TABLE t AS SELECT i as a, i as b FROM generate_series(1,3)i;
      EXPLAIN SELECT a, b FROM t GROUP BY a, ROLLUP(b);
      ```
    
    In ORCA, multiple grouping set specs is implemented as the pairwise
    concatenation of the individual elements from the different grouping
    sets. In the above example, there are 2 sets:
      ```
      "a" that produces (1), (2), (3)
      "ROLLUP(b)" that produces (), (1), (2), (3).
      ```
    
    Pairwise concatenation of the two sets produces groups:
    
      (1,),(2,),(3,),(1,1),(2,1),(3,1),(1,2),(2,2),(3,2),(1,3),(2,3),(3,3)
    
    Based on the table contents and the groups, the query result produces:
    
      (1,),(2,),(3,),(1,1),(2,2),(3,3)
    
    NB: Fallback if multiple grouping set specs reference duplicate alias
    
    There is a known issue in upstream Postgres right now where planner
    fails to distinguish between two duplicate alias columns where one
    column is possibly nulled by the action of a grouping node. For example:
      ```
      SELECT i AS ai1, i AS ai2 FROM generate_series(1,3)i GROUP BY ai2, 
ROLLUP(ai1) ORDER BY ai1, ai2;
      ```
    
    Above query produces a range table entry (RTE) with a single target list
    entry. In other words it fails to distinguish between ai1 and ai2. This
    blunder can lead to wrong results.
    
    So, you may ask, why should we fallback to PLANNER that can lead to
    wrong results too? Because PLANNER tends to get it less wrong and
    there's work-in-progress [1] that ORCA doesn't want to have to invent
    separately.
    
    In the meantime the workaround is to coerce a separate var. That can be
    done using a temp table or view. Above query can be rewritten as:
      ```
      CREATE TEMP TABLE tempt AS SELECT i AS ai1, i AS ai2 FROM 
generate_series(1, 3)i;
      SELECT ai1, ai2 FROM tempt GROUP BY ai2, ROLLUP(ai1) ORDER BY ai1, ai2;
      ```
    
    [1] 
https://www.postgresql.org/message-id/flat/[email protected]
---
 .../gpopt/translate/CTranslatorQueryToDXL.cpp      | 138 +++++++++++++++++++++
 src/backend/gpopt/translate/CTranslatorUtils.cpp   | 135 ++++++++++++++------
 .../gpopt/translate/CTranslatorQueryToDXL.h        |   4 +
 src/include/gpopt/translate/CTranslatorUtils.h     |   4 +
 .../expected/orca_groupingsets_fallbacks.out       |  98 +++++----------
 .../orca_groupingsets_fallbacks_optimizer.out      | 104 +++++-----------
 .../regress/sql/orca_groupingsets_fallbacks.sql    |  18 ++-
 7 files changed, 321 insertions(+), 180 deletions(-)

diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp 
b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
index 58dedc3ca9..fb293964fe 100644
--- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
+++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
@@ -2268,6 +2268,141 @@ CTranslatorQueryToDXL::IsDuplicateDqaArg(List 
*dqa_list, Aggref *aggref)
        return false;
 }
 
+//---------------------------------------------------------------------------
+//     @function:
+//             GroupingSetContainsValue
+//
+//     @doc:
+//             Check if value is a member of the GroupingSet content. Content 
for
+//             SIMPLE nodes is an integer list of ressortgroupref values. 
Content
+//             CUBE, ROLLUP, and SET nodes are either SIMPLE nodes or other 
ROLLUP or
+//             CUBE nodes. See details in parsenodes.h GroupingSet for more 
details.
+//---------------------------------------------------------------------------
+static BOOL
+GroupingSetContainsValue(GroupingSet *group, INT value)
+{
+       ListCell *lc = nullptr;
+       if (group->kind == GROUPING_SET_SIMPLE)
+       {
+               ForEach(lc, group->content)
+               {
+                       if (lfirst_int(lc) == value)
+                       {
+                               return true;
+                       }
+               }
+       }
+       if (group->kind == GROUPING_SET_CUBE ||
+               group->kind == GROUPING_SET_ROLLUP || group->kind == 
GROUPING_SET_SETS)
+       {
+               ForEach(lc, group->content)
+               {
+                       if (GroupingSetContainsValue((GroupingSet *) 
lfirst(lc), value))
+                       {
+                               return true;
+                       }
+               }
+       }
+       return false;
+}
+
+//---------------------------------------------------------------------------
+//     @function:
+//             CTranslatorQueryToDXL::CheckNoDuplicateAliasGroupingColumn
+//
+//     @doc:
+//             Check if there are multiple grouping set specs that reference
+//             duplicate alias columns that may produce NULL values. This can 
lead to
+//             a known wrong results scenario even in Postgres. Punt until a 
proper
+//             solution is found in Postgres. See following threads [1][2] for 
more
+//             details.
+//
+//             [1] 
https://www.postgresql.org/message-id/flat/CAHnPFjSdFx_TtNpQturPMkRSJMYaD5rGP2=8ifh9v24-ojh...@mail.gmail.com
+//             [2] 
https://www.postgresql.org/message-id/flat/[email protected]
+//---------------------------------------------------------------------------
+void
+CTranslatorQueryToDXL::CheckNoDuplicateAliasGroupingColumn(List *target_list,
+                                                                               
                                   List *group_clause,
+                                                                               
                                   List *grouping_set)
+{
+       if (gpdb::ListLength(grouping_set) < 2)
+       {
+               // no duplicates in different grouping specs if only 1 grouping 
set
+               return;
+       }
+
+       if (gpdb::ListLength(group_clause) < 2)
+       {
+               // no duplicates referenced from grouping set if only 1 group 
clause
+               return;
+       }
+
+       // Find if there are duplicate aliases in the target list
+       ListCell *lc1 = nullptr;
+       ListCell *lc2 = nullptr;
+
+       CBitSet *bitset = GPOS_NEW(m_mp) CBitSet(m_mp);
+
+       List *processed_list = NIL;
+       ForEach(lc1, target_list)
+       {
+               TargetEntry *target_entry = (TargetEntry *) lfirst(lc1);
+
+               ForEach(lc2, processed_list)
+               {
+                       TargetEntry *target_entry_inner = (TargetEntry *) 
lfirst(lc2);
+                       if (gpdb::Equals(target_entry->expr, 
target_entry_inner->expr))
+                       {
+                               // ressortgroupref's point to alias'd columns
+                               
bitset->ExchangeSet(target_entry->ressortgroupref);
+                               
bitset->ExchangeSet(target_entry_inner->ressortgroupref);
+                       }
+               }
+
+               processed_list = gpdb::LAppend(processed_list, target_entry);
+       }
+
+       if (gpdb::ListLength(processed_list) < 1)
+       {
+               // no duplicates if no duplicates found in target list
+               return;
+       }
+
+       int countSimple = 0;
+       int countNonSimple = 0;
+       ForEach(lc1, grouping_set)
+       {
+               GroupingSet *group = (GroupingSet *) lfirst(lc1);
+               CBitSetIter bsiter(*bitset);
+
+               while (bsiter.Advance())
+               {
+                       if (GroupingSetContainsValue(group, bsiter.Bit()))
+                       {
+                               if (group->kind == GROUPING_SET_CUBE ||
+                                       group->kind == GROUPING_SET_ROLLUP ||
+                                       group->kind == GROUPING_SET_SETS)
+                               {
+                                       countNonSimple += 1;
+                               }
+                               else if (group->kind == GROUPING_SET_SIMPLE)
+                               {
+                                       countSimple += 1;
+                               }
+
+                               if (countNonSimple > 1 ||
+                                       (countNonSimple > 0 && countSimple > 0))
+                               {
+                                       GPOS_RAISE(
+                                               gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLUnsupportedFeature,
+                                               GPOS_WSZ_LIT(
+                                                       "Multiple grouping sets 
specifications with duplicate aliased columns"));
+                               }
+                       }
+               }
+       }
+}
+
 //---------------------------------------------------------------------------
 //     @function:
 //             CTranslatorQueryToDXL::TranslateGroupingSets
@@ -2313,6 +2448,9 @@ CTranslatorQueryToDXL::TranslateGroupingSets(
                return result_dxlnode;
        }
 
+       CheckNoDuplicateAliasGroupingColumn(target_list, group_clause,
+                                                                               
grouping_set);
+
        // grouping functions refer to grouping col positions, so construct a 
map pos->grouping column
        // while processing the grouping clause
        UlongToUlongMap *grpcol_index_to_colid_mapping =
diff --git a/src/backend/gpopt/translate/CTranslatorUtils.cpp 
b/src/backend/gpopt/translate/CTranslatorUtils.cpp
index e5b8b2cf31..43b54c6e42 100644
--- a/src/backend/gpopt/translate/CTranslatorUtils.cpp
+++ b/src/backend/gpopt/translate/CTranslatorUtils.cpp
@@ -953,50 +953,84 @@ CTranslatorUtils::GetColumnAttnosForGroupBy(
 
        GPOS_ASSERT(0 < gpdb::ListLength(grouping_set_list));
 
-       if (1 < gpdb::ListLength(grouping_set_list))
-       {
-               GPOS_RAISE(gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLUnsupportedFeature,
-                                  GPOS_WSZ_LIT("Multiple grouping sets 
specifications"));
-       }
-
-       Node *node = (Node *) LInitial(grouping_set_list);
-       GPOS_ASSERT(nullptr != node && IsA(node, GroupingSet));
-       GroupingSet *grouping_set = (GroupingSet *) node;
-       CBitSetArray *col_attnos_arr = nullptr;
+       CBitSetArray *col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp);
 
-       switch (grouping_set->kind)
+       ListCell *cell = nullptr;
+       ForEach(cell, grouping_set_list)
        {
-               case GROUPING_SET_EMPTY:
-               {
-                       col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp);
-                       CBitSet *bset = GPOS_NEW(mp) CBitSet(mp);
-                       col_attnos_arr->Append(bset);
-                       break;
-               }
-               case GROUPING_SET_ROLLUP:
-               {
-                       col_attnos_arr = CreateGroupingSetsForRollup(
-                               mp, grouping_set, num_cols, group_cols, 
group_col_pos);
-                       break;
-               }
-               case GROUPING_SET_CUBE:
+               Node *node = (Node *) lfirst(cell);
+               GPOS_ASSERT(nullptr != node && IsA(node, GroupingSet));
+               GroupingSet *grouping_set = (GroupingSet *) node;
+               CBitSetArray *col_attnos_arr_current = nullptr;
+
+               switch (grouping_set->kind)
                {
-                       col_attnos_arr = CreateGroupingSetsForCube(
-                               mp, grouping_set, num_cols, group_cols, 
group_col_pos);
-                       break;
+                       case GROUPING_SET_EMPTY:
+                       {
+                               col_attnos_arr_current = GPOS_NEW(mp) 
CBitSetArray(mp);
+                               CBitSet *bset = GPOS_NEW(mp) CBitSet(mp);
+                               col_attnos_arr_current->Append(bset);
+                               break;
+                       }
+                       case GROUPING_SET_ROLLUP:
+                       {
+                               col_attnos_arr_current = 
CreateGroupingSetsForRollup(
+                                       mp, grouping_set, num_cols, group_cols, 
group_col_pos);
+                               break;
+                       }
+                       case GROUPING_SET_CUBE:
+                       {
+                               col_attnos_arr_current = 
CreateGroupingSetsForCube(
+                                       mp, grouping_set, num_cols, group_cols, 
group_col_pos);
+                               break;
+                       }
+                       case GROUPING_SET_SETS:
+                       {
+                               col_attnos_arr_current = 
CreateGroupingSetsForSets(
+                                       mp, grouping_set, num_cols, group_cols, 
group_col_pos);
+                               break;
+                       }
+                       case GROUPING_SET_SIMPLE:
+                       {
+                               col_attnos_arr_current = 
CreateGroupingSetsForSimple(
+                                       mp, grouping_set, num_cols, group_cols, 
group_col_pos);
+                               break;
+                       }
+                       default:
+                       {
+                               /* can't happen */
+                               GPOS_RAISE(gpdxl::ExmaDXL, 
gpdxl::ExmiQuery2DXLError,
+                                                  GPOS_WSZ_LIT("Unrecognized 
grouping set kind"));
+                       }
                }
-               case GROUPING_SET_SETS:
+
+               // Multiple grouping set specs is implemented as the pairwise
+               // concatenation of the individual elements of the different 
grouping
+               // sets. Here we blend the last computed grouping set spec
+               // (col_attnos_arr_current) into the cumulated result 
(col_attnos_arr).
+               ULONG col_attnos_arr_size = col_attnos_arr->Size();
+               if (col_attnos_arr_size > 0)
                {
-                       col_attnos_arr = CreateGroupingSetsForSets(
-                               mp, grouping_set, num_cols, group_cols, 
group_col_pos);
-                       break;
+                       CBitSetArray *col_attnos_arr_temp = GPOS_NEW(mp) 
CBitSetArray(mp);
+
+                       for (ULONG ul = 0; ul < col_attnos_arr_size; ul++)
+                       {
+                               for (ULONG ulInner = 0;
+                                        ulInner < 
col_attnos_arr_current->Size(); ulInner++)
+                               {
+                                       CBitSet *bset =
+                                               GPOS_NEW(mp) CBitSet(mp, 
*(*col_attnos_arr)[ul]);
+                                       
bset->Union((*col_attnos_arr_current)[ulInner]);
+                                       col_attnos_arr_temp->Append(bset);
+                               }
+                       }
+                       col_attnos_arr_current->Release();
+                       col_attnos_arr->Release();
+                       col_attnos_arr = col_attnos_arr_temp;
                }
-               case GROUPING_SET_SIMPLE:
-               default:
+               else
                {
-                       /* can't happen */
-                       GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLError,
-                                          GPOS_WSZ_LIT("Unrecognized grouping 
set kind"));
+                       col_attnos_arr = col_attnos_arr_current;
                }
        }
 
@@ -1049,6 +1083,33 @@ CTranslatorUtils::CreateGroupingSetsForSets(CMemoryPool 
*mp,
        return col_attnos_arr;
 }
 
+//---------------------------------------------------------------------------
+//     @function:
+//             CTranslatorUtils::CreateGroupingSetsForSimple
+//
+//     @doc:
+//             Construct a dynamic array of sets of column attnos for a 
grouping sets
+//             subclause
+//
+//---------------------------------------------------------------------------
+CBitSetArray *
+CTranslatorUtils::CreateGroupingSetsForSimple(CMemoryPool *mp,
+                                                                               
          const GroupingSet *grouping_set,
+                                                                               
          ULONG num_cols,
+                                                                               
          CBitSet *group_cols,
+                                                                               
          UlongToUlongMap *group_col_pos)
+{
+       GPOS_ASSERT(nullptr != grouping_set);
+       GPOS_ASSERT(grouping_set->kind == GROUPING_SET_SIMPLE);
+       CBitSetArray *col_attnos_arr = GPOS_NEW(mp) CBitSetArray(mp);
+
+       CBitSet *bset = CreateAttnoSetForGroupingSet(
+               mp, grouping_set->content, num_cols, group_col_pos, group_cols,
+               false /* use_group_clause */);
+       col_attnos_arr->Append(bset);
+       return col_attnos_arr;
+}
+
 //---------------------------------------------------------------------------
 //     @function:
 //             CTranslatorUtils::CreateGroupingSetsForRollup
diff --git a/src/include/gpopt/translate/CTranslatorQueryToDXL.h 
b/src/include/gpopt/translate/CTranslatorQueryToDXL.h
index 631e1fcad6..fbee1b31b3 100644
--- a/src/include/gpopt/translate/CTranslatorQueryToDXL.h
+++ b/src/include/gpopt/translate/CTranslatorQueryToDXL.h
@@ -217,6 +217,10 @@ private:
        // check if the argument of a DQA has already being used by another DQA
        static BOOL IsDuplicateDqaArg(List *dqa_list, Aggref *aggref);
 
+       void CheckNoDuplicateAliasGroupingColumn(List *target_list,
+                                                                               
         List *group_clause,
+                                                                               
         List *grouping_set);
+
        // translate a query with grouping sets
        CDXLNode *TranslateGroupingSets(
                FromExpr *from_expr, List *target_list, List *group_clause,
diff --git a/src/include/gpopt/translate/CTranslatorUtils.h 
b/src/include/gpopt/translate/CTranslatorUtils.h
index 8c54788702..d005fe5e08 100644
--- a/src/include/gpopt/translate/CTranslatorUtils.h
+++ b/src/include/gpopt/translate/CTranslatorUtils.h
@@ -119,6 +119,10 @@ private:
                CMemoryPool *mp, const GroupingSet *grouping_set_node, ULONG 
num_cols,
                CBitSet *group_cols, UlongToUlongMap *group_col_pos);
 
+       static CBitSetArray *CreateGroupingSetsForSimple(
+               CMemoryPool *mp, const GroupingSet *grouping_set_node, ULONG 
num_cols,
+               CBitSet *group_cols, UlongToUlongMap *group_col_pos);
+
 public:
        struct SCmptypeStrategy
        {
diff --git a/src/test/regress/expected/orca_groupingsets_fallbacks.out 
b/src/test/regress/expected/orca_groupingsets_fallbacks.out
index 41fba478ec..d649d950b5 100644
--- a/src/test/regress/expected/orca_groupingsets_fallbacks.out
+++ b/src/test/regress/expected/orca_groupingsets_fallbacks.out
@@ -14,75 +14,39 @@ insert into gstest1 values (1, 5, 10, 0, 100);
 insert into gstest1 values (1, 42, 20, 7, 200);
 insert into gstest1 values (2, 5, 30, 21, 300);
 insert into gstest1 values (2, 42, 40, 53, 400);
--- Orca falls back due to Cube
-select a, b, c, sum(v) from gstest1 group by cube(a, b, c);
- a | b  | c  | sum  
----+----+----+------
- 1 |  5 | 10 |  100
- 1 |  5 |    |  100
- 1 | 42 | 20 |  200
- 1 | 42 |    |  200
- 1 |    |    |  300
- 2 |  5 | 30 |  300
- 2 |  5 |    |  300
- 2 | 42 | 40 |  400
- 2 | 42 |    |  400
- 2 |    |    |  700
-   |    |    | 1000
-   |    | 30 |  300
-   |    | 10 |  100
-   |    | 40 |  400
-   |    | 20 |  200
- 2 |    | 30 |  300
- 1 |    | 20 |  200
- 2 |    | 40 |  400
- 1 |    | 10 |  100
-   |  5 |    |  400
-   | 42 |    |  600
-   |  5 | 30 |  300
-   | 42 | 20 |  200
-   | 42 | 40 |  400
-   |  5 | 10 |  100
-(25 rows)
+-- Orca falls back due to multiple grouping sets specifications referencing
+-- duplicate alias columns where column is possibly nulled by ROLLUP or CUBE.
+-- This is also a known issue in Postgres. Following threads [1][2] have more
+-- details.
+--
+-- [1] 
https://www.postgresql.org/message-id/flat/CAHnPFjSdFx_TtNpQturPMkRSJMYaD5rGP2=8ifh9v24-ojh...@mail.gmail.com
+-- [2] 
https://www.postgresql.org/message-id/flat/[email protected]
+select a as alias1, a as alias2 from gstest1 group by alias1, rollup(alias2);
+ alias1 | alias2 
+--------+--------
+      1 |      1
+      1 |       
+      2 |      2
+      2 |       
+(4 rows)
 
--- Orca falls back due to multiple grouping sets specifications
-select sum(v), b, a, c from gstest1 group by c, grouping sets ((a, b), ());
- sum | b  | a | c  
------+----+---+----
- 400 | 42 | 2 | 40
- 300 |  5 | 2 | 30
- 200 | 42 | 1 | 20
- 300 |    |   | 30
- 400 |    |   | 40
- 200 |    |   | 20
- 100 |  5 | 1 | 10
- 100 |    |   | 10
-(8 rows)
+select a as alias1, a as alias2 from gstest1 group by alias1, cube(alias2);
+ alias1 | alias2 
+--------+--------
+      1 |      1
+      1 |       
+      2 |      2
+      2 |       
+(4 rows)
 
-select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b), rollup(c, 
d);
- sum | b  | a | c  | d  
------+----+---+----+----
- 100 |    | 1 | 10 |  0
- 100 |    | 1 | 10 |   
- 200 |    | 1 | 20 |  7
- 200 |    | 1 | 20 |   
- 300 |    | 1 |    |   
- 300 |    | 2 | 30 | 21
- 300 |    | 2 | 30 |   
- 400 |    | 2 | 40 | 53
- 400 |    | 2 | 40 |   
- 700 |    | 2 |    |   
- 100 |  5 |   | 10 |  0
- 100 |  5 |   | 10 |   
- 300 |  5 |   | 30 | 21
- 300 |  5 |   | 30 |   
- 400 |  5 |   |    |   
- 200 | 42 |   | 20 |  7
- 200 | 42 |   | 20 |   
- 400 | 42 |   | 40 | 53
- 400 | 42 |   | 40 |   
- 600 | 42 |   |    |   
-(20 rows)
+-- Following does not need to fallback because no ROLLUP/CUBE means neither
+-- column needs to be nulled.
+select a as alias1, a as alias2 from gstest1 group by alias1, alias2;
+ alias1 | alias2 
+--------+--------
+      2 |      2
+      1 |      1
+(2 rows)
 
 -- Orca falls back due to nested grouping sets
 select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b, rollup(c, 
d));
diff --git 
a/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out 
b/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out
index d4242bceb3..3b3c67d820 100644
--- a/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out
+++ b/src/test/regress/expected/orca_groupingsets_fallbacks_optimizer.out
@@ -14,79 +14,43 @@ insert into gstest1 values (1, 5, 10, 0, 100);
 insert into gstest1 values (1, 42, 20, 7, 200);
 insert into gstest1 values (2, 5, 30, 21, 300);
 insert into gstest1 values (2, 42, 40, 53, 400);
--- Orca falls back due to Cube
-select a, b, c, sum(v) from gstest1 group by cube(a, b, c);
- a | b  | c  | sum  
----+----+----+------
- 1 |  5 | 10 |  100
- 1 |  5 |    |  100
- 1 | 42 | 20 |  200
- 1 | 42 |    |  200
- 1 |    |    |  300
- 2 |  5 | 30 |  300
- 2 |  5 |    |  300
- 2 | 42 | 40 |  400
- 2 | 42 |    |  400
- 2 |    |    |  700
-   |    |    | 1000
-   |    | 30 |  300
-   |    | 10 |  100
-   |    | 40 |  400
-   |    | 20 |  200
- 2 |    | 30 |  300
- 1 |    | 20 |  200
- 2 |    | 40 |  400
- 1 |    | 10 |  100
-   |  5 |    |  400
-   | 42 |    |  600
-   |  5 | 30 |  300
-   | 42 | 20 |  200
-   | 42 | 40 |  400
-   |  5 | 10 |  100
-(25 rows)
-
--- Orca falls back due to multiple grouping sets specifications
-select sum(v), b, a, c from gstest1 group by c, grouping sets ((a, b), ());
+-- Orca falls back due to multiple grouping sets specifications referencing
+-- duplicate alias columns where column is possibly nulled by ROLLUP or CUBE.
+-- This is also a known issue in Postgres. Following threads [1][2] have more
+-- details.
+--
+-- [1] 
https://www.postgresql.org/message-id/flat/CAHnPFjSdFx_TtNpQturPMkRSJMYaD5rGP2=8ifh9v24-ojh...@mail.gmail.com
+-- [2] 
https://www.postgresql.org/message-id/flat/[email protected]
+select a as alias1, a as alias2 from gstest1 group by alias1, rollup(alias2);
 INFO:  GPORCA failed to produce a plan, falling back to planner
-DETAIL:  Feature not supported: Multiple grouping sets specifications
- sum | b  | a | c  
------+----+---+----
- 400 | 42 | 2 | 40
- 300 |  5 | 2 | 30
- 200 | 42 | 1 | 20
- 300 |    |   | 30
- 400 |    |   | 40
- 200 |    |   | 20
- 100 |  5 | 1 | 10
- 100 |    |   | 10
-(8 rows)
+DETAIL:  Feature not supported: Multiple grouping sets specifications with 
duplicate aliased columns
+ alias1 | alias2 
+--------+--------
+      1 |      1
+      1 |       
+      2 |      2
+      2 |       
+(4 rows)
 
-select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b), rollup(c, 
d);
+select a as alias1, a as alias2 from gstest1 group by alias1, cube(alias2);
 INFO:  GPORCA failed to produce a plan, falling back to planner
-DETAIL:  Feature not supported: Multiple grouping sets specifications
- sum | b  | a | c  | d  
------+----+---+----+----
- 100 |    | 1 | 10 |  0
- 100 |    | 1 | 10 |   
- 200 |    | 1 | 20 |  7
- 200 |    | 1 | 20 |   
- 300 |    | 1 |    |   
- 300 |    | 2 | 30 | 21
- 300 |    | 2 | 30 |   
- 400 |    | 2 | 40 | 53
- 400 |    | 2 | 40 |   
- 700 |    | 2 |    |   
- 100 |  5 |   | 10 |  0
- 100 |  5 |   | 10 |   
- 300 |  5 |   | 30 | 21
- 300 |  5 |   | 30 |   
- 400 |  5 |   |    |   
- 200 | 42 |   | 20 |  7
- 200 | 42 |   | 20 |   
- 400 | 42 |   | 40 | 53
- 400 | 42 |   | 40 |   
- 600 | 42 |   |    |   
-(20 rows)
+DETAIL:  Feature not supported: Multiple grouping sets specifications with 
duplicate aliased columns
+ alias1 | alias2 
+--------+--------
+      2 |      2
+      2 |       
+      1 |      1
+      1 |       
+(4 rows)
+
+-- Following does not need to fallback because no ROLLUP/CUBE means neither
+-- column needs to be nulled.
+select a as alias1, a as alias2 from gstest1 group by alias1, alias2;
+ alias1 | alias2 
+--------+--------
+      1 |      1
+      2 |      2
+(2 rows)
 
 -- Orca falls back due to nested grouping sets
 select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b, rollup(c, 
d));
diff --git a/src/test/regress/sql/orca_groupingsets_fallbacks.sql 
b/src/test/regress/sql/orca_groupingsets_fallbacks.sql
index 5caa8dde1f..13f5aa66d8 100644
--- a/src/test/regress/sql/orca_groupingsets_fallbacks.sql
+++ b/src/test/regress/sql/orca_groupingsets_fallbacks.sql
@@ -15,12 +15,18 @@ insert into gstest1 values (1, 42, 20, 7, 200);
 insert into gstest1 values (2, 5, 30, 21, 300);
 insert into gstest1 values (2, 42, 40, 53, 400);
 
--- Orca falls back due to Cube
-select a, b, c, sum(v) from gstest1 group by cube(a, b, c);
-
--- Orca falls back due to multiple grouping sets specifications
-select sum(v), b, a, c from gstest1 group by c, grouping sets ((a, b), ());
-select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b), rollup(c, 
d);
+-- Orca falls back due to multiple grouping sets specifications referencing
+-- duplicate alias columns where column is possibly nulled by ROLLUP or CUBE.
+-- This is also a known issue in Postgres. Following threads [1][2] have more
+-- details.
+--
+-- [1] 
https://www.postgresql.org/message-id/flat/CAHnPFjSdFx_TtNpQturPMkRSJMYaD5rGP2=8ifh9v24-ojh...@mail.gmail.com
+-- [2] 
https://www.postgresql.org/message-id/flat/[email protected]
+select a as alias1, a as alias2 from gstest1 group by alias1, rollup(alias2);
+select a as alias1, a as alias2 from gstest1 group by alias1, cube(alias2);
+-- Following does not need to fallback because no ROLLUP/CUBE means neither
+-- column needs to be nulled.
+select a as alias1, a as alias2 from gstest1 group by alias1, alias2;
 
 -- Orca falls back due to nested grouping sets
 select sum(v), b, a, c, d from gstest1 group by grouping sets(a, b, rollup(c, 
d));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 13/15: [ORCA] Enable multiple grouping set specs (#14366)

Reply via email to