(calcite) branch main updated: [CALCITE-7126] The calculation result of grouping function is wrong

zhenchen Mon, 25 Aug 2025 17:43:32 -0700

This is an automated email from the ASF dual-hosted git repository.

zhenchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git



The following commit(s) were added to refs/heads/main by this push:
     new 87789d949f [CALCITE-7126] The calculation result of grouping function 
is wrong
87789d949f is described below

commit 87789d949ff305aa5eacc9e6bd835b4f25bcd769
Author: Zhen Chen <[email protected]>
AuthorDate: Tue Aug 26 07:28:31 2025 +0800

    [CALCITE-7126] The calculation result of grouping function is wrong
---
 .../java/org/apache/calcite/tools/RelBuilder.java  | 199 ++++++++++++++++-----
 .../org/apache/calcite/test/RelBuilderTest.java    |   5 +-
 .../apache/calcite/test/SqlToRelConverterTest.xml  |  32 ++--
 core/src/test/resources/sql/agg.iq                 | 179 ++++++++++++++----
 4 files changed, 318 insertions(+), 97 deletions(-)

diff --git a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java 
b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
index 9831e65a76..164ac0628a 100644
--- a/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
+++ b/core/src/main/java/org/apache/calcite/tools/RelBuilder.java
@@ -2817,10 +2817,12 @@ private RelBuilder aggregate_(ImmutableBitSet groupSet,
    *
    * <p>Also see the discussion in
    * <a 
href="https://issues.apache.org/jira/browse/CALCITE-1824";>[CALCITE-1824]
-   * GROUP_ID returns wrong result</a> and
+   * GROUP_ID returns wrong result</a>,
    * <a 
href="https://issues.apache.org/jira/browse/CALCITE-4748";>[CALCITE-4748]
    * If there are duplicate GROUPING SETS, Calcite should return duplicate
-   * rows</a>.
+   * rows</a> and
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-7126";>[CALCITE-7126]
+   * The calculation result of grouping function is wrong</a>.
    */
   private RelBuilder rewriteAggregateWithDuplicateGroupSets(
       ImmutableBitSet groupSet,
@@ -2832,23 +2834,11 @@ private RelBuilder 
rewriteAggregateWithDuplicateGroupSets(
             aggregateCalls.stream().map(AggCallPlus::aggregateCall)
                 .collect(toImmutableList())).getFieldNames();
 
-    // If n duplicates exist for a particular grouping, the {@code GROUP_ID()}
-    // function produces values in the range 0 to n-1. For each value,
-    // we need to figure out the corresponding group sets.
-    //
-    // For example, "... GROUPING SETS (a, a, b, c, c, c, c)"
-    // (i) The max value of the GROUP_ID() function returns is 3
-    // (ii) GROUPING SETS (a, b, c) produces value 0,
-    //      GROUPING SETS (a, c) produces value 1,
-    //      GROUPING SETS (c) produces value 2
-    //      GROUPING SETS (c) produces value 3
+    final Frame input = stack.pop();
+
     final Map<Integer, Set<ImmutableBitSet>> groupIdToGroupSets = new 
HashMap<>();
-    int maxGroupId = 0;
     for (Multiset.Entry<ImmutableBitSet> entry : groupSets.entrySet()) {
       int groupId = entry.getCount() - 1;
-      if (groupId > maxGroupId) {
-        maxGroupId = groupId;
-      }
       for (int i = 0; i <= groupId; i++) {
         groupIdToGroupSets.computeIfAbsent(i,
             k -> Sets.newTreeSet(ImmutableBitSet.COMPARATOR))
@@ -2856,43 +2846,148 @@ private RelBuilder 
rewriteAggregateWithDuplicateGroupSets(
       }
     }
 
-    // AggregateCall list without GROUP_ID function
-    final List<AggCall> aggregateCallsWithoutGroupId =
-        new ArrayList<>(aggregateCalls);
-    aggregateCallsWithoutGroupId.removeIf(RelBuilder::isGroupId);
-
-    // For each group id value, we first construct an Aggregate without
-    // GROUP_ID() function call, and then create a Project node on top of it.
-    // The Project adds literal value for group id in right position.
-    final Frame frame = stack.pop();
-    for (int groupId = 0; groupId <= maxGroupId; groupId++) {
-      // Create the Aggregate node without GROUP_ID() call
-      stack.push(frame);
-      aggregate(groupKey(groupSet, 
castNonNull(groupIdToGroupSets.get(groupId))),
-          aggregateCallsWithoutGroupId);
-
-      final List<RexNode> selectList = new ArrayList<>();
-      final int groupExprLength = groupSet.cardinality();
-      // Project fields in group by expressions
-      for (int i = 0; i < groupExprLength; i++) {
-        selectList.add(field(i));
+    // Create aggregate for each GROUP_ID value
+    for (Map.Entry<Integer, Set<ImmutableBitSet>> entry : 
groupIdToGroupSets.entrySet()) {
+      // If n duplicates exist for a particular grouping, the {@code 
GROUP_ID()}
+      // function produces values in the range 0 to n-1. For each value,
+      // we need to figure out the corresponding group sets.
+      //
+      // For example, "... GROUPING SETS (a, a, b, c, c, c, c)"
+      // (i) The max value of the GROUP_ID() function returns is 3
+      // (ii) GROUPING SETS (a, b, c) produces value 0,
+      //      GROUPING SETS (a, c) produces value 1,
+      //      GROUPING SETS (c) produces value 2
+      //      GROUPING SETS (c) produces value 3
+      int groupId = entry.getKey();
+      Set<ImmutableBitSet> newGroupSets = entry.getValue();
+      rewriteGroupAggCalls(newGroupSets, groupSet, aggregateCalls,
+          fieldNamesIfNoRewrite, input, groupId);
+    }
+
+    return union(true, groupIdToGroupSets.size());
+  }
+
+  private void rewriteGroupAggCalls(
+      Set<ImmutableBitSet> subGroupSets,
+      ImmutableBitSet oriGroupSet,
+      List<AggCallPlus> oriAggCalls,
+      List<String> fieldNames,
+      Frame input,
+      int groupId) {
+    stack.push(input);
+    List<AggCallPlus> subAggCalls = new ArrayList<>();
+    ImmutableBitSet subGroupSet = ImmutableBitSet.union(subGroupSets);
+    List<RexNode> subProjects = new ArrayList<>();
+
+    // 1. For GroupSet
+    RelDataType subAggregateGroupSetType =
+        Aggregate.deriveRowType(getTypeFactory(), peek().getRowType(), false,
+            subGroupSet, ImmutableList.copyOf(subGroupSets), 
ImmutableList.of());
+    for (int i = 0; i < oriGroupSet.cardinality(); i++) {
+      int groupKey = oriGroupSet.nth(i);
+      if (subGroupSet.get(groupKey)) {
+        subProjects.add(
+            RexInputRef.of(
+                subGroupSet.indexOf(groupKey),
+                subAggregateGroupSetType));
+      } else {
+        // If the groupKey is not in the GroupSet, use null as a placeholder.
+        
subProjects.add(getRexBuilder().makeNullLiteral(field(groupKey).getType()));
       }
-      // Project fields in aggregate calls
-      int groupIdCount = 0;
-      for (int i = 0; i < aggregateCalls.size(); i++) {
-        if (isGroupId(aggregateCalls.get(i))) {
-          selectList.add(
-              getRexBuilder().makeExactLiteral(BigDecimal.valueOf(groupId),
-                  getTypeFactory().createSqlType(SqlTypeName.BIGINT)));
-          groupIdCount++;
+    }
+
+    // 2. For AggregateCalls
+    int newGroupCount = subGroupSet.cardinality();
+    for (AggCallPlus oriAggCall : oriAggCalls) {
+      AggregateCall aggCall = oriAggCall.aggregateCall();
+      switch (oriAggCall.op().getKind()) {
+      case GROUPING:
+        if (!subGroupSet.contains(ImmutableBitSet.of(aggCall.getArgList()))) {
+          // If the parameters of the GROUPING function cannot be fully 
covered by the GroupSet,
+          // the GROUPING function needs to be split.
+          splitGrouping(subAggCalls, subProjects, aggCall, subGroupSet);
         } else {
-          selectList.add(field(groupExprLength + i - groupIdCount));
+          subProjects.add(
+              new RexInputRef(
+                  newGroupCount + subAggCalls.size(),
+                  aggCall.getType()));
+          subAggCalls.add(oriAggCall);
         }
+        break;
+      case GROUP_ID:
+        subProjects.add(
+            getRexBuilder().makeLiteral(
+                groupId,
+                aggCall.getType()));
+        break;
+      default:
+        subProjects.add(
+            new RexInputRef(
+                newGroupCount + subAggCalls.size(),
+                aggCall.getType()));
+        subAggCalls.add(oriAggCall);
+        break;
+      }
+    }
+
+    aggregate(groupKey(subGroupSet, subGroupSets), subAggCalls);
+    project(subProjects, fieldNames);
+  }
+
+  /**
+   * This method is used to expand the SQL GROUPING operator
+   * into a set of expressions. For example, it expands GROUPING(x, y, z)
+   * into 2^2 * GROUPING(x) + 2^1 * GROUPING(y) + 2^0 * GROUPING(z)
+   *
+   * <p>For example, in "GROUP BY GROUPING SETS ((a, b), (a), (), (b))":
+   * In the grouping set (a, b), GROUPING(a) = 0, GROUPING(b) = 0,
+   * GROUPING(a, b) = 2^1 * 0 + 2^0 * 0 => 0.
+   * In the grouping set (a), GROUPING(a) = 0, GROUPING(b) = 1,
+   * GROUPING(a, b) = 2^1 * 0 + 2^0 * 1 => 1.
+   * In the grouping set (), GROUPING(a) = 1, GROUPING(b) = 1,
+   * GROUPING(a, b) = 2^1 * 1 + 2^0 * 1 = 3.
+   * In the grouping set (b), GROUPING(a) = 1, GROUPING(b) = 0,
+   * GROUPING(a, b) = 2^1 * 1 + 2^0 * 0 = 2.
+   * Thus, GROUPING(a, b) produces 0, 1, 3, and 2 for each grouping set 
respectively.
+   */
+  private void splitGrouping(
+      List<AggCallPlus> aggCalls,
+      List<RexNode> projects,
+      AggregateCall grouping,
+      ImmutableBitSet groupSet) {
+    List<RexNode> splitOperands = new ArrayList<>();
+    List<Integer> groupingArgs = grouping.getArgList();
+    RelDataType groupingType = 
getTypeFactory().createSqlType(SqlTypeName.BIGINT);
+    for (int i = 0; i < groupingArgs.size(); i++) {
+      int groupingArg = groupingArgs.get(i);
+      if (groupSet.get(groupingArg)) {
+        RexInputRef groupingRef =
+            new RexInputRef(groupSet.cardinality() + aggCalls.size(), 
groupingType);
+        splitOperands.add(
+            call(SqlStdOperatorTable.MULTIPLY,
+                literal(safeShift(groupingArgs.size() - 1 - i)),
+                groupingRef));
+        aggCalls.add((AggCallPlus) aggregateCall(SqlStdOperatorTable.GROUPING, 
field(groupingArg)));
+      } else {
+        // Sets GROUPING function value to 1 for parameters not in GroupSet 
and calculates offset.
+        splitOperands.add(literal(safeShift(groupingArgs.size() - 1 - i)));
       }
-      project(selectList, fieldNamesIfNoRewrite);
     }
 
-    return union(true, maxGroupId + 1);
+    // Plus all expanded expressions (including GROUPING functions and 
constants).
+    RexNode plus = splitOperands.get(0);
+    for (int i = 1; i < splitOperands.size(); i++) {
+      plus = call(SqlStdOperatorTable.PLUS, plus, splitOperands.get(i));
+    }
+    projects.add(plus);
+  }
+
+  private static int safeShift(int shift) {
+    if (shift < 0 || shift >= Integer.SIZE) {
+      throw new IllegalArgumentException(
+          "Too many grouping keys. Maximum is " + (Integer.SIZE - 1) + " for 
grouping functions.");
+    }
+    return 1 << shift;
   }
 
   private static boolean isGroupId(AggCall c) {
@@ -4447,8 +4542,14 @@ private class AggCallImpl implements AggCallPlus {
       // return a call that is "approximately equivalent ... and is good for
       // deriving field names", so dummy values are good enough.
       final RelCollation collation = RelCollations.EMPTY;
-      final RelDataType type =
-          getTypeFactory().createSqlType(SqlTypeName.BOOLEAN);
+      final RelDataType type;
+      if (aggFunction.getKind() == SqlKind.GROUP_ID) {
+        // The return type of GROUP_ID function is SqlTypeName.BIGINT,
+        // see SqlGroupIdFunction.
+        type = getTypeFactory().createSqlType(SqlTypeName.BIGINT);
+      } else {
+        type = getTypeFactory().createSqlType(SqlTypeName.BOOLEAN);
+      }
       return AggregateCall.create(pos, aggFunction, distinct, approximate,
           ignoreNulls, preOperands, ImmutableList.of(), -1,
           null, collation, type, alias);
diff --git a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java 
b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
index 0e105ed784..9ca3eec4c8 100644
--- a/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelBuilderTest.java
@@ -2057,8 +2057,9 @@ private RelNode buildRelWithDuplicateAggregates(
         + "LogicalUnion(all=[true])\n"
         + "  LogicalAggregate(group=[{6, 7}], groups=[[{6}, {7}]])\n"
         + "    LogicalTableScan(table=[[scott, EMP]])\n"
-        + "  LogicalAggregate(group=[{6, 7}], groups=[[{7}]])\n"
-        + "    LogicalTableScan(table=[[scott, EMP]])\n";
+        + "  LogicalProject(COMM=[null:DECIMAL(7, 2)], DEPTNO=[$0])\n"
+        + "    LogicalAggregate(group=[{7}])\n"
+        + "      LogicalTableScan(table=[[scott, EMP]])\n";
     assertThat(root, hasTree(expected));
   }
 
diff --git 
a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml 
b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
index 787bad3174..07292374bc 100644
--- a/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
+++ b/core/src/test/resources/org/apache/calcite/test/SqlToRelConverterTest.xml
@@ -1866,9 +1866,10 @@ LogicalProject(EXPR$0=[$3])
     LogicalAggregate(group=[{0, 1, 2}], groups=[[{0, 1, 2}, {0, 1}, {0, 2}]], 
EXPR$0=[SUM($0)])
       LogicalProject(SAL=[$5], DEPTNO=[$7], ENAME=[$1])
         LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-    LogicalAggregate(group=[{0, 1, 2}], groups=[[{0, 2}]], EXPR$0=[SUM($0)])
-      LogicalProject(SAL=[$5], DEPTNO=[$7], ENAME=[$1])
-        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(SAL=[$0], DEPTNO=[null:INTEGER], ENAME=[$1], EXPR$0=[$2])
+      LogicalAggregate(group=[{0, 2}], EXPR$0=[SUM($0)])
+        LogicalProject(SAL=[$5], DEPTNO=[$7], ENAME=[$1])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>
@@ -2743,8 +2744,8 @@ LogicalProject(DEPTNO=[$0], EXPR$1=[$2])
       LogicalAggregate(group=[{0, 1}], groups=[[{0}, {1}]])
         LogicalProject(DEPTNO=[$7], JOB=[$2])
           LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-    LogicalProject(DEPTNO=[$0], JOB=[$1], EXPR$1=[2:BIGINT])
-      LogicalAggregate(group=[{0, 1}], groups=[[{0}]])
+    LogicalProject(DEPTNO=[$0], JOB=[null:VARCHAR(10)], EXPR$1=[2:BIGINT])
+      LogicalAggregate(group=[{0}])
         LogicalProject(DEPTNO=[$7], JOB=[$2])
           LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
@@ -2766,9 +2767,10 @@ LogicalUnion(all=[true])
   LogicalAggregate(group=[{0, 1}], groups=[[{0}, {1}]])
     LogicalProject(DEPTNO=[$7], JOB=[$2])
       LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-  LogicalAggregate(group=[{0, 1}], groups=[[{0}]])
-    LogicalProject(DEPTNO=[$7], JOB=[$2])
-      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+  LogicalProject(DEPTNO=[$0], JOB=[null:VARCHAR(10)])
+    LogicalAggregate(group=[{0}])
+      LogicalProject(DEPTNO=[$7], JOB=[$2])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>
@@ -2799,9 +2801,10 @@ LogicalSort(sort0=[$1], dir0=[ASC])
     LogicalAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {1}, {}]], 
EXPR$2=[SUM($2)])
       LogicalProject(DEPTNO=[$7], ENAME=[$1], SAL=[$5])
         LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-    LogicalAggregate(group=[{0, 1}], groups=[[{0}]], EXPR$2=[SUM($2)])
-      LogicalProject(DEPTNO=[$7], ENAME=[$1], SAL=[$5])
-        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(DEPTNO=[$0], ENAME=[null:VARCHAR(20)], EXPR$2=[$1])
+      LogicalAggregate(group=[{0}], EXPR$2=[SUM($2)])
+        LogicalProject(DEPTNO=[$7], ENAME=[$1], SAL=[$5])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>
@@ -2872,9 +2875,10 @@ LogicalSort(sort0=[$1], dir0=[ASC])
     LogicalAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {1}, {}]], 
EXPR$2=[SUM($2)])
       LogicalProject(DEPTNO=[$7], ENAME=[$1], SAL=[$5])
         LogicalTableScan(table=[[CATALOG, SALES, EMP]])
-    LogicalAggregate(group=[{0, 1}], groups=[[{}]], EXPR$2=[SUM($2)])
-      LogicalProject(DEPTNO=[$7], ENAME=[$1], SAL=[$5])
-        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+    LogicalProject(DEPTNO=[null:INTEGER], ENAME=[null:VARCHAR(20)], 
EXPR$2=[$0])
+      LogicalAggregate(group=[{}], EXPR$2=[SUM($2)])
+        LogicalProject(DEPTNO=[$7], ENAME=[$1], SAL=[$5])
+          LogicalTableScan(table=[[CATALOG, SALES, EMP]])
 ]]>
     </Resource>
   </TestCase>
diff --git a/core/src/test/resources/sql/agg.iq 
b/core/src/test/resources/sql/agg.iq
index 553047b76e..3852b29472 100644
--- a/core/src/test/resources/sql/agg.iq
+++ b/core/src/test/resources/sql/agg.iq
@@ -765,7 +765,8 @@ select count(*) as c,
   grouping_id(gender, deptno) as ggd,
   grouping_id(deptno, gender) as gdg
 from emp
-group by rollup(deptno, gender);
+group by rollup(deptno, gender)
+order by c, g, gid, gd, gg, ggd, gdg;
 +---+---+-----+----+----+-----+-----+
 | C | G | GID | GD | GG | GGD | GDG |
 +---+---+-----+----+----+-----+-----+
@@ -776,14 +777,14 @@ group by rollup(deptno, gender);
 | 1 | 0 |   0 |  0 |  0 |   0 |   0 |
 | 1 | 0 |   0 |  0 |  0 |   0 |   0 |
 | 1 | 0 |   0 |  0 |  0 |   0 |   0 |
-| 2 | 0 |   0 |  0 |  0 |   0 |   0 |
-| 9 | 1 |   0 |  1 |  1 |   3 |   3 |
 | 1 | 0 |   0 |  0 |  1 |   2 |   1 |
 | 1 | 0 |   0 |  0 |  1 |   2 |   1 |
 | 1 | 0 |   0 |  0 |  1 |   2 |   1 |
+| 2 | 0 |   0 |  0 |  0 |   0 |   0 |
 | 2 | 0 |   0 |  0 |  1 |   2 |   1 |
 | 2 | 0 |   0 |  0 |  1 |   2 |   1 |
 | 2 | 0 |   0 |  0 |  1 |   2 |   1 |
+| 9 | 1 |   0 |  1 |  1 |   3 |   3 |
 +---+---+-----+----+----+-----+-----+
 (15 rows)
 
@@ -880,31 +881,32 @@ group by rollup(deptno);
 
 # GROUPING, GROUPING_ID and GROUP_ID
 select deptno, gender, grouping(deptno) gd, grouping(gender) gg,
-  grouping_id(deptno, gender) dg, grouping_id(gender, deptno) gd,
-  group_id() gid, count(*) c
+  grouping_id(deptno, gender) dg, grouping_id(gender, deptno) gid,
+  group_id() g, count(*) c
 from emp
-group by cube(deptno, gender);
-+--------+--------+----+----+----+----+-----+---+
-| DEPTNO | GENDER | GD | GG | DG | GD | GID | C |
-+--------+--------+----+----+----+----+-----+---+
-|     10 | F      |  0 |  0 |  0 |  0 |   0 | 1 |
-|     10 | M      |  0 |  0 |  0 |  0 |   0 | 1 |
-|     20 | M      |  0 |  0 |  0 |  0 |   0 | 1 |
-|     30 | F      |  0 |  0 |  0 |  0 |   0 | 2 |
-|     50 | F      |  0 |  0 |  0 |  0 |   0 | 1 |
-|     50 | M      |  0 |  0 |  0 |  0 |   0 | 1 |
-|     60 | F      |  0 |  0 |  0 |  0 |   0 | 1 |
-|        | F      |  0 |  0 |  0 |  0 |   0 | 1 |
-|        |        |  1 |  1 |  3 |  3 |   0 | 9 |
-|     10 |        |  0 |  1 |  1 |  2 |   0 | 2 |
-|     20 |        |  0 |  1 |  1 |  2 |   0 | 1 |
-|     30 |        |  0 |  1 |  1 |  2 |   0 | 2 |
-|     50 |        |  0 |  1 |  1 |  2 |   0 | 2 |
-|     60 |        |  0 |  1 |  1 |  2 |   0 | 1 |
-|        | F      |  1 |  0 |  2 |  1 |   0 | 6 |
-|        | M      |  1 |  0 |  2 |  1 |   0 | 3 |
-|        |        |  0 |  1 |  1 |  2 |   0 | 1 |
-+--------+--------+----+----+----+----+-----+---+
+group by cube(deptno, gender)
+order by deptno, gender, gd, gg, dg, gid, g, c;
++--------+--------+----+----+----+-----+---+---+
+| DEPTNO | GENDER | GD | GG | DG | GID | G | C |
++--------+--------+----+----+----+-----+---+---+
+|     10 | F      |  0 |  0 |  0 |   0 | 0 | 1 |
+|     10 | M      |  0 |  0 |  0 |   0 | 0 | 1 |
+|     10 |        |  0 |  1 |  1 |   2 | 0 | 2 |
+|     20 | M      |  0 |  0 |  0 |   0 | 0 | 1 |
+|     20 |        |  0 |  1 |  1 |   2 | 0 | 1 |
+|     30 | F      |  0 |  0 |  0 |   0 | 0 | 2 |
+|     30 |        |  0 |  1 |  1 |   2 | 0 | 2 |
+|     50 | F      |  0 |  0 |  0 |   0 | 0 | 1 |
+|     50 | M      |  0 |  0 |  0 |   0 | 0 | 1 |
+|     50 |        |  0 |  1 |  1 |   2 | 0 | 2 |
+|     60 | F      |  0 |  0 |  0 |   0 | 0 | 1 |
+|     60 |        |  0 |  1 |  1 |   2 | 0 | 1 |
+|        | F      |  0 |  0 |  0 |   0 | 0 | 1 |
+|        | F      |  1 |  0 |  2 |   1 | 0 | 6 |
+|        | M      |  1 |  0 |  2 |   1 | 0 | 3 |
+|        |        |  0 |  1 |  1 |   2 | 0 | 1 |
+|        |        |  1 |  1 |  3 |   3 | 0 | 9 |
++--------+--------+----+----+----+-----+---+---+
 (17 rows)
 
 !ok
@@ -1339,11 +1341,6 @@ order by g, i, s desc;
 | 1900.00 | 0 | 0 |
 | 1300.00 | 0 | 0 |
 |  950.00 | 0 | 0 |
-| 8275.00 | 0 | 1 |
-| 6000.00 | 0 | 1 |
-| 5600.00 | 0 | 1 |
-| 5000.00 | 0 | 1 |
-| 4150.00 | 0 | 1 |
 | 6000.00 | 1 | 0 |
 | 5600.00 | 1 | 0 |
 | 5000.00 | 1 | 0 |
@@ -1358,6 +1355,11 @@ order by g, i, s desc;
 | 5600.00 | 2 | 0 |
 | 5000.00 | 2 | 0 |
 | 4150.00 | 2 | 0 |
+| 8275.00 | 2 | 1 |
+| 6000.00 | 2 | 1 |
+| 5600.00 | 2 | 1 |
+| 5000.00 | 2 | 1 |
+| 4150.00 | 2 | 1 |
 +---------+---+---+
 (28 rows)
 
@@ -3897,4 +3899,117 @@ EnumerableCalc(expr#0=[{inputs}], 
expr#1=[CAST($t0):BIGINT NOT NULL], M=[$t1])
       EnumerableTableScan(table=[[scott, EMP]])
 !plan
 
+# [CALCITE-7126] The calculation result of grouping function is wrong
+SELECT deptno, job, sal, SUM(comm),
+       GROUPING(deptno) AS deptno_flag,
+       GROUPING(job) AS job_flag,
+       GROUPING(sal) AS sal_flag
+FROM emp
+GROUP BY GROUPING SETS ((deptno, job), (deptno, sal), (deptno, job))
+ORDER BY deptno, job, sal;
++--------+-----------+---------+---------+-------------+----------+----------+
+| DEPTNO | JOB       | SAL     | EXPR$3  | DEPTNO_FLAG | JOB_FLAG | SAL_FLAG |
++--------+-----------+---------+---------+-------------+----------+----------+
+|     10 | CLERK     |         |         |           0 |        0 |        1 |
+|     10 | CLERK     |         |         |           0 |        0 |        1 |
+|     10 | MANAGER   |         |         |           0 |        0 |        1 |
+|     10 | MANAGER   |         |         |           0 |        0 |        1 |
+|     10 | PRESIDENT |         |         |           0 |        0 |        1 |
+|     10 | PRESIDENT |         |         |           0 |        0 |        1 |
+|     10 |           | 1300.00 |         |           0 |        1 |        0 |
+|     10 |           | 2450.00 |         |           0 |        1 |        0 |
+|     10 |           | 5000.00 |         |           0 |        1 |        0 |
+|     20 | ANALYST   |         |         |           0 |        0 |        1 |
+|     20 | ANALYST   |         |         |           0 |        0 |        1 |
+|     20 | CLERK     |         |         |           0 |        0 |        1 |
+|     20 | CLERK     |         |         |           0 |        0 |        1 |
+|     20 | MANAGER   |         |         |           0 |        0 |        1 |
+|     20 | MANAGER   |         |         |           0 |        0 |        1 |
+|     20 |           |  800.00 |         |           0 |        1 |        0 |
+|     20 |           | 1100.00 |         |           0 |        1 |        0 |
+|     20 |           | 2975.00 |         |           0 |        1 |        0 |
+|     20 |           | 3000.00 |         |           0 |        1 |        0 |
+|     30 | CLERK     |         |         |           0 |        0 |        1 |
+|     30 | CLERK     |         |         |           0 |        0 |        1 |
+|     30 | MANAGER   |         |         |           0 |        0 |        1 |
+|     30 | MANAGER   |         |         |           0 |        0 |        1 |
+|     30 | SALESMAN  |         | 2200.00 |           0 |        0 |        1 |
+|     30 | SALESMAN  |         | 2200.00 |           0 |        0 |        1 |
+|     30 |           |  950.00 |         |           0 |        1 |        0 |
+|     30 |           | 1250.00 | 1900.00 |           0 |        1 |        0 |
+|     30 |           | 1500.00 |    0.00 |           0 |        1 |        0 |
+|     30 |           | 1600.00 |  300.00 |           0 |        1 |        0 |
+|     30 |           | 2850.00 |         |           0 |        1 |        0 |
++--------+-----------+---------+---------+-------------+----------+----------+
+(30 rows)
+
+!ok
+
+EnumerableMergeUnion(all=[true])
+  EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], 
dir2=[ASC])
+    EnumerableCalc(expr#0..6=[{inputs}], DEPTNO=[$t2], JOB=[$t0], SAL=[$t1], 
EXPR$3=[$t3], DEPTNO_FLAG=[$t4], JOB_FLAG=[$t5], SAL_FLAG=[$t6])
+      EnumerableAggregate(group=[{2, 5, 7}], groups=[[{2, 7}, {5, 7}]], 
EXPR$3=[SUM($6)], DEPTNO_FLAG=[GROUPING($7)], JOB_FLAG=[GROUPING($2)], 
SAL_FLAG=[GROUPING($5)])
+        EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], 
dir2=[ASC])
+    EnumerableCalc(expr#0..4=[{inputs}], expr#5=[null:DECIMAL(7, 2)], 
expr#6=[1], DEPTNO=[$t1], JOB=[$t0], SAL=[$t5], EXPR$3=[$t2], 
DEPTNO_FLAG=[$t3], JOB_FLAG=[$t4], SAL_FLAG=[$t6])
+      EnumerableAggregate(group=[{2, 7}], EXPR$3=[SUM($6)], 
DEPTNO_FLAG=[GROUPING($7)], JOB_FLAG=[GROUPING($2)])
+        EnumerableTableScan(table=[[scott, EMP]])
+!plan
+
+# The subset of GROUPING SETS cannot contain all the fields in the GROUPING 
and GROUPING SETS has duplicate groupings
+SELECT deptno, job, sal,
+       SUM(comm) AS sum_comm,
+       GROUPING(deptno, job, sal) AS flag,
+       GROUP_ID() AS g
+FROM emp
+GROUP BY GROUPING SETS ((deptno, job), (deptno, sal), (deptno, job))
+ORDER BY deptno, job, sal;
++--------+-----------+---------+----------+------+---+
+| DEPTNO | JOB       | SAL     | SUM_COMM | FLAG | G |
++--------+-----------+---------+----------+------+---+
+|     10 | CLERK     |         |          |    1 | 0 |
+|     10 | CLERK     |         |          |    1 | 1 |
+|     10 | MANAGER   |         |          |    1 | 0 |
+|     10 | MANAGER   |         |          |    1 | 1 |
+|     10 | PRESIDENT |         |          |    1 | 0 |
+|     10 | PRESIDENT |         |          |    1 | 1 |
+|     10 |           | 1300.00 |          |    2 | 0 |
+|     10 |           | 2450.00 |          |    2 | 0 |
+|     10 |           | 5000.00 |          |    2 | 0 |
+|     20 | ANALYST   |         |          |    1 | 0 |
+|     20 | ANALYST   |         |          |    1 | 1 |
+|     20 | CLERK     |         |          |    1 | 0 |
+|     20 | CLERK     |         |          |    1 | 1 |
+|     20 | MANAGER   |         |          |    1 | 0 |
+|     20 | MANAGER   |         |          |    1 | 1 |
+|     20 |           |  800.00 |          |    2 | 0 |
+|     20 |           | 1100.00 |          |    2 | 0 |
+|     20 |           | 2975.00 |          |    2 | 0 |
+|     20 |           | 3000.00 |          |    2 | 0 |
+|     30 | CLERK     |         |          |    1 | 0 |
+|     30 | CLERK     |         |          |    1 | 1 |
+|     30 | MANAGER   |         |          |    1 | 0 |
+|     30 | MANAGER   |         |          |    1 | 1 |
+|     30 | SALESMAN  |         |  2200.00 |    1 | 0 |
+|     30 | SALESMAN  |         |  2200.00 |    1 | 1 |
+|     30 |           |  950.00 |          |    2 | 0 |
+|     30 |           | 1250.00 |  1900.00 |    2 | 0 |
+|     30 |           | 1500.00 |     0.00 |    2 | 0 |
+|     30 |           | 1600.00 |   300.00 |    2 | 0 |
+|     30 |           | 2850.00 |          |    2 | 0 |
++--------+-----------+---------+----------+------+---+
+(30 rows)
+
+!ok
+EnumerableMergeUnion(all=[true])
+  EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], 
dir2=[ASC])
+    EnumerableCalc(expr#0..4=[{inputs}], expr#5=[0:BIGINT], DEPTNO=[$t2], 
JOB=[$t0], SAL=[$t1], SUM_COMM=[$t3], FLAG=[$t4], G=[$t5])
+      EnumerableAggregate(group=[{2, 5, 7}], groups=[[{2, 7}, {5, 7}]], 
SUM_COMM=[SUM($6)], FLAG=[GROUPING($7, $2, $5)])
+        EnumerableTableScan(table=[[scott, EMP]])
+  EnumerableSort(sort0=[$0], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], 
dir2=[ASC])
+    EnumerableCalc(expr#0..4=[{inputs}], expr#5=[null:DECIMAL(7, 2)], 
expr#6=[4], expr#7=[*($t6, $t3)], expr#8=[2], expr#9=[*($t8, $t4)], 
expr#10=[+($t7, $t9)], expr#11=[1], expr#12=[+($t10, $t11)], 
expr#13=[1:BIGINT], DEPTNO=[$t1], JOB=[$t0], SAL=[$t5], SUM_COMM=[$t2], 
FLAG=[$t12], G=[$t13])
+      EnumerableAggregate(group=[{2, 7}], SUM_COMM=[SUM($6)], 
agg#1=[GROUPING($7)], agg#2=[GROUPING($2)])
+        EnumerableTableScan(table=[[scott, EMP]])
+!plan
+
 # End agg.iq

(calcite) branch main updated: [CALCITE-7126] The calculation result of grouping function is wrong

Reply via email to