[ 
https://issues.apache.org/jira/browse/DRILL-3962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18030435#comment-18030435
 ] 

ASF GitHub Bot commented on DRILL-3962:
---------------------------------------

cgivre commented on code in PR #3026:
URL: https://github.com/apache/drill/pull/3026#discussion_r2437141630


##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillAggregateExpandGroupingSetsRule.java:
##########
@@ -0,0 +1,426 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.logical.LogicalAggregate;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Planner rule that expands GROUPING SETS, ROLLUP, and CUBE into a UNION ALL
+ * of multiple aggregates, each with a single grouping set.
+ *
+ * This rule converts:
+ *   SELECT a, b, SUM(c) FROM t GROUP BY GROUPING SETS ((a, b), (a), ())
+ *
+ * Into:
+ *   SELECT a, b, SUM(c), 0 AS $g FROM t GROUP BY a, b
+ *   UNION ALL
+ *   SELECT a, null, SUM(c), 1 AS $g FROM t GROUP BY a
+ *   UNION ALL
+ *   SELECT null, null, SUM(c), 3 AS $g FROM t GROUP BY ()
+ *
+ * The $g column is the grouping ID that can be used by GROUPING() and 
GROUPING_ID() functions.
+ * Currently, the $g column is generated internally but stripped from the 
final output.
+ *
+ * TODO: Implement GROUPING() and GROUPING_ID() functions by:
+ * 1. Detecting these functions in the SELECT list during expansion
+ * 2. Rewriting them to reference the $g column (e.g., GROUPING(a) becomes bit 
extraction from $g)
+ * 3. Preserving the $g column in the output when these functions are used
+ */
+public class DrillAggregateExpandGroupingSetsRule extends RelOptRule {
+
+  public static final DrillAggregateExpandGroupingSetsRule INSTANCE =
+      new DrillAggregateExpandGroupingSetsRule();
+
+  private DrillAggregateExpandGroupingSetsRule() {
+    super(operand(Aggregate.class, any()), DrillRelFactories.LOGICAL_BUILDER,
+        "DrillAggregateExpandGroupingSetsRule");
+  }
+
+  @Override
+  public boolean matches(RelOptRuleCall call) {
+    final Aggregate aggregate = call.rel(0);
+
+    // Only match aggregates with multiple grouping sets
+    // Also only match logical aggregates (not physical ones)
+    return aggregate.getGroupSets().size() > 1
+        && (aggregate instanceof DrillAggregateRel || aggregate instanceof 
LogicalAggregate);
+  }
+
+  @Override
+  public void onMatch(RelOptRuleCall call) {
+    final Aggregate aggregate = call.rel(0);
+    final RelOptCluster cluster = aggregate.getCluster();
+    final RexBuilder rexBuilder = cluster.getRexBuilder();
+    final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
+
+    // Get the input
+    final RelNode input = aggregate.getInput();
+    final List<ImmutableBitSet> groupSets = aggregate.getGroupSets();
+    final ImmutableBitSet fullGroupSet = aggregate.getGroupSet();
+    final List<AggregateCall> aggCalls = aggregate.getAggCallList();
+
+    // Check if we have GROUPING, GROUPING_ID, or GROUP_ID functions
+    // These functions need the $g column to be preserved in the output
+    // We need to separate them from regular aggregate functions but preserve 
their original positions
+    List<AggregateCall> regularAggCalls = new ArrayList<>();
+    List<Integer> groupingFunctionPositions = new ArrayList<>();  // Original 
positions in aggCalls
+    List<AggregateCall> groupingFunctionCalls = new ArrayList<>();
+    boolean hasGroupingFunctions = false;
+
+    for (int i = 0; i < aggCalls.size(); i++) {
+      AggregateCall aggCall = aggCalls.get(i);
+      org.apache.calcite.sql.SqlKind kind = aggCall.getAggregation().getKind();
+      if (kind == org.apache.calcite.sql.SqlKind.GROUPING ||
+          kind == org.apache.calcite.sql.SqlKind.GROUPING_ID ||
+          kind == org.apache.calcite.sql.SqlKind.GROUP_ID) {
+        hasGroupingFunctions = true;
+        groupingFunctionPositions.add(i);
+        groupingFunctionCalls.add(aggCall);
+      } else {
+        regularAggCalls.add(aggCall);
+      }
+    }
+
+    // Create a separate aggregate for each grouping set
+    // Process grouping sets in order of decreasing cardinality (more columns 
first)
+    // This ensures that for UNION ALL, branches with actual data types come 
before
+    // branches with NULL placeholders, helping with type inference
+    //
+    // For GROUP_ID support, we need to track duplicate grouping sets and 
assign sequence numbers
+    List<RelNode> aggregates = new ArrayList<>();
+    List<ImmutableBitSet> sortedGroupSets = new ArrayList<>(groupSets);
+    // Sort by cardinality descending (more grouping columns first)
+    sortedGroupSets.sort((a, b) -> Integer.compare(b.cardinality(), 
a.cardinality()));
+
+    // Track GROUP_ID for duplicate grouping sets
+    // Map from grouping set to the count of times we've seen it so far
+    java.util.Map<ImmutableBitSet, Integer> groupSetOccurrences = new 
java.util.HashMap<>();
+    List<Integer> groupIds = new ArrayList<>();  // GROUP_ID value for each 
position in sortedGroupSets
+
+    for (int i = 0; i < sortedGroupSets.size(); i++) {
+      ImmutableBitSet groupSet = sortedGroupSets.get(i);
+
+      // Track GROUP_ID: how many times have we seen this grouping set before?
+      int groupId = groupSetOccurrences.getOrDefault(groupSet, 0);
+      groupIds.add(groupId);
+      groupSetOccurrences.put(groupSet, groupId + 1);
+
+      // Create the aggregate for this grouping set
+      // Use regularAggCalls (without GROUPING functions) because GROUPING 
functions
+      // will be evaluated later using the $g column
+      Aggregate newAggregate;
+      if (aggregate instanceof DrillAggregateRel) {
+        newAggregate = new DrillAggregateRel(
+            cluster,
+            aggregate.getTraitSet(),
+            input,
+            groupSet,
+            ImmutableList.of(groupSet),
+            regularAggCalls);
+      } else {
+        newAggregate = aggregate.copy(
+            aggregate.getTraitSet(),
+            input,
+            groupSet,
+            ImmutableList.of(groupSet),
+            regularAggCalls);
+      }
+
+      // Create a project to add NULLs for missing grouping columns
+      List<RexNode> projects = new ArrayList<>();
+      List<String> fieldNames = new ArrayList<>();
+
+      // Add grouping columns (with NULLs for columns not in this grouping set)
+      int aggOutputIdx = 0;
+      int outputColIdx = 0; // Index in the final output row type
+      for (int col : fullGroupSet) {
+        if (groupSet.get(col)) {
+          // Column is in this grouping set - project it directly from the 
aggregate output
+          RexNode inputRef = rexBuilder.makeInputRef(newAggregate, 
aggOutputIdx);
+          projects.add(inputRef);
+          aggOutputIdx++;
+        } else {
+          // Column is NOT in this grouping set - project a typed NULL literal
+          // Use the expected output type from the original aggregate to 
create a properly typed NULL
+          // This prevents type inference issues in the UNION ALL
+          org.apache.calcite.rel.type.RelDataType nullType =
+              
aggregate.getRowType().getFieldList().get(outputColIdx).getType();
+          // Use makeLiteral with null value and explicit type to create a 
typed NULL
+          projects.add(rexBuilder.makeNullLiteral(nullType));
+        }
+        
fieldNames.add(aggregate.getRowType().getFieldList().get(outputColIdx).getName());
+        outputColIdx++;
+      }
+
+      // Add aggregate result columns (only regular aggregates, not GROUPING 
functions)
+      // We'll use the alias from the original aggregate call
+      for (int j = 0; j < regularAggCalls.size(); j++) {
+        projects.add(rexBuilder.makeInputRef(newAggregate, aggOutputIdx));
+        AggregateCall regCall = regularAggCalls.get(j);
+        String fieldName = regCall.getName() != null ? regCall.getName() : 
("$f" + (fullGroupSet.cardinality() + j));
+        fieldNames.add(fieldName);
+        aggOutputIdx++;
+      }
+
+      // Add grouping ID column ($g)
+      // The grouping ID is a bitmap where bit i is 1 if column i is NOT in 
the grouping set
+      int groupingId = 0;
+      int bitPosition = 0;
+      for (int col : fullGroupSet) {
+        if (!groupSet.get(col)) {
+          groupingId |= (1 << bitPosition);
+        }
+        bitPosition++;
+      }
+      projects.add(rexBuilder.makeLiteral(groupingId, 
typeFactory.createSqlType(org.apache.calcite.sql.type.SqlTypeName.INTEGER), 
true));
+      fieldNames.add("$g");

Review Comment:
   Fixed





> Add support of ROLLUP, CUBE, GROUPING SETS, GROUPING, GROUPING_ID, GROUP_ID 
> support
> -----------------------------------------------------------------------------------
>
>                 Key: DRILL-3962
>                 URL: https://issues.apache.org/jira/browse/DRILL-3962
>             Project: Apache Drill
>          Issue Type: New Feature
>            Reporter: Jinfeng Ni
>            Assignee: Charles Givre
>            Priority: Major
>
> These functions are important for BI analytical workload.  Currently, Calcite 
> supports those functions, but neither the planning or execution in Drill 
> supports those functions. 
> DRILL-3802 blocks those functions in Drill planning. But we should provide 
> the support for those functions in both planning and execution of Drill. 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to