[
https://issues.apache.org/jira/browse/DRILL-3962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18030435#comment-18030435
]
ASF GitHub Bot commented on DRILL-3962:
---------------------------------------
cgivre commented on code in PR #3026:
URL: https://github.com/apache/drill/pull/3026#discussion_r2437141630
##########
exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillAggregateExpandGroupingSetsRule.java:
##########
@@ -0,0 +1,426 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.planner.logical;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.logical.LogicalAggregate;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Planner rule that expands GROUPING SETS, ROLLUP, and CUBE into a UNION ALL
+ * of multiple aggregates, each with a single grouping set.
+ *
+ * This rule converts:
+ * SELECT a, b, SUM(c) FROM t GROUP BY GROUPING SETS ((a, b), (a), ())
+ *
+ * Into:
+ * SELECT a, b, SUM(c), 0 AS $g FROM t GROUP BY a, b
+ * UNION ALL
+ * SELECT a, null, SUM(c), 1 AS $g FROM t GROUP BY a
+ * UNION ALL
+ * SELECT null, null, SUM(c), 3 AS $g FROM t GROUP BY ()
+ *
+ * The $g column is the grouping ID that can be used by GROUPING() and
GROUPING_ID() functions.
+ * Currently, the $g column is generated internally but stripped from the
final output.
+ *
+ * TODO: Implement GROUPING() and GROUPING_ID() functions by:
+ * 1. Detecting these functions in the SELECT list during expansion
+ * 2. Rewriting them to reference the $g column (e.g., GROUPING(a) becomes bit
extraction from $g)
+ * 3. Preserving the $g column in the output when these functions are used
+ */
+public class DrillAggregateExpandGroupingSetsRule extends RelOptRule {
+
+ public static final DrillAggregateExpandGroupingSetsRule INSTANCE =
+ new DrillAggregateExpandGroupingSetsRule();
+
+ private DrillAggregateExpandGroupingSetsRule() {
+ super(operand(Aggregate.class, any()), DrillRelFactories.LOGICAL_BUILDER,
+ "DrillAggregateExpandGroupingSetsRule");
+ }
+
+ @Override
+ public boolean matches(RelOptRuleCall call) {
+ final Aggregate aggregate = call.rel(0);
+
+ // Only match aggregates with multiple grouping sets
+ // Also only match logical aggregates (not physical ones)
+ return aggregate.getGroupSets().size() > 1
+ && (aggregate instanceof DrillAggregateRel || aggregate instanceof
LogicalAggregate);
+ }
+
+ @Override
+ public void onMatch(RelOptRuleCall call) {
+ final Aggregate aggregate = call.rel(0);
+ final RelOptCluster cluster = aggregate.getCluster();
+ final RexBuilder rexBuilder = cluster.getRexBuilder();
+ final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
+
+ // Get the input
+ final RelNode input = aggregate.getInput();
+ final List<ImmutableBitSet> groupSets = aggregate.getGroupSets();
+ final ImmutableBitSet fullGroupSet = aggregate.getGroupSet();
+ final List<AggregateCall> aggCalls = aggregate.getAggCallList();
+
+ // Check if we have GROUPING, GROUPING_ID, or GROUP_ID functions
+ // These functions need the $g column to be preserved in the output
+ // We need to separate them from regular aggregate functions but preserve
their original positions
+ List<AggregateCall> regularAggCalls = new ArrayList<>();
+ List<Integer> groupingFunctionPositions = new ArrayList<>(); // Original
positions in aggCalls
+ List<AggregateCall> groupingFunctionCalls = new ArrayList<>();
+ boolean hasGroupingFunctions = false;
+
+ for (int i = 0; i < aggCalls.size(); i++) {
+ AggregateCall aggCall = aggCalls.get(i);
+ org.apache.calcite.sql.SqlKind kind = aggCall.getAggregation().getKind();
+ if (kind == org.apache.calcite.sql.SqlKind.GROUPING ||
+ kind == org.apache.calcite.sql.SqlKind.GROUPING_ID ||
+ kind == org.apache.calcite.sql.SqlKind.GROUP_ID) {
+ hasGroupingFunctions = true;
+ groupingFunctionPositions.add(i);
+ groupingFunctionCalls.add(aggCall);
+ } else {
+ regularAggCalls.add(aggCall);
+ }
+ }
+
+ // Create a separate aggregate for each grouping set
+ // Process grouping sets in order of decreasing cardinality (more columns
first)
+ // This ensures that for UNION ALL, branches with actual data types come
before
+ // branches with NULL placeholders, helping with type inference
+ //
+ // For GROUP_ID support, we need to track duplicate grouping sets and
assign sequence numbers
+ List<RelNode> aggregates = new ArrayList<>();
+ List<ImmutableBitSet> sortedGroupSets = new ArrayList<>(groupSets);
+ // Sort by cardinality descending (more grouping columns first)
+ sortedGroupSets.sort((a, b) -> Integer.compare(b.cardinality(),
a.cardinality()));
+
+ // Track GROUP_ID for duplicate grouping sets
+ // Map from grouping set to the count of times we've seen it so far
+ java.util.Map<ImmutableBitSet, Integer> groupSetOccurrences = new
java.util.HashMap<>();
+ List<Integer> groupIds = new ArrayList<>(); // GROUP_ID value for each
position in sortedGroupSets
+
+ for (int i = 0; i < sortedGroupSets.size(); i++) {
+ ImmutableBitSet groupSet = sortedGroupSets.get(i);
+
+ // Track GROUP_ID: how many times have we seen this grouping set before?
+ int groupId = groupSetOccurrences.getOrDefault(groupSet, 0);
+ groupIds.add(groupId);
+ groupSetOccurrences.put(groupSet, groupId + 1);
+
+ // Create the aggregate for this grouping set
+ // Use regularAggCalls (without GROUPING functions) because GROUPING
functions
+ // will be evaluated later using the $g column
+ Aggregate newAggregate;
+ if (aggregate instanceof DrillAggregateRel) {
+ newAggregate = new DrillAggregateRel(
+ cluster,
+ aggregate.getTraitSet(),
+ input,
+ groupSet,
+ ImmutableList.of(groupSet),
+ regularAggCalls);
+ } else {
+ newAggregate = aggregate.copy(
+ aggregate.getTraitSet(),
+ input,
+ groupSet,
+ ImmutableList.of(groupSet),
+ regularAggCalls);
+ }
+
+ // Create a project to add NULLs for missing grouping columns
+ List<RexNode> projects = new ArrayList<>();
+ List<String> fieldNames = new ArrayList<>();
+
+ // Add grouping columns (with NULLs for columns not in this grouping set)
+ int aggOutputIdx = 0;
+ int outputColIdx = 0; // Index in the final output row type
+ for (int col : fullGroupSet) {
+ if (groupSet.get(col)) {
+ // Column is in this grouping set - project it directly from the
aggregate output
+ RexNode inputRef = rexBuilder.makeInputRef(newAggregate,
aggOutputIdx);
+ projects.add(inputRef);
+ aggOutputIdx++;
+ } else {
+ // Column is NOT in this grouping set - project a typed NULL literal
+ // Use the expected output type from the original aggregate to
create a properly typed NULL
+ // This prevents type inference issues in the UNION ALL
+ org.apache.calcite.rel.type.RelDataType nullType =
+
aggregate.getRowType().getFieldList().get(outputColIdx).getType();
+ // Use makeLiteral with null value and explicit type to create a
typed NULL
+ projects.add(rexBuilder.makeNullLiteral(nullType));
+ }
+
fieldNames.add(aggregate.getRowType().getFieldList().get(outputColIdx).getName());
+ outputColIdx++;
+ }
+
+ // Add aggregate result columns (only regular aggregates, not GROUPING
functions)
+ // We'll use the alias from the original aggregate call
+ for (int j = 0; j < regularAggCalls.size(); j++) {
+ projects.add(rexBuilder.makeInputRef(newAggregate, aggOutputIdx));
+ AggregateCall regCall = regularAggCalls.get(j);
+ String fieldName = regCall.getName() != null ? regCall.getName() :
("$f" + (fullGroupSet.cardinality() + j));
+ fieldNames.add(fieldName);
+ aggOutputIdx++;
+ }
+
+ // Add grouping ID column ($g)
+ // The grouping ID is a bitmap where bit i is 1 if column i is NOT in
the grouping set
+ int groupingId = 0;
+ int bitPosition = 0;
+ for (int col : fullGroupSet) {
+ if (!groupSet.get(col)) {
+ groupingId |= (1 << bitPosition);
+ }
+ bitPosition++;
+ }
+ projects.add(rexBuilder.makeLiteral(groupingId,
typeFactory.createSqlType(org.apache.calcite.sql.type.SqlTypeName.INTEGER),
true));
+ fieldNames.add("$g");
Review Comment:
Fixed
> Add support of ROLLUP, CUBE, GROUPING SETS, GROUPING, GROUPING_ID, GROUP_ID
> support
> -----------------------------------------------------------------------------------
>
> Key: DRILL-3962
> URL: https://issues.apache.org/jira/browse/DRILL-3962
> Project: Apache Drill
> Issue Type: New Feature
> Reporter: Jinfeng Ni
> Assignee: Charles Givre
> Priority: Major
>
> These functions are important for BI analytical workload. Currently, Calcite
> supports those functions, but neither the planning or execution in Drill
> supports those functions.
> DRILL-3802 blocks those functions in Drill planning. But we should provide
> the support for those functions in both planning and execution of Drill.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)