This is an automated email from the ASF dual-hosted git repository.
jtuglu1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 12dcedf5507 fix: preserve GROUPING SET dimensions with single-value
filter (#19285)
12dcedf5507 is described below
commit 12dcedf55073096160d0b91e24b3ce971b8be6fc
Author: Shekhar Prasad Rajak <[email protected]>
AuthorDate: Mon May 18 21:31:38 2026 +0530
fix: preserve GROUPING SET dimensions with single-value filter (#19285)
Fixes #13204
Fixed a bug where a SQL query using GROUPING SETS with a single-value WHERE
filter on a grouped dimension would return the filtered value instead of null
in subtotal rows that exclude that dimension. For example, WHERE dim2 = 'a'
GROUP BY GROUPING SETS ((dim1, dim2), (dim1)) now correctly returns null for
dim2 in the (dim1) subtotal rows instead of repeating 'a'.
---
.../org/apache/druid/sql/calcite/rel/Grouping.java | 32 +++++-
.../apache/druid/sql/calcite/CalciteQueryTest.java | 88 ++++++++++++++-
.../grouping_sets_single_value_filter.iq | 118 +++++++++++++++++++++
3 files changed, 234 insertions(+), 4 deletions(-)
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
index 8bcf1544fc8..740085b8938 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
@@ -228,14 +228,42 @@ public class Grouping
// Remove literal dimensions that did not appear in the projection. This
is useful for queries
// like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can
generate, and for which we don't
// actually want to include a dimension 'dummy'.
+ //
+ // However, non-literal dimensions (column references) used in any
GROUPING SET should not be dropped,
+ // even if they are not in the projection. This ensures correct NULL value
formatting.
+ // See: https://github.com/apache/druid/issues/13204
final ImmutableBitSet aggregateProjectBits =
RelOptUtil.InputFinder.bits(project.getProjects(), null);
final int[] newDimIndexes = new int[dimensions.size()];
boolean droppedDimensions = false;
+ // Collect all dimension indices referenced in any non-empty subtotal, but
only when the subtotals
+ // spec has real effect (i.e. multiple grouping sets). A plain GROUP BY
with a single group produces
+ // subtotals = [[0, 1, ...]] which has no effect and must not prevent
literal dimensions from being dropped.
+ // See: https://github.com/apache/druid/issues/13204
+ final Set<Integer> dimensionsInSubtotals = new HashSet<>();
+ if (subtotals.hasEffect(dimensions.stream()
+
.map(DimensionExpression::toDimensionSpec)
+ .collect(Collectors.toList()))) {
+ for (IntList subtotal : subtotals.getSubtotals()) {
+ if (!subtotal.isEmpty()) {
+ for (int dimIndex : subtotal) {
+ dimensionsInSubtotals.add(dimIndex);
+ }
+ }
+ }
+ }
+
for (int i = 0; i < dimensions.size(); i++) {
final DimensionExpression dimension = dimensions.get(i);
- if
(plannerContext.parseExpression(dimension.getDruidExpression().getExpression()).isLiteral()
- && !aggregateProjectBits.get(i)) {
+ final boolean isLiteral = plannerContext.parseExpression(
+ dimension.getDruidExpression().getExpression()
+ ).isLiteral();
+ final boolean isUsedInSubtotals = dimensionsInSubtotals.contains(i);
+
+ // Drop if it's a literal AND not in projection AND not used in any
grouping set.
+ // Non-literal dimensions referenced in a GROUPING SET must be preserved
so that
+ // subtotals which omit them correctly emit null.
+ if (isLiteral && !aggregateProjectBits.get(i) && !isUsedInSubtotals) {
droppedDimensions = true;
newDimIndexes[i] = -1;
} else {
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index fcb94485245..808b06e9411 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -11661,6 +11661,11 @@ public class CalciteQueryTest extends
BaseCalciteQueryTest
"nvl(\"dim2\",'')",
ColumnType.STRING
),
+ expressionVirtualColumn(
+ "v1",
+ "'dummy'",
+ ColumnType.STRING
+ ),
expressionVirtualColumn(
"v2",
"timestamp_floor(\"__time\",'P1M',null,'UTC')",
@@ -11670,15 +11675,16 @@ public class CalciteQueryTest extends
BaseCalciteQueryTest
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0"),
+ new DefaultDimensionSpec("v1", "d1",
ColumnType.STRING),
new DefaultDimensionSpec("v2", "d2",
ColumnType.LONG)
)
)
.setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
.setSubtotalsSpec(
ImmutableList.of(
- ImmutableList.of("d0", "d2"),
+ ImmutableList.of("d0", "d1", "d2"),
ImmutableList.of("d0"),
- ImmutableList.of(),
+ ImmutableList.of("d1"),
ImmutableList.of("d2")
)
)
@@ -11958,6 +11964,84 @@ public class CalciteQueryTest extends
BaseCalciteQueryTest
);
}
+ @Test
+ public void testGroupingSetsWithSingleValueFilter()
+ {
+ msqIncompatible();
+ testQuery(
+ "SELECT dim1, dim2, SUM(cnt)\n"
+ + "FROM druid.foo\n"
+ + "WHERE dim2 = 'a'\n"
+ + "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE1)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("dim1", "d0",
ColumnType.STRING),
+ new DefaultDimensionSpec("dim2", "d1",
ColumnType.STRING)
+ )
+ )
+ .setDimFilter(equality("dim2", "a", ColumnType.STRING))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setSubtotalsSpec(
+ ImmutableList.of(
+ ImmutableList.of("d0", "d1"),
+ ImmutableList.of("d0")
+ )
+ )
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"", "a", 1L},
+ new Object[]{"1", "a", 1L},
+ new Object[]{"", null, 1L},
+ new Object[]{"1", null, 1L}
+ )
+ );
+ }
+
+ @Test
+ public void testGroupingSetsWithSingleValueFilterUsingIn()
+ {
+ msqIncompatible();
+ testQuery(
+ "SELECT dim1, dim2, SUM(cnt)\n"
+ + "FROM druid.foo\n"
+ + "WHERE dim2 IN ('a')\n"
+ + "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )",
+ ImmutableList.of(
+ GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE1)
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ dimensions(
+ new DefaultDimensionSpec("dim1", "d0",
ColumnType.STRING),
+ new DefaultDimensionSpec("dim2", "d1",
ColumnType.STRING)
+ )
+ )
+ .setDimFilter(equality("dim2", "a", ColumnType.STRING))
+ .setAggregatorSpecs(aggregators(new
LongSumAggregatorFactory("a0", "cnt")))
+ .setSubtotalsSpec(
+ ImmutableList.of(
+ ImmutableList.of("d0", "d1"),
+ ImmutableList.of("d0")
+ )
+ )
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{"", "a", 1L},
+ new Object[]{"1", "a", 1L},
+ new Object[]{"", null, 1L},
+ new Object[]{"1", null, 1L}
+ )
+ );
+ }
+
@Test
public void testTimeExtractWithTooFewArguments()
{
diff --git
a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/grouping_sets_single_value_filter.iq
b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/grouping_sets_single_value_filter.iq
new file mode 100644
index 00000000000..9483f6a601d
--- /dev/null
+++
b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/grouping_sets_single_value_filter.iq
@@ -0,0 +1,118 @@
+!use druidtest:///
+!set outputformat mysql
+
+# GROUPING SETS with single-value equality filter should preserve dim2 in
subtotals (fix for #13204)
+SELECT dim1, dim2, SUM(cnt)
+FROM druid.foo
+WHERE dim2 = 'a'
+GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) );
++------+------+--------+
+| dim1 | dim2 | EXPR$2 |
++------+------+--------+
+| | a | 1 |
+| | | 1 |
+| 1 | a | 1 |
+| 1 | | 1 |
++------+------+--------+
+(4 rows)
+
+!ok
+{
+ "queryType" : "groupBy",
+ "dataSource" : {
+ "type" : "table",
+ "name" : "foo"
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "filter" : {
+ "type" : "equals",
+ "column" : "dim2",
+ "matchValueType" : "STRING",
+ "matchValue" : "a"
+ },
+ "granularity" : {
+ "type" : "all"
+ },
+ "dimensions" : [ {
+ "type" : "default",
+ "dimension" : "dim1",
+ "outputName" : "d0",
+ "outputType" : "STRING"
+ }, {
+ "type" : "default",
+ "dimension" : "dim2",
+ "outputName" : "d1",
+ "outputType" : "STRING"
+ } ],
+ "aggregations" : [ {
+ "type" : "longSum",
+ "name" : "a0",
+ "fieldName" : "cnt"
+ } ],
+ "limitSpec" : {
+ "type" : "NoopLimitSpec"
+ },
+ "subtotalsSpec" : [ [ "d0", "d1" ], [ "d0" ] ]
+}
+!nativePlan
+
+# GROUPING SETS with single-value IN filter should behave identically to =
filter (fix for #13204)
+SELECT dim1, dim2, SUM(cnt)
+FROM druid.foo
+WHERE dim2 IN ('a')
+GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) );
++------+------+--------+
+| dim1 | dim2 | EXPR$2 |
++------+------+--------+
+| | a | 1 |
+| | | 1 |
+| 1 | a | 1 |
+| 1 | | 1 |
++------+------+--------+
+(4 rows)
+
+!ok
+{
+ "queryType" : "groupBy",
+ "dataSource" : {
+ "type" : "table",
+ "name" : "foo"
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "filter" : {
+ "type" : "equals",
+ "column" : "dim2",
+ "matchValueType" : "STRING",
+ "matchValue" : "a"
+ },
+ "granularity" : {
+ "type" : "all"
+ },
+ "dimensions" : [ {
+ "type" : "default",
+ "dimension" : "dim1",
+ "outputName" : "d0",
+ "outputType" : "STRING"
+ }, {
+ "type" : "default",
+ "dimension" : "dim2",
+ "outputName" : "d1",
+ "outputType" : "STRING"
+ } ],
+ "aggregations" : [ {
+ "type" : "longSum",
+ "name" : "a0",
+ "fieldName" : "cnt"
+ } ],
+ "limitSpec" : {
+ "type" : "NoopLimitSpec"
+ },
+ "subtotalsSpec" : [ [ "d0", "d1" ], [ "d0" ] ]
+}
+!nativePlan
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]