This is an automated email from the ASF dual-hosted git repository.

jtuglu1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 12dcedf5507 fix: preserve GROUPING SET dimensions with single-value 
filter  (#19285)
12dcedf5507 is described below

commit 12dcedf55073096160d0b91e24b3ce971b8be6fc
Author: Shekhar Prasad Rajak <[email protected]>
AuthorDate: Mon May 18 21:31:38 2026 +0530

    fix: preserve GROUPING SET dimensions with single-value filter  (#19285)
    
    Fixes #13204
    
    Fixed a bug where a SQL query using GROUPING SETS with a single-value WHERE 
filter on a grouped dimension would return the filtered value instead of null 
in subtotal rows that exclude that dimension. For example, WHERE dim2 = 'a' 
GROUP BY GROUPING SETS ((dim1, dim2), (dim1)) now correctly returns null for 
dim2 in the (dim1) subtotal rows instead of repeating 'a'.
---
 .../org/apache/druid/sql/calcite/rel/Grouping.java |  32 +++++-
 .../apache/druid/sql/calcite/CalciteQueryTest.java |  88 ++++++++++++++-
 .../grouping_sets_single_value_filter.iq           | 118 +++++++++++++++++++++
 3 files changed, 234 insertions(+), 4 deletions(-)

diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
index 8bcf1544fc8..740085b8938 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/Grouping.java
@@ -228,14 +228,42 @@ public class Grouping
     // Remove literal dimensions that did not appear in the projection. This 
is useful for queries
     // like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can 
generate, and for which we don't
     // actually want to include a dimension 'dummy'.
+    //
+    // However, non-literal dimensions (column references) used in any 
GROUPING SET should not be dropped,
+    // even if they are not in the projection. This ensures correct NULL value 
formatting.
+    // See: https://github.com/apache/druid/issues/13204
     final ImmutableBitSet aggregateProjectBits = 
RelOptUtil.InputFinder.bits(project.getProjects(), null);
     final int[] newDimIndexes = new int[dimensions.size()];
     boolean droppedDimensions = false;
 
+    // Collect all dimension indices referenced in any non-empty subtotal, but 
only when the subtotals
+    // spec has real effect (i.e. multiple grouping sets). A plain GROUP BY 
with a single group produces
+    // subtotals = [[0, 1, ...]] which has no effect and must not prevent 
literal dimensions from being dropped.
+    // See: https://github.com/apache/druid/issues/13204
+    final Set<Integer> dimensionsInSubtotals = new HashSet<>();
+    if (subtotals.hasEffect(dimensions.stream()
+                                      
.map(DimensionExpression::toDimensionSpec)
+                                      .collect(Collectors.toList()))) {
+      for (IntList subtotal : subtotals.getSubtotals()) {
+        if (!subtotal.isEmpty()) {
+          for (int dimIndex : subtotal) {
+            dimensionsInSubtotals.add(dimIndex);
+          }
+        }
+      }
+    }
+
     for (int i = 0; i < dimensions.size(); i++) {
       final DimensionExpression dimension = dimensions.get(i);
-      if 
(plannerContext.parseExpression(dimension.getDruidExpression().getExpression()).isLiteral()
-          && !aggregateProjectBits.get(i)) {
+      final boolean isLiteral = plannerContext.parseExpression(
+          dimension.getDruidExpression().getExpression()
+      ).isLiteral();
+      final boolean isUsedInSubtotals = dimensionsInSubtotals.contains(i);
+
+      // Drop if it's a literal AND not in projection AND not used in any 
grouping set.
+      // Non-literal dimensions referenced in a GROUPING SET must be preserved 
so that
+      // subtotals which omit them correctly emit null.
+      if (isLiteral && !aggregateProjectBits.get(i) && !isUsedInSubtotals) {
         droppedDimensions = true;
         newDimIndexes[i] = -1;
       } else {
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index fcb94485245..808b06e9411 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -11661,6 +11661,11 @@ public class CalciteQueryTest extends 
BaseCalciteQueryTest
                                 "nvl(\"dim2\",'')",
                                 ColumnType.STRING
                             ),
+                            expressionVirtualColumn(
+                                "v1",
+                                "'dummy'",
+                                ColumnType.STRING
+                            ),
                             expressionVirtualColumn(
                                 "v2",
                                 "timestamp_floor(\"__time\",'P1M',null,'UTC')",
@@ -11670,15 +11675,16 @@ public class CalciteQueryTest extends 
BaseCalciteQueryTest
                         .setDimensions(
                             dimensions(
                                 new DefaultDimensionSpec("v0", "d0"),
+                                new DefaultDimensionSpec("v1", "d1", 
ColumnType.STRING),
                                 new DefaultDimensionSpec("v2", "d2", 
ColumnType.LONG)
                             )
                         )
                         .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "cnt")))
                         .setSubtotalsSpec(
                             ImmutableList.of(
-                                ImmutableList.of("d0", "d2"),
+                                ImmutableList.of("d0", "d1", "d2"),
                                 ImmutableList.of("d0"),
-                                ImmutableList.of(),
+                                ImmutableList.of("d1"),
                                 ImmutableList.of("d2")
                             )
                         )
@@ -11958,6 +11964,84 @@ public class CalciteQueryTest extends 
BaseCalciteQueryTest
     );
   }
 
+  @Test
+  public void testGroupingSetsWithSingleValueFilter()
+  {
+    msqIncompatible();
+    testQuery(
+        "SELECT dim1, dim2, SUM(cnt)\n"
+        + "FROM druid.foo\n"
+        + "WHERE dim2 = 'a'\n"
+        + "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )",
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setDataSource(CalciteTests.DATASOURCE1)
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setDimensions(
+                            dimensions(
+                                new DefaultDimensionSpec("dim1", "d0", 
ColumnType.STRING),
+                                new DefaultDimensionSpec("dim2", "d1", 
ColumnType.STRING)
+                            )
+                        )
+                        .setDimFilter(equality("dim2", "a", ColumnType.STRING))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "cnt")))
+                        .setSubtotalsSpec(
+                            ImmutableList.of(
+                                ImmutableList.of("d0", "d1"),
+                                ImmutableList.of("d0")
+                            )
+                        )
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{"", "a", 1L},
+            new Object[]{"1", "a", 1L},
+            new Object[]{"", null, 1L},
+            new Object[]{"1", null, 1L}
+        )
+    );
+  }
+
+  @Test
+  public void testGroupingSetsWithSingleValueFilterUsingIn()
+  {
+    msqIncompatible();
+    testQuery(
+        "SELECT dim1, dim2, SUM(cnt)\n"
+        + "FROM druid.foo\n"
+        + "WHERE dim2 IN ('a')\n"
+        + "GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) )",
+        ImmutableList.of(
+            GroupByQuery.builder()
+                        .setDataSource(CalciteTests.DATASOURCE1)
+                        .setInterval(querySegmentSpec(Filtration.eternity()))
+                        .setGranularity(Granularities.ALL)
+                        .setDimensions(
+                            dimensions(
+                                new DefaultDimensionSpec("dim1", "d0", 
ColumnType.STRING),
+                                new DefaultDimensionSpec("dim2", "d1", 
ColumnType.STRING)
+                            )
+                        )
+                        .setDimFilter(equality("dim2", "a", ColumnType.STRING))
+                        .setAggregatorSpecs(aggregators(new 
LongSumAggregatorFactory("a0", "cnt")))
+                        .setSubtotalsSpec(
+                            ImmutableList.of(
+                                ImmutableList.of("d0", "d1"),
+                                ImmutableList.of("d0")
+                            )
+                        )
+                        .build()
+        ),
+        ImmutableList.of(
+            new Object[]{"", "a", 1L},
+            new Object[]{"1", "a", 1L},
+            new Object[]{"", null, 1L},
+            new Object[]{"1", null, 1L}
+        )
+    );
+  }
+
   @Test
   public void testTimeExtractWithTooFewArguments()
   {
diff --git 
a/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/grouping_sets_single_value_filter.iq
 
b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/grouping_sets_single_value_filter.iq
new file mode 100644
index 00000000000..9483f6a601d
--- /dev/null
+++ 
b/sql/src/test/quidem/org.apache.druid.quidem.SqlQuidemTest/grouping_sets_single_value_filter.iq
@@ -0,0 +1,118 @@
+!use druidtest:///
+!set outputformat mysql
+
+# GROUPING SETS with single-value equality filter should preserve dim2 in 
subtotals (fix for #13204)
+SELECT dim1, dim2, SUM(cnt)
+FROM druid.foo
+WHERE dim2 = 'a'
+GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) );
++------+------+--------+
+| dim1 | dim2 | EXPR$2 |
++------+------+--------+
+|      | a    |      1 |
+|      |      |      1 |
+| 1    | a    |      1 |
+| 1    |      |      1 |
++------+------+--------+
+(4 rows)
+
+!ok
+{
+  "queryType" : "groupBy",
+  "dataSource" : {
+    "type" : "table",
+    "name" : "foo"
+  },
+  "intervals" : {
+    "type" : "intervals",
+    "intervals" : [ 
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+  },
+  "filter" : {
+    "type" : "equals",
+    "column" : "dim2",
+    "matchValueType" : "STRING",
+    "matchValue" : "a"
+  },
+  "granularity" : {
+    "type" : "all"
+  },
+  "dimensions" : [ {
+    "type" : "default",
+    "dimension" : "dim1",
+    "outputName" : "d0",
+    "outputType" : "STRING"
+  }, {
+    "type" : "default",
+    "dimension" : "dim2",
+    "outputName" : "d1",
+    "outputType" : "STRING"
+  } ],
+  "aggregations" : [ {
+    "type" : "longSum",
+    "name" : "a0",
+    "fieldName" : "cnt"
+  } ],
+  "limitSpec" : {
+    "type" : "NoopLimitSpec"
+  },
+  "subtotalsSpec" : [ [ "d0", "d1" ], [ "d0" ] ]
+}
+!nativePlan
+
+# GROUPING SETS with single-value IN filter should behave identically to = 
filter (fix for #13204)
+SELECT dim1, dim2, SUM(cnt)
+FROM druid.foo
+WHERE dim2 IN ('a')
+GROUP BY GROUPING SETS ( (dim1, dim2), (dim1) );
++------+------+--------+
+| dim1 | dim2 | EXPR$2 |
++------+------+--------+
+|      | a    |      1 |
+|      |      |      1 |
+| 1    | a    |      1 |
+| 1    |      |      1 |
++------+------+--------+
+(4 rows)
+
+!ok
+{
+  "queryType" : "groupBy",
+  "dataSource" : {
+    "type" : "table",
+    "name" : "foo"
+  },
+  "intervals" : {
+    "type" : "intervals",
+    "intervals" : [ 
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+  },
+  "filter" : {
+    "type" : "equals",
+    "column" : "dim2",
+    "matchValueType" : "STRING",
+    "matchValue" : "a"
+  },
+  "granularity" : {
+    "type" : "all"
+  },
+  "dimensions" : [ {
+    "type" : "default",
+    "dimension" : "dim1",
+    "outputName" : "d0",
+    "outputType" : "STRING"
+  }, {
+    "type" : "default",
+    "dimension" : "dim2",
+    "outputName" : "d1",
+    "outputType" : "STRING"
+  } ],
+  "aggregations" : [ {
+    "type" : "longSum",
+    "name" : "a0",
+    "fieldName" : "cnt"
+  } ],
+  "limitSpec" : {
+    "type" : "NoopLimitSpec"
+  },
+  "subtotalsSpec" : [ [ "d0", "d1" ], [ "d0" ] ]
+}
+!nativePlan


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to