This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 012b49d5e5 Fix the order of aggregator finalization in
GroupByPostShuffleFrameProcessor (MSQ) (#14022)
012b49d5e5 is described below
commit 012b49d5e5588d4ac2a8f588100d5c844727a05e
Author: Laksh Singla <[email protected]>
AuthorDate: Wed Apr 5 11:04:06 2023 +0530
Fix the order of aggregator finalization in
GroupByPostShuffleFrameProcessor (MSQ) (#14022)
* fix the order in which finalization is done
* add comment explaining the change
* null handling case
---
.../groupby/GroupByPostShuffleFrameProcessor.java | 11 ++--
.../org/apache/druid/msq/exec/MSQSelectTest.java | 68 ++++++++++++++++++++++
2 files changed, 75 insertions(+), 4 deletions(-)
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java
index 7a7a00a7dc..7aa7a72c55 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/groupby/GroupByPostShuffleFrameProcessor.java
@@ -221,14 +221,17 @@ public class GroupByPostShuffleFrameProcessor implements
FrameProcessor<Long>
outputRowAsMap.put(postAggregator.getName(), value);
}
- // Finalize aggregators.
- finalizeFn.accept(outputRow);
-
if (havingSpec != null && !havingSpec.eval(outputRow)) {
// Didn't match HAVING.
outputRow = null;
return false;
- } else if (frameWriter.addSelection()) {
+ }
+
+ // Finalize aggregators after checking if they are passing the havingSpec,
because havingSpec expects the
+ // unfinalized row (and finalizes it internally after making a copy of it)
+ finalizeFn.accept(outputRow);
+
+ if (frameWriter.addSelection()) {
outputRow = null;
return false;
} else if (frameWriter.getNumRows() > 0) {
diff --git
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java
index 080f726706..14cb7acbd6 100644
---
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java
+++
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/exec/MSQSelectTest.java
@@ -41,6 +41,7 @@ import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.aggregation.CountAggregatorFactory;
import org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
import org.apache.druid.query.aggregation.FilteredAggregatorFactory;
+import
org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory;
import org.apache.druid.query.aggregation.post.ArithmeticPostAggregator;
import org.apache.druid.query.aggregation.post.ExpressionPostAggregator;
import org.apache.druid.query.aggregation.post.FieldAccessPostAggregator;
@@ -1149,6 +1150,73 @@ public class MSQSelectTest extends MSQTestBase
)).verifyResults();
}
+ @Test
+ public void testHavingOnApproximateCountDistinct()
+ {
+ RowSignature resultSignature = RowSignature.builder()
+ .add("dim2", ColumnType.STRING)
+ .add("col", ColumnType.LONG)
+ .build();
+
+ GroupByQuery query = GroupByQuery.builder()
+ .setDataSource(CalciteTests.DATASOURCE1)
+
.setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(dimensions(new
DefaultDimensionSpec("dim2", "d0")))
+ .setAggregatorSpecs(
+ aggregators(
+ new CardinalityAggregatorFactory(
+ "a0",
+ null,
+ ImmutableList.of(
+ new DefaultDimensionSpec(
+ "m1",
+ "m1",
+ ColumnType.FLOAT
+ )
+ ),
+ false,
+ true
+ )
+ )
+ )
+ .setHavingSpec(
+ having(
+ bound(
+ "a0",
+ "1",
+ null,
+ true,
+ false,
+ null,
+ StringComparators.NUMERIC
+ )
+ )
+ )
+ .setContext(context)
+ .build();
+
+ testSelectQuery()
+ .setSql("SELECT dim2, COUNT(DISTINCT m1) as col FROM foo GROUP BY dim2
HAVING COUNT(DISTINCT m1) > 1")
+ .setExpectedMSQSpec(MSQSpec.builder()
+ .query(query)
+ .columnMappings(new
ColumnMappings(ImmutableList.of(
+ new ColumnMapping("d0", "dim2"),
+ new ColumnMapping("a0", "col")
+ )))
+
.tuningConfig(MSQTuningConfig.defaultConfig())
+ .build())
+ .setQueryContext(context)
+ .setExpectedRowSignature(resultSignature)
+ .setExpectedResultRows(
+ NullHandling.replaceWithDefault()
+ ? ImmutableList.of(new Object[]{null, 3L}, new Object[]{"a", 2L})
+ : ImmutableList.of(new Object[]{null, 2L}, new Object[]{"a", 2L})
+
+ )
+ .verifyResults();
+ }
+
@Test
public void testGroupByWithMultiValue()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]