This is an automated email from the ASF dual-hosted git repository.
soumyava pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 06f40a00192 remove calcite AggregateRemoveRule to fix nested group by
query with order by in outer query (#15237)
06f40a00192 is described below
commit 06f40a0019250fd731e40ddfea75ea6651b64aaa
Author: Soumyava <[email protected]>
AuthorDate: Tue Oct 24 15:30:13 2023 -0700
remove calcite AggregateRemoveRule to fix nested group by query with order
by in outer query (#15237)
* Fixing nested group by query with order by in outer query
* Adding examples
---
.../sql/calcite/planner/CalciteRulesManager.java | 6 +-
.../druid/sql/calcite/CalciteArraysQueryTest.java | 81 +++++-----
.../druid/sql/calcite/CalciteJoinQueryTest.java | 37 +++--
.../apache/druid/sql/calcite/CalciteQueryTest.java | 163 ++++++++++++++++++++-
.../druid/sql/calcite/DrillWindowQueryTest.java | 1 -
.../drill/window/queries/nestedAggs/multiWin_5.e | 8 +-
6 files changed, 236 insertions(+), 60 deletions(-)
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
index 9a4abf5b226..b944f3cd535 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
@@ -177,7 +177,11 @@ public class CalciteRulesManager
private static final List<RelOptRule> ABSTRACT_RELATIONAL_RULES =
ImmutableList.of(
AbstractConverter.ExpandConversionRule.INSTANCE,
- CoreRules.AGGREGATE_REMOVE,
+ // Removing CoreRules.AGGREGATE_REMOVE rule here
+ // as after the Calcite upgrade, it would plan queries to a scan
over a group by
+ // with ordering on a non-time column
+ // which is not allowed in Druid. We should add that rule back
+ // once Druid starts to support non-time ordering over scan queries
CoreRules.UNION_TO_DISTINCT,
CoreRules.PROJECT_REMOVE,
CoreRules.AGGREGATE_JOIN_TRANSPOSE,
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
index 3a5da7d325f..ec4dda0dc69 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java
@@ -50,7 +50,6 @@ import
org.apache.druid.query.aggregation.DoubleSumAggregatorFactory;
import org.apache.druid.query.aggregation.ExpressionLambdaAggregatorFactory;
import org.apache.druid.query.aggregation.FilteredAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
-import org.apache.druid.query.aggregation.post.ExpressionPostAggregator;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.extraction.SubstringDimExtractionFn;
@@ -3172,45 +3171,55 @@ public class CalciteArraysQueryTest extends
BaseCalciteQueryTest
public void testArrayAggGroupByArrayAggFromSubquery()
{
cannotVectorize();
-
+ skipVectorize();
testQuery(
"SELECT dim2, arr, COUNT(*) FROM (SELECT dim2, ARRAY_AGG(DISTINCT
dim1) as arr FROM foo WHERE dim1 is not null GROUP BY 1 LIMIT 5) GROUP BY 1,2",
QUERY_CONTEXT_NO_STRINGIFY_ARRAY,
ImmutableList.of(
- new TopNQueryBuilder()
- .dataSource(CalciteTests.DATASOURCE1)
- .dimension(new DefaultDimensionSpec(
- "dim2",
- "d0",
- ColumnType.STRING
- ))
- .metric(new DimensionTopNMetricSpec(
- null,
- StringComparators.LEXICOGRAPHIC
- ))
- .filters(notNull("dim1"))
- .threshold(5)
- .aggregators(new ExpressionLambdaAggregatorFactory(
- "a0",
- ImmutableSet.of("dim1"),
- "__acc",
- "ARRAY<STRING>[]",
- "ARRAY<STRING>[]",
- true,
- true,
- false,
- "array_set_add(\"__acc\", \"dim1\")",
- "array_set_add_all(\"__acc\", \"a0\")",
- null,
- null,
- new HumanReadableBytes(1024),
- ExprMacroTable.nil()
- ))
- .intervals(querySegmentSpec(Filtration.eternity()))
- .granularity(Granularities.ALL)
- .context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
- .postAggregators(new ExpressionPostAggregator("s0", "1", null,
ExprMacroTable.nil()))
- .build()
+ GroupByQuery.builder()
+ .setDataSource(new TopNQueryBuilder()
+
.dataSource(CalciteTests.DATASOURCE1)
+ .dimension(new DefaultDimensionSpec(
+ "dim2",
+ "d0",
+ ColumnType.STRING
+ ))
+ .metric(new DimensionTopNMetricSpec(
+ null,
+ StringComparators.LEXICOGRAPHIC
+ ))
+ .filters(notNull("dim1"))
+ .threshold(5)
+ .aggregators(new
ExpressionLambdaAggregatorFactory(
+ "a0",
+ ImmutableSet.of("dim1"),
+ "__acc",
+ "ARRAY<STRING>[]",
+ "ARRAY<STRING>[]",
+ true,
+ true,
+ false,
+ "array_set_add(\"__acc\",
\"dim1\")",
+ "array_set_add_all(\"__acc\",
\"a0\")",
+ null,
+ null,
+ new HumanReadableBytes(1024),
+ ExprMacroTable.nil()
+ ))
+
.intervals(querySegmentSpec(Filtration.eternity()))
+ .granularity(Granularities.ALL)
+
.context(QUERY_CONTEXT_NO_STRINGIFY_ARRAY)
+ .build()
+ )
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ new DefaultDimensionSpec("d0", "_d0",
ColumnType.STRING),
+ new DefaultDimensionSpec("a0", "_d1",
ColumnType.STRING_ARRAY)
+ )
+ .setAggregatorSpecs(new CountAggregatorFactory("_a0"))
+ .setContext(QUERY_CONTEXT_DEFAULT)
+ .build()
),
useDefault ?
ImmutableList.of(
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
index 0882f3c9cb1..ba55f7b4e4a 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
@@ -5381,6 +5381,8 @@ public class CalciteJoinQueryTest extends
BaseCalciteQueryTest
@Test
public void testPlanWithInFilterMoreThanInSubQueryThreshold()
{
+ skipVectorize();
+ cannotVectorize();
String query = "SELECT l1 FROM numfoo WHERE l1 IN (4842, 4844, 4845,
14905, 4853, 29064)";
Map<String, Object> queryContext = new HashMap<>(QUERY_CONTEXT_DEFAULT);
@@ -5397,21 +5399,32 @@ public class CalciteJoinQueryTest extends
BaseCalciteQueryTest
.dataSource(
JoinDataSource.create(
new TableDataSource(CalciteTests.DATASOURCE3),
- InlineDataSource.fromIterable(
- ImmutableList.of(
- new Object[]{4842L},
- new Object[]{4844L},
- new Object[]{4845L},
- new Object[]{14905L},
- new Object[]{4853L},
- new Object[]{29064L}
- ),
- RowSignature.builder()
- .add("ROW_VALUE", ColumnType.LONG)
+ new QueryDataSource(
+ GroupByQuery.builder()
+
.setDataSource(InlineDataSource.fromIterable(
+ ImmutableList.of(
+ new
Object[]{4842L},
+ new
Object[]{4844L},
+ new
Object[]{4845L},
+ new
Object[]{14905L},
+ new
Object[]{4853L},
+ new
Object[]{29064L}
+ ),
+
RowSignature.builder()
+
.add("ROW_VALUE", ColumnType.LONG)
+
.build()
+ )
+ )
+
.setInterval(querySegmentSpec(Filtration.eternity()))
+ .setDimensions(
+ new
DefaultDimensionSpec("ROW_VALUE", "d0", ColumnType.LONG)
+ )
+ .setGranularity(Granularities.ALL)
+
.setLimitSpec(NoopLimitSpec.instance())
.build()
),
"j0.",
- "(\"l1\" == \"j0.ROW_VALUE\")",
+ "(\"l1\" == \"j0.d0\")",
JoinType.INNER,
null,
ExprMacroTable.nil(),
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index 811227162ac..482b02f61ab 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -13949,15 +13949,32 @@ public class CalciteQueryTest extends
BaseCalciteQueryTest
+ "group by 1",
ImmutableList.of(
GroupByQuery.builder()
- .setDataSource(CalciteTests.DATASOURCE3)
+ .setDataSource(GroupByQuery.builder()
+
.setDataSource(CalciteTests.DATASOURCE3)
+
.setInterval(querySegmentSpec(Intervals.ETERNITY))
+
.setGranularity(Granularities.ALL)
+ .addDimension(new
DefaultDimensionSpec(
+ "dim1",
+ "_d0",
+ ColumnType.STRING
+ ))
+ .addAggregator(new
LongSumAggregatorFactory("a0", "l1"))
+ .build()
+ )
.setInterval(querySegmentSpec(Intervals.ETERNITY))
- .setGranularity(Granularities.ALL)
- .addDimension(new DefaultDimensionSpec("dim1", "_d0",
ColumnType.STRING))
- .addAggregator(new LongSumAggregatorFactory("a0",
"l1"))
- .setPostAggregatorSpecs(ImmutableList.of(
- expressionPostAgg("p0", "case_searched((\"a0\" ==
0),1,0)")
+ .setDimensions(new DefaultDimensionSpec("_d0", "d0",
ColumnType.STRING))
+ .setAggregatorSpecs(aggregators(
+ new FilteredAggregatorFactory(
+ new CountAggregatorFactory("_a0"),
+ useDefault ?
+ selector("a0", "0") :
+ equality("a0", 0, ColumnType.LONG)
+ )
))
+ .setGranularity(Granularities.ALL)
+ .setContext(QUERY_CONTEXT_DEFAULT)
.build()
+
),
useDefault ? ImmutableList.of(
new Object[]{"", 0L},
@@ -14275,4 +14292,138 @@ public class CalciteQueryTest extends
BaseCalciteQueryTest
.run());
assertThat(e, invalidSqlIs("ASCENDING ordering with NULLS LAST is not
supported! (line [1], column [41])"));
}
+
+ @Test
+ public void testInGroupByLimitOutGroupByOrderBy()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "with t AS (SELECT m2, COUNT(m1) as trend_score\n"
+ + "FROM \"foo\"\n"
+ + "GROUP BY 1 \n"
+ + "LIMIT 10\n"
+ + ")\n"
+ + "select m2, (MAX(trend_score)) from t\n"
+ + "where m2 > 2\n"
+ + "GROUP BY 1 \n"
+ + "ORDER BY 2 DESC",
+ QUERY_CONTEXT_DEFAULT,
+ ImmutableList.of(
+ new GroupByQuery.Builder()
+ .setDataSource(
+ new TopNQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .dimension(new DefaultDimensionSpec("m2", "d0",
ColumnType.DOUBLE))
+ .threshold(10)
+ .aggregators(aggregators(
+ useDefault
+ ? new CountAggregatorFactory("a0")
+ : new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a0"),
+ notNull("m1")
+ )
+ ))
+ .metric(new DimensionTopNMetricSpec(null,
StringComparators.NUMERIC))
+ .context(OUTER_LIMIT_CONTEXT)
+ .build()
+ )
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE)
+ )
+ .setDimFilter(
+ useDefault ?
+ bound("d0", "2", null, true, false, null,
StringComparators.NUMERIC) :
+ new RangeFilter("d0", ColumnType.LONG, 2L, null, true,
false, null)
+ )
+ .setAggregatorSpecs(aggregators(
+ new LongMaxAggregatorFactory("_a0", "a0")
+ ))
+ .setLimitSpec(
+ DefaultLimitSpec
+ .builder()
+ .orderBy(new OrderByColumnSpec("_a0",
Direction.DESCENDING, StringComparators.NUMERIC))
+ .build()
+ )
+ .setContext(OUTER_LIMIT_CONTEXT)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{3.0D, 1L},
+ new Object[]{4.0D, 1L},
+ new Object[]{5.0D, 1L},
+ new Object[]{6.0D, 1L}
+ )
+ );
+ }
+
+ @Test
+ public void testInGroupByOrderByLimitOutGroupByOrderByLimit()
+ {
+ skipVectorize();
+ cannotVectorize();
+ testQuery(
+ "with t AS (SELECT m2 as mo, COUNT(m1) as trend_score\n"
+ + "FROM \"foo\"\n"
+ + "GROUP BY 1\n"
+ + "ORDER BY trend_score DESC\n"
+ + "LIMIT 10)\n"
+ + "select mo, (MAX(trend_score)) from t\n"
+ + "where mo > 2\n"
+ + "GROUP BY 1 \n"
+ + "ORDER BY 2 DESC LIMIT 2\n",
+ QUERY_CONTEXT_DEFAULT,
+ ImmutableList.of(
+ new GroupByQuery.Builder()
+ .setDataSource(
+ new TopNQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .dimension(new DefaultDimensionSpec("m2", "d0",
ColumnType.DOUBLE))
+ .threshold(10)
+ .aggregators(aggregators(
+ useDefault
+ ? new CountAggregatorFactory("a0")
+ : new FilteredAggregatorFactory(
+ new CountAggregatorFactory("a0"),
+ notNull("m1")
+ )
+ ))
+ .metric(new NumericTopNMetricSpec("a0"))
+ .context(OUTER_LIMIT_CONTEXT)
+ .build()
+ )
+ .setInterval(querySegmentSpec(Filtration.eternity()))
+ .setGranularity(Granularities.ALL)
+ .setDimensions(
+ new DefaultDimensionSpec("d0", "_d0", ColumnType.DOUBLE)
+ )
+ .setDimFilter(
+ useDefault ?
+ bound("d0", "2", null, true, false, null,
StringComparators.NUMERIC) :
+ new RangeFilter("d0", ColumnType.LONG, 2L, null, true,
false, null)
+ )
+ .setAggregatorSpecs(aggregators(
+ new LongMaxAggregatorFactory("_a0", "a0")
+ ))
+ .setLimitSpec(
+ DefaultLimitSpec
+ .builder()
+ .orderBy(new OrderByColumnSpec("_a0",
Direction.DESCENDING, StringComparators.NUMERIC))
+ .limit(2)
+ .build()
+ )
+ .setContext(OUTER_LIMIT_CONTEXT)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{3.0D, 1L},
+ new Object[]{4.0D, 1L}
+ )
+ );
+ }
+
}
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java
index bf13b88e291..58138ffd1ee 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/DrillWindowQueryTest.java
@@ -4364,7 +4364,6 @@ public class DrillWindowQueryTest extends
BaseCalciteQueryTest
windowQueryTest();
}
- @NotYetSupported(Modes.CANNOT_APPLY_VIRTUAL_COL)
@DrillTest("nestedAggs/multiWin_5")
@Test
public void test_nestedAggs_multiWin_5()
diff --git
a/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e
b/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e
index 03d3e377f3d..d0d59256426 100644
--- a/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e
+++ b/sql/src/test/resources/drill/window/queries/nestedAggs/multiWin_5.e
@@ -8,15 +8,15 @@ false 280487665.00748299428571428571
false 295757331.64717262000000000000
false 302662813.16785714333333333333
false 304608131.82107142900000000000
-false 303537640.51502361272727272727
+false 303537640.515023651272727272727
true 4.0000000000000000
true 4.5000000000000000
true 5.2222222222222222
true 4101.1041666666666667
true 5907.4433333333333333
true 284524.6472222222222778
-true 449300.4527210884353810
+true 449300.4527210885000000
true 550414.3805059523809583
-true 613525.0542768959435185
+true 613525.0542768961000000
true 652829.5088492063491667
-true 676669.0989538239537879
+true 676669.0989538240000000
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]