This is an automated email from the ASF dual-hosted git repository.
vogievetsky pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 68d6e682e86 Fix TimeBoundary planning when filters require virtual
columns. (#16337)
68d6e682e86 is described below
commit 68d6e682e86f1c16c1f3a3e2d00a629c11645689
Author: Gian Merlino <[email protected]>
AuthorDate: Thu Apr 25 16:49:40 2024 -0700
Fix TimeBoundary planning when filters require virtual columns. (#16337)
The timeBoundary query does not support virtual columns, so we should
avoid it if the query requires virtual columns.
---
.../apache/druid/sql/calcite/rel/DruidQuery.java | 6 +++
.../sql/calcite/CalciteTimeBoundaryQueryTest.java | 63 +++++++++++++++++++++-
2 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index 95a33792189..691e9518659 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -1078,6 +1078,12 @@ public class DruidQuery
virtualColumnRegistry,
plannerContext.getJoinableFactoryWrapper()
);
+
+ if (!getVirtualColumns(true).isEmpty()) {
+ // timeBoundary query does not support virtual columns.
+ return null;
+ }
+
final DataSource newDataSource = dataSourceFiltrationPair.lhs;
final Filtration filtration = dataSourceFiltrationPair.rhs;
String bound = minTime ? TimeBoundaryQuery.MIN_TIME :
TimeBoundaryQuery.MAX_TIME;
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
index ff0555cb358..54ff1a2c00e 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
@@ -30,6 +30,7 @@ import
org.apache.druid.query.aggregation.LongMaxAggregatorFactory;
import org.apache.druid.query.aggregation.LongMinAggregatorFactory;
import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.timeboundary.TimeBoundaryQuery;
+import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.join.JoinType;
import org.apache.druid.sql.calcite.filtration.Filtration;
import org.apache.druid.sql.calcite.util.CalciteTests;
@@ -83,7 +84,7 @@ public class CalciteTimeBoundaryQueryTest extends
BaseCalciteQueryTest
}
@Test
- public void testMinTimeQueryWithFilters()
+ public void testMinTimeQueryWithTimeFilters()
{
HashMap<String, Object> queryContext = new
HashMap<>(QUERY_CONTEXT_DEFAULT);
queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
@@ -108,6 +109,66 @@ public class CalciteTimeBoundaryQueryTest extends
BaseCalciteQueryTest
);
}
+ @Test
+ public void testMinTimeQueryWithTimeAndColumnFilters()
+ {
+ HashMap<String, Object> queryContext = new
HashMap<>(QUERY_CONTEXT_DEFAULT);
+ queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
+ HashMap<String, Object> expectedContext = new
HashMap<>(QUERY_CONTEXT_DEFAULT);
+ expectedContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0");
+ testQuery(
+ "SELECT MIN(__time) AS minTime FROM foo\n"
+ + "where __time >= '2001-01-01' and __time < '2003-01-01'\n"
+ + "and dim2 = 'abc'",
+ queryContext,
+ ImmutableList.of(
+ Druids.newTimeBoundaryQueryBuilder()
+ .dataSource("foo")
+ .intervals(
+ new MultipleIntervalSegmentSpec(
+
ImmutableList.of(Intervals.of("2001-01-01T00:00:00.000Z/2003-01-01T00:00:00.000Z"))
+ )
+ )
+ .bound(TimeBoundaryQuery.MIN_TIME)
+ .filters(equality("dim2", "abc", ColumnType.STRING))
+ .context(expectedContext)
+ .build()
+ ),
+ ImmutableList.of(new Object[]{DateTimes.of("2001-01-02").getMillis()})
+ );
+ }
+
+ @Test
+ public void testMinTimeQueryWithTimeAndExpressionFilters()
+ {
+ // Cannot vectorize due to UPPER expression.
+ cannotVectorize();
+
+ HashMap<String, Object> queryContext = new
HashMap<>(QUERY_CONTEXT_DEFAULT);
+ queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
+ testQuery(
+ "SELECT MIN(__time) AS minTime FROM foo\n"
+ + "where __time >= '2001-01-01' and __time < '2003-01-01'\n"
+ + "and upper(dim2) = 'ABC'",
+ queryContext,
+ ImmutableList.of(
+ Druids.newTimeseriesQueryBuilder()
+ .dataSource("foo")
+ .intervals(
+ new MultipleIntervalSegmentSpec(
+
ImmutableList.of(Intervals.of("2001-01-01T00:00:00.000Z/2003-01-01T00:00:00.000Z"))
+ )
+ )
+ .virtualColumns(expressionVirtualColumn("v0",
"upper(\"dim2\")", ColumnType.STRING))
+ .filters(equality("v0", "ABC", ColumnType.STRING))
+ .aggregators(new LongMinAggregatorFactory("a0", "__time"))
+ .context(queryContext)
+ .build()
+ ),
+ ImmutableList.of(new Object[]{DateTimes.of("2001-01-02").getMillis()})
+ );
+ }
+
// Currently, if both min(__time) and max(__time) are present, we don't
convert it
// to a timeBoundary query. (ref :
https://github.com/apache/druid/issues/12479)
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]