This is an automated email from the ASF dual-hosted git repository.
abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new dc223f22dbd SQL: Use regular filters for time filtering in subqueries.
(#17173)
dc223f22dbd is described below
commit dc223f22dbdd15a3a75d95311fb34d63384066b1
Author: Gian Merlino <[email protected]>
AuthorDate: Thu Sep 26 22:02:30 2024 -0700
SQL: Use regular filters for time filtering in subqueries. (#17173)
* SQL: Use regular filters for time filtering in subqueries.
Using the "intervals" feature on subqueries, or any non-table, should be
avoided because it isn't a meaningful optimization in those cases, and
it's simpler for runtime implementations if they can assume all filters
are located in the regular filter object.
Two changes:
1) Fix the logic in DruidQuery.canUseIntervalFiltering. It was intended
to return false for QueryDataSource, but actually returned true.
2) Add a validation to ScanQueryFrameProcessor to ensure that when running
on an input channel (which would include any subquery), the query has
"intervals" set to ONLY_ETERNITY.
Prior to this patch, the new test case in testTimeFilterOnSubquery would
throw a "Can only handle a single interval" error in the native engine,
and "QueryNotSupported" in the MSQ engine.
* Mark new case as having extra columns in decoupled mode.
* Adjust test.
---
.../msq/querykit/scan/ScanQueryFrameProcessor.java | 15 ++--
.../querykit/scan/ScanQueryFrameProcessorTest.java | 18 +++--
.../apache/druid/sql/calcite/rel/DruidQuery.java | 4 +-
.../apache/druid/sql/calcite/CalciteQueryTest.java | 51 ++++++++++++
...estTimeFilterOnSubquery@NullHandling=default.iq | 92 ++++++++++++++++++++++
.../testTimeFilterOnSubquery@NullHandling=sql.iq | 88 +++++++++++++++++++++
6 files changed, 255 insertions(+), 13 deletions(-)
diff --git
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
index 06dce22a189..05f80b9805d 100644
---
a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
+++
b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessor.java
@@ -25,6 +25,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import it.unimi.dsi.fastutil.ints.IntSet;
import org.apache.druid.collections.ResourceHolder;
+import org.apache.druid.error.DruidException;
import org.apache.druid.frame.Frame;
import org.apache.druid.frame.channel.FrameWithPartition;
import org.apache.druid.frame.channel.ReadableFrameChannel;
@@ -64,7 +65,6 @@ import org.apache.druid.query.Order;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.query.scan.ScanQueryEngine;
import org.apache.druid.query.scan.ScanResultValue;
-import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
import org.apache.druid.query.spec.SpecificSegmentSpec;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.CompleteSegment;
@@ -312,13 +312,14 @@ public class ScanQueryFrameProcessor extends
BaseLeafFrameProcessor
);
}
+ if (!Intervals.ONLY_ETERNITY.equals(query.getIntervals())) {
+ // runWithInputChannel is for running on subquery results, where we
don't expect to see "intervals" set.
+ // The SQL planner avoid it for subqueries; see
DruidQuery#canUseIntervalFiltering.
+ throw DruidException.defensive("Expected eternity intervals, but
got[%s]", query.getIntervals());
+ }
+
final CursorHolder nextCursorHolder =
- cursorFactory.makeCursorHolder(
- ScanQueryEngine.makeCursorBuildSpec(
- query.withQuerySegmentSpec(new
MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY)),
- null
- )
- );
+
cursorFactory.makeCursorHolder(ScanQueryEngine.makeCursorBuildSpec(query,
null));
final Cursor nextCursor = nextCursorHolder.asCursor();
if (nextCursor == null) {
diff --git
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java
index af0a7203570..96957da5321 100644
---
a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java
+++
b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/querykit/scan/ScanQueryFrameProcessorTest.java
@@ -59,8 +59,11 @@ import org.apache.druid.segment.TestIndex;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.incremental.IncrementalIndexCursorFactory;
import org.apache.druid.timeline.SegmentId;
+import org.hamcrest.CoreMatchers;
+import org.hamcrest.MatcherAssert;
import org.junit.Assert;
import org.junit.Test;
+import org.junit.internal.matchers.ThrowableMessageMatcher;
import java.io.IOException;
import java.util.Collections;
@@ -177,7 +180,7 @@ public class ScanQueryFrameProcessorTest extends
FrameProcessorTestBase
}
}
- // put funny intervals on query to ensure it is adjusted to the segment
interval before building cursor
+ // put funny intervals on query to ensure it is validated before building
cursor
final ScanQuery query =
Druids.newScanQueryBuilder()
.dataSource("test")
@@ -240,11 +243,16 @@ public class ScanQueryFrameProcessorTest extends
FrameProcessorTestBase
FrameReader.create(signature)
);
- FrameTestUtil.assertRowsEqual(
- FrameTestUtil.readRowsFromCursorFactory(cursorFactory, signature,
false),
- rowsFromProcessor
+ final RuntimeException e = Assert.assertThrows(
+ RuntimeException.class,
+ rowsFromProcessor::toList
);
- Assert.assertEquals(Unit.instance(), retVal.get());
+ MatcherAssert.assertThat(
+ e,
+ ThrowableMessageMatcher.hasMessage(CoreMatchers.containsString(
+ "Expected eternity intervals, but
got[[2001-01-01T00:00:00.000Z/2011-01-01T00:00:00.000Z, "
+ + "2011-01-02T00:00:00.000Z/2021-01-01T00:00:00.000Z]]"))
+ );
}
}
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index 3ce33e72245..eb85dc83f30 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -76,6 +76,7 @@ import org.apache.druid.query.operator.OperatorFactory;
import org.apache.druid.query.operator.ScanOperatorFactory;
import org.apache.druid.query.operator.WindowOperatorQuery;
import org.apache.druid.query.ordering.StringComparator;
+import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.query.spec.LegacySegmentSpec;
import org.apache.druid.query.timeboundary.TimeBoundaryQuery;
@@ -884,7 +885,8 @@ public class DruidQuery
*/
private static boolean canUseIntervalFiltering(final DataSource dataSource)
{
- return dataSource.getAnalysis().isTableBased();
+ final DataSourceAnalysis analysis = dataSource.getAnalysis();
+ return !analysis.getBaseQuery().isPresent() && analysis.isTableBased();
}
private static Filtration toFiltration(
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
index 5af5ec3097c..412f378e8b5 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java
@@ -116,6 +116,7 @@ import org.apache.druid.query.topn.NumericTopNMetricSpec;
import org.apache.druid.query.topn.TopNQueryBuilder;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.join.JoinType;
@@ -7764,6 +7765,56 @@ public class CalciteQueryTest extends
BaseCalciteQueryTest
);
}
+ @DecoupledTestConfig(quidemReason =
QuidemTestCaseReason.EQUIV_PLAN_EXTRA_COLUMNS, separateDefaultModeTest = true)
+ @Test
+ public void testTimeFilterOnSubquery()
+ {
+ testQuery(
+ "SELECT __time, m1 FROM (SELECT * FROM \"foo\" LIMIT 100)\n"
+ + "WHERE TIME_IN_INTERVAL(__time, '2000/P1D') OR
TIME_IN_INTERVAL(__time, '2001/P1D')",
+ ImmutableList.of(
+ newScanQueryBuilder()
+ .dataSource(
+ newScanQueryBuilder()
+ .dataSource(CalciteTests.DATASOURCE1)
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .columns("__time", "m1")
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .limit(100)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .build()
+ )
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .filters(or(
+ range(
+ ColumnHolder.TIME_COLUMN_NAME,
+ ColumnType.LONG,
+ DateTimes.of("2000").getMillis(),
+ DateTimes.of("2000-01-02").getMillis(),
+ false,
+ true
+ ),
+ range(
+ ColumnHolder.TIME_COLUMN_NAME,
+ ColumnType.LONG,
+ DateTimes.of("2001").getMillis(),
+ DateTimes.of("2001-01-02").getMillis(),
+ false,
+ true
+ )
+ ))
+ .columns("__time", "m1")
+
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
+ .context(QUERY_CONTEXT_DEFAULT)
+ .build()
+ ),
+ ImmutableList.of(
+ new Object[]{DateTimes.of("2000-01-01").getMillis(), 1.0f},
+ new Object[]{DateTimes.of("2001-01-01").getMillis(), 4.0f}
+ )
+ );
+ }
+
@SqlTestFrameworkConfig.NumMergeBuffers(4)
@Test
public void testMultipleExactCountDistinctWithGroupingUsingGroupingSets()
diff --git
a/sql/src/test/quidem/org.apache.druid.sql.calcite.DecoupledPlanningCalciteQueryTest/testTimeFilterOnSubquery@NullHandling=default.iq
b/sql/src/test/quidem/org.apache.druid.sql.calcite.DecoupledPlanningCalciteQueryTest/testTimeFilterOnSubquery@NullHandling=default.iq
new file mode 100644
index 00000000000..9f63bc9a222
--- /dev/null
+++
b/sql/src/test/quidem/org.apache.druid.sql.calcite.DecoupledPlanningCalciteQueryTest/testTimeFilterOnSubquery@NullHandling=default.iq
@@ -0,0 +1,92 @@
+# testTimeFilterOnSubquery@NullHandling=default case-crc:73448efc
+# quidem testcase reason: EQUIV_PLAN_EXTRA_COLUMNS
+!set debug true
+!set defaultTimeout 300000
+!set maxScatterGatherBytes 9223372036854775807
+!set plannerStrategy DECOUPLED
+!set sqlCurrentTimestamp 2000-01-01T00:00:00Z
+!set sqlQueryId dummy
+!set outputformat mysql
+!use druidtest:///
+SELECT __time, m1 FROM (SELECT * FROM "foo" LIMIT 100)
+WHERE TIME_IN_INTERVAL(__time, '2000/P1D') OR TIME_IN_INTERVAL(__time,
'2001/P1D');
++-------------------------+-----+
+| __time | m1 |
++-------------------------+-----+
+| 2000-01-01 00:00:00.000 | 1.0 |
+| 2001-01-01 00:00:00.000 | 4.0 |
++-------------------------+-----+
+(2 rows)
+
+!ok
+LogicalProject(__time=[$0], m1=[$5])
+ LogicalFilter(condition=[SEARCH($0, Sarg[[2000-01-01
00:00:00:TIMESTAMP(3)..2000-01-02 00:00:00:TIMESTAMP(3)), [2001-01-01
00:00:00:TIMESTAMP(3)..2001-01-02 00:00:00:TIMESTAMP(3))]:TIMESTAMP(3))])
+ LogicalSort(fetch=[100])
+ LogicalTableScan(table=[[druid, foo]])
+
+!logicalPlan
+DruidProject(__time=[$0], m1=[$5], druid=[logical])
+ DruidFilter(condition=[SEARCH($0, Sarg[[2000-01-01
00:00:00:TIMESTAMP(3)..2000-01-02 00:00:00:TIMESTAMP(3)), [2001-01-01
00:00:00:TIMESTAMP(3)..2001-01-02 00:00:00:TIMESTAMP(3))]:TIMESTAMP(3))])
+ DruidSort(fetch=[100], druid=[logical])
+ DruidTableScan(table=[[druid, foo]], druid=[logical])
+
+!druidPlan
+{
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "query",
+ "query" : {
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "table",
+ "name" : "foo"
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "resultFormat" : "compactedList",
+ "limit" : 100,
+ "columns" : [ "__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2",
"unique_dim1" ],
+ "columnTypes" : [ "LONG", "LONG", "STRING", "STRING", "STRING", "FLOAT",
"DOUBLE", "COMPLEX<hyperUnique>" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+ }
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "resultFormat" : "compactedList",
+ "filter" : {
+ "type" : "or",
+ "fields" : [ {
+ "type" : "bound",
+ "dimension" : "__time",
+ "lower" : "946684800000",
+ "upper" : "946771200000",
+ "upperStrict" : true,
+ "ordering" : {
+ "type" : "numeric"
+ }
+ }, {
+ "type" : "bound",
+ "dimension" : "__time",
+ "lower" : "978307200000",
+ "upper" : "978393600000",
+ "upperStrict" : true,
+ "ordering" : {
+ "type" : "numeric"
+ }
+ } ]
+ },
+ "columns" : [ "__time", "m1" ],
+ "columnTypes" : [ "LONG", "FLOAT" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+}
+!nativePlan
diff --git
a/sql/src/test/quidem/org.apache.druid.sql.calcite.DecoupledPlanningCalciteQueryTest/testTimeFilterOnSubquery@NullHandling=sql.iq
b/sql/src/test/quidem/org.apache.druid.sql.calcite.DecoupledPlanningCalciteQueryTest/testTimeFilterOnSubquery@NullHandling=sql.iq
new file mode 100644
index 00000000000..40b9d874777
--- /dev/null
+++
b/sql/src/test/quidem/org.apache.druid.sql.calcite.DecoupledPlanningCalciteQueryTest/testTimeFilterOnSubquery@NullHandling=sql.iq
@@ -0,0 +1,88 @@
+# testTimeFilterOnSubquery@NullHandling=sql case-crc:73448efc
+# quidem testcase reason: EQUIV_PLAN_EXTRA_COLUMNS
+!set debug true
+!set defaultTimeout 300000
+!set maxScatterGatherBytes 9223372036854775807
+!set plannerStrategy DECOUPLED
+!set sqlCurrentTimestamp 2000-01-01T00:00:00Z
+!set sqlQueryId dummy
+!set outputformat mysql
+!use druidtest:///
+SELECT __time, m1 FROM (SELECT * FROM "foo" LIMIT 100)
+WHERE TIME_IN_INTERVAL(__time, '2000/P1D') OR TIME_IN_INTERVAL(__time,
'2001/P1D');
++-------------------------+-----+
+| __time | m1 |
++-------------------------+-----+
+| 2000-01-01 00:00:00.000 | 1.0 |
+| 2001-01-01 00:00:00.000 | 4.0 |
++-------------------------+-----+
+(2 rows)
+
+!ok
+LogicalProject(__time=[$0], m1=[$5])
+ LogicalFilter(condition=[SEARCH($0, Sarg[[2000-01-01
00:00:00:TIMESTAMP(3)..2000-01-02 00:00:00:TIMESTAMP(3)), [2001-01-01
00:00:00:TIMESTAMP(3)..2001-01-02 00:00:00:TIMESTAMP(3))]:TIMESTAMP(3))])
+ LogicalSort(fetch=[100])
+ LogicalTableScan(table=[[druid, foo]])
+
+!logicalPlan
+DruidProject(__time=[$0], m1=[$5], druid=[logical])
+ DruidFilter(condition=[SEARCH($0, Sarg[[2000-01-01
00:00:00:TIMESTAMP(3)..2000-01-02 00:00:00:TIMESTAMP(3)), [2001-01-01
00:00:00:TIMESTAMP(3)..2001-01-02 00:00:00:TIMESTAMP(3))]:TIMESTAMP(3))])
+ DruidSort(fetch=[100], druid=[logical])
+ DruidTableScan(table=[[druid, foo]], druid=[logical])
+
+!druidPlan
+{
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "query",
+ "query" : {
+ "queryType" : "scan",
+ "dataSource" : {
+ "type" : "table",
+ "name" : "foo"
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "resultFormat" : "compactedList",
+ "limit" : 100,
+ "columns" : [ "__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2",
"unique_dim1" ],
+ "columnTypes" : [ "LONG", "LONG", "STRING", "STRING", "STRING", "FLOAT",
"DOUBLE", "COMPLEX<hyperUnique>" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+ }
+ },
+ "intervals" : {
+ "type" : "intervals",
+ "intervals" : [
"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z" ]
+ },
+ "resultFormat" : "compactedList",
+ "filter" : {
+ "type" : "or",
+ "fields" : [ {
+ "type" : "range",
+ "column" : "__time",
+ "matchValueType" : "LONG",
+ "lower" : 946684800000,
+ "upper" : 946771200000,
+ "upperOpen" : true
+ }, {
+ "type" : "range",
+ "column" : "__time",
+ "matchValueType" : "LONG",
+ "lower" : 978307200000,
+ "upper" : 978393600000,
+ "upperOpen" : true
+ } ]
+ },
+ "columns" : [ "__time", "m1" ],
+ "columnTypes" : [ "LONG", "FLOAT" ],
+ "granularity" : {
+ "type" : "all"
+ },
+ "legacy" : false
+}
+!nativePlan
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]