[druid] branch master updated: Add feature flag for sql planning of TimeBoundary queries (#12491)

abhishek Tue, 10 May 2022 02:54:03 -0700

This is an automated email from the ASF dual-hosted git repository.

abhishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git



The following commit(s) were added to refs/heads/master by this push:
     new 75836a5a06 Add feature flag for sql planning of TimeBoundary queries 
(#12491)
75836a5a06 is described below

commit 75836a5a06b7d41b4151eca41b83d580cdd8486f
Author: Rohan Garg <[email protected]>
AuthorDate: Tue May 10 15:23:42 2022 +0530

    Add feature flag for sql planning of TimeBoundary queries (#12491)
    
    * Add feature flag for sql planning of TimeBoundary queries
    
    * fixup! Add feature flag for sql planning of TimeBoundary queries
    
    * Add documentation for enableTimeBoundaryPlanning
    
    * fixup! Add documentation for enableTimeBoundaryPlanning
---
 docs/querying/sql-query-context.md                 |  1 +
 .../java/org/apache/druid/query/QueryContexts.java |  7 +++++
 .../org/apache/druid/query/QueryContextsTest.java  |  7 +++++
 .../apache/druid/sql/calcite/rel/DruidQuery.java   |  7 +++--
 .../druid/sql/calcite/run/NativeQueryMaker.java    |  3 ++
 .../apache/druid/sql/calcite/run/QueryFeature.java |  5 ++++
 .../druid/sql/calcite/CalciteJoinQueryTest.java    | 20 +++++++++----
 .../sql/calcite/CalciteTimeBoundaryQueryTest.java  | 33 +++++++++++++++-------
 .../druid/sql/calcite/TestInsertQueryMaker.java    |  1 +
 website/.spelling                                  |  3 ++
 10 files changed, 69 insertions(+), 18 deletions(-)

diff --git a/docs/querying/sql-query-context.md 
b/docs/querying/sql-query-context.md
index 59b2f74a95..bc1fdf7a07 100644
--- a/docs/querying/sql-query-context.md
+++ b/docs/querying/sql-query-context.md
@@ -41,6 +41,7 @@ Configure Druid SQL query planning using the parameters in 
the table below.
 |`useApproximateCountDistinct`|Whether to use an approximate cardinality 
algorithm for `COUNT(DISTINCT 
foo)`.|druid.sql.planner.useApproximateCountDistinct on the Broker (default: 
true)|
 |`useGroupingSetForExactDistinct`|Whether to use grouping sets to execute 
queries with multiple exact distinct 
aggregations.|druid.sql.planner.useGroupingSetForExactDistinct on the Broker 
(default: false)|
 |`useApproximateTopN`|Whether to use approximate [TopN queries](topnquery.md) 
when a SQL query could be expressed as such. If false, exact [GroupBy 
queries](groupbyquery.md) will be used 
instead.|druid.sql.planner.useApproximateTopN on the Broker (default: true)|
+|`enableTimeBoundaryPlanning`|If true, SQL queries will get converted to 
TimeBoundary queries wherever possible. TimeBoundary queries are very efficient 
for min-max calculation on __time column in a datasource 
|druid.query.default.context.enableTimeBoundaryPlanning on the Broker (default: 
false)|
 
 ## Setting the query context
 The query context parameters can be specified as a "context" object in the 
[JSON API](sql-api.md) or as a [JDBC connection properties object](sql-jdbc.md).
diff --git a/processing/src/main/java/org/apache/druid/query/QueryContexts.java 
b/processing/src/main/java/org/apache/druid/query/QueryContexts.java
index 5768c6a067..a4defa7b08 100644
--- a/processing/src/main/java/org/apache/druid/query/QueryContexts.java
+++ b/processing/src/main/java/org/apache/druid/query/QueryContexts.java
@@ -70,6 +70,7 @@ public class QueryContexts
   public static final String BY_SEGMENT_KEY = "bySegment";
   public static final String BROKER_SERVICE_NAME = "brokerService";
   public static final String IN_SUB_QUERY_THRESHOLD_KEY = 
"inSubQueryThreshold";
+  public static final String TIME_BOUNDARY_PLANNING_KEY = 
"enableTimeBoundaryPlanning";
 
   public static final boolean DEFAULT_BY_SEGMENT = false;
   public static final boolean DEFAULT_POPULATE_CACHE = true;
@@ -93,6 +94,7 @@ public class QueryContexts
   public static final boolean DEFAULT_SECONDARY_PARTITION_PRUNING = true;
   public static final boolean DEFAULT_ENABLE_DEBUG = false;
   public static final int DEFAULT_IN_SUB_QUERY_THRESHOLD = Integer.MAX_VALUE;
+  public static final boolean DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING = false;
 
   @SuppressWarnings("unused") // Used by Jackson serialization
   public enum Vectorize
@@ -347,6 +349,11 @@ public class QueryContexts
     return parseInt(context, IN_SUB_QUERY_THRESHOLD_KEY, defaultValue);
   }
 
+  public static boolean isTimeBoundaryPlanningEnabled(Map<String, Object> 
queryContext)
+  {
+    return parseBoolean(queryContext, TIME_BOUNDARY_PLANNING_KEY, 
DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING);
+  }
+
   public static <T> Query<T> withMaxScatterGatherBytes(Query<T> query, long 
maxScatterGatherBytesLimit)
   {
     Object obj = query.getContextValue(MAX_SCATTER_GATHER_BYTES_KEY);
diff --git 
a/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java 
b/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java
index 7cf1f94efb..c9f0c8b482 100644
--- a/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java
+++ b/processing/src/test/java/org/apache/druid/query/QueryContextsTest.java
@@ -140,6 +140,13 @@ public class QueryContextsTest
                         
QueryContexts.getInSubQueryThreshold(ImmutableMap.of()));
   }
 
+  @Test
+  public void testDefaultPlanTimeBoundarySql()
+  {
+    Assert.assertEquals(QueryContexts.DEFAULT_ENABLE_TIME_BOUNDARY_PLANNING,
+                        
QueryContexts.isTimeBoundaryPlanningEnabled(ImmutableMap.of()));
+  }
+
   @Test
   public void testGetEnableJoinLeftScanDirect()
   {
diff --git a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
index f9f5083f17..08aa08ed87 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/rel/DruidQuery.java
@@ -804,7 +804,7 @@ public class DruidQuery
       }
     }
 
-    final TimeBoundaryQuery timeBoundaryQuery = toTimeBoundaryQuery();
+    final TimeBoundaryQuery timeBoundaryQuery = 
toTimeBoundaryQuery(queryFeatureInspector);
     if (timeBoundaryQuery != null) {
       return timeBoundaryQuery;
     }
@@ -838,9 +838,10 @@ public class DruidQuery
    * @return a TimeBoundaryQuery if possible. null if it is not possible to 
construct one.
    */
   @Nullable
-  private TimeBoundaryQuery toTimeBoundaryQuery()
+  private TimeBoundaryQuery toTimeBoundaryQuery(QueryFeatureInspector 
queryFeatureInspector)
   {
-    if (grouping == null
+    if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TIME_BOUNDARY)
+        || grouping == null
         || grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs())
         || grouping.getHavingFilter() != null
         || selectProjection != null) {
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java
index a74a68b518..3e3dc0b15e 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/NativeQueryMaker.java
@@ -42,6 +42,7 @@ import org.apache.druid.java.util.common.guava.Sequences;
 import org.apache.druid.math.expr.Evals;
 import org.apache.druid.query.InlineDataSource;
 import org.apache.druid.query.Query;
+import org.apache.druid.query.QueryContexts;
 import org.apache.druid.query.QueryToolChest;
 import org.apache.druid.query.filter.BoundDimFilter;
 import org.apache.druid.query.filter.DimFilter;
@@ -112,6 +113,8 @@ public class NativeQueryMaker implements QueryMaker
       case CAN_READ_EXTERNAL_DATA:
       case SCAN_CAN_ORDER_BY_NON_TIME:
         return false;
+      case CAN_RUN_TIME_BOUNDARY:
+        return 
QueryContexts.isTimeBoundaryPlanningEnabled(plannerContext.getQueryContext().getMergedParams());
       default:
         throw new IAE("Unrecognized feature: %s", feature);
     }
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java 
b/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java
index 7a1cdee319..363837435d 100644
--- a/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java
+++ b/sql/src/main/java/org/apache/druid/sql/calcite/run/QueryFeature.java
@@ -46,4 +46,9 @@ public enum QueryFeature
    * other than the "__time" column.
    */
   SCAN_CAN_ORDER_BY_NON_TIME,
+
+  /**
+   * Queries of type {@link 
org.apache.druid.query.timeboundary.TimeBoundaryQuery} are usable.
+   */
+  CAN_RUN_TIME_BOUNDARY
 }
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
index 1c4227102f..aad48cff19 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteJoinQueryTest.java
@@ -2388,11 +2388,13 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
       cannotVectorize();
     }
 
+    Map<String, Object> updatedQueryContext = new HashMap<>(queryContext);
+    updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
     Map<String, Object> maxTimeQueryContext = new HashMap<>(queryContext);
     maxTimeQueryContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, 
"a0");
     testQuery(
         "SELECT DISTINCT __time FROM druid.foo WHERE __time IN (SELECT 
MAX(__time) FROM druid.foo)",
-        queryContext,
+        updatedQueryContext,
         ImmutableList.of(
             GroupByQuery.builder()
                 .setDataSource(
@@ -2434,11 +2436,13 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
     // Cannot vectorize JOIN operator.
     cannotVectorize();
 
+    Map<String, Object> updatedQueryContext = new HashMap<>(queryContext);
+    updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
     Map<String, Object> maxTimeQueryContext = new HashMap<>(queryContext);
     maxTimeQueryContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, 
"a0");
     testQuery(
         "SELECT DISTINCT __time FROM druid.foo WHERE __time NOT IN (SELECT 
MAX(__time) FROM druid.foo)",
-        queryContext,
+        updatedQueryContext,
         ImmutableList.of(
             GroupByQuery.builder()
                 .setDataSource(
@@ -3568,6 +3572,8 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
       cannotVectorize();
     }
 
+    Map<String, Object> updatedQueryContext = new HashMap<>(queryContext);
+    updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
     Map<String, Object> maxTimeQueryContext = new HashMap<>(queryContext);
     maxTimeQueryContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, 
"a0");
     testQuery(
@@ -3576,7 +3582,7 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
             + "AND __time IN (SELECT MAX(__time) FROM foo WHERE cnt = 1)\n"
             + "AND __time IN (SELECT MAX(__time) FROM foo WHERE cnt <> 2)\n"
             + "GROUP BY 1",
-        queryContext,
+        updatedQueryContext,
         ImmutableList.of(
             GroupByQuery.builder()
                 .setDataSource(
@@ -3628,6 +3634,8 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
   {
     cannotVectorize();
 
+    Map<String, Object> updatedQueryContext = new HashMap<>(queryContext);
+    updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
     Map<String, Object> minTimeQueryContext = new HashMap<>(queryContext);
     minTimeQueryContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, 
"a0");
     Map<String, Object> maxTimeQueryContext = new HashMap<>(queryContext);
@@ -3638,7 +3646,7 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
             + "AND __time IN (SELECT MAX(__time) FROM foo)\n"
             + "AND __time NOT IN (SELECT MIN(__time) FROM foo)\n"
             + "GROUP BY 1",
-        queryContext,
+        updatedQueryContext,
         ImmutableList.of(
             GroupByQuery.builder()
                 .setDataSource(
@@ -3732,6 +3740,8 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
   {
     cannotVectorize();
 
+    Map<String, Object> updatedQueryContext = new HashMap<>(queryContext);
+    updatedQueryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
     Map<String, Object> minTimeQueryContext = new HashMap<>(queryContext);
     minTimeQueryContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, 
"a0");
     Map<String, Object> maxTimeQueryContext = new HashMap<>(queryContext);
@@ -3743,7 +3753,7 @@ public class CalciteJoinQueryTest extends 
BaseCalciteQueryTest
             + "LEFT JOIN (SELECT MIN(__time) t FROM foo) t1 on t1.t = 
foo.__time\n"
             + "WHERE dim1 IN ('abc', 'def') AND t1.t is null\n"
             + "GROUP BY 1",
-        queryContext,
+        updatedQueryContext,
         ImmutableList.of(
             GroupByQuery.builder()
                 .setDataSource(
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
index 74e42c5365..6ed157d349 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/CalciteTimeBoundaryQueryTest.java
@@ -23,6 +23,7 @@ import com.google.common.collect.ImmutableList;
 import org.apache.druid.java.util.common.DateTimes;
 import org.apache.druid.java.util.common.Intervals;
 import org.apache.druid.query.Druids;
+import org.apache.druid.query.QueryContexts;
 import org.apache.druid.query.aggregation.LongMaxAggregatorFactory;
 import org.apache.druid.query.aggregation.LongMinAggregatorFactory;
 import org.apache.druid.query.spec.MultipleIntervalSegmentSpec;
@@ -38,15 +39,18 @@ public class CalciteTimeBoundaryQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testMaxTimeQuery() throws Exception
   {
-    HashMap<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
-    context.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0");
+    HashMap<String, Object> queryContext = new 
HashMap<>(QUERY_CONTEXT_DEFAULT);
+    queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
+    HashMap<String, Object> expectedContext = new 
HashMap<>(QUERY_CONTEXT_DEFAULT);
+    expectedContext.put(TimeBoundaryQuery.MAX_TIME_ARRAY_OUTPUT_NAME, "a0");
     testQuery(
         "SELECT MAX(__time) AS maxTime FROM foo",
+        queryContext,
         ImmutableList.of(
             Druids.newTimeBoundaryQueryBuilder()
                   .dataSource("foo")
                   .bound(TimeBoundaryQuery.MAX_TIME)
-                  .context(context)
+                  .context(expectedContext)
                   .build()
         ),
         ImmutableList.of(new Object[]{DateTimes.of("2001-01-03").getMillis()})
@@ -56,15 +60,18 @@ public class CalciteTimeBoundaryQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testMinTimeQuery() throws Exception
   {
-    HashMap<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
-    context.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0");
+    HashMap<String, Object> queryContext = new 
HashMap<>(QUERY_CONTEXT_DEFAULT);
+    queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
+    HashMap<String, Object> expectedContext = new 
HashMap<>(QUERY_CONTEXT_DEFAULT);
+    expectedContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0");
     testQuery(
         "SELECT MIN(__time) AS minTime FROM foo",
+        queryContext,
         ImmutableList.of(
             Druids.newTimeBoundaryQueryBuilder()
                   .dataSource("foo")
                   .bound(TimeBoundaryQuery.MIN_TIME)
-                  .context(context)
+                  .context(expectedContext)
                   .build()
         ),
         ImmutableList.of(new Object[]{DateTimes.of("2000-01-01").getMillis()})
@@ -74,10 +81,13 @@ public class CalciteTimeBoundaryQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testMinTimeQueryWithFilters() throws Exception
   {
-    HashMap<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
-    context.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0");
+    HashMap<String, Object> queryContext = new 
HashMap<>(QUERY_CONTEXT_DEFAULT);
+    queryContext.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
+    HashMap<String, Object> expectedContext = new 
HashMap<>(QUERY_CONTEXT_DEFAULT);
+    expectedContext.put(TimeBoundaryQuery.MIN_TIME_ARRAY_OUTPUT_NAME, "a0");
     testQuery(
         "SELECT MIN(__time) AS minTime FROM foo where __time >= '2001-01-01' 
and __time < '2003-01-01'",
+        queryContext,
         ImmutableList.of(
             Druids.newTimeBoundaryQueryBuilder()
                   .dataSource("foo")
@@ -87,7 +97,7 @@ public class CalciteTimeBoundaryQueryTest extends 
BaseCalciteQueryTest
                       )
                   )
                   .bound(TimeBoundaryQuery.MIN_TIME)
-                  .context(context)
+                  .context(expectedContext)
                   .build()
         ),
         ImmutableList.of(new Object[]{DateTimes.of("2001-01-01").getMillis()})
@@ -99,8 +109,11 @@ public class CalciteTimeBoundaryQueryTest extends 
BaseCalciteQueryTest
   @Test
   public void testMinMaxTimeQuery() throws Exception
   {
+    HashMap<String, Object> context = new HashMap<>(QUERY_CONTEXT_DEFAULT);
+    context.put(QueryContexts.TIME_BOUNDARY_PLANNING_KEY, true);
     testQuery(
         "SELECT MIN(__time) AS minTime, MAX(__time) as maxTime FROM foo",
+        context,
         ImmutableList.of(
             Druids.newTimeseriesQueryBuilder()
                   .dataSource("foo")
@@ -109,7 +122,7 @@ public class CalciteTimeBoundaryQueryTest extends 
BaseCalciteQueryTest
                       new LongMinAggregatorFactory("a0", "__time"),
                       new LongMaxAggregatorFactory("a1", "__time")
                   )
-                  .context(QUERY_CONTEXT_DEFAULT)
+                  .context(context)
                   .build()
         ),
         ImmutableList.of(new Object[]{
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java 
b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java
index 5169a4d53d..31b9c31631 100644
--- a/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java
+++ b/sql/src/test/java/org/apache/druid/sql/calcite/TestInsertQueryMaker.java
@@ -65,6 +65,7 @@ public class TestInsertQueryMaker implements QueryMaker
       // INSERT queries should stick to groupBy, scan.
       case CAN_RUN_TIMESERIES:
       case CAN_RUN_TOPN:
+      case CAN_RUN_TIME_BOUNDARY:
         return false;
 
       // INSERT uses external data.
diff --git a/website/.spelling b/website/.spelling
index d23986e9a0..7097c7bd26 100644
--- a/website/.spelling
+++ b/website/.spelling
@@ -560,6 +560,9 @@ useApproximateCountDistinct
 useGroupingSetForExactDistinct
 useApproximateTopN
 wikipedia
+enableTimeBoundaryPlanning
+TimeBoundary
+druid.query.default.context.enableTimeBoundaryPlanning
 IEC
  - ../docs/comparisons/druid-vs-elasticsearch.md
 100x


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[druid] branch master updated: Add feature flag for sql planning of TimeBoundary queries (#12491)

Reply via email to