(druid) branch master updated: Feature configurable calcite bloat (#16248)

karan Mon, 06 May 2024 08:13:49 -0700

This is an automated email from the ASF dual-hosted git repository.

karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git



The following commit(s) were added to refs/heads/master by this push:
     new b5958b6b077 Feature configurable calcite bloat (#16248)
b5958b6b077 is described below

commit b5958b6b077821ee327f421febebe5b7f2741734
Author: Misha <[email protected]>
AuthorDate: Mon May 6 17:13:39 2024 +0200

    Feature configurable calcite bloat (#16248)
    
    * Configurable bloat for calcite ProjectMergeRule implemented
    
    * Comment added
    
    * Default bloat value increased to 1000
    
    * Implemented bloat configuration from QueryContext
    
    * Code refactored, docs updated
    
    ---------
    
    Co-authored-by: sviatahorau <[email protected]>
---
 docs/querying/query-context.md                     |  1 +
 .../sql/calcite/planner/CalciteRulesManager.java   | 19 +++++++-
 .../calcite/planner/CalcitePlannerModuleTest.java  | 52 ++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/docs/querying/query-context.md b/docs/querying/query-context.md
index 98d9c9c6aa7..d5ddea04f27 100644
--- a/docs/querying/query-context.md
+++ b/docs/querying/query-context.md
@@ -65,6 +65,7 @@ See [SQL query context](sql-query-context.md) for other query 
context parameters
 |`secondaryPartitionPruning`|`true`|Enable secondary partition pruning on the 
Broker. The Broker will always prune unnecessary segments from the input scan 
based on a filter on time intervals, but if the data is further partitioned 
with hash or range partitioning, this option will enable additional pruning 
based on a filter on secondary partition dimensions.|
 |`debug`| `false` | Flag indicating whether to enable debugging outputs for 
the query. When set to false, no additional logs will be produced (logs 
produced will be entirely dependent on your logging level). When set to true, 
the following addition logs will be produced:<br />- Log the stack trace of the 
exception (if any) produced by the query |
 |`setProcessingThreadNames`|`true`| Whether processing thread names will be 
set to `queryType_dataSource_intervals` while processing a query. This aids in 
interpreting thread dumps, and is on by default. Query overhead can be reduced 
slightly by setting this to `false`. This has a tiny effect in most scenarios, 
but can be meaningful in high-QPS, low-per-segment-processing-time scenarios. |
+|`sqlPlannerBloat`|`1000`|Calcite parameter which controls whether to merge 
two Project operators when inlining expressions causes complexity to increase. 
Implemented as a workaround to exception `There are not enough rules to produce 
a node with desired properties: convention=DRUID, sort=[]` thrown after 
rejecting the merge of two projects.|
 
 ## Parameters by query type
 
diff --git 
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
 
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
index 7faaa69581b..4326f63340d 100644
--- 
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
+++ 
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
@@ -38,6 +38,7 @@ import 
org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
 import org.apache.calcite.rel.rules.CoreRules;
 import org.apache.calcite.rel.rules.DateRangeRules;
 import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
+import org.apache.calcite.rel.rules.ProjectMergeRule;
 import org.apache.calcite.rel.rules.PruneEmptyRules;
 import org.apache.calcite.runtime.Hook;
 import org.apache.calcite.sql.SqlExplainFormat;
@@ -83,6 +84,8 @@ public class CalciteRulesManager
   private static final int HEP_DEFAULT_MATCH_LIMIT = Integer.parseInt(
       System.getProperty(HEP_DEFAULT_MATCH_LIMIT_CONFIG_STRING, "1200")
   );
+  public static final String BLOAT_PROPERTY = "sqlPlannerBloat";
+  public static final int DEFAULT_BLOAT = 1000;
 
   /**
    * Rules from {@link org.apache.calcite.plan.RelOptRules#BASE_RULES}, minus:
@@ -96,12 +99,14 @@ public class CalciteRulesManager
    * and {@link CoreRules#FILTER_INTO_JOIN}, which are part of {@link 
#FANCY_JOIN_RULES}.
    * 4) {@link CoreRules#PROJECT_FILTER_TRANSPOSE} because PartialDruidQuery 
would like to have the Project on top of the Filter -
    * this rule could create a lot of non-useful plans.
+   * 5) {@link CoreRules#PROJECT_MERGE} added later with bloat parameter 
configured from query context as a workaround for Calcite exception
+   * (there are not enough rules to produce a node with desired properties) 
thrown while running complex sql-queries with
+   * big amount of subqueries.
    */
   private static final List<RelOptRule> BASE_RULES =
       ImmutableList.of(
           CoreRules.AGGREGATE_STAR_TABLE,
           CoreRules.AGGREGATE_PROJECT_STAR_TABLE,
-          CoreRules.PROJECT_MERGE,
           CoreRules.FILTER_SCAN,
           CoreRules.FILTER_PROJECT_TRANSPOSE,
           CoreRules.JOIN_PUSH_EXPRESSIONS,
@@ -452,6 +457,17 @@ public class CalciteRulesManager
                         .build();
   }
 
+  public List<RelOptRule> configurableRuleSet(PlannerContext plannerContext)
+  {
+    return 
ImmutableList.of(ProjectMergeRule.Config.DEFAULT.withBloat(getBloatProperty(plannerContext)).toRule());
+  }
+
+  private int getBloatProperty(PlannerContext plannerContext)
+  {
+    final Integer bloat = plannerContext.queryContext().getInt(BLOAT_PROPERTY);
+    return (bloat != null) ? bloat : DEFAULT_BLOAT;
+  }
+
   public List<RelOptRule> baseRuleSet(final PlannerContext plannerContext)
   {
     final PlannerConfig plannerConfig = plannerContext.getPlannerConfig();
@@ -461,6 +477,7 @@ public class CalciteRulesManager
     rules.addAll(BASE_RULES);
     rules.addAll(ABSTRACT_RULES);
     rules.addAll(ABSTRACT_RELATIONAL_RULES);
+    rules.addAll(configurableRuleSet(plannerContext));
 
     if (plannerContext.getJoinAlgorithm().requiresSubquery()) {
       rules.addAll(FANCY_JOIN_RULES);
diff --git 
a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
 
b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
index 06d8cf761ab..8ef3ad3106f 100644
--- 
a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
+++ 
b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
@@ -30,6 +30,7 @@ import com.google.inject.multibindings.Multibinder;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.rel.logical.LogicalTableScan;
+import org.apache.calcite.rel.rules.ProjectMergeRule;
 import org.apache.calcite.schema.Schema;
 import org.apache.druid.guice.LazySingleton;
 import org.apache.druid.jackson.DefaultObjectMapper;
@@ -61,10 +62,13 @@ import javax.validation.Validation;
 import javax.validation.Validator;
 
 import java.util.Collections;
+import java.util.Optional;
 import java.util.Set;
 
 import static org.apache.calcite.plan.RelOptRule.any;
 import static org.apache.calcite.plan.RelOptRule.operand;
+import static 
org.apache.druid.sql.calcite.planner.CalciteRulesManager.BLOAT_PROPERTY;
+import static 
org.apache.druid.sql.calcite.planner.CalciteRulesManager.DEFAULT_BLOAT;
 
 @ExtendWith(EasyMockExtension.class)
 public class CalcitePlannerModuleTest extends CalciteTestBase
@@ -72,6 +76,7 @@ public class CalcitePlannerModuleTest extends CalciteTestBase
   private static final String SCHEMA_1 = "SCHEMA_1";
   private static final String SCHEMA_2 = "SCHEMA_2";
   private static final String DRUID_SCHEMA_NAME = "DRUID_SCHEMA_NAME";
+  private static final int BLOAT = 1200;
 
   @Mock
   private NamedSchema druidSchema1;
@@ -204,4 +209,51 @@ public class CalcitePlannerModuleTest extends 
CalciteTestBase
                                          .contains(customRule);
     Assert.assertTrue(containsCustomRule);
   }
+
+  @Test
+  public void testConfigurableBloat()
+  {
+    ObjectMapper mapper = new DefaultObjectMapper();
+    PlannerToolbox toolbox = new PlannerToolbox(
+            injector.getInstance(DruidOperatorTable.class),
+            macroTable,
+            mapper,
+            injector.getInstance(PlannerConfig.class),
+            rootSchema,
+            joinableFactoryWrapper,
+            CatalogResolver.NULL_RESOLVER,
+            "druid",
+            new CalciteRulesManager(ImmutableSet.of()),
+            CalciteTests.TEST_AUTHORIZER_MAPPER,
+            AuthConfig.newBuilder().build()
+    );
+
+    PlannerContext contextWithBloat = PlannerContext.create(
+            toolbox,
+            "SELECT 1",
+            new NativeSqlEngine(queryLifecycleFactory, mapper),
+            Collections.singletonMap(BLOAT_PROPERTY, BLOAT),
+            null
+    );
+
+    PlannerContext contextWithoutBloat = PlannerContext.create(
+            toolbox,
+            "SELECT 1",
+            new NativeSqlEngine(queryLifecycleFactory, mapper),
+            Collections.emptyMap(),
+            null
+    );
+
+    assertBloat(contextWithBloat, BLOAT);
+    assertBloat(contextWithoutBloat, DEFAULT_BLOAT);
+  }
+
+  private void assertBloat(PlannerContext context, int expectedBloat)
+  {
+    Optional<ProjectMergeRule> firstProjectMergeRule = 
injector.getInstance(CalciteRulesManager.class).baseRuleSet(context).stream()
+            .filter(rule -> rule instanceof ProjectMergeRule)
+            .map(rule -> (ProjectMergeRule) rule)
+            .findAny();
+    Assert.assertTrue(firstProjectMergeRule.isPresent() && 
firstProjectMergeRule.get().config.bloat() == expectedBloat);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(druid) branch master updated: Feature configurable calcite bloat (#16248)

Reply via email to