This is an automated email from the ASF dual-hosted git repository.
karan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new b5958b6b077 Feature configurable calcite bloat (#16248)
b5958b6b077 is described below
commit b5958b6b077821ee327f421febebe5b7f2741734
Author: Misha <[email protected]>
AuthorDate: Mon May 6 17:13:39 2024 +0200
Feature configurable calcite bloat (#16248)
* Configurable bloat for calcite ProjectMergeRule implemented
* Comment added
* Default bloat value increased to 1000
* Implemented bloat configuration from QueryContext
* Code refactored, docs updated
---------
Co-authored-by: sviatahorau <[email protected]>
---
docs/querying/query-context.md | 1 +
.../sql/calcite/planner/CalciteRulesManager.java | 19 +++++++-
.../calcite/planner/CalcitePlannerModuleTest.java | 52 ++++++++++++++++++++++
3 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/docs/querying/query-context.md b/docs/querying/query-context.md
index 98d9c9c6aa7..d5ddea04f27 100644
--- a/docs/querying/query-context.md
+++ b/docs/querying/query-context.md
@@ -65,6 +65,7 @@ See [SQL query context](sql-query-context.md) for other query
context parameters
|`secondaryPartitionPruning`|`true`|Enable secondary partition pruning on the
Broker. The Broker will always prune unnecessary segments from the input scan
based on a filter on time intervals, but if the data is further partitioned
with hash or range partitioning, this option will enable additional pruning
based on a filter on secondary partition dimensions.|
|`debug`| `false` | Flag indicating whether to enable debugging outputs for
the query. When set to false, no additional logs will be produced (logs
produced will be entirely dependent on your logging level). When set to true,
the following addition logs will be produced:<br />- Log the stack trace of the
exception (if any) produced by the query |
|`setProcessingThreadNames`|`true`| Whether processing thread names will be
set to `queryType_dataSource_intervals` while processing a query. This aids in
interpreting thread dumps, and is on by default. Query overhead can be reduced
slightly by setting this to `false`. This has a tiny effect in most scenarios,
but can be meaningful in high-QPS, low-per-segment-processing-time scenarios. |
+|`sqlPlannerBloat`|`1000`|Calcite parameter which controls whether to merge
two Project operators when inlining expressions causes complexity to increase.
Implemented as a workaround to exception `There are not enough rules to produce
a node with desired properties: convention=DRUID, sort=[]` thrown after
rejecting the merge of two projects.|
## Parameters by query type
diff --git
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
index 7faaa69581b..4326f63340d 100644
---
a/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
+++
b/sql/src/main/java/org/apache/druid/sql/calcite/planner/CalciteRulesManager.java
@@ -38,6 +38,7 @@ import
org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.rel.rules.DateRangeRules;
import org.apache.calcite.rel.rules.JoinPushThroughJoinRule;
+import org.apache.calcite.rel.rules.ProjectMergeRule;
import org.apache.calcite.rel.rules.PruneEmptyRules;
import org.apache.calcite.runtime.Hook;
import org.apache.calcite.sql.SqlExplainFormat;
@@ -83,6 +84,8 @@ public class CalciteRulesManager
private static final int HEP_DEFAULT_MATCH_LIMIT = Integer.parseInt(
System.getProperty(HEP_DEFAULT_MATCH_LIMIT_CONFIG_STRING, "1200")
);
+ public static final String BLOAT_PROPERTY = "sqlPlannerBloat";
+ public static final int DEFAULT_BLOAT = 1000;
/**
* Rules from {@link org.apache.calcite.plan.RelOptRules#BASE_RULES}, minus:
@@ -96,12 +99,14 @@ public class CalciteRulesManager
* and {@link CoreRules#FILTER_INTO_JOIN}, which are part of {@link
#FANCY_JOIN_RULES}.
* 4) {@link CoreRules#PROJECT_FILTER_TRANSPOSE} because PartialDruidQuery
would like to have the Project on top of the Filter -
* this rule could create a lot of non-useful plans.
+ * 5) {@link CoreRules#PROJECT_MERGE} added later with bloat parameter
configured from query context as a workaround for Calcite exception
+ * (there are not enough rules to produce a node with desired properties)
thrown while running complex sql-queries with
+ * big amount of subqueries.
*/
private static final List<RelOptRule> BASE_RULES =
ImmutableList.of(
CoreRules.AGGREGATE_STAR_TABLE,
CoreRules.AGGREGATE_PROJECT_STAR_TABLE,
- CoreRules.PROJECT_MERGE,
CoreRules.FILTER_SCAN,
CoreRules.FILTER_PROJECT_TRANSPOSE,
CoreRules.JOIN_PUSH_EXPRESSIONS,
@@ -452,6 +457,17 @@ public class CalciteRulesManager
.build();
}
+ public List<RelOptRule> configurableRuleSet(PlannerContext plannerContext)
+ {
+ return
ImmutableList.of(ProjectMergeRule.Config.DEFAULT.withBloat(getBloatProperty(plannerContext)).toRule());
+ }
+
+ private int getBloatProperty(PlannerContext plannerContext)
+ {
+ final Integer bloat = plannerContext.queryContext().getInt(BLOAT_PROPERTY);
+ return (bloat != null) ? bloat : DEFAULT_BLOAT;
+ }
+
public List<RelOptRule> baseRuleSet(final PlannerContext plannerContext)
{
final PlannerConfig plannerConfig = plannerContext.getPlannerConfig();
@@ -461,6 +477,7 @@ public class CalciteRulesManager
rules.addAll(BASE_RULES);
rules.addAll(ABSTRACT_RULES);
rules.addAll(ABSTRACT_RELATIONAL_RULES);
+ rules.addAll(configurableRuleSet(plannerContext));
if (plannerContext.getJoinAlgorithm().requiresSubquery()) {
rules.addAll(FANCY_JOIN_RULES);
diff --git
a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
index 06d8cf761ab..8ef3ad3106f 100644
---
a/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
+++
b/sql/src/test/java/org/apache/druid/sql/calcite/planner/CalcitePlannerModuleTest.java
@@ -30,6 +30,7 @@ import com.google.inject.multibindings.Multibinder;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.logical.LogicalTableScan;
+import org.apache.calcite.rel.rules.ProjectMergeRule;
import org.apache.calcite.schema.Schema;
import org.apache.druid.guice.LazySingleton;
import org.apache.druid.jackson.DefaultObjectMapper;
@@ -61,10 +62,13 @@ import javax.validation.Validation;
import javax.validation.Validator;
import java.util.Collections;
+import java.util.Optional;
import java.util.Set;
import static org.apache.calcite.plan.RelOptRule.any;
import static org.apache.calcite.plan.RelOptRule.operand;
+import static
org.apache.druid.sql.calcite.planner.CalciteRulesManager.BLOAT_PROPERTY;
+import static
org.apache.druid.sql.calcite.planner.CalciteRulesManager.DEFAULT_BLOAT;
@ExtendWith(EasyMockExtension.class)
public class CalcitePlannerModuleTest extends CalciteTestBase
@@ -72,6 +76,7 @@ public class CalcitePlannerModuleTest extends CalciteTestBase
private static final String SCHEMA_1 = "SCHEMA_1";
private static final String SCHEMA_2 = "SCHEMA_2";
private static final String DRUID_SCHEMA_NAME = "DRUID_SCHEMA_NAME";
+ private static final int BLOAT = 1200;
@Mock
private NamedSchema druidSchema1;
@@ -204,4 +209,51 @@ public class CalcitePlannerModuleTest extends
CalciteTestBase
.contains(customRule);
Assert.assertTrue(containsCustomRule);
}
+
+ @Test
+ public void testConfigurableBloat()
+ {
+ ObjectMapper mapper = new DefaultObjectMapper();
+ PlannerToolbox toolbox = new PlannerToolbox(
+ injector.getInstance(DruidOperatorTable.class),
+ macroTable,
+ mapper,
+ injector.getInstance(PlannerConfig.class),
+ rootSchema,
+ joinableFactoryWrapper,
+ CatalogResolver.NULL_RESOLVER,
+ "druid",
+ new CalciteRulesManager(ImmutableSet.of()),
+ CalciteTests.TEST_AUTHORIZER_MAPPER,
+ AuthConfig.newBuilder().build()
+ );
+
+ PlannerContext contextWithBloat = PlannerContext.create(
+ toolbox,
+ "SELECT 1",
+ new NativeSqlEngine(queryLifecycleFactory, mapper),
+ Collections.singletonMap(BLOAT_PROPERTY, BLOAT),
+ null
+ );
+
+ PlannerContext contextWithoutBloat = PlannerContext.create(
+ toolbox,
+ "SELECT 1",
+ new NativeSqlEngine(queryLifecycleFactory, mapper),
+ Collections.emptyMap(),
+ null
+ );
+
+ assertBloat(contextWithBloat, BLOAT);
+ assertBloat(contextWithoutBloat, DEFAULT_BLOAT);
+ }
+
+ private void assertBloat(PlannerContext context, int expectedBloat)
+ {
+ Optional<ProjectMergeRule> firstProjectMergeRule =
injector.getInstance(CalciteRulesManager.class).baseRuleSet(context).stream()
+ .filter(rule -> rule instanceof ProjectMergeRule)
+ .map(rule -> (ProjectMergeRule) rule)
+ .findAny();
+ Assert.assertTrue(firstProjectMergeRule.isPresent() &&
firstProjectMergeRule.get().config.bloat() == expectedBloat);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]