This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new b7eca8a HIVE-25880: Add property to exclude CBO rules by a regex on
their description (Alessandro Solimando, reviewed by Stamatis Zampetakis)
b7eca8a is described below
commit b7eca8ab5280c5b59d473b8c5fd98be8da5c1195
Author: Alessandro Solimando <[email protected]>
AuthorDate: Wed Jan 19 17:16:55 2022 +0100
HIVE-25880: Add property to exclude CBO rules by a regex on their
description (Alessandro Solimando, reviewed by Stamatis Zampetakis)
Closes #2955
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../hadoop/hive/ql/parse/CalcitePlanner.java | 9 ++
.../queries/clientpositive/rule_exclusion_config.q | 44 ++++++
.../llap/rule_exclusion_config.q.out | 150 +++++++++++++++++++++
4 files changed, 207 insertions(+)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a174653..6e4bbcc7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1883,6 +1883,10 @@ public class HiveConf extends Configuration {
+ " expressed
as multiple of Local FS write cost"),
HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5",
"Default cost of reading a byte from HDFS;"
+ " expressed
as multiple of Local FS read cost"),
+ HIVE_CBO_RULE_EXCLUSION_REGEX("hive.cbo.rule.exclusion.regex", "",
+ "Regex over rule descriptions to exclude them from planning. "
+ + "The intended usage is to allow to disable rules from
problematic queries, it is *not* a performance tuning property. "
+ + "The property is experimental, it can be changed or removed
without any notice."),
HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true,
"Toggle display of CBO warnings like missing column stats"),
HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS("hive.cbo.stats.correlated.multi.key.joins",
true,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index dc88027..ab4506e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1958,6 +1958,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
final boolean useMaterializedViewsRegistry =
!conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname)
.equals("DUMMY");
+ final String ruleExclusionRegex =
conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
final RelNode calcitePreMVRewritingPlan = basePlan;
final Set<TableName> tablesUsedQuery = getTablesUsed(basePlan);
@@ -2023,6 +2024,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
planner.addRule(new HivePartitionPruneRule(conf));
// Optimize plan
+ if (!ruleExclusionRegex.isEmpty()) {
+
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
+ }
planner.setRoot(basePlan);
basePlan = planner.findBestExp();
// Remove view-based rewriting rules from planner
@@ -2416,6 +2420,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
RelMetadataProvider mdProvider, RexExecutor executorProvider,
List<HiveRelOptMaterialization> materializations) {
+ final String ruleExclusionRegex =
conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
+
// Create planner and copy context
HepPlanner planner = new HepPlanner(program,
basePlan.getCluster().getPlanner().getContext());
@@ -2441,6 +2447,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
}
+ if (!ruleExclusionRegex.isEmpty()) {
+
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
+ }
planner.setRoot(basePlan);
return planner.findBestExp();
diff --git a/ql/src/test/queries/clientpositive/rule_exclusion_config.q
b/ql/src/test/queries/clientpositive/rule_exclusion_config.q
new file mode 100644
index 0000000..2fb4418
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/rule_exclusion_config.q
@@ -0,0 +1,44 @@
+--! qt:dataset:src
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set
hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule|HiveJoinAddNotNullRule;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set hive.cbo.rule.exclusion.regex=HiveJoin.*Rule;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set hive.cbo.rule.exclusion.regex=.*;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
diff --git
a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
new file mode 100644
index 0000000..a656edf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
@@ -0,0 +1,150 @@
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not
available])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE,
20))])
+ HiveTableScan(table=[[default, src]], table:alias=[src1])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE,
20))])
+ HiveTableScan(table=[[default, src]], table:alias=[src2])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 20))])
+ HiveTableScan(table=[[default, src]], table:alias=[src3])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0],
value1=[$1])
+ HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, src]], table:alias=[src3])
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, src]], table:alias=[src2])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE,
20))])
+ HiveTableScan(table=[[default, src]], table:alias=[src1])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0],
value1=[$1])
+ HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveTableScan(table=[[default, src]], table:alias=[src3])
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveTableScan(table=[[default, src]], table:alias=[src2])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE,
20))])
+ HiveTableScan(table=[[default, src]], table:alias=[src1])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0],
value1=[$1])
+ HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveTableScan(table=[[default, src]], table:alias=[src3])
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveProject(key=[$0], value=[$1])
+ HiveTableScan(table=[[default, src]], table:alias=[src2])
+ HiveProject(key=[$0], value=[$1])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE,
20))])
+ HiveTableScan(table=[[default, src]], table:alias=[src1])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+ JOIN src src2 ON (src1.key = src2.key)
+ JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$0], value=[$1], key1=[$6], value1=[$7], key2=[$12],
value2=[$13])
+ HiveFilter(condition=[AND(>(CAST($0):DOUBLE, CAST(10):DOUBLE),
<(CAST($0):DOUBLE, CAST(20):DOUBLE))])
+ HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none],
cost=[not available])
+ HiveTableScan(table=[[default, src]], table:alias=[src1])
+ HiveTableScan(table=[[default, src]], table:alias=[src2])
+ HiveTableScan(table=[[default, src]], table:alias=[src3])
+