This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new b7eca8a  HIVE-25880: Add property to exclude CBO rules by a regex on 
their description (Alessandro Solimando, reviewed by Stamatis Zampetakis)
b7eca8a is described below

commit b7eca8ab5280c5b59d473b8c5fd98be8da5c1195
Author: Alessandro Solimando <[email protected]>
AuthorDate: Wed Jan 19 17:16:55 2022 +0100

    HIVE-25880: Add property to exclude CBO rules by a regex on their 
description (Alessandro Solimando, reviewed by Stamatis Zampetakis)
    
    Closes #2955
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   4 +
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |   9 ++
 .../queries/clientpositive/rule_exclusion_config.q |  44 ++++++
 .../llap/rule_exclusion_config.q.out               | 150 +++++++++++++++++++++
 4 files changed, 207 insertions(+)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a174653..6e4bbcc7 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1883,6 +1883,10 @@ public class HiveConf extends Configuration {
                                                                  + " expressed 
as multiple of Local FS write cost"),
     HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", 
"Default cost of reading a byte from HDFS;"
                                                                  + " expressed 
as multiple of Local FS read cost"),
+    HIVE_CBO_RULE_EXCLUSION_REGEX("hive.cbo.rule.exclusion.regex", "",
+        "Regex over rule descriptions to exclude them from planning. "
+            + "The intended usage is to allow to disable rules from 
problematic queries, it is *not* a performance tuning property. "
+            + "The property is experimental, it can be changed or removed 
without any notice."),
     HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true,
          "Toggle display of CBO warnings like missing column stats"),
     
HIVE_CBO_STATS_CORRELATED_MULTI_KEY_JOINS("hive.cbo.stats.correlated.multi.key.joins",
 true,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index dc88027..ab4506e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1958,6 +1958,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       final boolean useMaterializedViewsRegistry = 
!conf.get(HiveConf.ConfVars.HIVE_SERVER2_MATERIALIZED_VIEWS_REGISTRY_IMPL.varname)
               .equals("DUMMY");
+      final String ruleExclusionRegex = 
conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
       final RelNode calcitePreMVRewritingPlan = basePlan;
       final Set<TableName> tablesUsedQuery = getTablesUsed(basePlan);
 
@@ -2023,6 +2024,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       planner.addRule(new HivePartitionPruneRule(conf));
 
       // Optimize plan
+      if (!ruleExclusionRegex.isEmpty()) {
+        
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
+      }
       planner.setRoot(basePlan);
       basePlan = planner.findBestExp();
       // Remove view-based rewriting rules from planner
@@ -2416,6 +2420,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
         RelMetadataProvider mdProvider, RexExecutor executorProvider,
         List<HiveRelOptMaterialization> materializations) {
 
+      final String ruleExclusionRegex = 
conf.get(ConfVars.HIVE_CBO_RULE_EXCLUSION_REGEX.varname, "");
+
       // Create planner and copy context
       HepPlanner planner = new HepPlanner(program,
           basePlan.getCluster().getPlanner().getContext());
@@ -2441,6 +2447,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
         }
       }
 
+      if (!ruleExclusionRegex.isEmpty()) {
+        
planner.setRuleDescExclusionFilter(Pattern.compile(ruleExclusionRegex));
+      }
       planner.setRoot(basePlan);
 
       return planner.findBestExp();
diff --git a/ql/src/test/queries/clientpositive/rule_exclusion_config.q 
b/ql/src/test/queries/clientpositive/rule_exclusion_config.q
new file mode 100644
index 0000000..2fb4418
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/rule_exclusion_config.q
@@ -0,0 +1,44 @@
+--! qt:dataset:src
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set 
hive.cbo.rule.exclusion.regex=HiveJoinPushTransitivePredicatesRule|HiveJoinAddNotNullRule;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set hive.cbo.rule.exclusion.regex=HiveJoin.*Rule;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
+
+set hive.cbo.rule.exclusion.regex=.*;
+
+EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20;
diff --git 
a/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out 
b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
new file mode 100644
index 0000000..a656edf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/rule_exclusion_config.q.out
@@ -0,0 +1,150 @@
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not 
available])
+  HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], 
cost=[not available])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 
20))])
+        HiveTableScan(table=[[default, src]], table:alias=[src1])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 
20))])
+        HiveTableScan(table=[[default, src]], table:alias=[src2])
+  HiveProject(key=[$0], value=[$1])
+    HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 20))])
+      HiveTableScan(table=[[default, src]], table:alias=[src3])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], 
value1=[$1])
+  HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+    HiveProject(key=[$0], value=[$1])
+      HiveFilter(condition=[IS NOT NULL($0)])
+        HiveTableScan(table=[[default, src]], table:alias=[src3])
+    HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+      HiveProject(key=[$0], value=[$1])
+        HiveFilter(condition=[IS NOT NULL($0)])
+          HiveTableScan(table=[[default, src]], table:alias=[src2])
+      HiveProject(key=[$0], value=[$1])
+        HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 
20))])
+          HiveTableScan(table=[[default, src]], table:alias=[src1])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], 
value1=[$1])
+  HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+    HiveProject(key=[$0], value=[$1])
+      HiveTableScan(table=[[default, src]], table:alias=[src3])
+    HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+      HiveProject(key=[$0], value=[$1])
+        HiveTableScan(table=[[default, src]], table:alias=[src2])
+      HiveProject(key=[$0], value=[$1])
+        HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 
20))])
+          HiveTableScan(table=[[default, src]], table:alias=[src1])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$4], value=[$5], key0=[$2], value0=[$3], key1=[$0], 
value1=[$1])
+  HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+    HiveProject(key=[$0], value=[$1])
+      HiveTableScan(table=[[default, src]], table:alias=[src3])
+    HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], 
cost=[not available])
+      HiveProject(key=[$0], value=[$1])
+        HiveTableScan(table=[[default, src]], table:alias=[src2])
+      HiveProject(key=[$0], value=[$1])
+        HiveFilter(condition=[AND(>(CAST($0):DOUBLE, 10), <(CAST($0):DOUBLE, 
20))])
+          HiveTableScan(table=[[default, src]], table:alias=[src1])
+
+PREHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT *
+FROM src src1
+  JOIN src src2 ON (src1.key = src2.key)
+  JOIN src src3 ON (src1.key = src3.key)
+WHERE src1.key > 10 and src1.key < 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(key=[$0], value=[$1], key1=[$6], value1=[$7], key2=[$12], 
value2=[$13])
+  HiveFilter(condition=[AND(>(CAST($0):DOUBLE, CAST(10):DOUBLE), 
<(CAST($0):DOUBLE, CAST(20):DOUBLE))])
+    HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], 
cost=[not available])
+      HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], 
cost=[not available])
+        HiveTableScan(table=[[default, src]], table:alias=[src1])
+        HiveTableScan(table=[[default, src]], table:alias=[src2])
+      HiveTableScan(table=[[default, src]], table:alias=[src3])
+

Reply via email to