This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 192a9a9aa82 HIVE-26160: Materialized View rewrite does not check 
tables scanned in sub-query expressions (Krisztian Kasa, reviewed by Stamatis 
Zampetakis)
192a9a9aa82 is described below

commit 192a9a9aa822bd724b2fd8070435dcd6e2f2cc03
Author: Krisztian Kasa <[email protected]>
AuthorDate: Thu Apr 28 06:10:11 2022 +0200

    HIVE-26160: Materialized View rewrite does not check tables scanned in 
sub-query expressions (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
 .../hadoop/hive/ql/parse/CalcitePlanner.java       | 15 ++--
 .../materialized_view_rewrite_by_text_9.q          | 22 ++++++
 .../llap/materialized_view_rewrite_by_text_9.q.out | 92 ++++++++++++++++++++++
 3 files changed, 124 insertions(+), 5 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index cb3fddb60b7..e522fc9cea7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1680,8 +1680,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
         LOG.debug("Initial CBO Plan:\n" + RelOptUtil.toString(calcitePlan));
       }
 
-      calcitePlan = applyMaterializedViewRewritingByText(ast, calcitePlan, 
optCluster);
-
       // Create executor
       RexExecutor executorProvider = new HiveRexExecutorImpl();
       calcitePlan.getCluster().getPlanner().setExecutor(executorProvider);
@@ -1691,6 +1689,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
       optCluster.invalidateMetadataQuery();
 
+      calcitePlan = applyMaterializedViewRewritingByText(
+              ast, calcitePlan, optCluster, mdProvider.getMetadataProvider());
+
       // We need to get the ColumnAccessInfo and viewToTableSchema for views.
       HiveRelFieldTrimmer.get()
           .trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
@@ -2108,7 +2109,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
 
     private RelNode applyMaterializedViewRewritingByText(
-            ASTNode queryToRewriteAST, RelNode originalPlan, RelOptCluster 
optCluster) {
+            ASTNode queryToRewriteAST,
+            RelNode originalPlan,
+            RelOptCluster optCluster,
+            RelMetadataProvider metadataProvider) {
       if (!isMaterializedViewRewritingByTextEnabled()) {
         return originalPlan;
       }
@@ -2122,8 +2126,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
                 queryToRewriteAST.getTokenStopIndex());
 
         ASTNode expandedAST = ParseUtils.parse(expandedQueryText, new 
Context(conf));
-        Set<TableName> tablesUsedByOriginalPlan = getTablesUsed(originalPlan);
-        RelNode mvScan = getMaterializedViewByAST(expandedAST, optCluster, 
ANY, db, tablesUsedByOriginalPlan, getTxnMgr());
+        Set<TableName> tablesUsedByOriginalPlan = 
getTablesUsed(removeSubqueries(originalPlan, metadataProvider));
+        RelNode mvScan = getMaterializedViewByAST(
+                expandedAST, optCluster, ANY, db, tablesUsedByOriginalPlan, 
getTxnMgr());
         if (mvScan != null) {
           return mvScan;
         }
diff --git 
a/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q 
b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q
new file mode 100644
index 00000000000..ca4ba36b0c2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q
@@ -0,0 +1,22 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.materializedview.rewriting=false;
+
+create table t1(col0 int) STORED AS ORC
+                          TBLPROPERTIES ('transactional'='true');
+
+create table t2(col0 int) STORED AS ORC
+                          TBLPROPERTIES ('transactional'='true');
+
+create materialized view mat1 as
+select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2;
+
+-- View can be used -> rewrite
+explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union 
select col0 from t1 where col0 = 2);
+
+insert into t1(col0) values (2);
+
+-- View can not be used since it is outdated
+explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union 
select col0 from t1 where col0 = 2);
diff --git 
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out
 
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out
new file mode 100644
index 00000000000..52ce097a039
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out
@@ -0,0 +1,92 @@
+PREHOOK: query: create table t1(col0 int) STORED AS ORC
+                          TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(col0 int) STORED AS ORC
+                          TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2(col0 int) STORED AS ORC
+                          TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2(col0 int) STORED AS ORC
+                          TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+Only query text based automatic rewriting is available for materialized view. 
Statement has unsupported operator: union.
+PREHOOK: query: create materialized view mat1 as
+select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+POSTHOOK: query: create materialized view mat1 as
+select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1.col0 EXPRESSION []
+PREHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union 
select col0 from t1 where col0 = 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union 
select col0 from t1 where col0 = 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
+  HiveProject(col0=[$0])
+    HiveFilter(condition=[IS NOT NULL($0)])
+      HiveTableScan(table=[[default, t2]], table:alias=[t2])
+  HiveProject(col0=[$0])
+    HiveFilter(condition=[IS NOT NULL($0)])
+      HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+
+PREHOOK: query: insert into t1(col0) values (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(col0) values (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.col0 SCRIPT []
+PREHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union 
select col0 from t1 where col0 = 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union 
select col0 from t1 where col0 = 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
+  HiveProject(col0=[$0])
+    HiveFilter(condition=[IN($0, 1, 2)])
+      HiveTableScan(table=[[default, t2]], table:alias=[t2])
+  HiveProject($f0=[$0])
+    HiveUnion(all=[true])
+      HiveProject($f0=[CAST(1):INTEGER])
+        HiveFilter(condition=[=($0, 1)])
+          HiveTableScan(table=[[default, t1]], table:alias=[t1])
+      HiveProject($f0=[CAST(2):INTEGER])
+        HiveFilter(condition=[=($0, 2)])
+          HiveTableScan(table=[[default, t1]], table:alias=[t1])
+

Reply via email to