This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 192a9a9aa82 HIVE-26160: Materialized View rewrite does not check
tables scanned in sub-query expressions (Krisztian Kasa, reviewed by Stamatis
Zampetakis)
192a9a9aa82 is described below
commit 192a9a9aa822bd724b2fd8070435dcd6e2f2cc03
Author: Krisztian Kasa <[email protected]>
AuthorDate: Thu Apr 28 06:10:11 2022 +0200
HIVE-26160: Materialized View rewrite does not check tables scanned in
sub-query expressions (Krisztian Kasa, reviewed by Stamatis Zampetakis)
---
.../hadoop/hive/ql/parse/CalcitePlanner.java | 15 ++--
.../materialized_view_rewrite_by_text_9.q | 22 ++++++
.../llap/materialized_view_rewrite_by_text_9.q.out | 92 ++++++++++++++++++++++
3 files changed, 124 insertions(+), 5 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index cb3fddb60b7..e522fc9cea7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1680,8 +1680,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
LOG.debug("Initial CBO Plan:\n" + RelOptUtil.toString(calcitePlan));
}
- calcitePlan = applyMaterializedViewRewritingByText(ast, calcitePlan,
optCluster);
-
// Create executor
RexExecutor executorProvider = new HiveRexExecutorImpl();
calcitePlan.getCluster().getPlanner().setExecutor(executorProvider);
@@ -1691,6 +1689,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider()));
optCluster.invalidateMetadataQuery();
+ calcitePlan = applyMaterializedViewRewritingByText(
+ ast, calcitePlan, optCluster, mdProvider.getMetadataProvider());
+
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
HiveRelFieldTrimmer.get()
.trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
@@ -2108,7 +2109,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
}
private RelNode applyMaterializedViewRewritingByText(
- ASTNode queryToRewriteAST, RelNode originalPlan, RelOptCluster
optCluster) {
+ ASTNode queryToRewriteAST,
+ RelNode originalPlan,
+ RelOptCluster optCluster,
+ RelMetadataProvider metadataProvider) {
if (!isMaterializedViewRewritingByTextEnabled()) {
return originalPlan;
}
@@ -2122,8 +2126,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
queryToRewriteAST.getTokenStopIndex());
ASTNode expandedAST = ParseUtils.parse(expandedQueryText, new
Context(conf));
- Set<TableName> tablesUsedByOriginalPlan = getTablesUsed(originalPlan);
- RelNode mvScan = getMaterializedViewByAST(expandedAST, optCluster,
ANY, db, tablesUsedByOriginalPlan, getTxnMgr());
+ Set<TableName> tablesUsedByOriginalPlan =
getTablesUsed(removeSubqueries(originalPlan, metadataProvider));
+ RelNode mvScan = getMaterializedViewByAST(
+ expandedAST, optCluster, ANY, db, tablesUsedByOriginalPlan,
getTxnMgr());
if (mvScan != null) {
return mvScan;
}
diff --git
a/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q
b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q
new file mode 100644
index 00000000000..ca4ba36b0c2
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_by_text_9.q
@@ -0,0 +1,22 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.materializedview.rewriting=false;
+
+create table t1(col0 int) STORED AS ORC
+ TBLPROPERTIES ('transactional'='true');
+
+create table t2(col0 int) STORED AS ORC
+ TBLPROPERTIES ('transactional'='true');
+
+create materialized view mat1 as
+select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2;
+
+-- View can be used -> rewrite
+explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union
select col0 from t1 where col0 = 2);
+
+insert into t1(col0) values (2);
+
+-- View can not be used since it is outdated
+explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union
select col0 from t1 where col0 = 2);
diff --git
a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out
new file mode 100644
index 00000000000..52ce097a039
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_by_text_9.q.out
@@ -0,0 +1,92 @@
+PREHOOK: query: create table t1(col0 int) STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(col0 int) STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2(col0 int) STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2(col0 int) STORED AS ORC
+ TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+Only query text based automatic rewriting is available for materialized view.
Statement has unsupported operator: union.
+PREHOOK: query: create materialized view mat1 as
+select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2
+PREHOOK: type: CREATE_MATERIALIZED_VIEW
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@mat1
+POSTHOOK: query: create materialized view mat1 as
+select col0 from t1 where col0 = 1 union select col0 from t1 where col0 = 2
+POSTHOOK: type: CREATE_MATERIALIZED_VIEW
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@mat1
+POSTHOOK: Lineage: mat1.col0 EXPRESSION []
+PREHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union
select col0 from t1 where col0 = 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@mat1
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union
select col0 from t1 where col0 = 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@mat1
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
+ HiveProject(col0=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, t2]], table:alias=[t2])
+ HiveProject(col0=[$0])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, mat1]], table:alias=[default.mat1])
+
+PREHOOK: query: insert into t1(col0) values (2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1(col0) values (2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.col0 SCRIPT []
+PREHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union
select col0 from t1 where col0 = 2)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo
+select col0 from t2 where col0 in (select col0 from t1 where col0 = 1 union
select col0 from t1 where col0 = 2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+CBO PLAN:
+HiveSemiJoin(condition=[=($0, $1)], joinType=[semi])
+ HiveProject(col0=[$0])
+ HiveFilter(condition=[IN($0, 1, 2)])
+ HiveTableScan(table=[[default, t2]], table:alias=[t2])
+ HiveProject($f0=[$0])
+ HiveUnion(all=[true])
+ HiveProject($f0=[CAST(1):INTEGER])
+ HiveFilter(condition=[=($0, 1)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+ HiveProject($f0=[CAST(2):INTEGER])
+ HiveFilter(condition=[=($0, 2)])
+ HiveTableScan(table=[[default, t1]], table:alias=[t1])
+