Repository: hive Updated Branches: refs/heads/branch-3 431c92d19 -> 7ead1685b
HIVE-20102: Add a couple of additional tests for query parsing (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7ead1685 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7ead1685 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7ead1685 Branch: refs/heads/branch-3 Commit: 7ead1685b0a84c3efe71b16001df203227df001b Parents: 431c92d Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Wed Jul 11 16:38:51 2018 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Thu Jul 12 13:10:41 2018 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/ParseDriver.java | 11 + .../org/apache/hadoop/hive/ql/parse/QB.java | 4 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 33 ++- ql/src/test/queries/clientpositive/masking_13.q | 28 +++ .../clientpositive/results_cache_with_masking.q | 3 +- .../llap/results_cache_with_masking.q.out | 51 ++++- .../results/clientpositive/masking_13.q.out | 206 +++++++++++++++++++ 7 files changed, 323 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java index bda3c21..895c2f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java @@ -153,6 +153,17 @@ public class ParseDriver { }; @Override + public Object dupTree(Object t, Object parent) { + // Overriden to copy start index / end index, that is needed through optimization, + // e.g., for masking/filtering + ASTNode astNode = (ASTNode) t; + ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent); + astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex()); + astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex()); + return astNodeCopy; + } + + @Override public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { return new ASTErrorNode(input, start, stop, e); }; http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index 64b3541..a2f6fbb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -423,6 +423,10 @@ public class QB { return viewDesc != null && !viewDesc.isMaterialized(); } + public boolean isMultiDestQuery() { + return qbp != null && qbp.getClauseNamesForDest() != null && qbp.getClauseNamesForDest().size() > 1; + } + public HashMap<String, Table> getViewToTabSchema() { return viewAliasToViewSchema; } http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 85ecbd6..644ac1f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12117,8 +12117,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws SemanticException { - // 1. Generate Resolved Parse tree from syntax tree LOG.info("Starting Semantic Analysis"); + // 1. Generate Resolved Parse tree from syntax tree + boolean needsTransform = needsTransform(); //change the location of position alias process here processPositionAlias(ast); PlannerContext plannerCtx = pcf.create(); @@ -12138,7 +12139,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // Otherwise we have to wait until after the masking/filtering step. boolean isCacheEnabled = isResultsCacheEnabled(); QueryResultsCache.LookupInfo lookupInfo = null; - boolean needsTransform = needsTransform(); if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) { lookupInfo = createLookupInfoForQuery(ast); if (checkResultsCache(lookupInfo)) { @@ -12146,32 +12146,45 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } + ASTNode astForMasking; + if (isCBOExecuted() && needsTransform && + (qb.isCTAS() || qb.isView() || qb.isMaterializedView() || qb.isMultiDestQuery())) { + // If we use CBO and we may apply masking/filtering policies, we create a copy of the ast. + // The reason is that the generation of the operator tree may modify the initial ast, + // but if we need to parse for a second time, we would like to parse the unmodified ast. + astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast); + } else { + astForMasking = ast; + } + // 2. Gen OP Tree from resolved Parse Tree Operator sinkOp = genOPTree(ast, plannerCtx); + boolean usesMasking = false; if (!unparseTranslator.isEnabled() && (tableMask.isEnabled() && analyzeRewrite == null)) { // Here we rewrite the * and also the masking table - ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, ctx.getTokenRewriteStream(), + ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, astForMasking, ctx.getTokenRewriteStream(), ctx, db, tabNameToTabObject, ignoredTokens); - if (tree != ast) { + if (astForMasking != rewrittenAST) { + usesMasking = true; plannerCtx = pcf.create(); ctx.setSkipTableMasking(true); init(true); //change the location of position alias process here - processPositionAlias(tree); - genResolvedParseTree(tree, plannerCtx); + processPositionAlias(rewrittenAST); + genResolvedParseTree(rewrittenAST, plannerCtx); if (this instanceof CalcitePlanner) { ((CalcitePlanner) this).resetCalciteConfiguration(); } - sinkOp = genOPTree(tree, plannerCtx); + sinkOp = genOPTree(rewrittenAST, plannerCtx); } } // Check query results cache - // In the case that row or column masking/filtering was required, the cache must be checked - // here, after applying the masking/filtering rewrite rules to the AST. - if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) { + // In the case that row or column masking/filtering was required, we do not support caching. + // TODO: Enable caching for queries with masking/filtering + if (isCacheEnabled && needsTransform && !usesMasking && queryTypeCanUseCache()) { lookupInfo = createLookupInfoForQuery(ast); if (checkResultsCache(lookupInfo)) { return; http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/test/queries/clientpositive/masking_13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/masking_13.q b/ql/src/test/queries/clientpositive/masking_13.q new file mode 100644 index 0000000..bb050b5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_13.q @@ -0,0 +1,28 @@ +--! qt:dataset:srcpart +--! qt:dataset:src +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create table masking_test as select cast(key as int) as key, value from src; + +explain select * from masking_test; +select * from masking_test; + +create table new_masking_test_nx as +select * from masking_test; +select * from new_masking_test_nx; + +create view `masking_test_view` as select key from `masking_test`; + +explain +select key from `masking_test_view`; +select key from `masking_test_view`; + +create table `my_table_masked` (key int); +insert into `my_table_masked` select key from `masking_test_view`; +select * from `my_table_masked`; + +create table new_masking_test_nx_2 as +select * from masking_test_view; + +select * from new_masking_test_nx_2; http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/test/queries/clientpositive/results_cache_with_masking.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q b/ql/src/test/queries/clientpositive/results_cache_with_masking.q index 1bb9c9a..688ea24 100644 --- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q +++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q @@ -11,7 +11,8 @@ explain select key, count(*) from masking_test_n7 group by key; select key, count(*) from masking_test_n7 group by key; --- This time we should use the cache +-- It will not use the cache as it is masked +-- TODO: We should use the cache explain select key, count(*) from masking_test_n7 group by key; select key, count(*) from masking_test_n7 group by key; http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out b/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out index 20a2e8d..e0e48b0 100644 --- a/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out +++ b/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out @@ -92,15 +92,62 @@ POSTHOOK: query: explain select key, count(*) from masking_test_n7 group by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: masking_test_n7 + filterExpr: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 500 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 250 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 250 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: no inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink - Cached Query Result: true PREHOOK: query: select key, count(*) from masking_test_n7 group by key PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/7ead1685/ql/src/test/results/clientpositive/masking_13.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/masking_13.q.out b/ql/src/test/results/clientpositive/masking_13.q.out new file mode 100644 index 0000000..8fa3a41 --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_13.q.out @@ -0,0 +1,206 @@ +PREHOOK: query: create table masking_test as select cast(key as int) as key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create table masking_test as select cast(key as int) as key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain select * from masking_test +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from masking_test +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from masking_test +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +#### A masked pattern was here #### +POSTHOOK: query: select * from masking_test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create table new_masking_test_nx as +select * from masking_test +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@new_masking_test_nx +POSTHOOK: query: create table new_masking_test_nx as +select * from masking_test +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_masking_test_nx +POSTHOOK: Lineage: new_masking_test_nx.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: new_masking_test_nx.value EXPRESSION [(masking_test)masking_test.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from new_masking_test_nx +PREHOOK: type: QUERY +PREHOOK: Input: default@new_masking_test_nx +#### A masked pattern was here #### +POSTHOOK: query: select * from new_masking_test_nx +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_masking_test_nx +#### A masked pattern was here #### +0 0_lav +4 4_lav +8 8_lav +0 0_lav +0 0_lav +2 2_lav +PREHOOK: query: create view `masking_test_view` as select key from `masking_test` +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@masking_test +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_view +POSTHOOK: query: create view `masking_test_view` as select key from `masking_test` +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@masking_test +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_view +POSTHOOK: Lineage: masking_test_view.key SIMPLE [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: explain +select key from `masking_test_view` +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key from `masking_test_view` +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + properties: + insideView TRUE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: boolean) + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger((UDFToDouble(key) / 2.0D)) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +#### A masked pattern was here #### +POSTHOOK: query: select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +#### A masked pattern was here #### +4 +PREHOOK: query: create table `my_table_masked` (key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@my_table_masked +POSTHOOK: query: create table `my_table_masked` (key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@my_table_masked +PREHOOK: query: insert into `my_table_masked` select key from `masking_test_view` +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Output: default@my_table_masked +POSTHOOK: query: insert into `my_table_masked` select key from `masking_test_view` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Output: default@my_table_masked +POSTHOOK: Lineage: my_table_masked.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from `my_table_masked` +PREHOOK: type: QUERY +PREHOOK: Input: default@my_table_masked +#### A masked pattern was here #### +POSTHOOK: query: select * from `my_table_masked` +POSTHOOK: type: QUERY +POSTHOOK: Input: default@my_table_masked +#### A masked pattern was here #### +4 +PREHOOK: query: create table new_masking_test_nx_2 as +select * from masking_test_view +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@masking_test +PREHOOK: Input: default@masking_test_view +PREHOOK: Output: database:default +PREHOOK: Output: default@new_masking_test_nx_2 +POSTHOOK: query: create table new_masking_test_nx_2 as +select * from masking_test_view +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@masking_test +POSTHOOK: Input: default@masking_test_view +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_masking_test_nx_2 +POSTHOOK: Lineage: new_masking_test_nx_2.key EXPRESSION [(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from new_masking_test_nx_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@new_masking_test_nx_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from new_masking_test_nx_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_masking_test_nx_2 +#### A masked pattern was here #### +4