Repository: hive
Updated Branches:
  refs/heads/master 3fa7f0c6e -> a3eacb9dd


HIVE-20102: Add a couple of additional tests for query parsing (Jesus Camacho 
Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a3eacb9d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a3eacb9d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a3eacb9d

Branch: refs/heads/master
Commit: a3eacb9ddc1039dd0866928d0860f31e38a3d7aa
Parents: 3fa7f0c
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Wed Jul 11 16:38:51 2018 -0700
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Thu Jul 12 12:58:05 2018 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/ParseDriver.java       |  11 +
 .../org/apache/hadoop/hive/ql/parse/QB.java     |   4 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  33 ++-
 ql/src/test/queries/clientpositive/masking_13.q |  28 +++
 .../clientpositive/results_cache_with_masking.q |   3 +-
 .../llap/results_cache_with_masking.q.out       |  51 ++++-
 .../results/clientpositive/masking_13.q.out     | 208 +++++++++++++++++++
 7 files changed, 325 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
index bda3c21..895c2f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java
@@ -153,6 +153,17 @@ public class ParseDriver {
     };
 
     @Override
+    public Object dupTree(Object t, Object parent) {
+      // Overriden to copy start index / end index, that is needed through 
optimization,
+      // e.g., for masking/filtering
+      ASTNode astNode = (ASTNode) t;
+      ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent);
+      astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex());
+      astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex());
+      return astNodeCopy;
+    }
+
+    @Override
     public Object errorNode(TokenStream input, Token start, Token stop, 
RecognitionException e) {
       return new ASTErrorNode(input, start, stop, e);
     };

http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
index 64b3541..a2f6fbb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java
@@ -423,6 +423,10 @@ public class QB {
     return viewDesc != null && !viewDesc.isMaterialized();
   }
 
+  public boolean isMultiDestQuery() {
+    return qbp != null && qbp.getClauseNamesForDest() != null && 
qbp.getClauseNamesForDest().size() > 1;
+  }
+
   public HashMap<String, Table> getViewToTabSchema() {
     return viewAliasToViewSchema;
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 12a915c..0ca9b58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -12126,8 +12126,9 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
   }
 
   void analyzeInternal(ASTNode ast, PlannerContextFactory pcf) throws 
SemanticException {
-    // 1. Generate Resolved Parse tree from syntax tree
     LOG.info("Starting Semantic Analysis");
+    // 1. Generate Resolved Parse tree from syntax tree
+    boolean needsTransform = needsTransform();
     //change the location of position alias process here
     processPositionAlias(ast);
     PlannerContext plannerCtx = pcf.create();
@@ -12147,7 +12148,6 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
     // Otherwise we have to wait until after the masking/filtering step.
     boolean isCacheEnabled = isResultsCacheEnabled();
     QueryResultsCache.LookupInfo lookupInfo = null;
-    boolean needsTransform = needsTransform();
     if (isCacheEnabled && !needsTransform && queryTypeCanUseCache()) {
       lookupInfo = createLookupInfoForQuery(ast);
       if (checkResultsCache(lookupInfo)) {
@@ -12155,32 +12155,45 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       }
     }
 
+    ASTNode astForMasking;
+    if (isCBOExecuted() && needsTransform &&
+        (qb.isCTAS() || qb.isView() || qb.isMaterializedView() || 
qb.isMultiDestQuery())) {
+      // If we use CBO and we may apply masking/filtering policies, we create 
a copy of the ast.
+      // The reason is that the generation of the operator tree may modify the 
initial ast,
+      // but if we need to parse for a second time, we would like to parse the 
unmodified ast.
+      astForMasking = (ASTNode) ParseDriver.adaptor.dupTree(ast);
+    } else {
+      astForMasking = ast;
+    }
+
     // 2. Gen OP Tree from resolved Parse Tree
     Operator sinkOp = genOPTree(ast, plannerCtx);
 
+    boolean usesMasking = false;
     if (!unparseTranslator.isEnabled() &&
         (tableMask.isEnabled() && analyzeRewrite == null)) {
       // Here we rewrite the * and also the masking table
-      ASTNode tree = rewriteASTWithMaskAndFilter(tableMask, ast, 
ctx.getTokenRewriteStream(),
+      ASTNode rewrittenAST = rewriteASTWithMaskAndFilter(tableMask, 
astForMasking, ctx.getTokenRewriteStream(),
           ctx, db, tabNameToTabObject, ignoredTokens);
-      if (tree != ast) {
+      if (astForMasking != rewrittenAST) {
+        usesMasking = true;
         plannerCtx = pcf.create();
         ctx.setSkipTableMasking(true);
         init(true);
         //change the location of position alias process here
-        processPositionAlias(tree);
-        genResolvedParseTree(tree, plannerCtx);
+        processPositionAlias(rewrittenAST);
+        genResolvedParseTree(rewrittenAST, plannerCtx);
         if (this instanceof CalcitePlanner) {
           ((CalcitePlanner) this).resetCalciteConfiguration();
         }
-        sinkOp = genOPTree(tree, plannerCtx);
+        sinkOp = genOPTree(rewrittenAST, plannerCtx);
       }
     }
 
     // Check query results cache
-    // In the case that row or column masking/filtering was required, the 
cache must be checked
-    // here, after applying the masking/filtering rewrite rules to the AST.
-    if (isCacheEnabled && needsTransform && queryTypeCanUseCache()) {
+    // In the case that row or column masking/filtering was required, we do 
not support caching.
+    // TODO: Enable caching for queries with masking/filtering
+    if (isCacheEnabled && needsTransform && !usesMasking && 
queryTypeCanUseCache()) {
       lookupInfo = createLookupInfoForQuery(ast);
       if (checkResultsCache(lookupInfo)) {
         return;

http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/test/queries/clientpositive/masking_13.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_13.q 
b/ql/src/test/queries/clientpositive/masking_13.q
new file mode 100644
index 0000000..bb050b5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_13.q
@@ -0,0 +1,28 @@
+--! qt:dataset:srcpart
+--! qt:dataset:src
+set hive.mapred.mode=nonstrict;
+set 
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test;
+select * from masking_test;
+
+create table new_masking_test_nx as
+select * from masking_test;
+select * from new_masking_test_nx;
+
+create view `masking_test_view` as select key from `masking_test`;
+
+explain
+select key from `masking_test_view`;
+select key from `masking_test_view`;
+
+create table `my_table_masked` (key int);
+insert into `my_table_masked` select key from `masking_test_view`;
+select * from `my_table_masked`;
+
+create table new_masking_test_nx_2 as
+select * from masking_test_view;
+
+select * from new_masking_test_nx_2;

http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/test/queries/clientpositive/results_cache_with_masking.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_with_masking.q 
b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
index d078092..e84098c 100644
--- a/ql/src/test/queries/clientpositive/results_cache_with_masking.q
+++ b/ql/src/test/queries/clientpositive/results_cache_with_masking.q
@@ -12,7 +12,8 @@ explain
 select key, count(*) from masking_test_n7 group by key;
 select key, count(*) from masking_test_n7 group by key;
 
--- This time we should use the cache
+-- It will not use the cache as it is masked
+-- TODO: We should use the cache
 explain
 select key, count(*) from masking_test_n7 group by key;
 select key, count(*) from masking_test_n7 group by key;

http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out 
b/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
index dc63ed0..de9910f 100644
--- a/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
+++ b/ql/src/test/results/clientpositive/llap/results_cache_with_masking.q.out
@@ -93,15 +93,62 @@ POSTHOOK: query: explain
 select key, count(*) from masking_test_n7 group by key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
-  Stage-0 is a root stage
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
 
 STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: masking_test_n7
+                  filterExpr: (((key % 2) = 0) and (key < 10)) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 1904 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+                    Statistics: Num rows: 250 Data size: 952 Basic stats: 
COMPLETE Column stats: NONE
+                    Group By Operator
+                      aggregations: count()
+                      keys: key (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 250 Data size: 952 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 250 Data size: 952 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 125 Data size: 476 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 125 Data size: 476 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
   Stage: Stage-0
     Fetch Operator
       limit: -1
       Processor Tree:
         ListSink
-      Cached Query Result: true
 
 PREHOOK: query: select key, count(*) from masking_test_n7 group by key
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/a3eacb9d/ql/src/test/results/clientpositive/masking_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_13.q.out 
b/ql/src/test/results/clientpositive/masking_13.q.out
new file mode 100644
index 0000000..ee4f6d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/masking_13.q.out
@@ -0,0 +1,208 @@
+PREHOOK: query: create table masking_test as select cast(key as int) as key, 
value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test as select cast(key as int) as key, 
value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: explain select * from masking_test
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test
+            filterExpr: (((key % 2) = 0) and (key < 10)) (type: boolean)
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+              Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: key (type: int), reverse(value) (type: string)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 83 Data size: 881 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from masking_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+0      0_lav
+4      4_lav
+8      8_lav
+0      0_lav
+0      0_lav
+2      2_lav
+PREHOOK: query: create table new_masking_test_nx as
+select * from masking_test
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@masking_test
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_masking_test_nx
+POSTHOOK: query: create table new_masking_test_nx as
+select * from masking_test
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_masking_test_nx
+POSTHOOK: Lineage: new_masking_test_nx.key SIMPLE 
[(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+POSTHOOK: Lineage: new_masking_test_nx.value EXPRESSION 
[(masking_test)masking_test.FieldSchema(name:value, type:string, comment:null), 
]
+PREHOOK: query: select * from new_masking_test_nx
+PREHOOK: type: QUERY
+PREHOOK: Input: default@new_masking_test_nx
+#### A masked pattern was here ####
+POSTHOOK: query: select * from new_masking_test_nx
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_masking_test_nx
+#### A masked pattern was here ####
+0      0_lav
+4      4_lav
+8      8_lav
+0      0_lav
+0      0_lav
+2      2_lav
+PREHOOK: query: create view `masking_test_view` as select key from 
`masking_test`
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@masking_test
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test_view
+POSTHOOK: query: create view `masking_test_view` as select key from 
`masking_test`
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test_view
+POSTHOOK: Lineage: masking_test_view.key SIMPLE 
[(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: explain
+select key from `masking_test_view`
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select key from `masking_test_view`
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: masking_test
+            filterExpr: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: 
boolean)
+            properties:
+              insideView TRUE
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: (((key % 2) = 0) and (key < 10) and (key > 6)) (type: 
boolean)
+              Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: UDFToInteger((UDFToDouble(key) / 2.0D)) (type: 
int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 27 Data size: 286 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key from `masking_test_view`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@masking_test_view
+#### A masked pattern was here ####
+POSTHOOK: query: select key from `masking_test_view`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@masking_test_view
+#### A masked pattern was here ####
+4
+PREHOOK: query: create table `my_table_masked` (key int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@my_table_masked
+POSTHOOK: query: create table `my_table_masked` (key int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@my_table_masked
+PREHOOK: query: insert into `my_table_masked` select key from 
`masking_test_view`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@masking_test_view
+PREHOOK: Output: default@my_table_masked
+POSTHOOK: query: insert into `my_table_masked` select key from 
`masking_test_view`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@masking_test_view
+POSTHOOK: Output: default@my_table_masked
+POSTHOOK: Lineage: my_table_masked.key EXPRESSION 
[(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from `my_table_masked`
+PREHOOK: type: QUERY
+PREHOOK: Input: default@my_table_masked
+#### A masked pattern was here ####
+POSTHOOK: query: select * from `my_table_masked`
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@my_table_masked
+#### A masked pattern was here ####
+4
+PREHOOK: query: create table new_masking_test_nx_2 as
+select * from masking_test_view
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@masking_test_view
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_masking_test_nx_2
+POSTHOOK: query: create table new_masking_test_nx_2 as
+select * from masking_test_view
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@masking_test_view
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_masking_test_nx_2
+POSTHOOK: Lineage: new_masking_test_nx_2.key EXPRESSION 
[(masking_test)masking_test.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: select * from new_masking_test_nx_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@new_masking_test_nx_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from new_masking_test_nx_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@new_masking_test_nx_2
+#### A masked pattern was here ####
+4

Reply via email to