This is an automated email from the ASF dual-hosted git repository. kgyrtkirk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
commit 2f8649728df309b1dac5ad0dea22c6d38486356a Author: Zoltan Matyus <[email protected]> AuthorDate: Tue Oct 1 11:21:54 2019 +0000 HIVE-22208: Column name with reserved keyword is unescaped when query including join on table with mask column is re-written (Zoltan Matyus via Zoltan Haindrich) Signed-off-by: Zoltan Haindrich <[email protected]> --- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 18 ++ .../test/queries/clientpositive/masking_reserved.q | 12 ++ .../results/clientpositive/masking_reserved.q.out | 202 +++++++++++++++++++++ 3 files changed, 232 insertions(+) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8cd4ba0..d4db6ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12191,6 +12191,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } // 2. rewrite the AST, replace TABREF with masking/filtering if (tableMask.needsRewrite()) { + quoteIdentifierTokens(tokenRewriteStream); tableMask.applyTranslations(tokenRewriteStream); String rewrittenQuery = tokenRewriteStream.toString( ast.getTokenStartIndex(), ast.getTokenStopIndex()); @@ -14948,6 +14949,23 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { return sb.toString(); } + private void quoteIdentifierTokens(TokenRewriteStream tokenRewriteStream) { + if (conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT).equals("none")) { + return; + } + + for (int idx = tokenRewriteStream.MIN_TOKEN_INDEX; idx <= tokenRewriteStream.size()-1; idx++) { + Token curTok = tokenRewriteStream.get(idx); + if (curTok.getType() == HiveLexer.Identifier) { + // The Tokens have no distinction between Identifiers and QuotedIdentifiers. + // Ugly solution is just to surround all identifiers with quotes. + // Re-escape any backtick (`) characters in the identifier. + String escapedTokenText = curTok.getText().replaceAll("`", "``"); + tokenRewriteStream.replace(curTok, "`" + escapedTokenText + "`"); + } + } + } + /** * Generate the query string for this query (with fully resolved table references). * @return The query string with resolved references. NULL if an error occurred. diff --git a/ql/src/test/queries/clientpositive/masking_reserved.q b/ql/src/test/queries/clientpositive/masking_reserved.q new file mode 100644 index 0000000..7fe94fa --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_reserved.q @@ -0,0 +1,12 @@ +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +create table keyword_test_off (id int, `etad` string, key int); +create table keyword_test_on (id int, `date` string, key int); +create table masking_test_n_masking_reserved (id int, value string, key int); + +explain select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id; +select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id; + +explain select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id; +select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id; diff --git a/ql/src/test/results/clientpositive/masking_reserved.q.out b/ql/src/test/results/clientpositive/masking_reserved.q.out new file mode 100644 index 0000000..5f12914 --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_reserved.q.out @@ -0,0 +1,202 @@ +PREHOOK: query: create table keyword_test_off (id int, `etad` string, key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@keyword_test_off +POSTHOOK: query: create table keyword_test_off (id int, `etad` string, key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@keyword_test_off +PREHOOK: query: create table keyword_test_on (id int, `date` string, key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@keyword_test_on +POSTHOOK: query: create table keyword_test_on (id int, `date` string, key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@keyword_test_on +PREHOOK: query: create table masking_test_n_masking_reserved (id int, value string, key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test_n_masking_reserved +POSTHOOK: query: create table masking_test_n_masking_reserved (id int, value string, key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test_n_masking_reserved +PREHOOK: query: explain select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_off +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: explain select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_off +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), etad (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: masking_test_n_masking_reserved + filterExpr: ((key < 10) and ((key % 2) = 0) and id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and ((key % 2) = 0) and id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_off +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: select a.`etad`, b.value from keyword_test_off a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_off +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +PREHOOK: query: explain select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_on +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: explain select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_on +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: a + filterExpr: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: id is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), date (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: masking_test_n_masking_reserved + filterExpr: ((key < 10) and ((key % 2) = 0) and id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 10) and ((key % 2) = 0) and id is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +PREHOOK: type: QUERY +PREHOOK: Input: default@keyword_test_on +PREHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here #### +POSTHOOK: query: select a.`date`, b.value from keyword_test_on a join masking_test_n_masking_reserved b on b.id = a.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@keyword_test_on +POSTHOOK: Input: default@masking_test_n_masking_reserved +#### A masked pattern was here ####
