HIVE-12489 : Analyze for partition fails if partition value has special characters (Thomas Friedrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cf6fbbd2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cf6fbbd2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cf6fbbd2 Branch: refs/heads/master-fixed Commit: cf6fbbd2da8f3eebf1054c3da1bb76b6cb540bd1 Parents: f15d4e1 Author: Thomas Friedrich <tfried...@yahoo.com> Authored: Fri Nov 20 13:55:00 2015 -0800 Committer: Owen O'Malley <omal...@apache.org> Committed: Tue Nov 24 12:10:09 2015 -0800 ---------------------------------------------------------------------- .../ql/parse/ColumnStatsSemanticAnalyzer.java | 6 +-- .../queries/clientpositive/analyze_tbl_part.q | 12 +++++ .../clientpositive/analyze_tbl_part.q.out | 52 ++++++++++++++++++++ 3 files changed, 67 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/cf6fbbd2/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java index 543bc0f..832a5bc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java @@ -199,7 +199,7 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { if (partColType.equals(serdeConstants.STRING_TYPE_NAME) || partColType.contains(serdeConstants.VARCHAR_TYPE_NAME) || partColType.contains(serdeConstants.CHAR_TYPE_NAME)) { - returnVal = "'" + partVal + "'"; + returnVal = "'" + escapeSQLString(partVal) + "'"; } else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) { returnVal = partVal+"Y"; } else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) { @@ -212,10 +212,10 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer { returnVal = partVal + "BD"; } else if (partColType.equals(serdeConstants.DATE_TYPE_NAME) || partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { - returnVal = partColType + " '" + partVal + "'"; + returnVal = partColType + " '" + escapeSQLString(partVal) + "'"; } else { //for other usually not used types, just quote the value - returnVal = "'" + partVal + "'"; + returnVal = "'" + escapeSQLString(partVal) + "'"; } return returnVal; http://git-wip-us.apache.org/repos/asf/hive/blob/cf6fbbd2/ql/src/test/queries/clientpositive/analyze_tbl_part.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/analyze_tbl_part.q b/ql/src/test/queries/clientpositive/analyze_tbl_part.q index c9e45b6..ecf1389 100644 --- a/ql/src/test/queries/clientpositive/analyze_tbl_part.q +++ b/ql/src/test/queries/clientpositive/analyze_tbl_part.q @@ -15,3 +15,15 @@ ANALYZE TABLE src_stat_part partition (partitionId) COMPUTE STATISTICS for colum describe formatted src_stat_part.key PARTITION(partitionId=1); describe formatted src_stat_part.value PARTITION(partitionId=2); + +create table src_stat_string_part(key string, value string) partitioned by (partitionName string); + +insert overwrite table src_stat_string_part partition (partitionName="p'1") +select * from src1; + +insert overwrite table src_stat_string_part partition (partitionName="p\"1") +select * from src1; + +ANALYZE TABLE src_stat_string_part partition (partitionName="p'1") COMPUTE STATISTICS for columns key, value; + +ANALYZE TABLE src_stat_string_part partition (partitionName="p\"1") COMPUTE STATISTICS for columns key, value; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/cf6fbbd2/ql/src/test/results/clientpositive/analyze_tbl_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out index 40b926c..464bdf7 100644 --- a/ql/src/test/results/clientpositive/analyze_tbl_part.q.out +++ b/ql/src/test/results/clientpositive/analyze_tbl_part.q.out @@ -81,3 +81,55 @@ POSTHOOK: Input: default@src_stat_part # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment value string 0 14 4.92 7 from deserializer +PREHOOK: query: create table src_stat_string_part(key string, value string) partitioned by (partitionName string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@src_stat_string_part +POSTHOOK: query: create table src_stat_string_part(key string, value string) partitioned by (partitionName string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_stat_string_part +PREHOOK: query: insert overwrite table src_stat_string_part partition (partitionName="p'1") +select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_string_part@partitionname=p%271 +POSTHOOK: query: insert overwrite table src_stat_string_part partition (partitionName="p'1") +select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_string_part@partitionname=p%271 +POSTHOOK: Lineage: src_stat_string_part PARTITION(partitionname=p'1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_string_part PARTITION(partitionname=p'1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table src_stat_string_part partition (partitionName="p\"1") +select * from src1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@src_stat_string_part@partitionname=p%221 +POSTHOOK: query: insert overwrite table src_stat_string_part partition (partitionName="p\"1") +select * from src1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@src_stat_string_part@partitionname=p%221 +POSTHOOK: Lineage: src_stat_string_part PARTITION(partitionname=p"1).key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_stat_string_part PARTITION(partitionname=p"1).value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p'1") COMPUTE STATISTICS for columns key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat_string_part +PREHOOK: Input: default@src_stat_string_part@partitionname=p%271 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p'1") COMPUTE STATISTICS for columns key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat_string_part +POSTHOOK: Input: default@src_stat_string_part@partitionname=p%271 +#### A masked pattern was here #### +PREHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p\"1") COMPUTE STATISTICS for columns key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src_stat_string_part +PREHOOK: Input: default@src_stat_string_part@partitionname=p%221 +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE src_stat_string_part partition (partitionName="p\"1") COMPUTE STATISTICS for columns key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_stat_string_part +POSTHOOK: Input: default@src_stat_string_part@partitionname=p%221 +#### A masked pattern was here ####