Repository: hive Updated Branches: refs/heads/master 17842e3d5 -> de3d86cdd
HIVE-14013: Describe table doesn't show unicode properly (Reviewed by Yongzhi Chen) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de3d86cd Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de3d86cd Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de3d86cd Branch: refs/heads/master Commit: de3d86cdd3db174d6bc5d8c65796a1e981171124 Parents: 17842e3 Author: Aihua Xu <aihu...@apache.org> Authored: Tue Jun 14 16:37:54 2016 -0400 Committer: Aihua Xu <aihu...@apache.org> Committed: Tue Jun 28 13:22:07 2016 -0400 ---------------------------------------------------------------------- common/pom.xml | 5 + .../hive/common/util/HiveStringUtils.java | 23 ++- .../org/apache/hadoop/hive/ql/exec/DDLTask.java | 9 +- .../formatting/MetaDataFormatUtils.java | 22 ++- .../queries/clientpositive/unicode_comments.q | 17 ++ .../clientpositive/unicode_comments.q.out | 166 +++++++++++++++++++ 6 files changed, 235 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/common/pom.xml ---------------------------------------------------------------------- diff --git a/common/pom.xml b/common/pom.xml index b7244aa..a8fdd27 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -61,6 +61,11 @@ <version>${commons-lang.version}</version> </dependency> <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + <version>${commons-lang3.version}</version> + </dependency> + <dependency> <groupId>org.eclipse.jetty.aggregate</groupId> <artifactId>jetty-all</artifactId> <version>${jetty.version}</version> http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/common/src/java/org/apache/hive/common/util/HiveStringUtils.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index bba14e2..c2ff635 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -43,11 +43,13 @@ import java.util.regex.Pattern; import com.google.common.collect.Interner; import com.google.common.collect.Interners; +import org.apache.commons.lang3.text.translate.CharSequenceTranslator; +import org.apache.commons.lang3.text.translate.EntityArrays; +import org.apache.commons.lang3.text.translate.LookupTranslator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; import org.apache.hadoop.io.Text; -import org.apache.hadoop.util.StringUtils; /** * HiveStringUtils @@ -66,6 +68,14 @@ public class HiveStringUtils { private static final DecimalFormat decimalFormat; + private static final CharSequenceTranslator ESCAPE_JAVA = + new LookupTranslator( + new String[][] { + {"\"", "\\\""}, + {"\\", "\\\\"}, + }).with( + new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())); + /** * Maintain a String pool to reduce memory. */ @@ -603,6 +613,17 @@ public class HiveStringUtils { } /** + * Escape non-unicode characters. StringEscapeUtil.escapeJava() will escape + * unicode characters as well but in some cases it's not desired. + * + * @param str Original string + * @return Escaped string + */ + public static String escapeJava(String str) { + return ESCAPE_JAVA.translate(str); +} + + /** * Unescape commas in the string using the default escape char * @param str a string * @return an unescaped string http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 493e3a0..7099b2a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -31,6 +31,7 @@ import java.io.Serializable; import java.io.Writer; import java.net.URI; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.sql.SQLException; import java.util.AbstractList; import java.util.ArrayList; @@ -2076,7 +2077,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { if (tbl.isView()) { String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + tbl.getViewExpandedText(); - outStream.writeBytes(createTab_stmt.toString()); + outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8)); return 0; } @@ -2225,7 +2226,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable { } createTab_stmt.add(TBL_PROPERTIES, tbl_properties); - outStream.writeBytes(createTab_stmt.render()); + outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { LOG.info("show create table: " + stringifyException(e)); return 1; @@ -2288,14 +2289,14 @@ public class DDLTask extends Task<DDLWork> implements Serializable { try { if (showIndexes.isFormatted()) { // column headers - outStream.writeBytes(MetaDataFormatUtils.getIndexColumnsHeader()); + outStream.write(MetaDataFormatUtils.getIndexColumnsHeader().getBytes(StandardCharsets.UTF_8)); outStream.write(terminator); outStream.write(terminator); } for (Index index : indexes) { - outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(index)); + outStream.write(MetaDataFormatUtils.getAllColumnsInformation(index).getBytes(StandardCharsets.UTF_8)); } } catch (FileNotFoundException e) { LOG.info("show indexes: " + stringifyException(e)); http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java index a2ccd56..03803bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata.formatting; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.DescTableDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hive.common.util.HiveStringUtils; import java.math.BigInteger; import java.util.ArrayList; @@ -438,7 +440,7 @@ public final class MetaDataFormatUtils { if (tbl.getParameters().size() > 0) { tableInfo.append("Table Parameters:").append(LINE_DELIM); - displayAllParameters(tbl.getParameters(), tableInfo); + displayAllParameters(tbl.getParameters(), tableInfo, false); } } @@ -457,12 +459,28 @@ public final class MetaDataFormatUtils { } } + /** + * Display key, value pairs of the parameters. The characters will be escaped + * including unicode. + */ private static void displayAllParameters(Map<String, String> params, StringBuilder tableInfo) { + displayAllParameters(params, tableInfo, true); + } + + /** + * Display key, value pairs of the parameters. The characters will be escaped + * including unicode if escapeUnicode is true; otherwise the characters other + * than unicode will be escaped. + */ + + private static void displayAllParameters(Map<String, String> params, StringBuilder tableInfo, boolean escapeUnicode) { List<String> keys = new ArrayList<String>(params.keySet()); Collections.sort(keys); for (String key : keys) { tableInfo.append(FIELD_DELIM); // Ensures all params are indented. - formatOutput(key, StringEscapeUtils.escapeJava(params.get(key)), tableInfo); + formatOutput(key, + escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) : HiveStringUtils.escapeJava(params.get(key)), + tableInfo); } } http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/test/queries/clientpositive/unicode_comments.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/unicode_comments.q b/ql/src/test/queries/clientpositive/unicode_comments.q new file mode 100644 index 0000000..4d958e4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/unicode_comments.q @@ -0,0 +1,17 @@ +create database unicode_comments_db comment 'æ°æ®åº'; +use unicode_comments_db; +create table unicode_comments_tbl1 +(col1 string comment '第ä¸å') comment 'è¡¨æ ¼' +partitioned by (p1 string comment 'åå²'); +create view unicode_comments_view1 (col1 comment '第ä¸å') comment 'è§å¾' +as select col1 from unicode_comments_tbl1; +create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment 'ç´¢å¼'; + +describe database extended unicode_comments_db; +show create table unicode_comments_tbl1; +describe formatted unicode_comments_tbl1; +show create table unicode_comments_view1; +describe formatted unicode_comments_view1; +show formatted index on unicode_comments_tbl1; + +drop database unicode_comments_db cascade; http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/test/results/clientpositive/unicode_comments.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/unicode_comments.q.out b/ql/src/test/results/clientpositive/unicode_comments.q.out new file mode 100644 index 0000000..4872cd3 --- /dev/null +++ b/ql/src/test/results/clientpositive/unicode_comments.q.out @@ -0,0 +1,166 @@ +PREHOOK: query: create database unicode_comments_db comment 'æ°æ®åº' +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:unicode_comments_db +POSTHOOK: query: create database unicode_comments_db comment 'æ°æ®åº' +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:unicode_comments_db +PREHOOK: query: use unicode_comments_db +PREHOOK: type: SWITCHDATABASE +PREHOOK: Input: database:unicode_comments_db +POSTHOOK: query: use unicode_comments_db +POSTHOOK: type: SWITCHDATABASE +POSTHOOK: Input: database:unicode_comments_db +PREHOOK: query: create table unicode_comments_tbl1 +(col1 string comment '第ä¸å') comment 'è¡¨æ ¼' +partitioned by (p1 string comment 'åå²') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: create table unicode_comments_tbl1 +(col1 string comment '第ä¸å') comment 'è¡¨æ ¼' +partitioned by (p1 string comment 'åå²') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: query: create view unicode_comments_view1 (col1 comment '第ä¸å') comment 'è§å¾' +as select col1 from unicode_comments_tbl1 +PREHOOK: type: CREATEVIEW +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: create view unicode_comments_view1 (col1 comment '第ä¸å') comment 'è§å¾' +as select col1 from unicode_comments_tbl1 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_view1 +PREHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment 'ç´¢å¼' +PREHOOK: type: CREATEINDEX +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with deferred rebuild comment 'ç´¢å¼' +POSTHOOK: type: CREATEINDEX +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +PREHOOK: query: describe database extended unicode_comments_db +PREHOOK: type: DESCDATABASE +PREHOOK: Input: database:unicode_comments_db +POSTHOOK: query: describe database extended unicode_comments_db +POSTHOOK: type: DESCDATABASE +POSTHOOK: Input: database:unicode_comments_db +unicode_comments_db æ°æ®åº location/in/test hive_test_user USER +PREHOOK: query: show create table unicode_comments_tbl1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: show create table unicode_comments_tbl1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +CREATE TABLE `unicode_comments_tbl1`( + `col1` string COMMENT '第ä¸å') +COMMENT 'è¡¨æ ¼' +PARTITIONED BY ( + `p1` string COMMENT 'åå²') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION +#### A masked pattern was here #### +TBLPROPERTIES ( +#### A masked pattern was here #### +PREHOOK: query: describe formatted unicode_comments_tbl1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: query: describe formatted unicode_comments_tbl1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1 +# col_name data_type comment + +col1 string 第ä¸å + +# Partition Information +# col_name data_type comment + +p1 string åå² + +# Detailed Table Information +Database: unicode_comments_db +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + comment è¡¨æ ¼ +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: show create table unicode_comments_view1 +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: show create table unicode_comments_view1 +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_view1 +CREATE VIEW `unicode_comments_view1` AS SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1` +PREHOOK: query: describe formatted unicode_comments_view1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: describe formatted unicode_comments_view1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: unicode_comments_db@unicode_comments_view1 +# col_name data_type comment + +col1 string 第ä¸å + +# Detailed Table Information +Database: unicode_comments_db +#### A masked pattern was here #### +Retention: 0 +Table Type: VIRTUAL_VIEW +Table Parameters: + comment è§å¾ +#### A masked pattern was here #### + +# Storage Information +SerDe Library: null +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] + +# View Information +View Original Text: select col1 from unicode_comments_tbl1 +View Expanded Text: SELECT `col1` AS `col1` FROM (select `unicode_comments_tbl1`.`col1` from `unicode_comments_db`.`unicode_comments_tbl1`) `unicode_comments_db.unicode_comments_view1` +PREHOOK: query: show formatted index on unicode_comments_tbl1 +PREHOOK: type: SHOWINDEXES +POSTHOOK: query: show formatted index on unicode_comments_tbl1 +POSTHOOK: type: SHOWINDEXES +idx_name tab_name col_names idx_tab_name idx_type comment + + +index2 unicode_comments_tbl1 col1 unicode_comments_db__unicode_comments_tbl1_index2__ compact ç´¢å¼ +PREHOOK: query: drop database unicode_comments_db cascade +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:unicode_comments_db +PREHOOK: Output: database:unicode_comments_db +PREHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +PREHOOK: Output: unicode_comments_db@unicode_comments_view1 +POSTHOOK: query: drop database unicode_comments_db cascade +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:unicode_comments_db +POSTHOOK: Output: database:unicode_comments_db +POSTHOOK: Output: unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__ +POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1 +POSTHOOK: Output: unicode_comments_db@unicode_comments_view1