Repository: hive
Updated Branches:
  refs/heads/master 17842e3d5 -> de3d86cdd


HIVE-14013: Describe table doesn't show unicode properly (Reviewed by Yongzhi 
Chen)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de3d86cd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de3d86cd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de3d86cd

Branch: refs/heads/master
Commit: de3d86cdd3db174d6bc5d8c65796a1e981171124
Parents: 17842e3
Author: Aihua Xu <aihu...@apache.org>
Authored: Tue Jun 14 16:37:54 2016 -0400
Committer: Aihua Xu <aihu...@apache.org>
Committed: Tue Jun 28 13:22:07 2016 -0400

----------------------------------------------------------------------
 common/pom.xml                                  |   5 +
 .../hive/common/util/HiveStringUtils.java       |  23 ++-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |   9 +-
 .../formatting/MetaDataFormatUtils.java         |  22 ++-
 .../queries/clientpositive/unicode_comments.q   |  17 ++
 .../clientpositive/unicode_comments.q.out       | 166 +++++++++++++++++++
 6 files changed, 235 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/common/pom.xml
----------------------------------------------------------------------
diff --git a/common/pom.xml b/common/pom.xml
index b7244aa..a8fdd27 100644
--- a/common/pom.xml
+++ b/common/pom.xml
@@ -61,6 +61,11 @@
       <version>${commons-lang.version}</version>
     </dependency>
     <dependency>
+       <groupId>org.apache.commons</groupId>
+       <artifactId>commons-lang3</artifactId>
+       <version>${commons-lang3.version}</version>
+    </dependency>
+    <dependency>
       <groupId>org.eclipse.jetty.aggregate</groupId>
       <artifactId>jetty-all</artifactId>
       <version>${jetty.version}</version>

http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java 
b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
index bba14e2..c2ff635 100644
--- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
+++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java
@@ -43,11 +43,13 @@ import java.util.regex.Pattern;
 import com.google.common.collect.Interner;
 import com.google.common.collect.Interners;
 
+import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
+import org.apache.commons.lang3.text.translate.EntityArrays;
+import org.apache.commons.lang3.text.translate.LookupTranslator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
 import org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * HiveStringUtils
@@ -66,6 +68,14 @@ public class HiveStringUtils {
 
   private static final DecimalFormat decimalFormat;
 
+  private static final CharSequenceTranslator ESCAPE_JAVA =
+      new LookupTranslator(
+        new String[][] {
+          {"\"", "\\\""},
+          {"\\", "\\\\"},
+      }).with(
+        new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE()));
+
   /**
    * Maintain a String pool to reduce memory.
    */
@@ -603,6 +613,17 @@ public class HiveStringUtils {
   }
 
   /**
+   * Escape non-unicode characters. StringEscapeUtil.escapeJava() will escape
+   * unicode characters as well but in some cases it's not desired.
+   *
+   * @param str Original string
+   * @return Escaped string
+   */
+  public static String escapeJava(String str) {
+    return ESCAPE_JAVA.translate(str);
+}
+
+  /**
    * Unescape commas in the string using the default escape char
    * @param str a string
    * @return an unescaped string

http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 493e3a0..7099b2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -31,6 +31,7 @@ import java.io.Serializable;
 import java.io.Writer;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.charset.StandardCharsets;
 import java.sql.SQLException;
 import java.util.AbstractList;
 import java.util.ArrayList;
@@ -2076,7 +2077,7 @@ public class DDLTask extends Task<DDLWork> implements 
Serializable {
 
       if (tbl.isView()) {
         String createTab_stmt = "CREATE VIEW `" + tableName + "` AS " + 
tbl.getViewExpandedText();
-        outStream.writeBytes(createTab_stmt.toString());
+        outStream.write(createTab_stmt.getBytes(StandardCharsets.UTF_8));
         return 0;
       }
 
@@ -2225,7 +2226,7 @@ public class DDLTask extends Task<DDLWork> implements 
Serializable {
       }
       createTab_stmt.add(TBL_PROPERTIES, tbl_properties);
 
-      outStream.writeBytes(createTab_stmt.render());
+      
outStream.write(createTab_stmt.render().getBytes(StandardCharsets.UTF_8));
     } catch (IOException e) {
       LOG.info("show create table: " + stringifyException(e));
       return 1;
@@ -2288,14 +2289,14 @@ public class DDLTask extends Task<DDLWork> implements 
Serializable {
     try {
       if (showIndexes.isFormatted()) {
         // column headers
-        outStream.writeBytes(MetaDataFormatUtils.getIndexColumnsHeader());
+        
outStream.write(MetaDataFormatUtils.getIndexColumnsHeader().getBytes(StandardCharsets.UTF_8));
         outStream.write(terminator);
         outStream.write(terminator);
       }
 
       for (Index index : indexes)
       {
-        
outStream.writeBytes(MetaDataFormatUtils.getAllColumnsInformation(index));
+        
outStream.write(MetaDataFormatUtils.getAllColumnsInformation(index).getBytes(StandardCharsets.UTF_8));
       }
     } catch (FileNotFoundException e) {
       LOG.info("show indexes: " + stringifyException(e));

http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
index a2ccd56..03803bb 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/MetaDataFormatUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hive.ql.metadata.formatting;
 
 import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -47,6 +48,7 @@ import org.apache.hadoop.hive.ql.plan.DescTableDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ShowIndexesDesc;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hive.common.util.HiveStringUtils;
 
 import java.math.BigInteger;
 import java.util.ArrayList;
@@ -438,7 +440,7 @@ public final class MetaDataFormatUtils {
 
     if (tbl.getParameters().size() > 0) {
       tableInfo.append("Table Parameters:").append(LINE_DELIM);
-      displayAllParameters(tbl.getParameters(), tableInfo);
+      displayAllParameters(tbl.getParameters(), tableInfo, false);
     }
   }
 
@@ -457,12 +459,28 @@ public final class MetaDataFormatUtils {
     }
   }
 
+  /**
+   * Display key, value pairs of the parameters. The characters will be escaped
+   * including unicode.
+   */
   private static void displayAllParameters(Map<String, String> params, 
StringBuilder tableInfo) {
+    displayAllParameters(params, tableInfo, true);
+  }
+
+  /**
+   * Display key, value pairs of the parameters. The characters will be escaped
+   * including unicode if escapeUnicode is true; otherwise the characters other
+   * than unicode will be escaped.
+   */
+
+  private static void displayAllParameters(Map<String, String> params, 
StringBuilder tableInfo, boolean escapeUnicode) {
     List<String> keys = new ArrayList<String>(params.keySet());
     Collections.sort(keys);
     for (String key : keys) {
       tableInfo.append(FIELD_DELIM); // Ensures all params are indented.
-      formatOutput(key, StringEscapeUtils.escapeJava(params.get(key)), 
tableInfo);
+      formatOutput(key,
+          escapeUnicode ? StringEscapeUtils.escapeJava(params.get(key)) : 
HiveStringUtils.escapeJava(params.get(key)),
+          tableInfo);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/test/queries/clientpositive/unicode_comments.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/unicode_comments.q 
b/ql/src/test/queries/clientpositive/unicode_comments.q
new file mode 100644
index 0000000..4d958e4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/unicode_comments.q
@@ -0,0 +1,17 @@
+create database unicode_comments_db comment '数据库';
+use unicode_comments_db;
+create table unicode_comments_tbl1
+(col1 string comment '第一列') comment '表格'
+partitioned by (p1 string comment '分割');
+create view unicode_comments_view1 (col1 comment '第一列') comment '视图'
+as select col1 from unicode_comments_tbl1;
+create index index2 on table unicode_comments_tbl1(col1) as 'COMPACT' with 
deferred rebuild comment '索引';
+
+describe database extended unicode_comments_db;
+show create table unicode_comments_tbl1;
+describe formatted unicode_comments_tbl1;
+show create table unicode_comments_view1;
+describe formatted unicode_comments_view1;
+show formatted index on unicode_comments_tbl1;
+
+drop database unicode_comments_db cascade;

http://git-wip-us.apache.org/repos/asf/hive/blob/de3d86cd/ql/src/test/results/clientpositive/unicode_comments.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/unicode_comments.q.out 
b/ql/src/test/results/clientpositive/unicode_comments.q.out
new file mode 100644
index 0000000..4872cd3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/unicode_comments.q.out
@@ -0,0 +1,166 @@
+PREHOOK: query: create database unicode_comments_db comment '数据库'
+PREHOOK: type: CREATEDATABASE
+PREHOOK: Output: database:unicode_comments_db
+POSTHOOK: query: create database unicode_comments_db comment '数据库'
+POSTHOOK: type: CREATEDATABASE
+POSTHOOK: Output: database:unicode_comments_db
+PREHOOK: query: use unicode_comments_db
+PREHOOK: type: SWITCHDATABASE
+PREHOOK: Input: database:unicode_comments_db
+POSTHOOK: query: use unicode_comments_db
+POSTHOOK: type: SWITCHDATABASE
+POSTHOOK: Input: database:unicode_comments_db
+PREHOOK: query: create table unicode_comments_tbl1
+(col1 string comment '第一列') comment '表格'
+partitioned by (p1 string comment '分割')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:unicode_comments_db
+PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: create table unicode_comments_tbl1
+(col1 string comment '第一列') comment '表格'
+partitioned by (p1 string comment '分割')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:unicode_comments_db
+POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+PREHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') 
comment '视图'
+as select col1 from unicode_comments_tbl1
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+PREHOOK: Output: database:unicode_comments_db
+PREHOOK: Output: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: create view unicode_comments_view1 (col1 comment '第一列') 
comment '视图'
+as select col1 from unicode_comments_tbl1
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: Output: database:unicode_comments_db
+POSTHOOK: Output: unicode_comments_db@unicode_comments_view1
+PREHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 
'COMPACT' with deferred rebuild comment '索引'
+PREHOOK: type: CREATEINDEX
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: create index index2 on table unicode_comments_tbl1(col1) as 
'COMPACT' with deferred rebuild comment '索引'
+POSTHOOK: type: CREATEINDEX
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: Output: 
unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__
+PREHOOK: query: describe database extended unicode_comments_db
+PREHOOK: type: DESCDATABASE
+PREHOOK: Input: database:unicode_comments_db
+POSTHOOK: query: describe database extended unicode_comments_db
+POSTHOOK: type: DESCDATABASE
+POSTHOOK: Input: database:unicode_comments_db
+unicode_comments_db    数据库       location/in/test        hive_test_user  
USER    
+PREHOOK: query: show create table unicode_comments_tbl1
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: show create table unicode_comments_tbl1
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+CREATE TABLE `unicode_comments_tbl1`(
+  `col1` string COMMENT '第一列')
+COMMENT '表格'
+PARTITIONED BY ( 
+  `p1` string COMMENT '分割')
+ROW FORMAT SERDE 
+  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
+STORED AS INPUTFORMAT 
+  'org.apache.hadoop.mapred.TextInputFormat' 
+OUTPUTFORMAT 
+  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+LOCATION
+#### A masked pattern was here ####
+TBLPROPERTIES (
+#### A masked pattern was here ####
+PREHOOK: query: describe formatted unicode_comments_tbl1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: query: describe formatted unicode_comments_tbl1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_tbl1
+# col_name             data_type               comment             
+                
+col1                   string                  第一列                 
+                
+# Partition Information                 
+# col_name             data_type               comment             
+                
+p1                     string                  分割                  
+                
+# Detailed Table Information            
+Database:              unicode_comments_db      
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       comment                 表格                  
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe      
 
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1                   
+PREHOOK: query: show create table unicode_comments_view1
+PREHOOK: type: SHOW_CREATETABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: show create table unicode_comments_view1
+POSTHOOK: type: SHOW_CREATETABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_view1
+CREATE VIEW `unicode_comments_view1` AS SELECT `col1` AS `col1` FROM (select 
`unicode_comments_tbl1`.`col1` from 
`unicode_comments_db`.`unicode_comments_tbl1`) 
`unicode_comments_db.unicode_comments_view1`
+PREHOOK: query: describe formatted unicode_comments_view1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: describe formatted unicode_comments_view1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: unicode_comments_db@unicode_comments_view1
+# col_name             data_type               comment             
+                
+col1                   string                  第一列                 
+                
+# Detailed Table Information            
+Database:              unicode_comments_db      
+#### A masked pattern was here ####
+Retention:             0                        
+Table Type:            VIRTUAL_VIEW             
+Table Parameters:               
+       comment                 视图                  
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         null                     
+InputFormat:           org.apache.hadoop.mapred.TextInputFormat         
+OutputFormat:          
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat       
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+                
+# View Information              
+View Original Text:    select col1 from unicode_comments_tbl1   
+View Expanded Text:    SELECT `col1` AS `col1` FROM (select 
`unicode_comments_tbl1`.`col1` from 
`unicode_comments_db`.`unicode_comments_tbl1`) 
`unicode_comments_db.unicode_comments_view1`     
+PREHOOK: query: show formatted index on unicode_comments_tbl1
+PREHOOK: type: SHOWINDEXES
+POSTHOOK: query: show formatted index on unicode_comments_tbl1
+POSTHOOK: type: SHOWINDEXES
+idx_name               tab_name                col_names               
idx_tab_name            idx_type                comment             
+                                        
+                                        
+index2                 unicode_comments_tbl1   col1                    
unicode_comments_db__unicode_comments_tbl1_index2__     compact                 
索引                  
+PREHOOK: query: drop database unicode_comments_db cascade
+PREHOOK: type: DROPDATABASE
+PREHOOK: Input: database:unicode_comments_db
+PREHOOK: Output: database:unicode_comments_db
+PREHOOK: Output: 
unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__
+PREHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+PREHOOK: Output: unicode_comments_db@unicode_comments_view1
+POSTHOOK: query: drop database unicode_comments_db cascade
+POSTHOOK: type: DROPDATABASE
+POSTHOOK: Input: database:unicode_comments_db
+POSTHOOK: Output: database:unicode_comments_db
+POSTHOOK: Output: 
unicode_comments_db@unicode_comments_db__unicode_comments_tbl1_index2__
+POSTHOOK: Output: unicode_comments_db@unicode_comments_tbl1
+POSTHOOK: Output: unicode_comments_db@unicode_comments_view1

Reply via email to