Repository: hive Updated Branches: refs/heads/master 53fc31931 -> 99a043a05
HIVE-12245: Support column comments for an HBase backed table (Chaoyu Tang, reviewed by Jimmy Xiang) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99a043a0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99a043a0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99a043a0 Branch: refs/heads/master Commit: 99a043a05b4d823589e403de9779cf3a4b881ca3 Parents: 53fc319 Author: ctang <ctang...@gmail.com> Authored: Wed Oct 28 22:46:55 2015 -0400 Committer: ctang <ctang...@gmail.com> Committed: Wed Oct 28 22:46:55 2015 -0400 ---------------------------------------------------------------------- .../hive/hbase/HBaseLazyObjectFactory.java | 28 +++++++++++++++ .../apache/hadoop/hive/hbase/HBaseSerDe.java | 5 +-- .../src/test/queries/positive/hbase_queries.q | 4 ++- .../results/positive/external_table_ppd.q.out | 16 ++++----- .../positive/hbase_binary_storage_queries.q.out | 32 ++++++++--------- .../test/results/positive/hbase_queries.q.out | 37 +++++++++++++++----- .../test/results/positive/hbase_timestamp.q.out | 6 ++-- .../positive/hbase_timestamp_format.q.out | 12 +++---- 8 files changed, 93 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java ---------------------------------------------------------------------- diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java index cb9f9d3..841e8ba 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java @@ -19,7 +19,10 @@ package org.apache.hadoop.hive.hbase; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Properties; import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory; import org.apache.hadoop.hive.serde2.SerDeException; @@ -53,4 +56,29 @@ public class HBaseLazyObjectFactory { serdeParams.getColumnNames(), columnObjectInspectors, null, serdeParams.getSeparators()[0], serdeParams, ObjectInspectorOptions.JAVA); } + + public static ObjectInspector createLazyHBaseStructInspector(HBaseSerDeParameters hSerdeParams, + Properties tbl) + throws SerDeException { + List<TypeInfo> columnTypes = hSerdeParams.getColumnTypes(); + ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>( + columnTypes.size()); + for (int i = 0; i < columnTypes.size(); i++) { + if (i == hSerdeParams.getKeyIndex()) { + columnObjectInspectors.add(hSerdeParams.getKeyFactory() + .createKeyObjectInspector(columnTypes.get(i))); + } else { + columnObjectInspectors.add(hSerdeParams.getValueFactories().get(i) + .createValueObjectInspector(columnTypes.get(i))); + } + } + List<String> structFieldComments = tbl.getProperty("columns.comments") == null ? + new ArrayList<String>(Collections.nCopies(columnTypes.size(), "")) + : Arrays.asList(tbl.getProperty("columns.comments").split("\0", columnTypes.size())); + + return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( + hSerdeParams.getColumnNames(), columnObjectInspectors, structFieldComments, + hSerdeParams.getSerdeParams().getSeparators()[0], + hSerdeParams.getSerdeParams(), ObjectInspectorOptions.JAVA); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java ---------------------------------------------------------------------- diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java index 41d6302..466aabe 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java @@ -125,10 +125,7 @@ public class HBaseSerDe extends AbstractSerDe { serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName()); cachedObjectInspector = - HBaseLazyObjectFactory - .createLazyHBaseStructInspector(serdeParams.getSerdeParams(), - serdeParams.getKeyIndex(), serdeParams.getKeyFactory(), - serdeParams.getValueFactories()); + HBaseLazyObjectFactory.createLazyHBaseStructInspector(serdeParams, tbl); cachedHBaseRow = new LazyHBaseRow( (LazySimpleStructObjectInspector) cachedObjectInspector, serdeParams); http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/queries/positive/hbase_queries.q ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/queries/positive/hbase_queries.q b/hbase-handler/src/test/queries/positive/hbase_queries.q index 6ef9325..b445c4b 100644 --- a/hbase-handler/src/test/queries/positive/hbase_queries.q +++ b/hbase-handler/src/test/queries/positive/hbase_queries.q @@ -1,5 +1,5 @@ DROP TABLE hbase_table_1; -CREATE TABLE hbase_table_1(key int, value string) +CREATE TABLE hbase_table_1(key int comment 'It is a column key', value string comment 'It is the column string value') STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string") TBLPROPERTIES ("hbase.table.name" = "hbase_table_0"); @@ -162,6 +162,8 @@ DESCRIBE EXTENDED hbase_table_1_like; INSERT OVERWRITE TABLE hbase_table_1_like SELECT * FROM hbase_table_1; SELECT COUNT(*) FROM hbase_table_1_like; +SHOW CREATE TABLE hbase_table_1_like; + DROP TABLE hbase_table_1; DROP TABLE hbase_table_1_like; DROP TABLE hbase_table_2; http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/external_table_ppd.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/external_table_ppd.q.out b/hbase-handler/src/test/results/positive/external_table_ppd.q.out index 83eb2f5..fd4b6ac 100644 --- a/hbase-handler/src/test/results/positive/external_table_ppd.q.out +++ b/hbase-handler/src/test/results/positive/external_table_ppd.q.out @@ -40,14 +40,14 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@t_hbase # col_name data_type comment -key string from deserializer -tinyint_col tinyint from deserializer -smallint_col smallint from deserializer -int_col int from deserializer -bigint_col bigint from deserializer -float_col float from deserializer -double_col double from deserializer -boolean_col boolean from deserializer +key string +tinyint_col tinyint +smallint_col smallint +int_col int +bigint_col bigint +float_col float +double_col double +boolean_col boolean # Detailed Table Information Database: default http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out index f212331..24df908 100644 --- a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out @@ -40,14 +40,14 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@t_hbase # col_name data_type comment -key string from deserializer -tinyint_col tinyint from deserializer -smallint_col smallint from deserializer -int_col int from deserializer -bigint_col bigint from deserializer -float_col float from deserializer -double_col double from deserializer -boolean_col boolean from deserializer +key string +tinyint_col tinyint +smallint_col smallint +int_col int +bigint_col bigint +float_col float +double_col double +boolean_col boolean # Detailed Table Information Database: default @@ -215,14 +215,14 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@t_hbase_1 # col_name data_type comment -key string from deserializer -tinyint_col tinyint from deserializer -smallint_col smallint from deserializer -int_col int from deserializer -bigint_col bigint from deserializer -float_col float from deserializer -double_col double from deserializer -boolean_col boolean from deserializer +key string +tinyint_col tinyint +smallint_col smallint +int_col int +bigint_col bigint +float_col float +double_col double +boolean_col boolean # Detailed Table Information Database: default http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_queries.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index ce6e526..d887566 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -2,14 +2,14 @@ PREHOOK: query: DROP TABLE hbase_table_1 PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE hbase_table_1 POSTHOOK: type: DROPTABLE -PREHOOK: query: CREATE TABLE hbase_table_1(key int, value string) +PREHOOK: query: CREATE TABLE hbase_table_1(key int comment 'It is a column key', value string comment 'It is the column string value') STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string") TBLPROPERTIES ("hbase.table.name" = "hbase_table_0") PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@hbase_table_1 -POSTHOOK: query: CREATE TABLE hbase_table_1(key int, value string) +POSTHOOK: query: CREATE TABLE hbase_table_1(key int comment 'It is a column key', value string comment 'It is the column string value') STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string") TBLPROPERTIES ("hbase.table.name" = "hbase_table_0") @@ -22,8 +22,8 @@ PREHOOK: Input: default@hbase_table_1 POSTHOOK: query: DESCRIBE EXTENDED hbase_table_1 POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@hbase_table_1 -key int from deserializer -value string from deserializer +key int It is a column key +value string It is the column string value #### A masked pattern was here #### PREHOOK: query: select * from hbase_table_1 @@ -834,9 +834,9 @@ PREHOOK: Input: default@hbase_table_3_like POSTHOOK: query: DESCRIBE EXTENDED hbase_table_3_like POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@hbase_table_3_like -key int from deserializer -value string from deserializer -count int from deserializer +key int +value string +count int #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_3_like SELECT * FROM hbase_table_3 @@ -878,8 +878,8 @@ PREHOOK: Input: default@hbase_table_1_like POSTHOOK: query: DESCRIBE EXTENDED hbase_table_1_like POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@hbase_table_1_like -key int from deserializer -value string from deserializer +key int It is a column key +value string It is the column string value #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_1_like SELECT * FROM hbase_table_1 @@ -899,6 +899,25 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@hbase_table_1_like #### A masked pattern was here #### 155 +PREHOOK: query: SHOW CREATE TABLE hbase_table_1_like +PREHOOK: type: SHOW_CREATETABLE +PREHOOK: Input: default@hbase_table_1_like +POSTHOOK: query: SHOW CREATE TABLE hbase_table_1_like +POSTHOOK: type: SHOW_CREATETABLE +POSTHOOK: Input: default@hbase_table_1_like +CREATE EXTERNAL TABLE `hbase_table_1_like`( + `key` int COMMENT 'It is a column key', + `value` string COMMENT 'It is the column string value') +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:string', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_0', +#### A masked pattern was here #### PREHOOK: query: DROP TABLE hbase_table_1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@hbase_table_1 http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_timestamp.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_timestamp.q.out b/hbase-handler/src/test/results/positive/hbase_timestamp.q.out index 538e551..6c42fc3 100644 --- a/hbase-handler/src/test/results/positive/hbase_timestamp.q.out +++ b/hbase-handler/src/test/results/positive/hbase_timestamp.q.out @@ -20,9 +20,9 @@ PREHOOK: Input: default@hbase_table POSTHOOK: query: DESC extended hbase_table POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@hbase_table -key string from deserializer -value string from deserializer -time timestamp from deserializer +key string +value string +time timestamp #### A masked pattern was here #### PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table SELECT key, value, "2012-02-23 10:14:52" WHERE (key % 17) = 0 http://git-wip-us.apache.org/repos/asf/hive/blob/99a043a0/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out b/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out index 138cfe6..0428e41 100644 --- a/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out +++ b/hbase-handler/src/test/results/positive/hbase_timestamp_format.q.out @@ -18,9 +18,9 @@ PREHOOK: Input: default@hbase_str POSTHOOK: query: describe hbase_str POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@hbase_str -rowkey string from deserializer -mytime string from deserializer -mystr string from deserializer +rowkey string +mytime string +mystr string PREHOOK: query: insert overwrite table hbase_str select key, '2001-02-03-04.05.06.123456', value from src limit 3 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -62,9 +62,9 @@ PREHOOK: Input: default@hbase_ts POSTHOOK: query: describe hbase_ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@hbase_ts -rowkey string from deserializer -mytime timestamp from deserializer -mystr string from deserializer +rowkey string +mytime timestamp +mystr string PREHOOK: query: select * from hbase_ts PREHOOK: type: QUERY PREHOOK: Input: default@hbase_ts