HIVE-14743: ArrayIndexOutOfBoundsException - HBASE-backed views' query with JOINs (Yongzhi Chen, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7f8263e2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7f8263e2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7f8263e2 Branch: refs/heads/hive-14535 Commit: 7f8263e280cf1ff16340f795d19009c38891bcc7 Parents: 0e91e28 Author: Yongzhi Chen <ych...@apache.org> Authored: Tue Sep 13 13:05:13 2016 -0400 Committer: Yongzhi Chen <ych...@apache.org> Committed: Thu Sep 15 10:00:47 2016 -0400 ---------------------------------------------------------------------- .../hive/hbase/HiveHBaseTableInputFormat.java | 5 +- .../src/test/queries/positive/hbase_viewjoins.q | 56 ++++++ .../test/results/positive/hbase_viewjoins.q.out | 172 +++++++++++++++++++ 3 files changed, 232 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/7f8263e2/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java ---------------------------------------------------------------------- diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java index d9db624..1ef4545 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HiveHBaseTableInputFormat.java @@ -58,6 +58,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; @@ -221,7 +223,8 @@ public class HiveHBaseTableInputFormat extends TableInputFormatBase SerializationUtilities.deserializeExpression(filterExprSerialized); String keyColName = jobConf.get(serdeConstants.LIST_COLUMNS).split(",")[iKey]; - String colType = jobConf.get(serdeConstants.LIST_COLUMN_TYPES).split(",")[iKey]; + ArrayList<TypeInfo> cols = TypeInfoUtils.getTypeInfosFromTypeString(jobConf.get(serdeConstants.LIST_COLUMN_TYPES)); + String colType = cols.get(iKey).getTypeName(); boolean isKeyComparable = isKeyBinary || colType.equalsIgnoreCase("string"); String tsColName = null; http://git-wip-us.apache.org/repos/asf/hive/blob/7f8263e2/hbase-handler/src/test/queries/positive/hbase_viewjoins.q ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/queries/positive/hbase_viewjoins.q b/hbase-handler/src/test/queries/positive/hbase_viewjoins.q new file mode 100644 index 0000000..5c98903 --- /dev/null +++ b/hbase-handler/src/test/queries/positive/hbase_viewjoins.q @@ -0,0 +1,56 @@ +DROP VIEW IF EXISTS VIEW_HBASE_TABLE_TEST_2; +DROP VIEW IF EXISTS VIEW_HBASE_TABLE_TEST_1; +DROP TABLE IF EXISTS HBASE_TABLE_TEST_2; +DROP TABLE IF EXISTS HBASE_TABLE_TEST_1; +CREATE TABLE HBASE_TABLE_TEST_1( + cvalue string , + pk string, + ccount int ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:val,:key,cf2:count', + 'hbase.scan.cache'='500', + 'hbase.scan.cacheblocks'='false', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_test_1', + 'serialization.null.format'='' ); + +CREATE VIEW VIEW_HBASE_TABLE_TEST_1 AS SELECT hbase_table_test_1.cvalue,hbase_table_test_1.pk,hbase_table_test_1.ccount FROM hbase_table_test_1 WHERE hbase_table_test_1.ccount IS NOT NULL; + +CREATE TABLE HBASE_TABLE_TEST_2( + cvalue string , + pk string , + ccount int ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:val,:key,cf2:count', + 'hbase.scan.cache'='500', + 'hbase.scan.cacheblocks'='false', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_test_2', + 'serialization.null.format'=''); + +CREATE VIEW VIEW_HBASE_TABLE_TEST_2 AS SELECT hbase_table_test_2.cvalue,hbase_table_test_2.pk,hbase_table_test_2.ccount +FROM hbase_table_test_2 WHERE hbase_table_test_2.pk >='3-0000h-0' AND hbase_table_test_2.pk <= '3-0000h-g' AND +hbase_table_test_2.ccount IS NOT NULL; + +set hive.auto.convert.join=false; + +SELECT p.cvalue cvalue +FROM `VIEW_HBASE_TABLE_TEST_1` `p` +LEFT OUTER JOIN `VIEW_HBASE_TABLE_TEST_2` `A1` +ON `p`.cvalue = `A1`.cvalue +LEFT OUTER JOIN `VIEW_HBASE_TABLE_TEST_1` `A2` +ON `p`.cvalue = `A2`.cvalue; +DROP VIEW VIEW_HBASE_TABLE_TEST_2; +DROP VIEW VIEW_HBASE_TABLE_TEST_1; +DROP TABLE HBASE_TABLE_TEST_2; +DROP TABLE HBASE_TABLE_TEST_1; http://git-wip-us.apache.org/repos/asf/hive/blob/7f8263e2/hbase-handler/src/test/results/positive/hbase_viewjoins.q.out ---------------------------------------------------------------------- diff --git a/hbase-handler/src/test/results/positive/hbase_viewjoins.q.out b/hbase-handler/src/test/results/positive/hbase_viewjoins.q.out new file mode 100644 index 0000000..908024c --- /dev/null +++ b/hbase-handler/src/test/results/positive/hbase_viewjoins.q.out @@ -0,0 +1,172 @@ +PREHOOK: query: DROP VIEW IF EXISTS VIEW_HBASE_TABLE_TEST_2 +PREHOOK: type: DROPVIEW +POSTHOOK: query: DROP VIEW IF EXISTS VIEW_HBASE_TABLE_TEST_2 +POSTHOOK: type: DROPVIEW +PREHOOK: query: DROP VIEW IF EXISTS VIEW_HBASE_TABLE_TEST_1 +PREHOOK: type: DROPVIEW +POSTHOOK: query: DROP VIEW IF EXISTS VIEW_HBASE_TABLE_TEST_1 +POSTHOOK: type: DROPVIEW +PREHOOK: query: DROP TABLE IF EXISTS HBASE_TABLE_TEST_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS HBASE_TABLE_TEST_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS HBASE_TABLE_TEST_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS HBASE_TABLE_TEST_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE HBASE_TABLE_TEST_1( + cvalue string , + pk string, + ccount int ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:val,:key,cf2:count', + 'hbase.scan.cache'='500', + 'hbase.scan.cacheblocks'='false', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_test_1', + 'serialization.null.format'='' ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@HBASE_TABLE_TEST_1 +POSTHOOK: query: CREATE TABLE HBASE_TABLE_TEST_1( + cvalue string , + pk string, + ccount int ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:val,:key,cf2:count', + 'hbase.scan.cache'='500', + 'hbase.scan.cacheblocks'='false', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_test_1', + 'serialization.null.format'='' ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@HBASE_TABLE_TEST_1 +PREHOOK: query: CREATE VIEW VIEW_HBASE_TABLE_TEST_1 AS SELECT hbase_table_test_1.cvalue,hbase_table_test_1.pk,hbase_table_test_1.ccount FROM hbase_table_test_1 WHERE hbase_table_test_1.ccount IS NOT NULL +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@hbase_table_test_1 +PREHOOK: Output: database:default +PREHOOK: Output: default@VIEW_HBASE_TABLE_TEST_1 +POSTHOOK: query: CREATE VIEW VIEW_HBASE_TABLE_TEST_1 AS SELECT hbase_table_test_1.cvalue,hbase_table_test_1.pk,hbase_table_test_1.ccount FROM hbase_table_test_1 WHERE hbase_table_test_1.ccount IS NOT NULL +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@hbase_table_test_1 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@VIEW_HBASE_TABLE_TEST_1 +PREHOOK: query: CREATE TABLE HBASE_TABLE_TEST_2( + cvalue string , + pk string , + ccount int ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:val,:key,cf2:count', + 'hbase.scan.cache'='500', + 'hbase.scan.cacheblocks'='false', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_test_2', + 'serialization.null.format'='') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@HBASE_TABLE_TEST_2 +POSTHOOK: query: CREATE TABLE HBASE_TABLE_TEST_2( + cvalue string , + pk string , + ccount int ) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.hbase.HBaseSerDe' +STORED BY + 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' +WITH SERDEPROPERTIES ( + 'hbase.columns.mapping'='cf:val,:key,cf2:count', + 'hbase.scan.cache'='500', + 'hbase.scan.cacheblocks'='false', + 'serialization.format'='1') +TBLPROPERTIES ( + 'hbase.table.name'='hbase_table_test_2', + 'serialization.null.format'='') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@HBASE_TABLE_TEST_2 +PREHOOK: query: CREATE VIEW VIEW_HBASE_TABLE_TEST_2 AS SELECT hbase_table_test_2.cvalue,hbase_table_test_2.pk,hbase_table_test_2.ccount +FROM hbase_table_test_2 WHERE hbase_table_test_2.pk >='3-0000h-0' AND hbase_table_test_2.pk <= '3-0000h-g' AND +hbase_table_test_2.ccount IS NOT NULL +PREHOOK: type: CREATEVIEW +PREHOOK: Input: default@hbase_table_test_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@VIEW_HBASE_TABLE_TEST_2 +POSTHOOK: query: CREATE VIEW VIEW_HBASE_TABLE_TEST_2 AS SELECT hbase_table_test_2.cvalue,hbase_table_test_2.pk,hbase_table_test_2.ccount +FROM hbase_table_test_2 WHERE hbase_table_test_2.pk >='3-0000h-0' AND hbase_table_test_2.pk <= '3-0000h-g' AND +hbase_table_test_2.ccount IS NOT NULL +POSTHOOK: type: CREATEVIEW +POSTHOOK: Input: default@hbase_table_test_2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@VIEW_HBASE_TABLE_TEST_2 +PREHOOK: query: SELECT p.cvalue cvalue +FROM `VIEW_HBASE_TABLE_TEST_1` `p` +LEFT OUTER JOIN `VIEW_HBASE_TABLE_TEST_2` `A1` +ON `p`.cvalue = `A1`.cvalue +LEFT OUTER JOIN `VIEW_HBASE_TABLE_TEST_1` `A2` +ON `p`.cvalue = `A2`.cvalue +PREHOOK: type: QUERY +PREHOOK: Input: default@hbase_table_test_1 +PREHOOK: Input: default@hbase_table_test_2 +PREHOOK: Input: default@view_hbase_table_test_1 +PREHOOK: Input: default@view_hbase_table_test_2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT p.cvalue cvalue +FROM `VIEW_HBASE_TABLE_TEST_1` `p` +LEFT OUTER JOIN `VIEW_HBASE_TABLE_TEST_2` `A1` +ON `p`.cvalue = `A1`.cvalue +LEFT OUTER JOIN `VIEW_HBASE_TABLE_TEST_1` `A2` +ON `p`.cvalue = `A2`.cvalue +POSTHOOK: type: QUERY +POSTHOOK: Input: default@hbase_table_test_1 +POSTHOOK: Input: default@hbase_table_test_2 +POSTHOOK: Input: default@view_hbase_table_test_1 +POSTHOOK: Input: default@view_hbase_table_test_2 +#### A masked pattern was here #### +PREHOOK: query: DROP VIEW VIEW_HBASE_TABLE_TEST_2 +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@view_hbase_table_test_2 +PREHOOK: Output: default@view_hbase_table_test_2 +POSTHOOK: query: DROP VIEW VIEW_HBASE_TABLE_TEST_2 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@view_hbase_table_test_2 +POSTHOOK: Output: default@view_hbase_table_test_2 +PREHOOK: query: DROP VIEW VIEW_HBASE_TABLE_TEST_1 +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@view_hbase_table_test_1 +PREHOOK: Output: default@view_hbase_table_test_1 +POSTHOOK: query: DROP VIEW VIEW_HBASE_TABLE_TEST_1 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@view_hbase_table_test_1 +POSTHOOK: Output: default@view_hbase_table_test_1 +PREHOOK: query: DROP TABLE HBASE_TABLE_TEST_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hbase_table_test_2 +PREHOOK: Output: default@hbase_table_test_2 +POSTHOOK: query: DROP TABLE HBASE_TABLE_TEST_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hbase_table_test_2 +POSTHOOK: Output: default@hbase_table_test_2 +PREHOOK: query: DROP TABLE HBASE_TABLE_TEST_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@hbase_table_test_1 +PREHOOK: Output: default@hbase_table_test_1 +POSTHOOK: query: DROP TABLE HBASE_TABLE_TEST_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@hbase_table_test_1 +POSTHOOK: Output: default@hbase_table_test_1