HIVE-11285 : ObjectInspector for partition columns in FetchOperator in SMBJoin causes exception (Pengcheng Xiong via Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8646c12f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8646c12f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8646c12f Branch: refs/heads/hbase-metastore Commit: 8646c12f7438a166d32cf8527733fe68d152a831 Parents: b0247cf Author: Pengcheng Xiong <[email protected]> Authored: Thu Jul 16 02:09:00 2015 +0700 Committer: Ashutosh Chauhan <[email protected]> Committed: Fri Jul 17 13:06:10 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FetchOperator.java | 5 +- .../clientpositive/smb_join_partition_key.q | 35 +++++ .../clientpositive/smb_join_partition_key.q.out | 128 +++++++++++++++++++ 3 files changed, 166 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8646c12f/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 258d28e..4c6f7ee 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -229,8 +229,9 @@ public class FetchOperator implements Serializable { String[] partKeyTypes = pcolTypes.trim().split(":"); ObjectInspector[] inspectors = new ObjectInspector[partKeys.length]; for (int i = 0; i < partKeys.length; i++) { - inspectors[i] = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( - TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); + inspectors[i] = PrimitiveObjectInspectorFactory + .getPrimitiveWritableObjectInspector(TypeInfoFactory + .getPrimitiveTypeInfo(partKeyTypes[i])); } return ObjectInspectorFactory.getStandardStructObjectInspector( Arrays.asList(partKeys), Arrays.asList(inspectors)); http://git-wip-us.apache.org/repos/asf/hive/blob/8646c12f/ql/src/test/queries/clientpositive/smb_join_partition_key.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/smb_join_partition_key.q b/ql/src/test/queries/clientpositive/smb_join_partition_key.q new file mode 100644 index 0000000..49e2d2f --- /dev/null +++ b/ql/src/test/queries/clientpositive/smb_join_partition_key.q @@ -0,0 +1,35 @@ +SET hive.execution.engine=mr; +SET hive.enforce.sortmergebucketmapjoin=false; +SET hive.auto.convert.sortmerge.join=true; +SET hive.optimize.bucketmapjoin = true; +SET hive.optimize.bucketmapjoin.sortedmerge = true; +SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; +SET hive.exec.dynamic.partition.mode=nonstrict; + +CREATE TABLE data_table (key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'; + +insert into table data_table values(1, 'one'); +insert into table data_table values(2, 'two'); + +CREATE TABLE smb_table (key INT, value STRING) CLUSTERED BY (key) +SORTED BY (key) INTO 1 BUCKETS STORED AS ORC; + +CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 DECIMAL) +CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +STORED AS ORC; + +INSERT OVERWRITE TABLE smb_table SELECT * FROM data_table; + +INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table; + +SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key; + +drop table smb_table_part; + +CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 double) +CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +STORED AS ORC; + +INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table; + +SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/8646c12f/ql/src/test/results/clientpositive/smb_join_partition_key.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/smb_join_partition_key.q.out b/ql/src/test/results/clientpositive/smb_join_partition_key.q.out new file mode 100644 index 0000000..a4ab8c3 --- /dev/null +++ b/ql/src/test/results/clientpositive/smb_join_partition_key.q.out @@ -0,0 +1,128 @@ +PREHOOK: query: CREATE TABLE data_table (key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@data_table +POSTHOOK: query: CREATE TABLE data_table (key INT, value STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@data_table +PREHOOK: query: insert into table data_table values(1, 'one') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@data_table +POSTHOOK: query: insert into table data_table values(1, 'one') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@data_table +POSTHOOK: Lineage: data_table.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: data_table.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: insert into table data_table values(2, 'two') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@data_table +POSTHOOK: query: insert into table data_table values(2, 'two') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@data_table +POSTHOOK: Lineage: data_table.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: data_table.value SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: CREATE TABLE smb_table (key INT, value STRING) CLUSTERED BY (key) +SORTED BY (key) INTO 1 BUCKETS STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_table +POSTHOOK: query: CREATE TABLE smb_table (key INT, value STRING) CLUSTERED BY (key) +SORTED BY (key) INTO 1 BUCKETS STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_table +PREHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 DECIMAL) +CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_table_part +POSTHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 DECIMAL) +CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_table_part +PREHOOK: query: INSERT OVERWRITE TABLE smb_table SELECT * FROM data_table +PREHOOK: type: QUERY +PREHOOK: Input: default@data_table +PREHOOK: Output: default@smb_table +POSTHOOK: query: INSERT OVERWRITE TABLE smb_table SELECT * FROM data_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@data_table +POSTHOOK: Output: default@smb_table +POSTHOOK: Lineage: smb_table.key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_table.value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table +PREHOOK: type: QUERY +PREHOOK: Input: default@data_table +PREHOOK: Output: default@smb_table_part +POSTHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@data_table +POSTHOOK: Output: default@smb_table_part@p1=100 +POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_table +PREHOOK: Input: default@smb_table_part +PREHOOK: Input: default@smb_table_part@p1=100 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_table +POSTHOOK: Input: default@smb_table_part +POSTHOOK: Input: default@smb_table_part@p1=100 +#### A masked pattern was here #### +1 100 +2 100 +PREHOOK: query: drop table smb_table_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@smb_table_part +PREHOOK: Output: default@smb_table_part +POSTHOOK: query: drop table smb_table_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@smb_table_part +POSTHOOK: Output: default@smb_table_part +PREHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 double) +CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@smb_table_part +POSTHOOK: query: CREATE TABLE smb_table_part (key INT, value STRING) PARTITIONED BY (p1 double) +CLUSTERED BY (key) SORTED BY (key) INTO 1 BUCKETS +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@smb_table_part +PREHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table +PREHOOK: type: QUERY +PREHOOK: Input: default@data_table +PREHOOK: Output: default@smb_table_part +POSTHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@data_table +POSTHOOK: Output: default@smb_table_part@p1=100 +POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@smb_table +PREHOOK: Input: default@smb_table_part +PREHOOK: Input: default@smb_table_part@p1=100 +#### A masked pattern was here #### +POSTHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@smb_table +POSTHOOK: Input: default@smb_table_part +POSTHOOK: Input: default@smb_table_part@p1=100 +#### A masked pattern was here #### +1 100.0 +2 100.0
