This is an automated email from the ASF dual-hosted git repository. zabetak pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 9b5feec HIVE-25410: CommonMergeJoin fails for ARRAY join keys with varying sizes (okumin reviewed by Stamatis Zampetakis) 9b5feec is described below commit 9b5feec4dc475d6ee38c6d11130fb19d351a718d Author: okumin <g...@okumin.com> AuthorDate: Sat Jul 31 01:31:11 2021 +0900 HIVE-25410: CommonMergeJoin fails for ARRAY join keys with varying sizes (okumin reviewed by Stamatis Zampetakis) CommonMergeJoinOperator fails with ArrayIndexOutOfBoundsException when the column contains ARRAYs of different size. Before this change the comparators were created only once for the first comparison and they were reused afterwards causing exception when subsequent ARRAY elements were bigger. STRUCT are not affected since the number of elements is consistent across records. Closes #2551 --- .../hadoop/hive/ql/exec/HiveStructComparator.java | 19 ++- .../hive/ql/exec/WritableComparatorFactory.java | 3 +- .../clientpositive/smb_mapjoin_complex_type.q | 4 +- .../clientpositive/test_join_complex_type.q | 8 +- .../llap/test_join_complex_type.q.out | 186 +++++++++++++++------ .../clientpositive/smb_mapjoin_complex_type.q.out | 19 ++- 6 files changed, 162 insertions(+), 77 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java index 50d02cc..d8abf72 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/HiveStructComparator.java @@ -19,12 +19,15 @@ package org.apache.hadoop.hive.ql.exec; import org.apache.hadoop.hive.ql.util.NullOrdering; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; +import java.util.ArrayList; import java.util.List; +/** + * A WritableComparator to compare STRUCT or ARRAY objects. + */ final class HiveStructComparator extends HiveWritableComparator { - private WritableComparator[] comparator = null; + private final List<WritableComparator> comparators = new ArrayList<>(); HiveStructComparator(boolean nullSafe, NullOrdering nullOrdering) { super(nullSafe, nullOrdering); @@ -45,16 +48,14 @@ final class HiveStructComparator extends HiveWritableComparator { if (a1.size() == 0) { return 0; } - if (comparator == null) { - comparator = new WritableComparator[a1.size()]; - // For struct all elements may not be of same type, so create comparator for each entry. - for (int i = 0; i < a1.size(); i++) { - comparator[i] = WritableComparatorFactory.get(a1.get(i), nullSafe, nullOrdering); - } + // For array, the length may not be fixed, so extend comparators on demand + for (int i = comparators.size(); i < a1.size(); i++) { + // For struct, all elements may not be of same type, so create comparator for each entry. + comparators.add(i, WritableComparatorFactory.get(a1.get(i), nullSafe, nullOrdering)); } result = 0; for (int i = 0; i < a1.size(); i++) { - result = comparator[i].compare(a1.get(i), a2.get(i)); + result = comparators.get(i).compare(a1.get(i), a2.get(i)); if (result != 0) { return result; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java index 17ae06d..ff9ada4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/WritableComparatorFactory.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.exec; import org.apache.hadoop.hive.ql.util.NullOrdering; import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; import java.util.List; import java.util.Map; @@ -27,7 +26,7 @@ import java.util.Map; public final class WritableComparatorFactory { public static WritableComparator get(Object key, boolean nullSafe, NullOrdering nullOrdering) { if (key instanceof List) { - // For array type struct is used as we do not know if all elements of array are of same type. + // STRUCT or ARRAY are expressed as java.util.List return new HiveStructComparator(nullSafe, nullOrdering); } else if (key instanceof Map) { // TODO : https://issues.apache.org/jira/browse/HIVE-25042 diff --git a/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q b/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q index 002746c..4842008 100644 --- a/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q +++ b/ql/src/test/queries/clientpositive/smb_mapjoin_complex_type.q @@ -10,10 +10,10 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; CREATE TABLE test_list1 (key INT, value array<int>, col_1 STRING) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; -INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'); +INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'), (102, array(1, 2, 3, 4), 'Mallory'); CREATE TABLE test_list2 (key INT, value array<int>, col_2 STRING) CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS; -INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'); +INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'), (105, array(1, 2, 3, 4), 'Victor'); EXPLAIN SELECT * diff --git a/ql/src/test/queries/clientpositive/test_join_complex_type.q b/ql/src/test/queries/clientpositive/test_join_complex_type.q index 7b72a5a..af0cd7c 100644 --- a/ql/src/test/queries/clientpositive/test_join_complex_type.q +++ b/ql/src/test/queries/clientpositive/test_join_complex_type.q @@ -4,11 +4,17 @@ insert into table_list_types VALUES (2, array(1,2), array(2,2)); insert into table_list_types VALUES (3, array(1,3), array(2,3)); insert into table_list_types VALUES (4, array(1,4), array(1,4)); insert into table_list_types VALUES (5, array(1,4), array(null,4)); +insert into table_list_types VALUES (6, array(1,1,1), array(1,2,3)); +insert into table_list_types VALUES (7, array(1,2,3), array(3,2,1)); +insert into table_list_types VALUES (8, array(1,1,1,1), array(4,3,2,1)); create table table_list_types1 (id int, c1 array<int>, c2 array<int>); insert into table_list_types1 VALUES (1, array(1,1), array(2,1)); insert into table_list_types1 VALUES (2, array(1,2), array(2,2)); insert into table_list_types1 VALUES (3, array(1,4), array(1,3)); +insert into table_list_types1 VALUES (4, array(1,1,1), array(1,2,3)); +insert into table_list_types1 VALUES (5, array(1,2,3), array(2,2,2)); +insert into table_list_types1 VALUES (6, array(1,1,1,1), array(2,2,2,2)); set hive.cbo.enable=false; set hive.auto.convert.join=false; @@ -78,4 +84,4 @@ explain select * from table_struct_types t1 inner join table_struct_types1 t2 on select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c1 = t2.c1; explain select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c2 = t2.c2; -select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c2 = t2.c2; \ No newline at end of file +select * from table_struct_types t1 inner join table_struct_types1 t2 on t1.c2 = t2.c2; diff --git a/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out b/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out index cbd7ae1..da9179b 100644 --- a/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out +++ b/ql/src/test/results/clientpositive/llap/test_join_complex_type.q.out @@ -61,6 +61,39 @@ POSTHOOK: Output: default@table_list_types POSTHOOK: Lineage: table_list_types.c1 SCRIPT [] POSTHOOK: Lineage: table_list_types.c2 SCRIPT [] POSTHOOK: Lineage: table_list_types.id SCRIPT [] +PREHOOK: query: insert into table_list_types VALUES (6, array(1,1,1), array(1,2,3)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_list_types +POSTHOOK: query: insert into table_list_types VALUES (6, array(1,1,1), array(1,2,3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_list_types +POSTHOOK: Lineage: table_list_types.c1 SCRIPT [] +POSTHOOK: Lineage: table_list_types.c2 SCRIPT [] +POSTHOOK: Lineage: table_list_types.id SCRIPT [] +PREHOOK: query: insert into table_list_types VALUES (7, array(1,2,3), array(3,2,1)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_list_types +POSTHOOK: query: insert into table_list_types VALUES (7, array(1,2,3), array(3,2,1)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_list_types +POSTHOOK: Lineage: table_list_types.c1 SCRIPT [] +POSTHOOK: Lineage: table_list_types.c2 SCRIPT [] +POSTHOOK: Lineage: table_list_types.id SCRIPT [] +PREHOOK: query: insert into table_list_types VALUES (8, array(1,1,1,1), array(4,3,2,1)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_list_types +POSTHOOK: query: insert into table_list_types VALUES (8, array(1,1,1,1), array(4,3,2,1)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_list_types +POSTHOOK: Lineage: table_list_types.c1 SCRIPT [] +POSTHOOK: Lineage: table_list_types.c2 SCRIPT [] +POSTHOOK: Lineage: table_list_types.id SCRIPT [] PREHOOK: query: create table table_list_types1 (id int, c1 array<int>, c2 array<int>) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -102,6 +135,39 @@ POSTHOOK: Output: default@table_list_types1 POSTHOOK: Lineage: table_list_types1.c1 SCRIPT [] POSTHOOK: Lineage: table_list_types1.c2 SCRIPT [] POSTHOOK: Lineage: table_list_types1.id SCRIPT [] +PREHOOK: query: insert into table_list_types1 VALUES (4, array(1,1,1), array(1,2,3)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_list_types1 +POSTHOOK: query: insert into table_list_types1 VALUES (4, array(1,1,1), array(1,2,3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_list_types1 +POSTHOOK: Lineage: table_list_types1.c1 SCRIPT [] +POSTHOOK: Lineage: table_list_types1.c2 SCRIPT [] +POSTHOOK: Lineage: table_list_types1.id SCRIPT [] +PREHOOK: query: insert into table_list_types1 VALUES (5, array(1,2,3), array(2,2,2)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_list_types1 +POSTHOOK: query: insert into table_list_types1 VALUES (5, array(1,2,3), array(2,2,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_list_types1 +POSTHOOK: Lineage: table_list_types1.c1 SCRIPT [] +POSTHOOK: Lineage: table_list_types1.c2 SCRIPT [] +POSTHOOK: Lineage: table_list_types1.id SCRIPT [] +PREHOOK: query: insert into table_list_types1 VALUES (6, array(1,1,1,1), array(2,2,2,2)) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@table_list_types1 +POSTHOOK: query: insert into table_list_types1 VALUES (6, array(1,1,1,1), array(2,2,2,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@table_list_types1 +POSTHOOK: Lineage: table_list_types1.c1 SCRIPT [] +POSTHOOK: Lineage: table_list_types1.c2 SCRIPT [] +POSTHOOK: Lineage: table_list_types1.id SCRIPT [] PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c1 = t2.c1 PREHOOK: type: QUERY PREHOOK: Input: default@table_list_types @@ -128,16 +194,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: c1 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: c1 (type: array<int>) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE value expressions: id (type: int), c2 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -145,16 +211,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: c1 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: c1 (type: array<int>) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE value expressions: id (type: int), c2 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -168,14 +234,14 @@ STAGE PLANS: 0 c1 (type: array<int>) 1 c1 (type: array<int>) outputColumnNames: _col0, _col1, _col2, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: array<int>), _col2 (type: array<int>), _col7 (type: int), _col8 (type: array<int>), _col9 (type: array<int>) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -198,7 +264,10 @@ POSTHOOK: Input: default@table_list_types POSTHOOK: Input: default@table_list_types1 #### A masked pattern was here #### 1 [1,1] [2,1] 1 [1,1] [2,1] +6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3] +8 [1,1,1,1] [4,3,2,1] 6 [1,1,1,1] [2,2,2,2] 2 [1,2] [2,2] 2 [1,2] [2,2] +7 [1,2,3] [3,2,1] 5 [1,2,3] [2,2,2] 5 [1,4] [null,4] 3 [1,4] [1,3] 4 [1,4] [1,4] 3 [1,4] [1,3] PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c2 = t2.c2 @@ -227,16 +296,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: c2 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: c2 (type: array<int>) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE value expressions: id (type: int), c1 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -244,16 +313,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: c2 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: c2 (type: array<int>) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE value expressions: id (type: int), c1 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -267,14 +336,14 @@ STAGE PLANS: 0 c2 (type: array<int>) 1 c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2, _col7, _col8, _col9 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: array<int>), _col2 (type: array<int>), _col7 (type: int), _col8 (type: array<int>), _col9 (type: array<int>) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -296,6 +365,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@table_list_types POSTHOOK: Input: default@table_list_types1 #### A masked pattern was here #### +6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3] 1 [1,1] [2,1] 1 [1,1] [2,1] 2 [1,2] [2,2] 2 [1,2] [2,2] PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c1 = t2.c1 @@ -324,14 +394,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -341,10 +411,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -355,20 +425,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: array<int>) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -389,6 +459,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@table_list_types POSTHOOK: Input: default@table_list_types1 #### A masked pattern was here #### +8 [1,1,1,1] [4,3,2,1] 6 [1,1,1,1] [2,2,2,2] +6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3] +7 [1,2,3] [3,2,1] 5 [1,2,3] [2,2,2] 5 [1,4] [null,4] 3 [1,4] [1,3] 1 [1,1] [2,1] 1 [1,1] [2,1] 2 [1,2] [2,2] 2 [1,2] [2,2] @@ -419,14 +492,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -436,10 +509,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -450,20 +523,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: array<int>) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -484,6 +557,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@table_list_types POSTHOOK: Input: default@table_list_types1 #### A masked pattern was here #### +6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3] 1 [1,1] [2,1] 1 [1,1] [2,1] 2 [1,2] [2,2] 2 [1,2] [2,2] PREHOOK: query: explain select * from table_list_types t1 inner join table_list_types1 t2 on t1.c1 = t2.c1 @@ -513,14 +587,14 @@ STAGE PLANS: TableScan alias: t1 filterExpr: c1 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -530,10 +604,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -545,20 +619,20 @@ STAGE PLANS: TableScan alias: t2 filterExpr: c1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: array<int>) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -579,6 +653,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@table_list_types POSTHOOK: Input: default@table_list_types1 #### A masked pattern was here #### +8 [1,1,1,1] [4,3,2,1] 6 [1,1,1,1] [2,2,2,2] +6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3] +7 [1,2,3] [3,2,1] 5 [1,2,3] [2,2,2] 5 [1,4] [null,4] 3 [1,4] [1,3] 1 [1,1] [2,1] 1 [1,1] [2,1] 2 [1,2] [2,2] 2 [1,2] [2,2] @@ -610,14 +687,14 @@ STAGE PLANS: TableScan alias: t1 filterExpr: c2 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 1220 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -627,10 +704,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 1342 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2147 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -642,20 +719,20 @@ STAGE PLANS: TableScan alias: t2 filterExpr: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: c2 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: id (type: int), c1 (type: array<int>), c2 (type: array<int>) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: array<int>) null sort order: z sort order: + Map-reduce partition columns: _col2 (type: array<int>) - Statistics: Num rows: 3 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1464 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: array<int>) Execution mode: vectorized, llap LLAP IO: all inputs @@ -676,6 +753,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@table_list_types POSTHOOK: Input: default@table_list_types1 #### A masked pattern was here #### +6 [1,1,1] [1,2,3] 4 [1,1,1] [1,2,3] 1 [1,1] [2,1] 1 [1,1] [2,1] 2 [1,2] [2,2] 2 [1,2] [2,2] PREHOOK: query: create table table_struct_types (id int, c1 struct<f1: int,f2: string>, c2 struct<f1: int,f2: string>) diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out index b65f64f..5cf76de 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_complex_type.q.out @@ -6,11 +6,11 @@ POSTHOOK: query: CREATE TABLE test_list1 (key INT, value array<int>, col_1 STRIN POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_list1 -PREHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car') +PREHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'), (102, array(1, 2, 3, 4), 'Mallory') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@test_list1 -POSTHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car') +POSTHOOK: query: INSERT INTO test_list1 VALUES (99, array(0,0), 'Alice'), (99, array(2,2), 'Mat'), (100, array(0,0), 'Bob'), (101, array(2,2), 'Car'), (102, array(1, 2, 3, 4), 'Mallory') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_list1 @@ -25,11 +25,11 @@ POSTHOOK: query: CREATE TABLE test_list2 (key INT, value array<int>, col_2 STRIN POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_list2 -PREHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli') +PREHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'), (105, array(1, 2, 3, 4), 'Victor') PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table PREHOOK: Output: default@test_list2 -POSTHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli') +POSTHOOK: query: INSERT INTO test_list2 VALUES (102, array(2,2), 'Del'), (103, array(2,2), 'Ema'), (104, array(3,3), 'Fli'), (105, array(1, 2, 3, 4), 'Victor') POSTHOOK: type: QUERY POSTHOOK: Input: _dummy_database@_dummy_table POSTHOOK: Output: default@test_list2 @@ -63,14 +63,14 @@ STAGE PLANS: TableScan alias: test_list1 filterExpr: value is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1540 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 4 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1540 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: array<int>), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1540 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -78,10 +78,10 @@ STAGE PLANS: 0 _col1 (type: array<int>) 1 _col1 (type: array<int>) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 4 Data size: 1355 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1694 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1355 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 1694 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -107,6 +107,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test_list1 POSTHOOK: Input: default@test_list2 #### A masked pattern was here #### +102 [1,2,3,4] Mallory 105 [1,2,3,4] Victor 101 [2,2] Car 103 [2,2] Ema 101 [2,2] Car 102 [2,2] Del 99 [2,2] Mat 103 [2,2] Ema