Repository: hive Updated Branches: refs/heads/master b98fb1f1a -> 9559306c3
HIVE-16491 : CBO cant handle join involving complex types in on condition (Miklos Gergely via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9559306c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9559306c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9559306c Branch: refs/heads/master Commit: 9559306c3698a453609fe1ea47fddf219ca397b3 Parents: b98fb1f Author: Miklos Gergely <mgerg...@hortonworks.com> Authored: Wed Feb 14 08:48:46 2018 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Wed Feb 14 08:49:45 2018 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 2 +- .../calcite/translator/JoinTypeCheckCtx.java | 2 +- .../clientpositive/vector_complex_join.q | 1 - .../llap/vector_complex_join.q.out | 128 +++--- .../clientpositive/vector_complex_join.q.out | 405 ------------------- 5 files changed, 74 insertions(+), 464 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 391170f..c2252f3 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -323,7 +323,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_coalesce_2.q,\ vector_coalesce_3.q,\ vector_complex_all.q,\ - vector_complex_join.q,\ vector_count.q,\ vector_count_distinct.q,\ vector_data_types.q,\ @@ -714,6 +713,7 @@ minillaplocal.query.files=\ vector_auto_smb_mapjoin_14.q,\ vector_char_varchar_1.q,\ vector_complex_all.q,\ + vector_complex_join.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ vector_groupby_cube1.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java index 4e42197..871518c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java @@ -54,7 +54,7 @@ public class JoinTypeCheckCtx extends TypeCheckCtx { public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType hiveJoinType) throws SemanticException { super(RowResolver.getCombinedRR(leftRR, rightRR), true, false, false, false, false, false, false, false, - false, false); + true, false); this.inputRRLst = ImmutableList.of(leftRR, rightRR); this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == JoinType.RIGHTOUTER) || (hiveJoinType == JoinType.FULLOUTER); http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/test/queries/clientpositive/vector_complex_join.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_complex_join.q b/ql/src/test/queries/clientpositive/vector_complex_join.q index db407bc..dbdc36a 100644 --- a/ql/src/test/queries/clientpositive/vector_complex_join.q +++ b/ql/src/test/queries/clientpositive/vector_complex_join.q @@ -23,7 +23,6 @@ INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2), 1 FROM src LIMIT 1; CREATE TABLE test2b (a INT) STORED AS ORC; INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4); -set hive.cbo.enable=false; explain vectorization expression select * from test2b join test2a on test2b.a = test2a.a[1]; http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 3a0c6a4..98e7dc0 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -233,27 +233,27 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: a is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 a (type: int) - 1 a[1] (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col4, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col4 (type: array<int>), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator + projectedOutputColumnNums: [0] + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0[1] (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator native: true - projectedOutputColumnNums: [0, 2, 3] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -290,17 +290,25 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 3:int)(children: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int) predicate: a[1] is not null (type: boolean) Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a[1] (type: int) - sort order: + - Map-reduce partition columns: a[1] (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyExpressions: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int + Select Operator + expressions: a (type: array<int>), index (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: array<int>), index (type: int) + Reduce Output Operator + key expressions: _col0[1] (type: int) + sort order: + + Map-reduce partition columns: _col0[1] (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array<int>), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -368,27 +376,27 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: a is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 a (type: int) - 1 a[index] (type: int) - Map Join Vectorization: - className: VectorMapJoinInnerLongOperator + Select Operator + expressions: a (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - outputColumnNames: _col0, _col4, _col5 - input vertices: - 1 Map 2 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col4 (type: array<int>), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator + projectedOutputColumnNums: [0] + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0[_col1] (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator native: true - projectedOutputColumnNums: [0, 2, 3] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 2 Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -425,17 +433,25 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 3:int)(children: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int) predicate: a[index] is not null (type: boolean) Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: a[index] (type: int) - sort order: + - Map-reduce partition columns: a[index] (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyExpressions: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int + Select Operator + expressions: a (type: array<int>), index (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE - value expressions: a (type: array<int>), index (type: int) + Reduce Output Operator + key expressions: _col0[_col1] (type: int) + sort order: + + Map-reduce partition columns: _col0[_col1] (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyExpressions: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array<int>), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/test/results/clientpositive/vector_complex_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out b/ql/src/test/results/clientpositive/vector_complex_join.q.out deleted file mode 100644 index 487ba5b..0000000 --- a/ql/src/test/results/clientpositive/vector_complex_join.q.out +++ /dev/null @@ -1,405 +0,0 @@ -PREHOOK: query: CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test -POSTHOOK: query: CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test -PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test -POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 2, "val_2") FROM src LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test -POSTHOOK: Lineage: test.a SIMPLE [] -POSTHOOK: Lineage: test.b EXPRESSION [] -_c0 _c1 -PREHOOK: query: explain vectorization expression -select * from alltypesorc join test where alltypesorc.cint=test.a -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select * from alltypesorc join test where alltypesorc.cint=test.a -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:test - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:test - TableScan - alias: test - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: int), b (type: map<int,string>) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: alltypesorc - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 2:int) - predicate: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 2:int - bigTableValueExpressions: col 0:tinyint, col 1:smallint, col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:string, col 7:string, col 8:timestamp, col 9:timestamp, col 10:boolean, col 11:boolean - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a -PREHOOK: type: QUERY -PREHOOK: Input: default@alltypesorc -PREHOOK: Input: default@test -#### A masked pattern was here #### -POSTHOOK: query: select * from alltypesorc join test where alltypesorc.cint=test.a -POSTHOOK: type: QUERY -POSTHOOK: Input: default@alltypesorc -POSTHOOK: Input: default@test -#### A masked pattern was here #### -alltypesorc.ctinyint alltypesorc.csmallint alltypesorc.cint alltypesorc.cbigint alltypesorc.cfloat alltypesorc.cdouble alltypesorc.cstring1 alltypesorc.cstring2 alltypesorc.ctimestamp1 alltypesorc.ctimestamp2 alltypesorc.cboolean1 alltypesorc.cboolean2 test.a test.b --51 NULL 199408978 -1800989684 -51.0 NULL 34N4EY63M1GFWuW0boW P4PL5h1eXR4mMLr2 1969-12-31 16:00:08.451 NULL false true 199408978 {1:"val_1",2:"val_2"} -PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>, index INT) STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test2a -POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>, index INT) STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test2a -PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2), 1 FROM src LIMIT 1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test2a -POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2), 1 FROM src LIMIT 1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test2a -POSTHOOK: Lineage: test2a.a EXPRESSION [] -POSTHOOK: Lineage: test2a.index SIMPLE [] -_c0 _c1 -PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@test2b -POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@test2b -PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) -PREHOOK: type: QUERY -PREHOOK: Input: _dummy_database@_dummy_table -PREHOOK: Output: default@test2b -POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4) -POSTHOOK: type: QUERY -POSTHOOK: Input: _dummy_database@_dummy_table -POSTHOOK: Output: default@test2b -POSTHOOK: Lineage: test2b.a SCRIPT [] -col1 -PREHOOK: query: explain vectorization expression -select * from test2b join test2a on test2b.a = test2a.a[1] -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select * from test2b join test2a on test2b.a = test2a.a[1] -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - test2b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - test2b - TableScan - alias: test2b - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 a (type: int) - 1 a[1] (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: test2a - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int) - predicate: a[1] is not null (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 a (type: int) - 1 a[1] (type: int) - Map Join Vectorization: - bigTableKeyExpressions: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int - bigTableValueExpressions: col 0:array<int>, col 1:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col4, _col5 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col4 (type: array<int>), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] -PREHOOK: type: QUERY -PREHOOK: Input: default@test2a -PREHOOK: Input: default@test2b -#### A masked pattern was here #### -POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[1] -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test2a -POSTHOOK: Input: default@test2b -#### A masked pattern was here #### -test2b.a test2a.a test2a.index -2 [1,2] 1 -PREHOOK: query: explain vectorization expression -select * from test2b join test2a on test2b.a = test2a.a[test2a.index] -PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression -select * from test2b join test2a on test2b.a = test2a.a[test2a.index] -POSTHOOK: type: QUERY -Explain -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-4 - Map Reduce Local Work - Alias -> Map Local Tables: - test2b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - test2b - TableScan - alias: test2b - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 a (type: int) - 1 a[index] (type: int) - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: test2a - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int) - predicate: a[index] is not null (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 a (type: int) - 1 a[index] (type: int) - Map Join Vectorization: - bigTableKeyExpressions: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int - bigTableValueExpressions: col 0:array<int>, col 1:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - outputColumnNames: _col0, _col4, _col5 - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col4 (type: array<int>), _col5 (type: int) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: vectorized - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[test2a.index] -PREHOOK: type: QUERY -PREHOOK: Input: default@test2a -PREHOOK: Input: default@test2b -#### A masked pattern was here #### -POSTHOOK: query: select * from test2b join test2a on test2b.a = test2a.a[test2a.index] -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test2a -POSTHOOK: Input: default@test2b -#### A masked pattern was here #### -test2b.a test2a.a test2a.index -2 [1,2] 1