hive git commit: HIVE-16491 : CBO cant handle join involving complex types in on condition (Miklos Gergely via Ashutosh Chauhan)

hashutosh Wed, 14 Feb 2018 08:50:10 -0800

Repository: hive
Updated Branches:
  refs/heads/master b98fb1f1a -> 9559306c3



HIVE-16491 : CBO cant handle join involving complex types in on condition 
(Miklos Gergely via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashut...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9559306c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9559306c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9559306c

Branch: refs/heads/master
Commit: 9559306c3698a453609fe1ea47fddf219ca397b3
Parents: b98fb1f
Author: Miklos Gergely <mgerg...@hortonworks.com>
Authored: Wed Feb 14 08:48:46 2018 -0800
Committer: Ashutosh Chauhan <hashut...@apache.org>
Committed: Wed Feb 14 08:49:45 2018 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +-
 .../calcite/translator/JoinTypeCheckCtx.java    |   2 +-
 .../clientpositive/vector_complex_join.q        |   1 -
 .../llap/vector_complex_join.q.out              | 128 +++---
 .../clientpositive/vector_complex_join.q.out    | 405 -------------------
 5 files changed, 74 insertions(+), 464 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 391170f..c2252f3 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -323,7 +323,6 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   vector_coalesce_2.q,\
   vector_coalesce_3.q,\
   vector_complex_all.q,\
-  vector_complex_join.q,\
   vector_count.q,\
   vector_count_distinct.q,\
   vector_data_types.q,\
@@ -714,6 +713,7 @@ minillaplocal.query.files=\
   vector_auto_smb_mapjoin_14.q,\
   vector_char_varchar_1.q,\
   vector_complex_all.q,\
+  vector_complex_join.q,\
   vector_decimal_2.q,\
   vector_decimal_udf.q,\
   vector_groupby_cube1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java
index 4e42197..871518c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/JoinTypeCheckCtx.java
@@ -54,7 +54,7 @@ public class JoinTypeCheckCtx extends TypeCheckCtx {
   public JoinTypeCheckCtx(RowResolver leftRR, RowResolver rightRR, JoinType 
hiveJoinType)
       throws SemanticException {
     super(RowResolver.getCombinedRR(leftRR, rightRR), true, false, false, 
false, false, false, false, false,
-        false, false);
+        true, false);
     this.inputRRLst = ImmutableList.of(leftRR, rightRR);
     this.outerJoin = (hiveJoinType == JoinType.LEFTOUTER) || (hiveJoinType == 
JoinType.RIGHTOUTER)
         || (hiveJoinType == JoinType.FULLOUTER);

http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/test/queries/clientpositive/vector_complex_join.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_complex_join.q 
b/ql/src/test/queries/clientpositive/vector_complex_join.q
index db407bc..dbdc36a 100644
--- a/ql/src/test/queries/clientpositive/vector_complex_join.q
+++ b/ql/src/test/queries/clientpositive/vector_complex_join.q
@@ -23,7 +23,6 @@ INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2), 1 FROM src 
LIMIT 1;
 CREATE TABLE test2b (a INT) STORED AS ORC;
 INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4);
 
-set hive.cbo.enable=false;
 explain vectorization expression
 select *  from test2b join test2a on test2b.a = test2a.a[1];
 

http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
index 3a0c6a4..98e7dc0 100644
--- a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
@@ -233,27 +233,27 @@ STAGE PLANS:
                         predicateExpression: SelectColumnIsNotNull(col 0:int)
                     predicate: a is not null (type: boolean)
                     Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 a (type: int)
-                        1 a[1] (type: int)
-                      Map Join Vectorization:
-                          className: VectorMapJoinInnerLongOperator
+                    Select Operator
+                      expressions: a (type: int)
+                      outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
                           native: true
-                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
-                      outputColumnNames: _col0, _col4, _col5
-                      input vertices:
-                        1 Map 2
-                      Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
-                      Select Operator
-                        expressions: _col0 (type: int), _col4 (type: 
array<int>), _col5 (type: int)
-                        outputColumnNames: _col0, _col1, _col2
-                        Select Vectorization:
-                            className: VectorSelectOperator
+                          projectedOutputColumnNums: [0]
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0[1] (type: int)
+                        Map Join Vectorization:
+                            className: VectorMapJoinInnerLongOperator
                             native: true
-                            projectedOutputColumnNums: [0, 2, 3]
+                            nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+                        outputColumnNames: _col0, _col1, _col2
+                        input vertices:
+                          1 Map 2
                         Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
@@ -290,17 +290,25 @@ STAGE PLANS:
                         predicateExpression: SelectColumnIsNotNull(col 
3:int)(children: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int)
                     predicate: a[1] is not null (type: boolean)
                     Statistics: Num rows: 1 Data size: 124 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: a[1] (type: int)
-                      sort order: +
-                      Map-reduce partition columns: a[1] (type: int)
-                      Reduce Sink Vectorization:
-                          className: VectorReduceSinkLongOperator
-                          keyExpressions: ListIndexColScalar(col 0:array<int>, 
col 1:int) -> 3:int
+                    Select Operator
+                      expressions: a (type: array<int>), index (type: int)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
                           native: true
-                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          projectedOutputColumnNums: [0, 1]
                       Statistics: Num rows: 1 Data size: 124 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: a (type: array<int>), index (type: 
int)
+                      Reduce Output Operator
+                        key expressions: _col0[1] (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0[1] (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyExpressions: ListIndexColScalar(col 
0:array<int>, col 1:int) -> 3:int
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        Statistics: Num rows: 1 Data size: 124 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: array<int>), _col1 
(type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:
@@ -368,27 +376,27 @@ STAGE PLANS:
                         predicateExpression: SelectColumnIsNotNull(col 0:int)
                     predicate: a is not null (type: boolean)
                     Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    Map Join Operator
-                      condition map:
-                           Inner Join 0 to 1
-                      keys:
-                        0 a (type: int)
-                        1 a[index] (type: int)
-                      Map Join Vectorization:
-                          className: VectorMapJoinInnerLongOperator
+                    Select Operator
+                      expressions: a (type: int)
+                      outputColumnNames: _col0
+                      Select Vectorization:
+                          className: VectorSelectOperator
                           native: true
-                          nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
-                      outputColumnNames: _col0, _col4, _col5
-                      input vertices:
-                        1 Map 2
-                      Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
-                      Select Operator
-                        expressions: _col0 (type: int), _col4 (type: 
array<int>), _col5 (type: int)
-                        outputColumnNames: _col0, _col1, _col2
-                        Select Vectorization:
-                            className: VectorSelectOperator
+                          projectedOutputColumnNums: [0]
+                      Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: int)
+                          1 _col0[_col1] (type: int)
+                        Map Join Vectorization:
+                            className: VectorMapJoinInnerLongOperator
                             native: true
-                            projectedOutputColumnNums: [0, 2, 3]
+                            nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+                        outputColumnNames: _col0, _col1, _col2
+                        input vertices:
+                          1 Map 2
                         Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
@@ -425,17 +433,25 @@ STAGE PLANS:
                         predicateExpression: SelectColumnIsNotNull(col 
3:int)(children: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int)
                     predicate: a[index] is not null (type: boolean)
                     Statistics: Num rows: 1 Data size: 124 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      key expressions: a[index] (type: int)
-                      sort order: +
-                      Map-reduce partition columns: a[index] (type: int)
-                      Reduce Sink Vectorization:
-                          className: VectorReduceSinkLongOperator
-                          keyExpressions: ListIndexColColumn(col 0:array<int>, 
col 1:int) -> 3:int
+                    Select Operator
+                      expressions: a (type: array<int>), index (type: int)
+                      outputColumnNames: _col0, _col1
+                      Select Vectorization:
+                          className: VectorSelectOperator
                           native: true
-                          nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                          projectedOutputColumnNums: [0, 1]
                       Statistics: Num rows: 1 Data size: 124 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: a (type: array<int>), index (type: 
int)
+                      Reduce Output Operator
+                        key expressions: _col0[_col1] (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0[_col1] (type: int)
+                        Reduce Sink Vectorization:
+                            className: VectorReduceSinkLongOperator
+                            keyExpressions: ListIndexColColumn(col 
0:array<int>, col 1:int) -> 3:int
+                            native: true
+                            nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+                        Statistics: Num rows: 1 Data size: 124 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: array<int>), _col1 
(type: int)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/9559306c/ql/src/test/results/clientpositive/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_complex_join.q.out 
b/ql/src/test/results/clientpositive/vector_complex_join.q.out
deleted file mode 100644
index 487ba5b..0000000
--- a/ql/src/test/results/clientpositive/vector_complex_join.q.out
+++ /dev/null
@@ -1,405 +0,0 @@
-PREHOOK: query: CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@test
-POSTHOOK: query: CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@test
-PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 
2, "val_2") FROM src LIMIT 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@test
-POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 
2, "val_2") FROM src LIMIT 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@test
-POSTHOOK: Lineage: test.a SIMPLE []
-POSTHOOK: Lineage: test.b EXPRESSION []
-_c0    _c1
-PREHOOK: query: explain vectorization expression
-select * from alltypesorc join test where alltypesorc.cint=test.a
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
-select * from alltypesorc join test where alltypesorc.cint=test.a
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-3 depends on stages: Stage-4
-  Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
-  Stage: Stage-4
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        $hdt$_1:test 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        $hdt$_1:test 
-          TableScan
-            alias: test
-            Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-            Filter Operator
-              predicate: a is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-              Select Operator
-                expressions: a (type: int), b (type: map<int,string>)
-                outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
-                HashTable Sink Operator
-                  keys:
-                    0 _col2 (type: int)
-                    1 _col0 (type: int)
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: alltypesorc
-            Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
-            TableScan Vectorization:
-                native: true
-            Filter Operator
-              Filter Vectorization:
-                  className: VectorFilterOperator
-                  native: true
-                  predicateExpression: SelectColumnIsNotNull(col 2:int)
-              predicate: cint is not null (type: boolean)
-              Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
-              Select Operator
-                expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
-                Select Vectorization:
-                    className: VectorSelectOperator
-                    native: true
-                    projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 
10, 11]
-                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
-                Map Join Operator
-                  condition map:
-                       Inner Join 0 to 1
-                  keys:
-                    0 _col2 (type: int)
-                    1 _col0 (type: int)
-                  Map Join Vectorization:
-                      bigTableKeyExpressions: col 2:int
-                      bigTableValueExpressions: col 0:tinyint, col 1:smallint, 
col 2:int, col 3:bigint, col 4:float, col 5:double, col 6:string, col 7:string, 
col 8:timestamp, col 9:timestamp, col 10:boolean, col 11:boolean
-                      className: VectorMapJoinOperator
-                      native: false
-                      nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
-                      nativeConditionsNotMet: hive.execution.engine mr IN 
[tez, spark] IS false
-                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
-                  Statistics: Num rows: 13516 Data size: 2906160 Basic stats: 
COMPLETE Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    Statistics: Num rows: 13516 Data size: 2906160 Basic 
stats: COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Execution mode: vectorized
-      Map Vectorization:
-          enabled: true
-          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
-          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-      Local Work:
-        Map Reduce Local Work
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select * from alltypesorc join test where 
alltypesorc.cint=test.a
-PREHOOK: type: QUERY
-PREHOOK: Input: default@alltypesorc
-PREHOOK: Input: default@test
-#### A masked pattern was here ####
-POSTHOOK: query: select * from alltypesorc join test where 
alltypesorc.cint=test.a
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@alltypesorc
-POSTHOOK: Input: default@test
-#### A masked pattern was here ####
-alltypesorc.ctinyint   alltypesorc.csmallint   alltypesorc.cint        
alltypesorc.cbigint     alltypesorc.cfloat      alltypesorc.cdouble     
alltypesorc.cstring1    alltypesorc.cstring2    alltypesorc.ctimestamp1 
alltypesorc.ctimestamp2 alltypesorc.cboolean1   alltypesorc.cboolean2   test.a  
test.b
--51    NULL    199408978       -1800989684     -51.0   NULL    
34N4EY63M1GFWuW0boW     P4PL5h1eXR4mMLr2        1969-12-31 16:00:08.451 NULL    
false   true    199408978       {1:"val_1",2:"val_2"}
-PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>, index INT) STORED AS ORC
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@test2a
-POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>, index INT) STORED AS ORC
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@test2a
-PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2), 1 FROM src 
LIMIT 1
-PREHOOK: type: QUERY
-PREHOOK: Input: default@src
-PREHOOK: Output: default@test2a
-POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2), 1 FROM src 
LIMIT 1
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@src
-POSTHOOK: Output: default@test2a
-POSTHOOK: Lineage: test2a.a EXPRESSION []
-POSTHOOK: Lineage: test2a.index SIMPLE []
-_c0    _c1
-PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
-PREHOOK: type: CREATETABLE
-PREHOOK: Output: database:default
-PREHOOK: Output: default@test2b
-POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
-POSTHOOK: type: CREATETABLE
-POSTHOOK: Output: database:default
-POSTHOOK: Output: default@test2b
-PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
-PREHOOK: type: QUERY
-PREHOOK: Input: _dummy_database@_dummy_table
-PREHOOK: Output: default@test2b
-POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
-POSTHOOK: type: QUERY
-POSTHOOK: Input: _dummy_database@_dummy_table
-POSTHOOK: Output: default@test2b
-POSTHOOK: Lineage: test2b.a SCRIPT []
-col1
-PREHOOK: query: explain vectorization expression
-select *  from test2b join test2a on test2b.a = test2a.a[1]
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
-select *  from test2b join test2a on test2b.a = test2a.a[1]
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-3 depends on stages: Stage-4
-  Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
-  Stage: Stage-4
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        test2b 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        test2b 
-          TableScan
-            alias: test2b
-            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
-            Filter Operator
-              predicate: a is not null (type: boolean)
-              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 a (type: int)
-                  1 a[1] (type: int)
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: test2a
-            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
-            TableScan Vectorization:
-                native: true
-            Filter Operator
-              Filter Vectorization:
-                  className: VectorFilterOperator
-                  native: true
-                  predicateExpression: SelectColumnIsNotNull(col 
3:int)(children: ListIndexColScalar(col 0:array<int>, col 1:int) -> 3:int)
-              predicate: a[1] is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-              Map Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 a (type: int)
-                  1 a[1] (type: int)
-                Map Join Vectorization:
-                    bigTableKeyExpressions: ListIndexColScalar(col 
0:array<int>, col 1:int) -> 3:int
-                    bigTableValueExpressions: col 0:array<int>, col 1:int
-                    className: VectorMapJoinOperator
-                    native: false
-                    nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
-                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
-                outputColumnNames: _col0, _col4, _col5
-                Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col4 (type: array<int>), 
_col5 (type: int)
-                  outputColumnNames: _col0, _col1, _col2
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 2]
-                  Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE 
Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Execution mode: vectorized
-      Map Vectorization:
-          enabled: true
-          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
-          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-      Local Work:
-        Map Reduce Local Work
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
-PREHOOK: type: QUERY
-PREHOOK: Input: default@test2a
-PREHOOK: Input: default@test2b
-#### A masked pattern was here ####
-POSTHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@test2a
-POSTHOOK: Input: default@test2b
-#### A masked pattern was here ####
-test2b.a       test2a.a        test2a.index
-2      [1,2]   1
-PREHOOK: query: explain vectorization expression
-select *  from test2b join test2a on test2b.a = test2a.a[test2a.index]
-PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
-select *  from test2b join test2a on test2b.a = test2a.a[test2a.index]
-POSTHOOK: type: QUERY
-Explain
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
-STAGE DEPENDENCIES:
-  Stage-4 is a root stage
-  Stage-3 depends on stages: Stage-4
-  Stage-0 depends on stages: Stage-3
-
-STAGE PLANS:
-  Stage: Stage-4
-    Map Reduce Local Work
-      Alias -> Map Local Tables:
-        test2b 
-          Fetch Operator
-            limit: -1
-      Alias -> Map Local Operator Tree:
-        test2b 
-          TableScan
-            alias: test2b
-            Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
-            Filter Operator
-              predicate: a is not null (type: boolean)
-              Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-              HashTable Sink Operator
-                keys:
-                  0 a (type: int)
-                  1 a[index] (type: int)
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            alias: test2a
-            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column 
stats: NONE
-            TableScan Vectorization:
-                native: true
-            Filter Operator
-              Filter Vectorization:
-                  className: VectorFilterOperator
-                  native: true
-                  predicateExpression: SelectColumnIsNotNull(col 
3:int)(children: ListIndexColColumn(col 0:array<int>, col 1:int) -> 3:int)
-              predicate: a[index] is not null (type: boolean)
-              Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
-              Map Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 a (type: int)
-                  1 a[index] (type: int)
-                Map Join Vectorization:
-                    bigTableKeyExpressions: ListIndexColColumn(col 
0:array<int>, col 1:int) -> 3:int
-                    bigTableValueExpressions: col 0:array<int>, col 1:int
-                    className: VectorMapJoinOperator
-                    native: false
-                    nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin 
Condition IS true, No nullsafe IS true, Small table vectorizes IS true, 
Optimized Table and Supports Key Types IS true
-                    nativeConditionsNotMet: hive.execution.engine mr IN [tez, 
spark] IS false
-                outputColumnNames: _col0, _col4, _col5
-                Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE 
Column stats: NONE
-                Select Operator
-                  expressions: _col0 (type: int), _col4 (type: array<int>), 
_col5 (type: int)
-                  outputColumnNames: _col0, _col1, _col2
-                  Select Vectorization:
-                      className: VectorSelectOperator
-                      native: true
-                      projectedOutputColumnNums: [0, 1, 2]
-                  Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE 
Column stats: NONE
-                  File Output Operator
-                    compressed: false
-                    File Sink Vectorization:
-                        className: VectorFileSinkOperator
-                        native: false
-                    Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
-                    table:
-                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-      Execution mode: vectorized
-      Map Vectorization:
-          enabled: true
-          enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS 
true
-          inputFormatFeatureSupport: []
-          featureSupportInUse: []
-          inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-          allNative: false
-          usesVectorUDFAdaptor: false
-          vectorized: true
-      Local Work:
-        Map Reduce Local Work
-
-  Stage: Stage-0
-    Fetch Operator
-      limit: -1
-      Processor Tree:
-        ListSink
-
-PREHOOK: query: select *  from test2b join test2a on test2b.a = 
test2a.a[test2a.index]
-PREHOOK: type: QUERY
-PREHOOK: Input: default@test2a
-PREHOOK: Input: default@test2b
-#### A masked pattern was here ####
-POSTHOOK: query: select *  from test2b join test2a on test2b.a = 
test2a.a[test2a.index]
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@test2a
-POSTHOOK: Input: default@test2b
-#### A masked pattern was here ####
-test2b.a       test2a.a        test2a.index
-2      [1,2]   1

hive git commit: HIVE-16491 : CBO cant handle join involving complex types in on condition (Miklos Gergely via Ashutosh Chauhan)

Reply via email to