Repository: hive
Updated Branches:
  refs/heads/master f80f65da9 -> a5502b26f


HIVE-13744: LLAP IO - add complex types support (Prasanth Jayachandran reviewed 
by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a5502b26
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a5502b26
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a5502b26

Branch: refs/heads/master
Commit: a5502b26f234ed76966b2e13c7713fdbb7d8be95
Parents: f80f65d
Author: Prasanth Jayachandran <[email protected]>
Authored: Wed Jun 22 11:16:28 2016 -0700
Committer: Prasanth Jayachandran <[email protected]>
Committed: Wed Jun 22 11:16:32 2016 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +
 .../llap/io/decode/OrcEncodedDataConsumer.java  |  37 ++-
 .../llap/vector_complex_all.q.out               | 258 +++++++++++++++++++
 .../llap/vector_complex_join.q.out              | 233 +++++++++++++++++
 4 files changed, 525 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a5502b26/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 46350a3..2b40cd9 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -515,6 +515,8 @@ minillap.shared.query.files=bucket_map_join_tez1.q,\
   tez_union_group_by.q,\
   tez_smb_main.q,\
   tez_smb_1.q,\
+  vector_complex_all.q,\
+  vector_complex_join.q,\
   vector_join_part_col_char.q,\
   vectorized_dynamic_partition_pruning.q,\
   tez_multi_union.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/a5502b26/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
index 619d1a4..3dfab63 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java
@@ -32,8 +32,12 @@ import 
org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.MapColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.UnionColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.orc.CompressionCodec;
 import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer;
@@ -77,8 +81,9 @@ public class OrcEncodedDataConsumer
     stripes[m.getStripeIx()] = m;
   }
 
-  private static ColumnVector createColumn(OrcProto.Type type,
-                                           int batchSize) {
+  private static ColumnVector createColumn(List<OrcProto.Type> types,
+      final int columnId, int batchSize) {
+    OrcProto.Type type = types.get(columnId);
     switch (type.getKind()) {
       case BOOLEAN:
       case BYTE:
@@ -100,6 +105,28 @@ public class OrcEncodedDataConsumer
       case DECIMAL:
         return new DecimalColumnVector(batchSize, type.getPrecision(),
             type.getScale());
+      case STRUCT: {
+        List<Integer> subtypeIdxs = type.getSubtypesList();
+        ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()];
+        for(int i=0; i < fieldVector.length; ++i) {
+          fieldVector[i] = createColumn(types, subtypeIdxs.get(i), batchSize);
+        }
+        return new StructColumnVector(batchSize, fieldVector);
+      }
+      case UNION: {
+        List<Integer> subtypeIdxs = type.getSubtypesList();
+        ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()];
+        for(int i=0; i < fieldVector.length; ++i) {
+          fieldVector[i] = createColumn(types, subtypeIdxs.get(i), batchSize);
+        }
+        return new UnionColumnVector(batchSize, fieldVector);
+      }
+      case LIST:
+        return new ListColumnVector(batchSize, createColumn(types, 
type.getSubtypes(0), batchSize));
+      case MAP:
+        return new MapColumnVector(batchSize,
+            createColumn(types, type.getSubtypes(0), batchSize),
+            createColumn(types, type.getSubtypes(1), batchSize));
       default:
         throw new IllegalArgumentException("LLAP does not support " +
             type.getKind());
@@ -151,9 +178,9 @@ public class OrcEncodedDataConsumer
         int[] columnMapping = batch.getColumnIxs();
         for (int idx = 0; idx < batch.getColumnIxs().length; idx++) {
           if (cvb.cols[idx] == null) {
-            // skip over the top level struct, but otherwise assume no complex
-            // types
-            cvb.cols[idx] = createColumn(types.get(columnMapping[idx]), 
batchSize);
+            // Orc store rows inside a root struct (hive writes it this way).
+            // When we populate column vectors we skip over the root struct.
+            cvb.cols[idx] = createColumn(types, columnMapping[idx], batchSize);
           }
           cvb.cols[idx].ensureSize(batchSize, false);
           columnReaders[idx].nextVector(cvb.cols[idx], null, batchSize);

http://git-wip-us.apache.org/repos/asf/hive/blob/a5502b26/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out 
b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
new file mode 100644
index 0000000..08d49bc
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_complex_all.q.out
@@ -0,0 +1,258 @@
+PREHOOK: query: CREATE TABLE orc_create_staging (
+  str STRING,
+  mp  MAP<STRING,STRING>,
+  lst ARRAY<STRING>,
+  strct STRUCT<A:STRING,B:STRING>
+) ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY '|'
+    COLLECTION ITEMS TERMINATED BY ','
+    MAP KEYS TERMINATED BY ':'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_create_staging
+POSTHOOK: query: CREATE TABLE orc_create_staging (
+  str STRING,
+  mp  MAP<STRING,STRING>,
+  lst ARRAY<STRING>,
+  strct STRUCT<A:STRING,B:STRING>
+) ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY '|'
+    COLLECTION ITEMS TERMINATED BY ','
+    MAP KEYS TERMINATED BY ':'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_create_staging
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' 
OVERWRITE INTO TABLE orc_create_staging
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@orc_create_staging
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' 
OVERWRITE INTO TABLE orc_create_staging
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@orc_create_staging
+PREHOOK: query: CREATE TABLE orc_create_complex (
+  str STRING,
+  mp  MAP<STRING,STRING>,
+  lst ARRAY<STRING>,
+  strct STRUCT<A:STRING,B:STRING>
+) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_create_complex
+POSTHOOK: query: CREATE TABLE orc_create_complex (
+  str STRING,
+  mp  MAP<STRING,STRING>,
+  lst ARRAY<STRING>,
+  strct STRUCT<A:STRING,B:STRING>
+) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orc_create_complex
+PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT * FROM 
orc_create_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_staging
+PREHOOK: Output: default@orc_create_complex
+POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT * FROM 
orc_create_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_staging
+POSTHOOK: Output: default@orc_create_complex
+POSTHOOK: Lineage: orc_create_complex.lst SIMPLE 
[(orc_create_staging)orc_create_staging.FieldSchema(name:lst, 
type:array<string>, comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.mp SIMPLE 
[(orc_create_staging)orc_create_staging.FieldSchema(name:mp, 
type:map<string,string>, comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.str SIMPLE 
[(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, 
comment:null), ]
+POSTHOOK: Lineage: orc_create_complex.strct SIMPLE 
[(orc_create_staging)orc_create_staging.FieldSchema(name:strct, 
type:struct<A:string,B:string>, comment:null), ]
+orc_create_staging.str orc_create_staging.mp   orc_create_staging.lst  
orc_create_staging.strct
+PREHOOK: query: -- Since complex types are not supported, this query should 
not vectorize.
+EXPLAIN
+SELECT * FROM orc_create_complex
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Since complex types are not supported, this query should 
not vectorize.
+EXPLAIN
+SELECT * FROM orc_create_complex
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_create_complex
+                  Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: str (type: string), mp (type: 
map<string,string>), lst (type: array<string>), strct (type: 
struct<a:string,b:string>)
+                    outputColumnNames: _col0, _col1, _col2, _col3
+                    Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT * FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+orc_create_complex.str orc_create_complex.mp   orc_create_complex.lst  
orc_create_complex.strct
+line1  {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"]   
{"a":"one","b":"two"}
+line2  {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"]   
{"a":"three","b":"four"}
+line3  {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"]   
{"a":"five","b":"six"}
+PREHOOK: query: -- However, since this query is not referencing the complex 
fields, it should vectorize.
+EXPLAIN
+SELECT COUNT(*) FROM orc_create_complex
+PREHOOK: type: QUERY
+POSTHOOK: query: -- However, since this query is not referencing the complex 
fields, it should vectorize.
+EXPLAIN
+SELECT COUNT(*) FROM orc_create_complex
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_create_complex
+                  Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Group By Operator
+                      aggregations: count()
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        value expressions: _col0 (type: bigint)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT COUNT(*) FROM orc_create_complex
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT COUNT(*) FROM orc_create_complex
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+c0
+3
+PREHOOK: query: -- Also, since this query is not referencing the complex 
fields, it should vectorize.
+EXPLAIN
+SELECT str FROM orc_create_complex ORDER BY str
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Also, since this query is not referencing the complex 
fields, it should vectorize.
+EXPLAIN
+SELECT str FROM orc_create_complex ORDER BY str
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: orc_create_complex
+                  Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: str (type: string)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0
+                Statistics: Num rows: 3 Data size: 3177 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 3177 Basic stats: 
COMPLETE Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT str FROM orc_create_complex ORDER BY str
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT str FROM orc_create_complex ORDER BY str
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_create_complex
+#### A masked pattern was here ####
+str
+line1
+line2
+line3

http://git-wip-us.apache.org/repos/asf/hive/blob/a5502b26/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
new file mode 100644
index 0000000..480627f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out
@@ -0,0 +1,233 @@
+PREHOOK: query: -- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test
+POSTHOOK: query: -- From HIVE-10729.  Not expected to vectorize this query.
+--
+CREATE TABLE test (a INT, b MAP<INT, STRING>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test
+PREHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 
2, "val_2") FROM src LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test
+POSTHOOK: query: INSERT OVERWRITE TABLE test SELECT 199408978, MAP(1, "val_1", 
2, "val_2") FROM src LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test
+POSTHOOK: Lineage: test.a SIMPLE []
+POSTHOOK: Lineage: test.b EXPRESSION []
+c0     c1
+PREHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from alltypesorc join test where alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: alltypesorc
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: 
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint is not null (type: boolean)
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic 
stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: 
smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), 
cdouble (type: double), cstring1 (type: string), cstring2 (type: string), 
ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: 
boolean), cboolean2 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic 
stats: COMPLETE Column stats: NONE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col2 (type: int)
+                          1 _col0 (type: int)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+                        input vertices:
+                          1 Map 2
+                        Statistics: Num rows: 13516 Data size: 2906160 Basic 
stats: COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 13516 Data size: 2906160 Basic 
stats: COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test
+                  Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: map<int,string>)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 1 Data size: 190 Basic stats: 
COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: map<int,string>)
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from alltypesorc join test where 
alltypesorc.cint=test.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Input: default@test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from alltypesorc join test where 
alltypesorc.cint=test.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Input: default@test
+#### A masked pattern was here ####
+alltypesorc.ctinyint   alltypesorc.csmallint   alltypesorc.cint        
alltypesorc.cbigint     alltypesorc.cfloat      alltypesorc.cdouble     
alltypesorc.cstring1    alltypesorc.cstring2    alltypesorc.ctimestamp1 
alltypesorc.ctimestamp2 alltypesorc.cboolean1   alltypesorc.cboolean2   test.a  
test.b
+-51    NULL    199408978       -1800989684     -51.0   NULL    
34N4EY63M1GFWuW0boW     P4PL5h1eXR4mMLr2        1969-12-31 16:00:08.451 NULL    
false   true    199408978       {1:"val_1",2:"val_2"}
+PREHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2a
+POSTHOOK: query: CREATE TABLE test2a (a ARRAY<INT>) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2a
+PREHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src 
LIMIT 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test2a
+POSTHOOK: query: INSERT OVERWRITE TABLE test2a SELECT ARRAY(1, 2) FROM src 
LIMIT 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test2a
+POSTHOOK: Lineage: test2a.a EXPRESSION []
+c0
+PREHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2b
+POSTHOOK: query: CREATE TABLE test2b (a INT) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2b
+PREHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test2b
+POSTHOOK: query: INSERT OVERWRITE TABLE test2b VALUES (2), (3), (4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test2b
+POSTHOOK: Lineage: test2b.a EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+_col0
+PREHOOK: query: explain
+select *  from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select *  from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+Explain
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test2b
+                  Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 3 Data size: 12 Basic stats: 
COMPLETE Column stats: NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                      keys:
+                        0 a (type: int)
+                        1 a[1] (type: int)
+                      outputColumnNames: _col0, _col4
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: int), _col4 (type: 
array<int>)
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 3 Data size: 13 Basic stats: 
COMPLETE Column stats: NONE
+                          table:
+                              input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test2a
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Filter Operator
+                    predicate: a[1] is not null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: a[1] (type: int)
+                      sort order: +
+                      Map-reduce partition columns: a[1] (type: int)
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: a (type: array<int>)
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test2a
+PREHOOK: Input: default@test2b
+#### A masked pattern was here ####
+POSTHOOK: query: select *  from test2b join test2a on test2b.a = test2a.a[1]
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test2a
+POSTHOOK: Input: default@test2b
+#### A masked pattern was here ####
+test2b.a       test2a.a
+2      [1,2]

Reply via email to