Repository: hive Updated Branches: refs/heads/master 66b373a82 -> 3ac6a1ae3
HIVE-11371: Null pointer exception for nested table query when using ORC versus text (Matt McCline, reviewed by Prasanth J Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3ac6a1ae Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3ac6a1ae Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3ac6a1ae Branch: refs/heads/master Commit: 3ac6a1ae35f21d71a809cedcf5f0c8c516903557 Parents: 66b373a Author: Matt McCline <[email protected]> Authored: Wed Dec 2 16:55:40 2015 -0800 Committer: Matt McCline <[email protected]> Committed: Wed Dec 2 17:34:24 2015 -0800 ---------------------------------------------------------------------- data/files/TJOIN1 | 3 + data/files/TJOIN2 | 4 + data/files/TJOIN3 | 2 + data/files/TJOIN4 | 1 + .../test/resources/testconfiguration.properties | 1 + .../hive/ql/exec/vector/VectorCopyRow.java | 15 + .../VectorMapJoinGenerateResultOperator.java | 22 +- .../queries/clientpositive/vector_outer_join6.q | 42 +++ .../clientpositive/tez/vector_outer_join6.q.out | 357 +++++++++++++++++++ .../clientpositive/vector_outer_join6.q.out | 348 ++++++++++++++++++ 10 files changed, 776 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/data/files/TJOIN1 ---------------------------------------------------------------------- diff --git a/data/files/TJOIN1 b/data/files/TJOIN1 new file mode 100644 index 0000000..20e0ed5 --- /dev/null +++ b/data/files/TJOIN1 @@ -0,0 +1,3 @@ +0|10|15 +1|20|25 +2|\N|50 http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/data/files/TJOIN2 ---------------------------------------------------------------------- diff --git a/data/files/TJOIN2 b/data/files/TJOIN2 new file mode 100644 index 0000000..bbf3927 --- /dev/null +++ b/data/files/TJOIN2 @@ -0,0 +1,4 @@ +0|10|BB +1|15|DD +2|\N|EE +3|10|FF http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/data/files/TJOIN3 ---------------------------------------------------------------------- diff --git a/data/files/TJOIN3 b/data/files/TJOIN3 new file mode 100644 index 0000000..a7e688b --- /dev/null +++ b/data/files/TJOIN3 @@ -0,0 +1,2 @@ +0|10|XX +1|15|YY http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/data/files/TJOIN4 ---------------------------------------------------------------------- diff --git a/data/files/TJOIN4 b/data/files/TJOIN4 new file mode 100644 index 0000000..972c8aa --- /dev/null +++ b/data/files/TJOIN4 @@ -0,0 +1 @@ +0|20|ZZ http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3dfb6f3..1031655 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -290,6 +290,7 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_outer_join3.q,\ vector_outer_join4.q,\ vector_outer_join5.q,\ + vector_outer_join6.q,\ vector_partition_diff_num_cols.q,\ vector_partitioned_date_time.q,\ vector_reduce_groupby_decimal.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index 34b81e7..c56903e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -260,4 +260,19 @@ public class VectorCopyRow { copyRow.copy(inBatch, inBatchIndex, outBatch, outBatchIndex); } } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("VectorCopyRow "); + for (CopyRow copyRow : subRowToBatchCopiersByValue) { + if (sb.length() > 0) { + sb.append(", "); + } + sb.append(copyRow.getClass().getName()); + sb.append(" inColumnIndex " + copyRow.inColumnIndex); + sb.append(" outColumnIndex " + copyRow.outColumnIndex); + } + return sb.toString(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 2d9da84..c1c137b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -166,6 +166,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC int batchIndex = allMatchs[allMatchesIndex + i]; + // Outer key copying is only used when we are using the input BigTable batch as the output. + // if (bigTableVectorCopyOuterKeys != null) { // Copy within row. bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); @@ -232,17 +234,12 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC // Copy the BigTable values into the overflow batch. Since the overflow batch may // not get flushed here, we must copy by value. + // Note this includes any outer join keys that need to go into the small table "area". if (bigTableRetainedVectorCopy != null) { bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, overflowBatch, overflowBatch.size); } - // Reference the keys we just copied above. - if (bigTableVectorCopyOuterKeys != null) { - bigTableVectorCopyOuterKeys.copyByReference(overflowBatch, overflowBatch.size, - overflowBatch, overflowBatch.size); - } - if (smallTableVectorDeserializeRow != null) { byte[] bytes = byteSegmentRef.getBytes(); @@ -333,12 +330,6 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC overflowBatch.cols[column].isRepeating = true; } } - if (bigTableVectorCopyOuterKeys != null) { - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableOuterKeyOutputVectorColumns) { - overflowBatch.cols[column].isRepeating = true; - } - } // Crucial here that we don't reset the overflow batch, or we will loose the small table // values we put in above. @@ -349,13 +340,6 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } - - if (bigTableVectorCopyOuterKeys != null) { - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector colVector = overflowBatch.cols[column]; - colVector.reset(); - } - } } if (hashMapResult.isEof()) { http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/ql/src/test/queries/clientpositive/vector_outer_join6.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_outer_join6.q b/ql/src/test/queries/clientpositive/vector_outer_join6.q new file mode 100644 index 0000000..b430108 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_outer_join6.q @@ -0,0 +1,42 @@ +set hive.explain.user=false; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +SET hive.auto.convert.join=true; + +-- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n'; + +load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt; +load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt; +load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt; +load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt; + +create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt; +create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt; +create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt; +create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt; + +explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; + +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; + +explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; + +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1; http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out new file mode 100644 index 0000000..e3b8cf5 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_outer_join6.q.out @@ -0,0 +1,357 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1_txt +PREHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2_txt +POSTHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2_txt +PREHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3_txt +POSTHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3_txt +PREHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4_txt +POSTHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin3_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin3_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin4_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin4_txt +PREHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin2_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin2_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin3_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3 +POSTHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin3_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3 +PREHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin4_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4 +POSTHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin4_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4 +PREHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 3 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 0 +0 3 0 +1 NULL NULL +2 NULL NULL +PREHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized + Map 3 + Map Operator Tree: + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 +0 3 +1 NULL +2 NULL http://git-wip-us.apache.org/repos/asf/hive/blob/3ac6a1ae/ql/src/test/results/clientpositive/vector_outer_join6.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_outer_join6.q.out b/ql/src/test/results/clientpositive/vector_outer_join6.q.out new file mode 100644 index 0000000..b7c15e8 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_outer_join6.q.out @@ -0,0 +1,348 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1_txt +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table TJOIN1_txt (RNUM int , C1 int, C2 int) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1_txt +PREHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2_txt +POSTHOOK: query: create table TJOIN2_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2_txt +PREHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3_txt +POSTHOOK: query: create table if not exists TJOIN3_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3_txt +PREHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4_txt +POSTHOOK: query: create table TJOIN4_txt (RNUM int , C1 int, C2 char(2)) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN1' into table TJOIN1_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN2' into table TJOIN2_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin3_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN3' into table TJOIN3_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin3_txt +PREHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin4_txt +POSTHOOK: query: load data local inpath '../../data/files/TJOIN4' into table TJOIN4_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin4_txt +PREHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin1_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table TJOIN1 stored as orc AS SELECT * FROM TJOIN1_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin1_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin2_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table TJOIN2 stored as orc AS SELECT * FROM TJOIN2_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin2_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin3_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN3 +POSTHOOK: query: create table TJOIN3 stored as orc AS SELECT * FROM TJOIN3_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin3_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN3 +PREHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@tjoin4_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN4 +POSTHOOK: query: create table TJOIN4 stored as orc AS SELECT * FROM TJOIN4_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@tjoin4_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN4 +PREHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:tjoin2 + Fetch Operator + limit: -1 + $hdt$_1:tjoin3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_1:tjoin3 + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col1 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum, tjoin3.rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 0 +0 3 0 +1 NULL NULL +2 NULL NULL +PREHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_1:tjoin2 + Fetch Operator + limit: -1 + $hdt$_1:tjoin3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_1:tjoin2 + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_1:tjoin3 + TableScan + alias: tjoin3 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: rnum (type: int), c1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 449 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +PREHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +POSTHOOK: query: select tj1rnum, tj2rnum as rnumt3 from + (select tjoin1.rnum tj1rnum, tjoin2.rnum tj2rnum, tjoin2.c1 tj2c1 from tjoin1 left outer join tjoin2 on tjoin1.c1 = tjoin2.c1 ) tj left outer join tjoin3 on tj2c1 = tjoin3.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +POSTHOOK: Input: default@tjoin3 +#### A masked pattern was here #### +0 0 +0 3 +1 NULL +2 NULL
