from:"mmccline"

[2/3] hive git commit: HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi)

2018-10-04 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
index c64adbf..b11ad87 100644
--- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out
@@ -140,23 +140,46 @@ STAGE PLANS:
 TableScan
   alias: timestamps
   Statistics: Num rows: 51 Data size: 12597 Basic stats: 
COMPLETE Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
+  vectorizationSchemaColumns: [0:cdate:date, 
1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 
4:ROW__ID:struct]
   Select Operator
 expressions: ctimestamp1 (type: timestamp), ctimestamp2 
(type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) 
THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) 
THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND 
TIMESTAMP'2010-12-31 23:59:59.9') THEN ('Late 2000s') WHEN 
((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.9')) THEN ('Early 
2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= 
TIMESTAMP'2000-12-31 23:59:59.9')) THEN ('Old') WHEN ((ctimestamp2 < 
TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN 
TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') 
THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 
23:59:59.9')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE 
WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.9')) THEN ('Old') 
WHEN ((ctimestamp2 
 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 
BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 
23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= 
TIMESTAMP'2015-12-31 23:59:59.9')) THEN (null) ELSE (null) END (type: 
string), if((ctimestamp1 < TIMESTAMP'1974-10-04 17:21:03.989'), 
year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like 
'%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END (type: 
string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, 
minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 
18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), 
minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) 
> 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 
(type: string)
 outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9, _col10
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumnNums: [1, 3, 9, 10, 11, 8, 12, 7, 
6, 17, 2]
+selectExpressions: VectorUDFAdaptor(CASE WHEN 
((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') 
WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN 
(ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 
23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= 
TIMESTAMP'2015-12-31 23:59:59.9')) THEN ('Early 2010s') ELSE 
('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 
3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, 
TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 
6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, 
right 2010-12-31 15:59:59.9) -> 7:boolean, 
TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 
23:59:59.9) -> 8:boolean) -> 9:string, VectorUDFAdaptor(CASE WHEN 
((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.9')) THEN ('Old') WHEN 
((ctimestamp2 < TIME
 STAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN 
TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') 
THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 
23:59:59.9')) THEN ('Early 2010s') ELSE (null) END)(children: 
TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 
23:59:59.9) -> 5:boolean, TimestampColLessTimestampScalar(col 
3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 
3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.9) 
-> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 
2015-12-31 23:59:59.9) -> 8:boolean) ->

[1/3] hive git commit: HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi)

2018-10-04 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 857259ed0 -> 97f0513c4


http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/ql/src/test/results/clientpositive/vector_case_when_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_case_when_1.q.out 
b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
index 270f5eb..9949de7 100644
--- a/ql/src/test/results/clientpositive/vector_case_when_1.q.out
+++ b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
@@ -202,23 +202,44 @@ STAGE PLANS:
   TableScan
 alias: lineitem_test
 Statistics: Num rows: 101 Data size: 78500 Basic stats: COMPLETE 
Column stats: NONE
+TableScan Vectorization:
+native: true
+vectorizationSchemaColumns: [0:l_orderkey:int, 
1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 
5:l_extendedprice:double, 6:l_discount:double, 
7:l_tax:decimal(10,2)/DECIMAL_64, 8:l_returnflag:char(1), 
9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 
12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 
15:l_comment:string, 
16:ROW__ID:struct]
 Select Operator
   expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 
1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 
10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') 
END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN 
((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN 
((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN 
((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN 
((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE 
(null) END (type: string), if((l_shipmode = 'SHIP  '), date_add(l_shipdate, 
10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) 
THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), 
CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) 
ELSE (0.0D) END (type: double), if((CAST( l_shipinstruct AS STRING) = 'DEL
 IVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((CAST( l_shipinstruct 
AS STRING) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((CAST( 
l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax) (type: 
decimal(12,2)), if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), 
l_tax, 0) (type: decimal(12,2)), if((CAST( l_shipinstruct AS STRING) = 'DELIVER 
IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((CAST( l_shipinstruct AS 
STRING) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 
30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) 
(type: timestamp), if((l_suppkey > 1), datediff(l_receiptdate, 
l_commitdate), null) (type: int), if((l_suppkey > 1), null, 
datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 
100), DATE'2009-01-01', DATE'2009-12-31') (type: date)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  projectedOutputColumnNums: [4, 21, 22, 23, 20, 24, 25, 27, 
28, 29, 30, 31, 32, 35, 37, 38, 19]
+  selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 
1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 
10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') 
END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, 
LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, 
LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, 
LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, 
VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity 
= 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 
100)) THEN ('Many') ELSE (null) END)(children: LongColEqualLongScalar(col 
4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 
18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, 
LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 22:string, 
VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') W
 HEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') 
WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END)(children: 
LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, 
LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, 
LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, 
LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 23:string,

[3/3] hive git commit: HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi)

2018-10-04 Thread mmccline

HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and 
result type are different (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97f0513c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97f0513c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97f0513c

Branch: refs/heads/master
Commit: 97f0513c4c8ff1c251b1bdd1b84bd238557f03b0
Parents: 857259e
Author: Matt McCline 
Authored: Thu Oct 4 14:37:21 2018 -0500
Committer: Matt McCline 
Committed: Thu Oct 4 14:37:21 2018 -0500

--
 .../test/resources/testconfiguration.properties |   1 +
 .../ql/exec/vector/VectorizationContext.java|  92 ++-
 .../expressions/CastTimestampToString.java  |  10 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |  15 +-
 .../exec/vector/TestVectorizationContext.java   |  31 +-
 .../vector_case_when_conversion.q   | 136 
 .../llap/vector_case_when_1.q.out   |  36 +-
 .../llap/vector_case_when_2.q.out   |  45 +-
 .../llap/vector_case_when_conversion.q.out  | 616 +++
 .../llap/vector_decimal_expressions.q.out   |   2 +-
 .../llap/vector_udf_adaptor_1.q.out |  52 +-
 .../clientpositive/llap/vectorized_case.q.out   |  12 +-
 .../clientpositive/spark/vectorized_case.q.out  |  12 +-
 .../clientpositive/vector_case_when_1.q.out |  35 +-
 .../clientpositive/vector_case_when_2.q.out |  39 +-
 .../vector_decimal_expressions.q.out|   2 +-
 .../clientpositive/vectorized_case.q.out|  12 +-
 17 files changed, 1061 insertions(+), 87 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index fdd8ecc..d444c99 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -763,6 +763,7 @@ minillaplocal.query.files=\
   vector_acid4.q,\
   vector_annotate_stats_select.q,\
   vector_auto_smb_mapjoin_14.q,\
+  vector_case_when_conversion.q,\
   vector_char_varchar_1.q,\
   vector_complex_all.q,\
   vector_complex_join.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 6ca1248..488f277 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -546,6 +546,7 @@ public class VectorizationContext {
 private final int initialOutputCol;
 private int outputColCount = 0;
 private boolean reuseScratchColumns = true;
+private boolean dontReuseTrackedScratchColumns = false;
 
 protected OutputColumnManager(int initialOutputCol) {
   this.initialOutputCol = initialOutputCol;
@@ -558,6 +559,7 @@ public class VectorizationContext {
 private String[] scratchVectorTypeNames = new String[100];
 private DataTypePhysicalVariation[] scratchDataTypePhysicalVariations =
 new DataTypePhysicalVariation[100];
+private boolean[] scratchColumnTrackWasUsed = new boolean[100];
 
 private final Set usedOutputColumns = new HashSet();
 
@@ -589,6 +591,9 @@ public class VectorizationContext {
   scratchDataTypePhysicalVariations[i] == 
dataTypePhysicalVariation)) {
   continue;
 }
+if (dontReuseTrackedScratchColumns && scratchColumnTrackWasUsed[i]) {
+  continue;
+}
 //Use i
 usedOutputColumns.add(i);
 return i;
@@ -597,16 +602,19 @@ public class VectorizationContext {
   if (outputColCount < scratchVectorTypeNames.length) {
 int newIndex = outputColCount;
 scratchVectorTypeNames[outputColCount] = columnType;
-scratchDataTypePhysicalVariations[outputColCount++] = 
dataTypePhysicalVariation;
+scratchDataTypePhysicalVariations[outputColCount] = 
dataTypePhysicalVariation;
+scratchColumnTrackWasUsed[outputColCount++] = true;
 usedOutputColumns.add(newIndex);
 return newIndex;
   } else {
 //Expand the array
 scratchVectorTypeNames = Arrays.copyOf(scratchVectorTypeNames, 
2*outputColCount);
 scratchDataTypePhysicalVariations = 
Arrays.copyOf(scratchDataTypePhysicalVariations, 2*outputColCount);
+

[12/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
index fbcbd64..5e95f39 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
@@ -63,10 +63,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0, 1, 2, 3]
+valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 
3:bigint
 Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: bigint), _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -119,10 +118,10 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0]
+  keyColumns: 0:tinyint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: [1, 2, 3]
+  valueColumns: 1:tinyint, 2:bigint, 3:bigint
   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
   value expressions: _col1 (type: tinyint), _col2 (type: 
bigint), _col3 (type: bigint)
 Reducer 3 
@@ -244,10 +243,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0]
+valueColumns: 0:bigint
 Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: bigint)
 Execution mode: vectorized, llap
@@ -300,10 +298,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0]
+  keyColumns: 0:bigint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
 Reducer 3 
 Execution mode: vectorized, llap
@@ -575,10 +572,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0, 1, 2, 3]
+valueColumns: 0:bigint, 1:bigint, 2:bigint, 
3:bigint
 Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type:

[33/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out 
b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
index b075ecf..801948c 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
@@ -983,7 +983,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 _col0 (type: string)
   1 _col0 (type: string)
@@ -1153,7 +1153,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 _col0 (type: string)
   1 _col0 (type: string)
@@ -1229,6 +1229,910 @@ POSTHOOK: Input: default@src1
  A masked pattern was here 
 12744278   500 652447  25
 PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+  FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by 
x.key) a
+  FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y 
group by y.key) b
+  ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+  FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by 
x.key) a
+  FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y 
group by y.key) b
+  ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: x
+  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: key, value
+Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Group By Operator
+  aggregations: count(value)
+  keys: key (type: string)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+value expressions: _col1 (type: bigint)
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Map 5 
+Map Operator Tree:
+TableScan
+  alias: y
+  Statistics: Num rows: 25 Data size: 4375 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: key, value
+Statistics: Num rows: 25 Data size: 4375 Basic stats: 
COMPLETE Column stats: COMPLETE
+Group By Operator
+  aggregations: count(value)
+  keys: key (type: string)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 12 Data size: 1128 Basic

[28/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out 
b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
index e33101c..0a8a8a8 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -271,8 +271,8 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_GBY_8: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_7: 2100
-   RECORDS_OUT_OPERATOR_SEL_6: 2100
-   RECORDS_OUT_OPERATOR_TS_0: 2100
+   RECORDS_OUT_OPERATOR_SEL_6: 3
+   RECORDS_OUT_OPERATOR_TS_0: 3
 Stage-1 LLAP IO COUNTERS:
ALLOCATED_BYTES: 262144
ALLOCATED_USED_BYTES: 26
@@ -327,13 +327,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 8
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 8
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 8
-   RECORDS_OUT_OPERATOR_SEL_9: 8
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
ALLOCATED_BYTES: 1048576
ALLOCATED_USED_BYTES: 2731
@@ -367,13 +367,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 22
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 22
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 22
-   RECORDS_OUT_OPERATOR_SEL_9: 22
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -405,13 +405,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 16
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 16
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 16
-   RECORDS_OUT_OPERATOR_SEL_9: 16
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -443,13 +443,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 18
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 18
+   RECORDS_OUT_OPERATOR_FIL_8: 2
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 18
-   RECORDS_OUT_OPERATOR_SEL_9: 18
-   RECORDS_OUT_OPERATOR_TS_0: 2000
+   RECORDS_OUT_OPERATOR_SEL_9: 2
+   RECORDS_OUT_OPERATOR_TS_0: 2
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -487,7 +487,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 1
RECORDS_OUT_OPERATOR_SEL_9: 1
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -519,13 +519,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 32
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 32
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 32
-   RECORDS_OUT_OPERATOR_SEL_9: 32
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -557,13 +557,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 32
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 32
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 32
-   RECORDS_OUT_OPERATOR_SEL_9: 32
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -595,13 +595,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 1697
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 1697
+   RECORDS_OUT_OPERATOR_FIL_8: 2
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 1697
-   RECORDS_OUT_OPERATOR_SEL_9: 1697
-

[11/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
index 6b25672..2f5eb26 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
@@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1_n14
 POSTHOOK: Lineage: test1_n14.col_1 SCRIPT []
 POSTHOOK: Lineage: test1_n14.key SCRIPT []
 POSTHOOK: Lineage: test1_n14.value SCRIPT []
+col1   col2col3
 PREHOOK: query: CREATE TABLE test2_n9 (key INT, value INT, col_2 STRING)
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2_n9
 POSTHOOK: Lineage: test2_n9.col_2 SCRIPT []
 POSTHOOK: Lineage: test2_n9.key SCRIPT []
 POSTHOOK: Lineage: test2_n9.value SCRIPT []
-PREHOOK: query: EXPLAIN
+col1   col2col3
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
 SELECT *
 FROM test1_n14 LEFT OUTER JOIN test2_n9
 ON (test1_n14.value=test2_n9.value)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
 SELECT *
 FROM test1_n14 LEFT OUTER JOIN test2_n9
 ON (test1_n14.value=test2_n9.value)
 POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -67,9 +74,14 @@ STAGE PLANS:
 TableScan
   alias: test1_n14
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
   Select Operator
 expressions: key (type: int), value (type: int), col_1 
(type: string)
 outputColumnNames: _col0, _col1, _col2
+Select Vectorization:
+className: VectorSelectOperator
+native: true
 Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
@@ -77,12 +89,19 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col1 (type: int)
+  Map Join Vectorization:
+  className: VectorMapJoinOuterLongOperator
+  native: true
+  nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
   input vertices:
 1 Map 2
   Statistics: Num rows: 8 Data size: 859 Basic stats: 
COMPLETE Column stats: COMPLETE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 8 Data size: 859 Basic stats: 
COMPLETE Column stats: COMPLETE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -90,23 +109,50 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 2 
 Map Operator Tree:
 TableScan
   alias: test2_n9
   Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE 
Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
   Select Operator
 expressions: key (type: int), value (type: int), col_2 
(type: string)
 outputColumnNames: _col0, _col1, _col2
+

[26/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out
new file mode 100644
index 000..6ddcef6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out
@@ -0,0 +1,1228 @@
+PREHOOK: query: drop table if exists TJOIN1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists TJOIN1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists TJOIN2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists TJOIN2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) 
STORED AS orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN1
+POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) 
STORED AS orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN1
+PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 
char(2)) STORED AS orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN2
+POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 
char(2)) STORED AS orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN2
+PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN1STAGE
+POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN1STAGE
+PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN2STAGE
+POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN2STAGE
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE 
INTO TABLE TJOIN1STAGE
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@tjoin1stage
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' 
OVERWRITE INTO TABLE TJOIN1STAGE
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@tjoin1stage
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE 
INTO TABLE TJOIN2STAGE
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@tjoin2stage
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' 
OVERWRITE INTO TABLE TJOIN2STAGE
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@tjoin2stage
+PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tjoin1stage
+PREHOOK: Output: default@tjoin1
+POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tjoin1stage
+POSTHOOK: Output: default@tjoin1
+POSTHOOK: Lineage: tjoin1.c1 SIMPLE 
[(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: tjoin1.c2 EXPRESSION 
[(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ]
+POSTHOOK: Lineage: tjoin1.rnum SIMPLE 
[(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ]
+_col0  _col1   _col2
+PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tjoin2stage
+PREHOOK: Output: default@tjoin2
+POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tjoin2stage
+POSTHOOK: Output: default@tjoin2
+POSTHOOK: Lineage: tjoin2.c1 SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: tjoin2.c2 SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ]
+POSTHOOK: Lineage: tjoin2.rnum SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ]
+tjoin2stage.rnum   tjoin2stage.c1  tjoin2stage.c2
+PREHOOK: query: explain vectorization detail
+select tjoin1.rnum,

[38/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
new file mode 100644
index 000..586c850
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.Random;
+
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastBytesHashMapNonMatched extends 
CommonFastHashTable {
+
+  @Test
+  public void testOneKey() throws Exception {
+random = new Random(82733);
+
+VectorMapJoinFastMultiKeyHashMap map =
+new VectorMapJoinFastMultiKeyHashMap(
+false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1);
+
+VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+random.nextBytes(key);
+byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+random.nextBytes(value);
+
+map.testPutRow(key, value);
+verifyTable.add(key, value);
+
+// Second value.
+value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+random.nextBytes(value);
+map.testPutRow(key, value);
+verifyTable.add(key, value);
+
+// Third value.
+value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+random.nextBytes(value);
+map.testPutRow(key, value);
+verifyTable.add(key, value);
+
+verifyTable.verifyNonMatched(map, random);
+  }
+
+  @Test
+  public void testMultipleKeysSingleValue() throws Exception {
+random = new Random(29383);
+
+VectorMapJoinFastMultiKeyHashMap map =
+new VectorMapJoinFastMultiKeyHashMap(
+false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1);
+
+VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+int keyCount = 100 + random.nextInt(1000);
+for (int i = 0; i < keyCount; i++) {
+  byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+  random.nextBytes(key);
+  if (!verifyTable.contains(key)) {
+// Unique keys for this test.
+break;
+  }
+  byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+  random.nextBytes(value);
+
+  map.testPutRow(key, value);
+  verifyTable.add(key, value);
+}
+
+verifyTable.verifyNonMatched(map, random);
+  }
+
+  public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+  VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable)
+  throws HiveException, IOException {
+addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, 
MAX_KEY_LENGTH, -1);
+  }
+
+  public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+  VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable,
+  int maxKeyLength, int fixedValueLength)
+  throws HiveException, IOException {
+for (int i = 0; i < keyCount; i++) {
+  byte[] value;
+  if (fixedValueLength == -1) {
+value = new byte[generateLargeCount() - 1];
+  } else {
+value = new byte[fixedValueLength];
+  }
+  random.nextBytes(value);
+
+  // Add a new key or add a value to an existing key?
+  if (random.nextBoolean() || verifyTable.getCount() == 0) {
+byte[] key;
+while (true) {
+  key = new byte[random.nextInt(maxKeyLength)];
+  random.nextBytes(key);
+  if (!verifyTable.contains(key)) {
+// Unique keys for this test.
+

[45/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index c832cdb..5733688 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.lang.ArrayUtils;
 import org.slf4j.Logger;
@@ -41,7 +41,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow;
 import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator;
@@ -55,14 +54,17 @@ import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTabl
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import 
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -124,6 +126,10 @@ private static final Logger LOG = 
LoggerFactory.getLogger(CLASS_NAME);
   // a mixture of input big table columns and new scratch columns.
   protected VectorizationContext vOutContext;
 
+  protected VectorMapJoinVariation vectorMapJoinVariation;
+  protected HashTableKind hashTableKind;
+  protected HashTableKeyType hashTableKeyType;
+
   // The output column projection of the vectorized row batch.  And, the type 
infos of the output
   // columns.
   protected int[] outputProjection;
@@ -149,28 +155,70 @@ private static final Logger LOG = 
LoggerFactory.getLogger(CLASS_NAME);
   protected String[] bigTableValueColumnNames;
   protected TypeInfo[] bigTableValueTypeInfos;
 
-  // This is a mapping of which big table columns (input and key/value 
expressions) will be
-  // part of the big table portion of the join output result.
-  protected VectorColumnOutputMapping bigTableRetainedMapping;
+  /*
+   * NOTE:
+   *The Big Table key columns are from the key expressions.
+   *The Big Table value columns are from the getExpr(posBigTable) 
expressions.
+   *Any calculations needed for those will be scratch columns.
+   *
+   *The Small Table key and value output columns are scratch columns.
+   *
+   * Big Table Retain Column Map / TypeInfos:
+   *Any Big Table Batch columns that will be in the output result.
+   *0, 1, ore more Column Nums and TypeInfos
+   *
+   * Non Outer Small Table Key Mapping:
+   *For non-[FULL] OUTER MapJoin, when Big Table key columns are not 
retained for the output
+   *result but are needed for the Small Table output result, they are put 
in this mapping
+   *as they are required for copying rows to the overflow batch.
+   *
+   * Outer Small Table Key Mapping
+   *For [FULL] OUTER MapJoin, the mapping for any Small Table key columns 
needed for the
+   *output result from the Big Table key columns.  The Big Table keys 
cannot be projected since
+   *on NOMATCH there must be a physical column present to hold the 
non-match NULL.
+   *
+   * Full Outer Small Table Key Mapping
+   *For FULL OUTER MapJoin, the mapping from any needed Small Table key

[27/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
index ddcabd8..4c81131 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
@@ -87,10 +87,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:boolean
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -204,10 +203,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:tinyint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -321,10 +319,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:smallint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -438,10 +435,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:int
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -555,10 +551,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:bigint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -672,10 +667,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns:

[42/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
new file mode 100644
index 000..b9e86eb
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import com.google.common.base.Joiner;
+
+/**
+ * FULL OUTER MapJoin planning.
+ */
+public class FullOuterMapJoinOptimization {
+
+  FullOuterMapJoinOptimization() {
+  }
+
+  public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws 
SemanticException {
+int[][] filterMaps = mapJoinDesc.getFilterMap();
+if (filterMaps == null) {
+  return;
+}
+final byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
+final int numAliases = mapJoinDesc.getExprs().size();
+List valueFilteredTblDescs = 
mapJoinDesc.getValueFilteredTblDescs();
+for (byte pos = 0; pos < numAliases; pos++) {
+  if (pos != posBigTable) {
+int[] filterMap = filterMaps[pos];
+TableDesc tableDesc = valueFilteredTblDescs.get(pos);
+Properties properties = tableDesc.getProperties();
+String columnNameProperty = 
properties.getProperty(serdeConstants.LIST_COLUMNS);
+String columnNameDelimiter =
+properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ?
+properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) :
+  String.valueOf(SerDeUtils.COMMA);
+
+String columnTypeProperty = 
properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
+List columnNameList;
+if (columnNameProperty.length() == 0) {
+  columnNameList = new ArrayList();
+} else {
+  columnNameList = 
Arrays.asList(columnNameProperty.split(columnNameDelimiter));
+}
+List truncatedColumnNameList = columnNameList.subList(0, 
columnNameList.size() - 1);
+String truncatedColumnNameProperty =
+Joiner.on(columnNameDelimiter).join(truncatedColumnNameList);
+
+List columnTypeList;
+if (columnTypeProperty.length() == 0) {
+  columnTypeList = new ArrayList();
+} else {
+  columnTypeList = TypeInfoUtils
+  .getTypeInfosFromTypeString(columnTypeProperty);
+}
+if (!columnTypeList.get(columnTypeList.size() - 
1).equals(TypeInfoFactory.shortTypeInfo)) {
+  throw new SemanticException("Expecting filterTag smallint as last 
column type");
+}
+List truncatedColumnTypeList =
+columnTypeList.subList(0, columnTypeList.size() - 1);
+String truncatedColumnTypeProperty =
+Joiner.on(",").join(truncatedColumnTypeList);
+
+properties.setProperty(serdeConstants.LIST_COLUMNS, 
truncatedColumnNameProperty);
+properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, 
truncatedColumnTypeProperty);
+  }
+}
+mapJoinDesc.setFilterMap(null);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java

[16/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
index 1a20a35..31b7326 100644
--- a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
@@ -63,10 +63,10 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0]
+  keyColumns: 0:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: [2]
+  valueColumns: 2:boolean
   Statistics: Num rows: 3 Data size: 552 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: boolean)
 Execution mode: vectorized, llap

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out 
b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
index 630f3f7..8ac3a11 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
@@ -104,10 +104,9 @@ STAGE PLANS:
   sort order: +++
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0, 1, 2]
+  keyColumns: 0:varchar(10), 1:int, 2:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 10 Data size: 2820 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
index 45bfc6b..29c4bc1 100644
--- a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
@@ -167,11 +167,11 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [0]
-valueColumnNums: [1]
+partitionColumns: 0:int
+valueColumns: 1:string
 Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
 value expressions: _col1 (type: string)
 Execution mode: vectorized, llap
@@ -223,14 +223,16 @@ STAGE PLANS:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Map Join Vectorization:
-bigTableKeyColumnNums: [0]
-bigTableRetainedColumnNums: [0, 1]
-bigTableValueColumnNums: [1]
+bigTableKeyColumns: 0:int
+bigTableRetainColumnNums: [1]
+bigTableValueColumns: 1:string

[07/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index 18e75aa..87f0ca8 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -289,74 +289,12 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 0  val_0   val_0
 0  val_0   val_0
 0  val_0   val_0
-2  val_2   val_2
-4  val_4   val_4
-8  val_8   val_8
-11 val_11  val_11
-15 val_15  val_15
-15 val_15  val_15
-15 val_15  val_15
-15 val_15  val_15
-17 val_17  val_17
-19 val_19  val_19
-20 val_20  val_20
-24 val_24  val_24
-24 val_24  val_24
-24 val_24  val_24
-24 val_24  val_24
-26 val_26  val_26
-26 val_26  val_26
-26 val_26  val_26
-26 val_26  val_26
-28 val_28  val_28
-33 val_33  val_33
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-37 val_37  val_37
-37 val_37  val_37
-37 val_37  val_37
-37 val_37  val_37
-42 val_42  val_42
-42 val_42  val_42
-42 val_42  val_42
-42 val_42  val_42
-44 val_44  val_44
-51 val_51  val_51
-51 val_51  val_51
-51 val_51  val_51
-51 val_51  val_51
-53 val_53  val_53
-57 val_57  val_57
-64 val_64  val_64
-66 val_66  val_66
-77 val_77  val_77
-80 val_80  val_80
-82 val_82  val_82
-84 val_84  val_84
-84 val_84  val_84
-84 val_84  val_84
-84 val_84  val_84
-86 val_86  val_86
-95 val_95  val_95
-95 val_95  val_95
-95 val_95  val_95
-95 val_95  val_95
-97 val_97  val_97
-97 val_97  val_97
-97 val_97  val_97
-97 val_97  val_97
 103val_103 val_103
 103val_103 val_103
 103val_103 val_103
 103val_103 val_103
 105val_105 val_105
+11 val_11  val_11
 114val_114 val_114
 116val_116 val_116
 118val_118 val_118
@@ -398,6 +336,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 149val_149 val_149
 149val_149 val_149
 149val_149 val_149
+15 val_15  val_15
+15 val_15  val_15
+15 val_15  val_15
+15 val_15  val_15
 150val_150 val_150
 152val_152 val_152
 152val_152 val_152
@@ -435,6 +377,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 169val_169 val_169
 169val_169 val_169
 169val_169 val_169
+17 val_17  val_17
 170val_170 val_170
 172val_172 val_172
 172val_172 val_172
@@ -461,10 +404,13 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 187val_187 val_187
 187val_187 val_187
 189val_189 val_189
+19 val_19  val_19
 190val_190 val_190
 192val_192 val_192
 194val_194 val_194
 196val_196 val_196
+2  val_2   val_2
+20 val_20  val_20
 200val_200 val_200
 200val_200 val_200
 200val_200 val_200
@@ -511,6 +457,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 239val_239 val_239
 239val_239 val_239
 239val_239 val_239
+24 val_24  val_24
+24 val_24  val_24
+24 val_24  val_24
+24 val_24  val_24
 242val_242 val_242
 242val_242 val_242
 242val_242 val_242
@@ -522,6 +472,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 255val_255 val_255
 255val_255 val_255
 257val_257 val_257
+26 val_26  val_26
+26 val_26  val_26
+26 val_26  val_26
+26 val_26  val_26
 260val_260 val_260
 262val_262 val_262
 266val_266 val_266
@@ -551,6 +505,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 277val_277 val_277
 277val_277 val_277
 277val_277 val_277
+28 val_28  val_28
 280val_280 val_280
 280val_280 val_280
 280val_280 val_280
@@ -612,11 +567,21 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 327val_327 val_327
 327val_327 val_327
 327val_327 val_327
+33 val_33  val_33
 332val_332 val_332
 336val_336 val_336
 338val_338 val_338
 341val_341 val_341
 345val_345 val_345
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
 356val_356 val_356
 365val_365 val_365
 367val_367 val_367
@@ -632,6 +597,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 369val_369 val_369
 369val_369 val_369
 369val_369 val_369
+37 val_37  val_37
+37 val_37  val_37
+37 val_37  val_37
+37 val_37  val_37
 374val_374 val_374
 378val_378 val_378
 389val_389 val_389
@@ -646,6

[35/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
--
diff --git a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out 
b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
index 6b85e13..2288b4b 100644
--- a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
@@ -27,6 +27,85 @@ POSTHOOK: Output: default@tbl_n1
 POSTHOOK: Lineage: tbl_n1.n SCRIPT []
 POSTHOOK: Lineage: tbl_n1.t SCRIPT []
 Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
+PREHOOK: query: explain
+select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+Map Reduce Local Work
+  Alias -> Map Local Tables:
+$hdt$_1:tbl_n1 
+  Fetch Operator
+limit: -1
+  Alias -> Map Local Operator Tree:
+$hdt$_1:tbl_n1 
+  TableScan
+alias: tbl_n1
+Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column 
stats: NONE
+Filter Operator
+  predicate: false (type: boolean)
+  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: n (type: bigint), t (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+HashTable Sink Operator
+  keys:
+0 
+1 
+
+  Stage: Stage-3
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: tbl_n1
+filterExpr: (n = 1L) (type: boolean)
+Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column 
stats: NONE
+Filter Operator
+  predicate: (n = 1L) (type: boolean)
+  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: t (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Outer Join 0 to 1
+  keys:
+0 
+1 
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: 1L (type: bigint), _col0 (type: string), 
_col1 is null (type: boolean), _col2 is null (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 1 Data size: 11 Basic stats: 
COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 1 Data size: 11 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  Execution mode: vectorized
+  Local Work:
+Map Reduce Local Work
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
 PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from 
tbl_n1 where n = 1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on 
a.n = b.n
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tbl_n1
@@ -37,6 +116,91 @@ POSTHOOK: Input: default@tbl_n1
  A masked pattern was here 
 1  one truetrue
 Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
+PREHOOK: query: explain
+select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from tbl_n1 where n = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from

[37/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin2.q
--
diff --git a/ql/src/test/queries/clientpositive/mapjoin2.q 
b/ql/src/test/queries/clientpositive/mapjoin2.q
index e194bd0..014dabe 100644
--- a/ql/src/test/queries/clientpositive/mapjoin2.q
+++ b/ql/src/test/queries/clientpositive/mapjoin2.q
@@ -6,16 +6,30 @@ create table tbl_n1 (n bigint, t string);
 insert into tbl_n1 values (1, 'one'); 
 insert into tbl_n1 values(2, 'two');
 
+explain
+select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n;
 select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n;
 
+explain
+select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from tbl_n1 where n = 2) b on a.n = b.n;
 select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from tbl_n1 where n = 2) b on a.n = b.n;
 
+explain
+select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from 
tbl_n1 where n = 1) a  full outer join  (select * from tbl_n1 where n = 2) b on 
a.n = b.n;
 select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from 
tbl_n1 where n = 1) a  full outer join  (select * from tbl_n1 where n = 2) b on 
a.n = b.n;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 
b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 
b_one, 0 b_zero ) b on a.key = b.key;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin46.q
--
diff --git a/ql/src/test/queries/clientpositive/mapjoin46.q 
b/ql/src/test/queries/clientpositive/mapjoin46.q
index 9de7113..81f9610 100644
--- a/ql/src/test/queries/clientpositive/mapjoin46.q
+++ b/ql/src/test/queries/clientpositive/mapjoin46.q
@@ -3,6 +3,8 @@ set hive.auto.convert.join=true;
 set hive.strict.checks.cartesian.product=false;
 set hive.join.emit.interval=2;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE test1_n4 (key INT, value INT, col_1 STRING);
 INSERT INTO test1_n4 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
 (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car');
@@ -173,6 +175,22 @@ ON (test1_n4.value=test2_n2.value
 OR test2_n2.key between 100 and 102));
 
 -- Disjunction with pred on multiple inputs and single inputs (full outer join)
+SET hive.mapjoin.full.outer=false;
+EXPLAIN
+SELECT *
+FROM test1_n4 FULL OUTER JOIN test2_n2
+ON (test1_n4.value=test2_n2.value
+  OR test1_n4.key between 100 and 102
+  OR test2_n2.key between 100 and 102);
+
+SELECT *
+FROM test1_n4 FULL OUTER JOIN test2_n2
+ON (test1_n4.value=test2_n2.value
+  OR test1_n4.key between 100 and 102
+  OR test2_n2.key between 100 and 102);
+
+SET hive.mapjoin.full.outer=true;
+SET hive.merge.nway.joins=false;
 EXPLAIN
 SELECT *
 FROM test1_n4 FULL OUTER JOIN test2_n2
@@ -185,8 +203,23 @@ FROM test1_n4 FULL OUTER JOIN test2_n2
 ON (test1_n4.value=test2_n2.value
   OR test1_n4.key between 100 and 102
   OR test2_n2.key between 100 and 102);
+SET hive.merge.nway.joins=true;
 
 -- Disjunction with pred on multiple inputs and left input (full outer join)
+SET hive.mapjoin.full.outer=false;
+EXPLAIN

[18/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
index 12db036..b8d76ed 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
@@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1_n4
  A masked pattern was here 
 13630578
 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
- A masked pattern was here 
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
- A masked pattern was here 
-13630578
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 2 <- Map 1 (BROADCAST_EDGE)
+Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkEmptyKeyOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+  Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: int), _col1 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+Map 2 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Right Outer Join 0 to 1
+  filter predicates:
+0 
+1 {true}
+  keys:
+0 
+1 
+  Map Join Vectorization:
+  className: VectorMapJoinOuterFilteredOperator
+  native: false
+  nativeConditionsMet:

[06/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join33.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out 
b/ql/src/test/results/clientpositive/spark/join33.q.out
index 13cd446..09198b0 100644
--- a/ql/src/test/results/clientpositive/spark/join33.q.out
+++ b/ql/src/test/results/clientpositive/spark/join33.q.out
@@ -393,88 +393,88 @@ POSTHOOK: query: select * from dest_j1_n7
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest_j1_n7
  A masked pattern was here 
-146val_146 val_146
-146val_146 val_146
-146val_146 val_146
-146val_146 val_146
+146val_146
+146val_146
+146val_146
+146val_146
 150val_150 val_150
-213val_213 val_213
-213val_213 val_213
-213val_213 val_213
-213val_213 val_213
-238val_238 val_238
-238val_238 val_238
-238val_238 val_238
-238val_238 val_238
-255val_255 val_255
-255val_255 val_255
-255val_255 val_255
-255val_255 val_255
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-278val_278 val_278
-278val_278 val_278
-278val_278 val_278
-278val_278 val_278
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
+213val_213
+213val_213
+213val_213
+213val_213
+238val_238
+238val_238
+238val_238
+238val_238
+255val_255
+255val_255
+255val_255
+255val_255
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+278val_278
+278val_278
+278val_278
+278val_278
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
 66 val_66  val_66
-98 val_98  val_98
-98 val_98  val_98
-98 val_98  val_98
-98 val_98  val_98
+98 val_98
+98 val_98
+98 val_98
+98 val_98

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join6.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out 
b/ql/src/test/results/clientpositive/spark/join6.q.out
index 6075e5f..caa0849 100644
---

[20/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index 4b2f06f..9238bc7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -10,7 +10,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc_n0
 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -18,7 +18,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -51,6 +51,7 @@ STAGE PLANS:
   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
+  vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct]
   Filter Operator
 Filter Vectorization:
 className: VectorFilterOperator
@@ -72,6 +73,7 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 475 Data size: 83204 Basic 
stats: COMPLETE Column stats: NONE
@@ -86,6 +88,12 @@ STAGE PLANS:
 allNative: true
 usesVectorUDFAdaptor: false
 vectorized: true
+rowBatchContext:
+dataColumnCount: 2
+includeColumns: [0]
+dataColumns: key:string, value:string
+partitionColumnCount: 0
+scratchColumnTypeNames: []
 Map 2 
 Map Operator Tree:
 TableScan
@@ -94,6 +102,7 @@ STAGE PLANS:
   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
+  vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct]
   Filter Operator
 Filter Vectorization:
 className: VectorFilterOperator
@@ -116,9 +125,15 @@ STAGE PLANS:
   0 _col0 (type: string)
   1 _col0 (type: string)
 Map Join Vectorization:
+bigTableKeyColumns: 0:string
+bigTableRetainColumnNums: [0, 1]
+bigTableValueColumns: 0:string, 1:string
 className: VectorMapJoinInnerBigOnlyStringOperator
 native: true
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+nonOuterSmallTableKeyMapping: []
+projectedOutput: 0:string, 1:string
+hashTableImplementationType: OPTIMIZED
 outputColumnNames: _col2, _col3
 input vertices:
   0 Map 1
@@ -141,6 +156,7 @@ STAGE PLANS:
 className: VectorReduceSinkEmptyKeyOperator
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+valueColumns: 0:bigint

[30/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out 
b/ql/src/test/results/clientpositive/llap/join46.q.out
index 07c4a62..95d3611 100644
--- a/ql/src/test/results/clientpositive/llap/join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/join46.q.out
@@ -1633,7 +1633,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 
@@ -1746,7 +1746,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 
@@ -1857,7 +1857,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 
@@ -1970,7 +1970,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 _col1 (type: int)
   1 _col1 (type: int)
@@ -2148,7 +2148,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out 
b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
index c0c9f95..c3b1eb7 100644
--- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
+++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
@@ -230,6 +230,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false
@@ -318,6 +319,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false
@@ -407,6 +409,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false
@@ -495,6 +498,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/llap_acid.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/llap_acid.q.out 
b/ql/src/test/results/clientpositive/llap/llap_acid.q.out
index 0d1a331..d441ab0 100644
--- a/ql/src/test/results/clientpositive/llap/llap_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_acid.q.out
@@ -115,10 +115,10 @@ STAGE PLANS:
   projectedOutputColumnNums: [0, 4, 1]
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [4, 0]
+keyColumns: 4:smallint, 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS

[47/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a37827ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a37827ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a37827ec

Branch: refs/heads/master
Commit: a37827ecd557c7f7d69f3b2ccdbf6535908b1461
Parents: 45163ee
Author: Matt McCline 
Authored: Sun Sep 16 09:04:59 2018 -0500
Committer: Matt McCline 
Committed: Sun Sep 16 09:04:59 2018 -0500

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   10 +-
 data/files/fullouter_long_big_1a.txt|   11 +
 data/files/fullouter_long_big_1a_nonull.txt |   10 +
 data/files/fullouter_long_big_1b.txt|   13 +
 data/files/fullouter_long_big_1c.txt|   11 +
 data/files/fullouter_long_big_1d.txt|   12 +
 data/files/fullouter_long_small_1a.txt  |   54 +
 data/files/fullouter_long_small_1a_nonull.txt   |   51 +
 data/files/fullouter_long_small_1b.txt  |   72 +
 data/files/fullouter_long_small_1c.txt  |   81 +
 data/files/fullouter_long_small_1d.txt  |   39 +
 data/files/fullouter_multikey_big_1a.txt|   13 +
 data/files/fullouter_multikey_big_1a_nonull.txt |   10 +
 data/files/fullouter_multikey_big_1b.txt|   17 +
 data/files/fullouter_multikey_small_1a.txt  |   92 +
 .../fullouter_multikey_small_1a_nonull.txt  |   90 +
 data/files/fullouter_multikey_small_1b.txt  |  118 +
 data/files/fullouter_string_big_1a.txt  |   13 +
 data/files/fullouter_string_big_1a_nonull.txt   |   12 +
 data/files/fullouter_string_big_1a_old.txt  |   13 +
 data/files/fullouter_string_small_1a.txt|   38 +
 data/files/fullouter_string_small_1a_nonull.txt |   35 +
 data/files/fullouter_string_small_1a_old.txt|   38 +
 .../vectorization/mapjoin/AbstractMapJoin.java  |   66 +-
 .../mapjoin/MapJoinMultiKeyBenchBase.java   |3 +-
 .../mapjoin/MapJoinOneLongKeyBenchBase.java |3 +-
 .../mapjoin/MapJoinOneStringKeyBenchBase.java   |3 +-
 .../test/resources/testconfiguration.properties |5 +
 .../hadoop/hive/ql/exec/CommonJoinOperator.java |   11 +-
 .../apache/hadoop/hive/ql/exec/ExplainTask.java |  187 +-
 .../apache/hadoop/hive/ql/exec/JoinUtil.java|2 +-
 .../hadoop/hive/ql/exec/MapJoinOperator.java|  199 +-
 .../apache/hadoop/hive/ql/exec/Operator.java|   86 +-
 .../hadoop/hive/ql/exec/TableScanOperator.java  |6 +-
 .../persistence/BytesBytesMultiHashMap.java |   71 +-
 .../ql/exec/persistence/HashMapWrapper.java |   22 +
 .../persistence/HybridHashTableContainer.java   |  118 +-
 .../persistence/MapJoinBytesTableContainer.java |  199 +-
 .../hive/ql/exec/persistence/MapJoinKey.java|   15 +
 .../persistence/MapJoinObjectSerDeContext.java  |   17 +
 .../exec/persistence/MapJoinTableContainer.java |   61 +-
 .../hive/ql/exec/persistence/MatchTracker.java  |  154 +
 .../ReusableGetAdaptorDirectAccess.java |4 +-
 .../ql/exec/persistence/UnwrapRowContainer.java |   12 +-
 .../hive/ql/exec/tez/ReduceRecordProcessor.java |2 +-
 .../vector/VectorAppMasterEventOperator.java|2 +-
 .../hive/ql/exec/vector/VectorAssignRow.java|   12 +
 .../hive/ql/exec/vector/VectorCopyRow.java  |   21 +-
 .../ql/exec/vector/VectorDeserializeRow.java|   32 +
 .../ql/exec/vector/VectorFilterOperator.java|2 +-
 .../ql/exec/vector/VectorGroupByOperator.java   |2 +-
 .../ql/exec/vector/VectorLimitOperator.java |2 +-
 .../exec/vector/VectorMapJoinBaseOperator.java  |   23 +-
 .../ql/exec/vector/VectorMapJoinOperator.java   |   23 +-
 .../exec/vector/VectorSMBMapJoinOperator.java   |2 +-
 .../ql/exec/vector/VectorSelectOperator.java|4 +-
 .../ql/exec/vector/VectorTopNKeyOperator.java   |2 +-
 .../mapjoin/VectorMapJoinCommonOperator.java|  409 +-
 .../VectorMapJoinFullOuterLongOperator.java |   68 +
 .../VectorMapJoinFullOuterMultiKeyOperator.java |   71 +
 .../VectorMapJoinFullOuterStringOperator.java   |   71 +
 .../VectorMapJoinGenerateResultOperator.java|  163 +-
 ...pJoinInnerBigOnlyGenerateResultOperator.java |   18 +-
 .../VectorMapJoinInnerBigOnlyLongOperator.java  |   63 +-
 ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java |   60 +-
 ...VectorMapJoinInnerBigOnlyStringOperator.java |   50 +-
 ...ectorMapJoinInnerGenerateResultOperator.java |   22 +-
 .../mapjoin/VectorMapJoinInnerLongOperator.java |   64 +-
 .../VectorMapJoinInnerMultiKeyOperator.java |   58 +-
 .../VectorMapJoinInnerStringOperator.java   |   50 +-
 ...orMapJoinLeftSemiGenerateResultOperator.java |   12 +-
 .../VectorMapJoinLeftSemiLongOperator.java  |   64 +-
 .../VectorMapJoinLeftSemiMultiKeyOperator.java  |   58 +-
 .../VectorMapJoinLeftSemiStringOperator.java|   50 +-

[04/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
index 352e74f..5954629 100644
--- 
a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
@@ -89,10 +89,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 2000 Data size: 21248 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -372,10 +371,9 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkStringOperator
-  keyColumnNums: [2]
+  keyColumns: 2:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -424,10 +422,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -575,10 +572,9 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkStringOperator
-  keyColumnNums: [2]
+  keyColumns: 2:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -627,10 +623,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE

[34/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out 
b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
index 194fc5d..a160428 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
@@ -188,6 +188,139 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1_n2
  A masked pattern was here 
 4543526
+PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col1 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Map 4 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col1 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col1 (type: int)
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Reducer 2 
+Execution mode: llap
+Reduce Operator Tree:
+  Merge Join Operator
+condition map:
+ Full Outer Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col1 (type: int)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Group By Operator
+aggregations: sum(_col0)
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: bigint)
+Reducer 3 
+Execution mode: vectorized, llap
+Reduce Operator Tree:
+  Group By Operator
+aggregations: sum(VALUE._col0)
+mode: mergepartial
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+

[09/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mapjoin46.q.out
--
diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out 
b/ql/src/test/results/clientpositive/mapjoin46.q.out
index febb6c7..b6f8b19 100644
--- a/ql/src/test/results/clientpositive/mapjoin46.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin46.q.out
@@ -124,14 +124,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
+1012   Car 103 2   Ema
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat 102 2   Del
 99 2   Mat 103 2   Ema
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
-1012   Car 103 2   Ema
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 LEFT OUTER JOIN test2_n2
@@ -234,12 +234,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -340,12 +340,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob 102 2   Del
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob 102 2   Del
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 RIGHT OUTER JOIN test2_n2
@@ -430,10 +430,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-99 2   Mat 102 2   Del
 1012   Car 102 2   Del
-99 2   Mat 103 2   Ema
 1012   Car 103 2   Ema
+99 2   Mat 102 2   Del
+99 2   Mat 103 2   Ema
 NULL   NULLNULL104 3   Fli
 NULL   NULLNULL105 NULLNone
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
@@ -528,10 +528,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
-98 NULLNoneNULLNULLNULL
-99 0   Alice   NULLNULLNULL
-99 2   Mat NULLNULLNULL
 1001   Bob 102 2   Del
 1001   Bob 103 2   Ema
 1001   Bob 104 3   Fli
@@ -540,6 +536,10 @@ NULL   NULLNoneNULLNULLNULL
 1012   Car 103 2   Ema
 1012   Car 104 3   Fli
 1012   Car 105 NULLNone
+98 NULLNoneNULLNULLNULL
+99 0   Alice   NULLNULLNULL
+99 2   Mat NULLNULLNULL
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -635,11 +635,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNone102 2   Del
-98 NULLNone102 2   Del
-99 0   Alice   102 2   Del
-99 2   Mat 102 2   Del
-99 2   Mat 103 2   Ema
 1001   Bob 102 2   Del
 1001   Bob 103 2   Ema
 1001   Bob 104 3   Fli
@@ -648,6 +643,11 @@ NULL   NULLNone102 2   Del
 1012   Car 103 2   Ema
 1012   Car 104 3   Fli
 1012   Car 105 NULLNone
+98 NULLNone102 2   Del
+99 0   Alice   102 2   Del
+99 2   Mat 102 2   Del
+99 2   Mat 103 2   Ema
+NULL   NULLNone102 2   Del
 Warning:

[36/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_join30.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_join30.q 
b/ql/src/test/queries/clientpositive/vector_join30.q
index 9672a47..74c4433 100644
--- a/ql/src/test/queries/clientpositive/vector_join30.q
+++ b/ql/src/test/queries/clientpositive/vector_join30.q
@@ -11,7 +11,7 @@ SET hive.auto.convert.join.noconditionaltask.size=10;
 
 CREATE TABLE orcsrc_n0 STORED AS ORC AS SELECT * FROM src;
 
-explain vectorization expression
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -19,14 +19,14 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
-FROM 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-select sum(hash(Y.key,Y.value));
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -34,116 +34,238 @@ LEFT OUTER JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- LEFT OUTER JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN 
+RIGHT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- RIGHT OUTER JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN
+FULL OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- FULL OUTER JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN 
+JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-FROM
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
+FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-LEFT OUTER JOIN
+RIGHT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-FROM
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- RIGHT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+

[23/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
new file mode 100644
index 000..52ca0fb
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
@@ -0,0 +1,3923 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE

[31/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out 
b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out
new file mode 100644
index 000..c387af5
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out
@@ -0,0 +1,3139 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here

[14/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
index 6637d33..ef1e653 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
@@ -43,11 +43,11 @@ STAGE PLANS:
 Map-reduce partition columns: p_mfgr (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [2, 1]
+keyColumns: 2:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [2]
-valueColumnNums: [5, 7]
+partitionColumns: 2:string
+valueColumns: 5:int, 7:double
 Statistics: Num rows: 26 Data size: 6006 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: p_size (type: int), p_retailprice 
(type: double)
 Execution mode: vectorized, llap
@@ -279,11 +279,11 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [0, 1, 2]
-valueColumnNums: [3]
+partitionColumns: 0:string, 1:string, 2:int
+valueColumns: 3:double
 Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col3 (type: double)
 Execution mode: vectorized, llap
@@ -339,11 +339,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col1 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [1, 0]
+  keyColumns: 1:string, 0:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  partitionColumnNums: [1]
-  valueColumnNums: [2, 3]
+  partitionColumns: 1:string
+  valueColumns: 2:int, 3:double
   Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col2 (type: int), _col3 (type: double)
 Reducer 3 
@@ -529,11 +529,11 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [0, 1, 2]
-valueColumnNums: [3]
+partitionColumns: 0:string, 1:string, 2:int
+valueColumns: 3:double
 Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col3 (type: double)
 Execution mode:

[39/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
index 4c41f9c..a37b5a0 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
@@ -25,7 +25,6 @@ import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
@@ -33,9 +32,7 @@ import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator;
-import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase;
-import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator;
 import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects;
 import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -48,17 +45,23 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
-import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream;
 import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator;
 import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType;
 import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow;
@@ -69,7 +72,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
@@ -86,14 +88,13 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import 
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import

[08/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
--
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out 
b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
index 1eab962..a7fdfda 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   ctimestamp1,
  cfloat,
  cstring1,
@@ -31,7 +31,7 @@ WHERE(((ctinyint <= cbigint)
 GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
 ORDER BY cstring1, cfloat, cdouble, ctimestamp1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   ctimestamp1,
  cfloat,
  cstring1,
@@ -83,7 +83,6 @@ STAGE PLANS:
 Statistics: Num rows: 12288 Data size: 147456 Basic stats: 
COMPLETE Column stats: NONE
 TableScan Vectorization:
 native: true
-vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct]
 Filter Operator
   Filter Vectorization:
   className: VectorFilterOperator
@@ -135,12 +134,6 @@ STAGE PLANS:
   allNative: false
   usesVectorUDFAdaptor: false
   vectorized: true
-  rowBatchContext:
-  dataColumnCount: 12
-  includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10]
-  dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, 
cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, 
ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, 
cboolean2:boolean
-  partitionColumnCount: 0
-  scratchColumnTypeNames: [double, double, double, double]
   Reduce Vectorization:
   enabled: false
   enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -169,7 +162,6 @@ STAGE PLANS:
   TableScan
 TableScan Vectorization:
 native: true
-vectorizationSchemaColumns: [0:_col0:timestamp, 1:_col1:float, 
2:_col2:string, 3:_col3:boolean, 4:_col4:double, 5:_col5:double, 
6:_col6:double, 7:_col7:double, 8:_col8:float, 9:_col9:float, 10:_col10:float, 
11:_col11:float, 12:_col12:double, 13:_col13:double, 14:_col14:bigint, 
15:_col15:double, 16:_col16:double, 17:_col17:double, 18:_col18:double, 
19:_col19:double, 20:_col20:double, 21:_col21:double]
 Reduce Output Operator
   key expressions: _col2 (type: string), _col1 (type: float), 
_col4 (type: double), _col0 (type: timestamp)
   sort order: 
@@ -190,12 +182,6 @@ STAGE PLANS:
   allNative: false
   usesVectorUDFAdaptor: false
   vectorized: true
-  rowBatchContext:
-  dataColumnCount: 22
-  includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 
14, 15, 16, 17, 18, 19, 20, 21]
-  dataColumns: _col0:timestamp, _col1:float, _col2:string, 
_col3:boolean, _col4:double, _col5:double, _col6:double, _col7:double, 
_col8:float, _col9:float, _col10:float, _col11:float, _col12:double, 
_col13:double, _col14:bigint, _col15:double, _col16:double, _col17:double, 
_col18:double, _col19:double, _col20:double, _col21:double
-  partitionColumnCount: 0
-  scratchColumnTypeNames: []
   Reduce Vectorization:
   enabled: false
   enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
--
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out 
b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
index 2d306cf..6974ee8 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cfloat,
  cboolean1,
  cdouble,
@@ -29,7 +29,7 @@ WHERE(((cstring2 LIKE '%ss%')
 GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+POSTHOOK: query: EXPLAIN

[40/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
index bde4424..93fdb28 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
@@ -27,20 +28,23 @@ import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 public class MapJoinTestDescription extends DescriptionTest {
 
+  public static enum MapJoinPlanVariation {
+DYNAMIC_PARTITION_HASH_JOIN,
+SHARED_SMALL_TABLE
+  }
+
   public static class SmallTableGenerationParameters {
 
 public static enum ValueOption {
   NO_RESTRICTION,
   ONLY_ONE,
-  ONLY_TWO,
-  AT_LEAST_TWO
+  NO_REGULAR_SMALL_KEYS
 }
 
 private ValueOption valueOption;
@@ -82,70 +86,103 @@ public class MapJoinTestDescription extends 
DescriptionTest {
   final VectorMapJoinVariation vectorMapJoinVariation;
 
   // Adjustable.
-  public String[] bigTableColumnNames;
+  public String[] bigTableKeyColumnNames;
   public TypeInfo[] bigTableTypeInfos;
+
   public int[] bigTableKeyColumnNums;
-  public String[] smallTableValueColumnNames;
+
   public TypeInfo[] smallTableValueTypeInfos;
-  public int[] bigTableRetainColumnNums;
+
   public int[] smallTableRetainKeyColumnNums;
-  public int[] smallTableRetainValueColumnNums;
 
   public SmallTableGenerationParameters smallTableGenerationParameters;
 
   // Derived.
-  public List bigTableColumnNamesList;
-  public String[] bigTableKeyColumnNames;
-  public TypeInfo[] bigTableKeyTypeInfos;
-  public List smallTableValueColumnNamesList;
+
+  public int[] bigTableColumnNums;
+  public String[] bigTableColumnNames;
+  public List bigTableColumnNameList;
   public ObjectInspector[] bigTableObjectInspectors;
-  public List bigTableObjectInspectorsList;
+  public List bigTableObjectInspectorList;
+
+  public TypeInfo[] bigTableKeyTypeInfos;
+
+  public List smallTableKeyColumnNameList;
+  public String[] smallTableKeyColumnNames;
+  public TypeInfo[] smallTableKeyTypeInfos;
+  public ObjectInspector[] smallTableKeyObjectInspectors;
+  public List smallTableKeyObjectInspectorList;
+
+  public List smallTableValueColumnNameList;
+  public String[] smallTableValueColumnNames;
+  public ObjectInspector[] smallTableValueObjectInspectors;
+  public List smallTableValueObjectInspectorList;
+
+  public int[] bigTableRetainColumnNums;
+  public int[] smallTableRetainValueColumnNums;
+
+  public String[] smallTableColumnNames;
+  public List smallTableColumnNameList;
+  public TypeInfo[] smallTableTypeInfos;
+  public List smallTableObjectInspectorList;
+
   public StandardStructObjectInspector bigTableStandardObjectInspector;
-  public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos;
-  public ObjectInspector[] smallTableObjectInspectors;
-  public PrimitiveCategory[] smallTablePrimitiveCategories;
-  public List smallTableObjectInspectorsList;
   public StandardStructObjectInspector smallTableStandardObjectInspector;
   public ObjectInspector[] inputObjectInspectors;
+
   public String[] outputColumnNames;
   public TypeInfo[] outputTypeInfos;
   public ObjectInspector[] outputObjectInspectors;
 
+  final MapJoinPlanVariation mapJoinPlanVariation;
+
+  public MapJoinTestDescription (
+  HiveConf hiveConf,
+  VectorMapJoinVariation vectorMapJoinVariation,
+  TypeInfo[] bigTableTypeInfos,
+  int[] bigTableKeyColumnNums,
+  TypeInfo[] smallTableValueTypeInfos,
+  int[] smallTableRetainKeyColumnNums,
+  SmallTableGenerationParameters smallTableGenerationParameters,
+  MapJoinPlanVariation mapJoinPlanVariation) {
+this(
+hiveConf,
+vectorMapJoinVariation,
+/* bigTableColumnNames */ null,
+bigTableTypeInfos,
+bigTableKeyColumnNums,
+smallTableValueTypeInfos,
+smallTableRetainKeyColumnNums,
+

[22/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
index 5c0d6bb..6eaf7ad 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
@@ -72,10 +72,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -209,10 +209,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -372,10 +372,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -632,11 +632,11 @@ STAGE PLANS:
 Map-reduce partition columns: rand() (type: double)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [4]
-valueColumnNums: [3]
+partitionColumns: 4:double
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -692,11 +692,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
   Reduce Sink Vectorization:

[15/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
index 5e26637..0f40378 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
@@ -296,10 +296,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0]
+valueColumns: 0:decimal(25,2)
 Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: decimal(25,2))
 Execution mode: vectorized, llap
@@ -353,11 +352,11 @@ STAGE PLANS:
   Map-reduce partition columns: 0 (type: int)
   Reduce Sink Vectorization:
   className: VectorReduceSinkLongOperator
-  keyColumnNums: [1]
+  keyColumns: 1:int
   keyExpressions: ConstantVectorExpression(val 0) -> 1:int
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: [0]
+  valueColumns: 0:decimal(25,2)
   Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: COMPLETE
   value expressions: _col0 (type: decimal(25,2))
 Reducer 3 
@@ -517,10 +516,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: 
decimal(15,2)), _col1 (type: decimal(15,2))
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:decimal(15,2), 1:decimal(15,2)
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [2]
+valueColumns: 2:decimal(25,2)
 Statistics: Num rows: 2 Data size: 672 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col2 (type: decimal(25,2))
 Execution mode: vectorized, llap
@@ -576,11 +575,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col1 (type: decimal(15,2))
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [1, 0]
+  keyColumns: 1:decimal(15,2), 0:decimal(15,2)
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  partitionColumnNums: [1]
-  valueColumnNums: [2]
+  partitionColumns: 1:decimal(15,2)
+  valueColumns: 2:decimal(25,2)
   Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE 
Column stats: COMPLETE
   value expressions: _col2 (type: decimal(25,2))
 Reducer 3 
@@ -747,10 +746,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: 
decimal(15,2))
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0]
+keyColumns: 0:decimal(15,2)
 native: true

[13/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
index c07f4d9..6660d73 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
@@ -62,10 +62,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [1]
+valueColumns: 1:bigint
 Statistics: Num rows: 7 Data size: 651 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col1 (type: bigint)
 Execution mode: vectorized, llap
@@ -129,12 +129,11 @@ STAGE PLANS:
 Map-reduce partition columns: 0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [2, 1]
+keyColumns: 2:int, 1:bigint
 keyExpressions: ConstantVectorExpression(val 0) -> 
2:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [3]
-valueColumnNums: []
+partitionColumns: 3:int
 Statistics: Num rows: 7 Data size: 651 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reducer 3 
 Execution mode: vectorized, llap
@@ -304,10 +303,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkLongOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [1, 2]
+valueColumns: 1:string, 2:bigint
 Statistics: Num rows: 6 Data size: 1176 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col1 (type: string), _col2 (type: 
bigint)
 Execution mode: vectorized, llap
@@ -363,11 +362,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col1 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [1, 2]
+  keyColumns: 1:string, 2:bigint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  partitionColumnNums: [1]
-  valueColumnNums: [0]
+  partitionColumns: 1:string
+  valueColumns: 0:int
   Statistics: Num rows: 6 Data size: 1176 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col0 (type: int)
 Reducer 3 
@@ -540,10 +539,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true

[43/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
index 4c049cb..8dce5b8 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
@@ -47,6 +47,14 @@ public class VectorMapJoinFastLongHashSet
 return new VectorMapJoinFastHashSet.HashSetResult();
   }
 
+  @Override
+  public void putRow(BytesWritable currentKey, BytesWritable currentValue)
+  throws HiveException, IOException {
+
+// Ignore NULL keys (HashSet not used for FULL OUTER).
+adaptPutRow(currentKey, currentValue);
+  }
+
   /*
* A Unit Test convenience method for putting the key into the hash table 
using the
* actual type.
@@ -76,11 +84,18 @@ public class VectorMapJoinFastLongHashSet
 optimizedHashSetResult.forget();
 
 long hashCode = HashCodeUtil.calculateLongHashCode(key);
-long existance = findReadSlot(key, hashCode);
+int pairIndex = findReadSlot(key, hashCode);
 JoinUtil.JoinResult joinResult;
-if (existance == -1) {
+if (pairIndex == -1) {
   joinResult = JoinUtil.JoinResult.NOMATCH;
 } else {
+  /*
+   * NOTE: Support for trackMatched not needed yet for Set.
+
+  if (matchTracker != null) {
+matchTracker.trackMatch(pairIndex / 2);
+  }
+  */
   joinResult = JoinUtil.JoinResult.MATCH;
 }
 
@@ -91,9 +106,13 @@ public class VectorMapJoinFastLongHashSet
   }
 
   public VectorMapJoinFastLongHashSet(
-  boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType 
hashTableKeyType,
+  boolean isFullOuter,
+  boolean minMaxEnabled,
+  HashTableKeyType hashTableKeyType,
   int initialCapacity, float loadFactor, int writeBuffersSize, long 
estimatedKeyCount) {
-super(minMaxEnabled, isOuterJoin, hashTableKeyType,
+super(
+isFullOuter,
+minMaxEnabled, hashTableKeyType,
 initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount);
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index c9c3e80..03ef249 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -49,8 +49,6 @@ public abstract class VectorMapJoinFastLongHashTable
 
   private final HashTableKeyType hashTableKeyType;
 
-  private final boolean isOuterJoin;
-
   private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead;
 
   private final boolean useMinMax;
@@ -72,14 +70,13 @@ public abstract class VectorMapJoinFastLongHashTable
 return max;
   }
 
-  @Override
-  public void putRow(BytesWritable currentKey, BytesWritable currentValue) 
throws HiveException, IOException {
+  public boolean adaptPutRow(BytesWritable currentKey, BytesWritable 
currentValue) throws HiveException, IOException {
 byte[] keyBytes = currentKey.getBytes();
 int keyLength = currentKey.getLength();
 keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength);
 try {
   if (!keyBinarySortableDeserializeRead.readNextField()) {
-return;
+return false;
   }
 } catch (Exception e) {
   throw new HiveException(
@@ -92,6 +89,7 @@ public abstract class VectorMapJoinFastLongHashTable
 keyBinarySortableDeserializeRead, 
hashTableKeyType);
 
 add(key, currentValue);
+return true;
   }
 
   protected abstract void assignSlot(int slot, long key, boolean isNewKey, 
BytesWritable currentValue);
@@ -215,10 +213,9 @@ public abstract class VectorMapJoinFastLongHashTable
 largestNumberOfSteps = newLargestNumberOfSteps;
 resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
 metricExpands++;
-// LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new 
logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + 
resizeThreshold + " metricExpands " + metricExpands);
   }
 
-  protected long findReadSlot(long key, long hashCode) {
+  protected int findReadSlot(long key, long hashCode) {
 
 int intHashCode = (int) hashCode;
 int slot =

[46/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index f45a012..114cea9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.commons.lang3.tuple.Pair;
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.Constants;
@@ -41,12 +42,16 @@ import 
org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashP
 import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper;
+import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
+import 
org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess;
+import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator;
 import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
+import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
 import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer;
 import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities;
@@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hive.common.util.ReflectionUtil;
@@ -74,8 +81,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.annotations.VisibleForTesting;
-
 import com.esotericsoftware.kryo.KryoException;
+import com.google.common.base.Preconditions;
 
 /**
  * Map side Join operator implementation.
@@ -105,6 +112,23 @@ public class MapJoinOperator extends 
AbstractMapJoinOperator implem
   protected HybridHashTableContainer firstSmallTable; // The first small table;
   // Only this table has 
spilled big table rows
 
+  /*
+   * FULL OUTER MapJoin members.
+   */
+  protected transient boolean isFullOuterMapJoin; // Are we doing a FULL 
OUTER MapJoin?
+
+  protected transient int fullOuterBigTableRetainSize;
+  // The number of Big 
Table columns being
+  // retained in the 
output result for
+  // FULL OUTER MapJoin.
+
+  /*
+   * Small Table key match tracking used for FULL OUTER MapJoin.  Otherwise, 
null.
+   * Since the Small Table hash table can be shared among vertces, we require 
this non-shared object
+   * for our vertex (i.e. operator private) key match tracking.
+   */
+  protected transient MatchTracker matchTracker;
+
   protected transient boolean isTestingNoHashTableLoad;
   // Only used in bucket map join.
   private transient int numBuckets = -1;
@@ -177,6 +201,8 @@ public class MapJoinOperator extends 
AbstractMapJoinOperator implem
 hybridMapJoinLeftover = false;
 firstSmallTable = null;
 
+doFullOuterMapJoinInit();
+
 generateMapMetaData();
 
 isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf,
@@ -252,6 +278,24 @@ public class MapJoinOperator extends 
AbstractMapJoinOperator implem
 }
   }
 
+  /*
+   * Do initialization for FULL OUTER MapJoin.
+   *
+   * Currently, we do not support FULL OUTER MapJoin for N-way.
+   */
+  private void doFullOuterMapJoinInit() {
+
+// This will be set during the first process call or during closeOp if no 
rows

[21/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
index bbfba28..07c4eed 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
@@ -88,10 +88,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 6 Data size: 2208 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -158,10 +157,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 12 Data size: 4416 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Reducer 3 
@@ -280,10 +279,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 6 Data size: 2208 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -350,10 +348,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 12 Data size: 4416 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Reducer 3 
@@ -499,10 +497,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No

[19/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
index 7c1780b..a49e8e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
@@ -47,15 +47,174 @@ POSTHOOK: Input: default@myinput1_n1
  A masked pattern was here 
 4937935
 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM 
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND 
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1_n1 a RIGHT OUTER 
JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key 
> 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
- A masked pattern was here 
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM 
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND 
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1_n1 a RIGHT OUTER 
JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key 
> 40 AND b.value > 50 AND b.key = b.value
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
- A masked pattern was here 
-3080335
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 2 <- Map 1 (BROADCAST_EDGE)
+Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  filterExpr: ((key > 40) and (value > 50) and (key = value)) 
(type: boolean)
+  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicate: ((key = value) and (key > 40) and (value > 50)) 
(type: boolean)
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: key (type: int), value (type: int)
+  outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+sort order: 
+Reduce Sink Vectorization:
+className: VectorReduceSinkEmptyKeyOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: int), _col1 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+Map 2 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+

[10/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
index 8efe78d..c9b9e81 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
@@ -146,14 +146,16 @@ STAGE PLANS:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Map Join Vectorization:
-bigTableKeyColumnNums: [1]
-bigTableRetainedColumnNums: [3]
-bigTableValueColumnNums: [3]
+bigTableKeyColumns: 1:int
+bigTableRetainColumnNums: [3]
+bigTableValueColumns: 3:decimal(8,1)
 bigTableValueExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1)
 className: VectorMapJoinInnerBigOnlyLongOperator
 native: true
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
-projectedOutputColumnNums: [3]
+nonOuterSmallTableKeyMapping: []
+projectedOutput: 3:decimal(8,1)
+hashTableImplementationType: OPTIMIZED
 outputColumnNames: _col0
 input vertices:
   1 Reducer 3
@@ -217,10 +219,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0]
+valueColumns: 0:int
 Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col0 (type: int)
 Execution mode: vectorized, llap
@@ -281,10 +282,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkLongOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
 
   Stage: Stage-0
@@ -351,14 +351,16 @@ STAGE PLANS:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Map Join Vectorization:
-bigTableKeyColumnNums: [1]
-bigTableRetainedColumnNums: [3]
-bigTableValueColumnNums: [3]
+bigTableKeyColumns: 1:int
+bigTableRetainColumnNums: [3]
+bigTableValueColumns: 3:decimal(8,1)
 bigTableValueExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1)
 className: VectorMapJoinInnerBigOnlyLongOperator
 native: true
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
-projectedOutputColumnNums: [3]
+nonOuterSmallTableKeyMapping: []
+projectedOutput: 3:decimal(8,1)
+

[29/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
index 52eb609..d0d9c87 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
@@ -128,14 +128,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
+1012   Car 103 2   Ema
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat 102 2   Del
 99 2   Mat 103 2   Ema
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
-1012   Car 103 2   Ema
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 LEFT OUTER JOIN test2_n2
@@ -239,12 +239,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -344,12 +344,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob 102 2   Del
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob 102 2   Del
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 RIGHT OUTER JOIN test2_n2
@@ -438,10 +438,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-99 2   Mat 102 2   Del
 1012   Car 102 2   Del
-99 2   Mat 103 2   Ema
 1012   Car 103 2   Ema
+99 2   Mat 102 2   Del
+99 2   Mat 103 2   Ema
 NULL   NULLNULL104 3   Fli
 NULL   NULLNULL105 NULLNone
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
@@ -535,18 +535,18 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
-98 NULLNoneNULLNULLNULL
-99 0   Alice   NULLNULLNULL
-99 2   Mat NULLNULLNULL
 1001   Bob 102 2   Del
-1001   Bob 105 NULLNone
-1001   Bob 104 3   Fli
 1001   Bob 103 2   Ema
+1001   Bob 104 3   Fli
+1001   Bob 105 NULLNone
 1012   Car 102 2   Del
-1012   Car 105 NULLNone
-1012   Car 104 3   Fli
 1012   Car 103 2   Ema
+1012   Car 104 3   Fli
+1012   Car 105 NULLNone
+98 NULLNoneNULLNULLNULL
+99 0   Alice   NULLNULLNULL
+99 2   Mat NULLNULLNULL
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -644,19 +644,19 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNone102 2   Del
-98 NULLNone102 2   Del
-99 0   Alice   102 2   Del
-99 2   Mat 102 2   Del
-99 2   Mat 103 2   Ema
 1001   Bob 102 2   Del
-1001   Bob 105 NULLNone
-1001   Bob 104 3   Fli
 1001   Bob 103 2   Ema
+1001   Bob 104 3   Fli
+1001   Bob 105 NULLNone
 1012   Car 102 2   Del
-1012   Car 105 NULLNone
-1012

[17/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
index 006a51a..960f5f5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
@@ -128,18 +128,100 @@ POSTHOOK: query: select * from t4_n19
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t4_n19
  A masked pattern was here 
-PREHOOK: query: explain vectorization only summary
-
+PREHOOK: query: explain vectorization expression
 select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, 
a.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization only summary
-
+POSTHOOK: query: explain vectorization expression
 select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, 
a.value
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: false
   enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
 
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  filterExpr: key is not null (type: boolean)
+  Statistics: Num rows: 11 Data size: 2068 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 11 Data size: 2068 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Semi Join 0 to 1
+  keys:
+0 key (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col0, _col1
+  input vertices:
+1 Map 3
+  Statistics: Num rows: 12 Data size: 2274 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: int), _col1 (type: 
string)
+sort order: ++
+Statistics: Num rows: 12 Data size: 2274 Basic stats: 
COMPLETE Column stats: NONE
+Execution mode: llap
+LLAP IO: all inputs
+Map 3 
+Map Operator Tree:
+TableScan
+  alias: b
+  filterExpr: key is not null (type: boolean)
+  Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+keys: _col0 (type: int)
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+Execution mode: llap
+LLAP IO: all inputs
+Reducer 2 
+Execution mode: llap
+Reduce Operator Tree:
+  Select Operator
+expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 12 Data size: 2274 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+

[05/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out 
b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
index 903e74b..a445b44 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cfloat,
  cstring1,
  cint,
@@ -22,7 +22,7 @@ WHERE(((cbigint > -23)
   OR (cfloat = cdouble
 ORDER BY cbigint, cfloat
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cfloat,
  cstring1,
  cint,
@@ -69,7 +69,6 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 147456 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
-  vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct]
   Filter Operator
 Filter Vectorization:
 className: VectorFilterOperator
@@ -91,10 +90,8 @@ STAGE PLANS:
 sort order: ++
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [3, 4]
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 
20, 22, 18]
 Statistics: Num rows: 4096 Data size: 49152 Basic 
stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), 
_col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: 
double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: 
double)
 Execution mode: vectorized
@@ -107,27 +104,14 @@ STAGE PLANS:
 allNative: true
 usesVectorUDFAdaptor: false
 vectorized: true
-rowBatchContext:
-dataColumnCount: 12
-includeColumns: [0, 1, 2, 3, 4, 5, 6, 8]
-dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-partitionColumnCount: 0
-scratchColumnTypeNames: [decimal(13,3), double, double, 
bigint, double, double, double, double, decimal(19,0), decimal(11,4), double]
 Reducer 2 
 Execution mode: vectorized
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: zz
-reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
 vectorized: true
-rowBatchContext:
-dataColumnCount: 14
-dataColumns: KEY.reducesinkkey0:bigint, 
KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, 
VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, 
VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, 
VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double
-partitionColumnCount: 0
-scratchColumnTypeNames: []
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 
(type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 
(type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), 
VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: 
double), VALUE._col10 (type:

[24/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
new file mode 100644
index 000..da513db
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
@@ -0,0 +1,3945 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+POSTHOOK: type: LOAD
+ A

[03/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union14.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union14.q.out 
b/ql/src/test/results/clientpositive/spark/union14.q.out
index 6a95e4a..49d6cb1 100644
--- a/ql/src/test/results/clientpositive/spark/union14.q.out
+++ b/ql/src/test/results/clientpositive/spark/union14.q.out
@@ -126,20 +126,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Input: default@src1
  A masked pattern was here 
-2781
-2731
+   10
 1281
-2551
-tst1   1
 1461
-3691
+1501
 2131
-3111
 2241
 2381
-1501
-   10
+2551
+2731
+2781
+3111
+3691
+4011
 4061
 66 1
-4011
 98 1
+tst1   1

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union7.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union7.q.out 
b/ql/src/test/results/clientpositive/spark/union7.q.out
index 549075c..8556f84 100644
--- a/ql/src/test/results/clientpositive/spark/union7.q.out
+++ b/ql/src/test/results/clientpositive/spark/union7.q.out
@@ -122,20 +122,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Input: default@src1
  A masked pattern was here 
-2781
-2731
+   10
 1281
-2551
-tst1   1
 1461
-3691
+1501
 2131
-3111
 2241
 2381
-1501
-   10
+2551
+2731
+2781
+3111
+3691
+4011
 4061
 66 1
-4011
 98 1
+tst1   1

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_null.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union_null.q.out 
b/ql/src/test/results/clientpositive/spark/union_null.q.out
index d37adbb..696641c 100644
--- a/ql/src/test/results/clientpositive/spark/union_null.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_null.q.out
@@ -24,16 +24,16 @@ POSTHOOK: query: select x from (select * from (select value 
as x from src order
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
  A masked pattern was here 
-val_0
-val_0
-val_0
-val_10
-val_100
 NULL
 NULL
 NULL
 NULL
 NULL
+val_0
+val_0
+val_0
+val_10
+val_100
 PREHOOK: query: select * from (select * from (select cast(null as string) as N 
from src1 group by key)a UNION ALL select * from (select cast(null as string) 
as N from src1 group by key)b ) a
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src1

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_view.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union_view.q.out 
b/ql/src/test/results/clientpositive/spark/union_view.q.out
index 591ebfa..97a5bef 100644
--- a/ql/src/test/results/clientpositive/spark/union_view.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_view.q.out
@@ -483,10 +483,10 @@ STAGE PLANS:
 86 val_86  2
 86 val_86  3
 86 val_86  3
-86 val_86  2
-86 val_86  2
 86 val_86  3
 86 val_86  3
+86 val_86  2
+86 val_86  2
 86 val_86  1
 STAGE DEPENDENCIES:
   Stage-1 is a root stage

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out 
b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
index e3d815b..6e33ead 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
@@ -99,10 +99,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkLongOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9]
+valueColumns: 1:bigint, 2:decimal(20,10), 
3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 
7:decimal(23,14), 8:decimal(33,14), 9:bigint
 Statistics: Num rows: 12289

[44/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
index 6785bce..df900a1 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
@@ -31,7 +31,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-
 import org.apache.hadoop.hive.ql.plan.VectorDesc;
 // Single-Column String hash table import.
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet;
@@ -98,40 +97,31 @@ public class VectorMapJoinLeftSemiStringOperator extends 
VectorMapJoinLeftSemiGe
   //
 
   @Override
-  public void process(Object row, int tag) throws HiveException {
-
-try {
-  VectorizedRowBatch batch = (VectorizedRowBatch) row;
-
-  alias = (byte) tag;
-
-  if (needCommonSetup) {
-// Our one time process method initialization.
-commonSetup(batch);
+  protected void commonSetup() throws HiveException {
+super.commonSetup();
 
-/*
- * Initialize Single-Column String members for this specialized class.
- */
-
-singleJoinColumn = bigTableKeyColumnMap[0];
+/*
+ * Initialize Single-Column String members for this specialized class.
+ */
 
-needCommonSetup = false;
-  }
+singleJoinColumn = bigTableKeyColumnMap[0];
+  }
 
-  if (needHashTableSetup) {
-// Setup our hash table specialization.  It will be the first time the 
process
-// method is called, or after a Hybrid Grace reload.
+  @Override
+  public void hashTableSetup() throws HiveException {
+super.hashTableSetup();
 
-/*
- * Get our Single-Column String hash set information for this 
specialized class.
- */
+/*
+ * Get our Single-Column String hash set information for this specialized 
class.
+ */
 
-hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
+hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
+  }
 
-needHashTableSetup = false;
-  }
+  @Override
+  public void processBatch(VectorizedRowBatch batch) throws HiveException {
 
-  batchCounter++;
+try {
 
   // Do the per-batch setup for an left semi join.
 
@@ -144,11 +134,7 @@ public class VectorMapJoinLeftSemiStringOperator extends 
VectorMapJoinLeftSemiGe
   }
 
   final int inputLogicalSize = batch.size;
-
   if (inputLogicalSize == 0) {
-if (LOG.isDebugEnabled()) {
-  LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
-}
 return;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index 2e5c568..61bcbf0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -24,13 +24,19 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult;
+import

[25/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out
new file mode 100644
index 000..169d94c
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out
@@ -0,0 +1,3945 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was

[01/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 45163ee4c -> a37827ecd


http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out
--
diff --git a/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out 
b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out
new file mode 100644
index 000..53c32ff
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out
@@ -0,0 +1,2050 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 
'None'),
+(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 
'None'),
+(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SCRIPT []
+POSTHOOK: Lineage: test1.key SCRIPT []
+POSTHOOK: Lineage: test1.value SCRIPT []
+col1   col2col3
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+(104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+(104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SCRIPT []
+POSTHOOK: Lineage: test2.key SCRIPT []
+POSTHOOK: Lineage: test2.value SCRIPT []
+col1   col2col3
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value)
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+Map Reduce Local Work
+  Alias -> Map Local Tables:
+$hdt$_1:test2 
+  Fetch Operator
+limit: -1
+  Alias -> Map Local Operator Tree:
+$hdt$_1:test2 
+  TableScan
+alias: test2
+Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column 
stats: NONE
+Select Operator
+  expressions: key (type: int), value (type: int), col_2 (type: 
string)
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
+  HashTable Sink Operator
+keys:
+  0 _col1 (type: int)
+  1 _col1 (type: int)
+
+  Stage: Stage-3
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: test1
+Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column 
stats: NONE
+TableScan Vectorization:
+native: true
+Select Operator
+  expressions: key (type: int), value (type: int), col_1 (type: 
string)
+  outputColumnNames: _col0, _col1, _col2
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE 
Column stats: NONE
+  Map Join Operator
+condition map:
+ Left Outer Join 0 to 1
+keys:
+  0 _col1 (type: int)
+  1 _col1 (type: int)
+Map Join Vectorization:
+bigTableKeyExpressions: col 1:int
+className: VectorMapJoinOperator
+native: false
+nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin

hive git commit: Missed files: HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL

2018-09-15 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master be1130d56 -> 37120b877


Missed files: HIVE-20524: Schema Evolution checking is broken in going from 
Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/37120b87
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/37120b87
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/37120b87

Branch: refs/heads/master
Commit: 37120b8c40baf44040fc7778d129cd8a5824
Parents: be1130d
Author: Matt McCline 
Authored: Sat Sep 15 18:43:44 2018 -0500
Committer: Matt McCline 
Committed: Sat Sep 15 18:44:22 2018 -0500

--
 .../test/resources/testconfiguration.properties|  1 +
 .../apache/hadoop/hive/metastore/ColumnType.java   | 17 +++--
 2 files changed, 16 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/37120b87/itests/src/test/resources/testconfiguration.properties
--
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 65ae6bb..0e071fb 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -688,6 +688,7 @@ minillaplocal.query.files=\
   schema_evol_text_vecrow_part_all_primitive.q,\
   schema_evol_text_vecrow_table_llap_io.q,\
   schema_evol_text_vecrow_table.q,\
+  schema_evol_undecorated.q,\
   selectDistinctStar.q,\
   semijoin.q,\
   semijoin6.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/37120b87/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
--
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
index d5dea4d..39d2b2f 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hive.metastore.utils.StringUtils;
 
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
@@ -144,6 +145,14 @@ public class ColumnType {
 NumericCastOrder.put(DOUBLE_TYPE_NAME, 7);
   }
 
+  private static final Set decoratedTypeNames = new HashSet<>();
+
+  static {
+decoratedTypeNames.add("char");
+decoratedTypeNames.add("decimal");
+decoratedTypeNames.add("varchar");
+  }
+
   private static final Map alternateTypeNames = new 
HashMap<>();
 
   static {
@@ -199,6 +208,9 @@ public class ColumnType {
   public static String getTypeName(String typeString) {
 if (typeString == null) return null;
 String protoType = typeString.toLowerCase().split("\\W")[0];
+if (decoratedTypeNames.contains(protoType)) {
+  return protoType;
+}
 String realType = alternateTypeNames.get(protoType);
 return realType == null ? protoType : realType;
   }
@@ -217,8 +229,9 @@ public class ColumnType {
 return NumericCastOrder.get(from) < NumericCastOrder.get(to);
   }
 
-  // Allow string to double conversion
-  if (StringTypes.contains(from) && to.equals(DOUBLE_TYPE_NAME)) return 
true;
+  // Allow string to double/decimal conversion
+  if (StringTypes.contains(from) &&
+  (to.equals(DOUBLE_TYPE_NAME) || to.equals(DECIMAL_TYPE_NAME))) 
return true;
 
   // Void can go to anything
   if (from.equals(VOID_TYPE_NAME)) return true;

hive git commit: HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL (Matt McCline, reviewed by Jason Dere)

2018-09-15 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 2c8e67942 -> e041c9ece


HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to 
version 3 for ALTER TABLE VARCHAR to DECIMAL (Matt McCline, reviewed by Jason 
Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e041c9ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e041c9ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e041c9ec

Branch: refs/heads/master
Commit: e041c9ecec9d546cb790d2cf8cf3ea10eeeab202
Parents: 2c8e679
Author: Matt McCline 
Authored: Sat Sep 15 15:06:19 2018 -0500
Committer: Matt McCline 
Committed: Sat Sep 15 15:06:19 2018 -0500

--
 .../clientpositive/schema_evol_undecorated.q| 14 +
 .../llap/schema_evol_undecorated.q.out  | 64 
 2 files changed, 78 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e041c9ec/ql/src/test/queries/clientpositive/schema_evol_undecorated.q
--
diff --git a/ql/src/test/queries/clientpositive/schema_evol_undecorated.q 
b/ql/src/test/queries/clientpositive/schema_evol_undecorated.q
new file mode 100644
index 000..1fe5c08
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/schema_evol_undecorated.q
@@ -0,0 +1,14 @@
+
+set hive.metastore.disallow.incompatible.col.type.changes=true;
+
+create external table new_char_decimal (c1 char(20));
+alter table new_char_decimal change c1 c1 decimal(31,0);
+
+create external table new_varchar_decimal (c1 varchar(25));
+alter table new_varchar_decimal change c1 c1 decimal(12,5);
+
+create external table new_char_double (c1 char(20));
+alter table new_char_double change c1 c1 double;
+
+create external table new_varchar_double (c1 varchar(25));
+alter table new_varchar_double change c1 c1 double;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/e041c9ec/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out 
b/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out
new file mode 100644
index 000..2cbdb4c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out
@@ -0,0 +1,64 @@
+PREHOOK: query: create external table new_char_decimal (c1 char(20))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_char_decimal
+POSTHOOK: query: create external table new_char_decimal (c1 char(20))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_char_decimal
+PREHOOK: query: alter table new_char_decimal change c1 c1 decimal(31,0)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@new_char_decimal
+PREHOOK: Output: default@new_char_decimal
+POSTHOOK: query: alter table new_char_decimal change c1 c1 decimal(31,0)
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@new_char_decimal
+POSTHOOK: Output: default@new_char_decimal
+PREHOOK: query: create external table new_varchar_decimal (c1 varchar(25))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_varchar_decimal
+POSTHOOK: query: create external table new_varchar_decimal (c1 varchar(25))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_varchar_decimal
+PREHOOK: query: alter table new_varchar_decimal change c1 c1 decimal(12,5)
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@new_varchar_decimal
+PREHOOK: Output: default@new_varchar_decimal
+POSTHOOK: query: alter table new_varchar_decimal change c1 c1 decimal(12,5)
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@new_varchar_decimal
+POSTHOOK: Output: default@new_varchar_decimal
+PREHOOK: query: create external table new_char_double (c1 char(20))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@new_char_double
+POSTHOOK: query: create external table new_char_double (c1 char(20))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@new_char_double
+PREHOOK: query: alter table new_char_double change c1 c1 double
+PREHOOK: type: ALTERTABLE_RENAMECOL
+PREHOOK: Input: default@new_char_double
+PREHOOK: Output: default@new_char_double
+POSTHOOK: query: alter table new_char_double change c1 c1 double
+POSTHOOK: type: ALTERTABLE_RENAMECOL
+POSTHOOK: Input: default@new_char_double
+POSTHOOK: Output: default@new_char_double
+PREHOOK: query: create external table new_varchar_double (c1 varchar(25))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output:

[2/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)

2018-09-10 Thread mmccline

HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt 
McCline, reviewed by Zoltan Haindrich)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff98a30a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff98a30a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff98a30a

Branch: refs/heads/master
Commit: ff98a30ab49c4eafe53974e03c9dd205c14ffee7
Parents: 494b771
Author: Matt McCline 
Authored: Mon Sep 10 04:24:35 2018 -0500
Committer: Matt McCline 
Committed: Mon Sep 10 04:24:35 2018 -0500

--
 .../fast/VectorMapJoinFastBytesHashKeyRef.java  | 178 ++
 .../fast/VectorMapJoinFastBytesHashMap.java | 141 +++--
 .../VectorMapJoinFastBytesHashMapStore.java | 559 +++
 .../VectorMapJoinFastBytesHashMultiSet.java | 132 -
 ...VectorMapJoinFastBytesHashMultiSetStore.java | 280 ++
 .../fast/VectorMapJoinFastBytesHashSet.java | 124 +++-
 .../VectorMapJoinFastBytesHashSetStore.java | 219 
 .../fast/VectorMapJoinFastBytesHashTable.java   | 148 ++---
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |   6 +-
 .../fast/TestVectorMapJoinFastBytesHashMap.java |   3 +
 .../fast/TestVectorMapJoinFastLongHashMap.java  |   3 +
 .../clientpositive/bucket_map_join_tez2.q   |   2 +-
 .../test/queries/clientpositive/tez_smb_main.q  |   3 +-
 .../results/clientpositive/llap/orc_llap.q.out  |  59 +-
 .../apache/hadoop/hive/serde2/WriteBuffers.java |  53 ++
 15 files changed, 1661 insertions(+), 249 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
new file mode 100644
index 000..dbfe518
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+// import com.google.common.base.Preconditions;
+
+public class VectorMapJoinFastBytesHashKeyRef {
+
+  public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, 
int keyLength,
+  WriteBuffers writeBuffers, WriteBuffers.Position readPos) {
+
+// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 
0);
+
+final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord);
+
+writeBuffers.setReadPoint(absoluteOffset, readPos);
+
+int actualKeyLength = KeyRef.getSmallKeyLength(refWord);
+boolean isKeyLengthSmall = (actualKeyLength != 
KeyRef.SmallKeyLength.allBitsOn);
+if (!isKeyLengthSmall) {
+
+  // And, if current value is big we must read it.
+  actualKeyLength = writeBuffers.readVInt(readPos);
+}
+
+if (actualKeyLength != keyLength) {
+  return false;
+}
+
+// Our reading was positioned to the key.
+if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) {
+  return false;
+}
+
+return true;
+  }
+
+  public static int calculateHashCode(long refWord, WriteBuffers writeBuffers,
+  WriteBuffers.Position readPos) {
+
+// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 
0);
+
+final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord);
+
+int actualKeyLength = KeyRef.getSmallKeyLength(refWord);
+boolean isKeyLengthSmall = (actualKeyLength != 
KeyRef.SmallKeyLength.allBitsOn);
+final long keyAbsoluteOffset;
+if (!isKeyLengthSmall) {
+
+  // Position after next relative offset (fixed length) to the key.
+  writeBuffers.setReadPoint(absoluteOffset, readPos);
+
+  // And, if current value is big we must

[1/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)

2018-09-10 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 494b771ac -> ff98a30ab


http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
--
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
index 17d4bdb..79462a0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
@@ -57,6 +57,11 @@ public final class WriteBuffers implements 
RandomAccessOutput, MemoryEstimate {
   memSize += (2 * jdm.primitive1());
   return memSize;
 }
+public void set(Position pos) {
+  buffer = pos.buffer;
+  bufferIndex = pos.bufferIndex;
+  offset = pos.offset;
+}
   }
 
   Position writePos = new Position(); // Position where we'd write
@@ -552,6 +557,21 @@ public final class WriteBuffers implements 
RandomAccessOutput, MemoryEstimate {
 return v;
   }
 
+  public long readNByteLong(int bytes, Position readPos) {
+long v = 0;
+if (isAllInOneReadBuffer(bytes, readPos)) {
+  for (int i = 0; i < bytes; ++i) {
+v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff);
+  }
+  readPos.offset += bytes;
+} else {
+  for (int i = 0; i < bytes; ++i) {
+v = (v << 8) + (readNextByte(readPos) & 0xff);
+  }
+}
+return v;
+  }
+
   public void writeFiveByteULong(long offset, long v) {
 int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset;
 setWritePoint(offset);
@@ -574,10 +594,43 @@ public final class WriteBuffers implements 
RandomAccessOutput, MemoryEstimate {
 writePos.offset = prevOffset;
   }
 
+  public void writeFiveByteULong(long v) {
+if (isAllInOneWriteBuffer(5)) {
+  writePos.buffer[writePos.offset] = (byte)(v >>> 32);
+  writePos.buffer[writePos.offset + 1] = (byte)(v >>> 24);
+  writePos.buffer[writePos.offset + 2] = (byte)(v >>> 16);
+  writePos.buffer[writePos.offset + 3] = (byte)(v >>> 8);
+  writePos.buffer[writePos.offset + 4] = (byte)(v);
+  writePos.offset += 5;
+} else {
+  write((byte)(v >>> 32));
+  write((byte)(v >>> 24));
+  write((byte)(v >>> 16));
+  write((byte)(v >>> 8));
+  write((byte)(v));
+}
+  }
+
   public int readInt(long offset) {
 return (int)unsafeReadNByteLong(offset, 4);
   }
 
+  public int readInt(long offset, Position readPos) {
+setReadPoint(offset, readPos);
+long v = 0;
+if (isAllInOneReadBuffer(4, readPos)) {
+  for (int i = 0; i < 4; ++i) {
+v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff);
+  }
+  readPos.offset += 4;
+} else {
+  for (int i = 0; i < 4; ++i) {
+v = (v << 8) + (readNextByte(readPos) & 0xff);
+  }
+}
+return (int) v;
+  }
+
   @Override
   public void writeInt(long offset, int v) {
 int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset;

hive git commit: HIVE-20496: Vectorization: Vectorized PTF IllegalStateException (Matt McCline, reviewed by Teddy Choi)

2018-09-03 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 804535275 -> a4dd84b38


HIVE-20496: Vectorization: Vectorized PTF IllegalStateException (Matt McCline, 
reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4dd84b3
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4dd84b3
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4dd84b3

Branch: refs/heads/master
Commit: a4dd84b38083864edc2e09e52e208827b82e82cd
Parents: 8045352
Author: Matt McCline 
Authored: Mon Sep 3 05:42:29 2018 -0500
Committer: Matt McCline 
Committed: Mon Sep 3 05:42:29 2018 -0500

--
 .../org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java  | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/a4dd84b3/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index 70d6468..5698639 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -451,6 +451,11 @@ public class ReduceRecordSource implements RecordSource {
   }
   reducer.process(batch, tag);
 
+  // Do the non-column batch reset logic.
+  batch.selectedInUse = false;
+  batch.size = 0;
+  batch.endOfFile = false;
+
   // Reset just the value columns and value buffer.
   for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
 // Note that reset also resets the data buffer for bytes column 
vectors.

[1/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

2018-08-25 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master e2142b206 -> fa36381fa


http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/test/results/clientpositive/vector_case_when_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/vector_case_when_1.q.out 
b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
index 01fc3ce..88cba90 100644
--- a/ql/src/test/results/clientpositive/vector_case_when_1.q.out
+++ b/ql/src/test/results/clientpositive/vector_case_when_1.q.out
@@ -516,7 +516,7 @@ STAGE PLANS:
   className: VectorSelectOperator
   native: true
   projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 
31, 32, 33, 34, 36, 40, 42, 45, 46]
-  selectExpressions: IfExprStringScalarStringGroupColumn(col 
17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 
4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 
18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, 
val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val 
Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 
19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge 
number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 
21:string) -> 22:string) -> 21:string) -> 22:string, 
IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 
23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, 
IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 
24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, 
IfExprStringScalarStringGroupColumn(col 19:boolean, val 
 Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 
19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: 
LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, 
ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 
23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, 
val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 
17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 
25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, 
IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 
23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, 
IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 
25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 
19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 
17:boolean, VectorUDFDateAddColScalar(co
 l 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) 
-> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 
28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) 
-> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 
27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 
27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 
17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 
8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, 
col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 
6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 
17:boolean, null, col 48)(children: StringGroupColEqualStringScalar(col 
23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 
13:varchar(20)) -> 23:string) -> 17:boolean, ConvertDecimal64ToDecimal(col 
7:decimal(10,2)/DECIMAL_64) ->
  48:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 
49:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 
23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 
13:varchar(20)) -> 23:string) -> 18:boolean, ConvertDecimal64ToDecimal(col 
7:decimal(10,2)/DECIMAL_64) -> 49:decimal(10,2)) -> 31:decimal(10,2), 
VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, 
l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN 
PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 
19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS 
STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: 
StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: 
CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 
33:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 19:boolean, 
decimal64Val 0, decimalVal 0, col 7:dec
 imal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 23:string, 
val DELIVER IN PERSON)(children: CastStringGroupToString(col

[5/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

2018-08-25 Thread mmccline

HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid 
unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fa36381f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fa36381f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fa36381f

Branch: refs/heads/master
Commit: fa36381faad40576f62e2ac925ef2976efecd8b6
Parents: e2142b2
Author: Matt McCline 
Authored: Sat Aug 25 09:21:25 2018 -0700
Committer: Matt McCline 
Committed: Sat Aug 25 09:21:25 2018 -0700

--
 .../exec/vector/VectorSMBMapJoinOperator.java   |2 +-
 .../ql/exec/vector/VectorizationContext.java|   46 +-
 .../exec/vector/VectorizationContext.java.orig  | 3771 --
 .../expressions/CastStringGroupToString.java|   40 -
 .../ql/exec/vector/expressions/VectorElt.java   |  168 +-
 .../VectorExpressionWriterFactory.java  |   26 +
 .../ql/exec/vector/TestVectorRowObject.java |3 +-
 .../hive/ql/exec/vector/TestVectorSerDeRow.java |  137 +-
 .../ql/exec/vector/VectorRandomRowSource.java   |   67 +-
 .../hive/ql/exec/vector/VectorVerifyFast.java   |6 +-
 .../aggregation/TestVectorAggregation.java  |9 +-
 .../expressions/TestVectorArithmetic.java   |   14 +-
 .../vector/expressions/TestVectorBetweenIn.java |   38 +-
 .../expressions/TestVectorCastStatement.java|   11 +-
 .../expressions/TestVectorCoalesceElt.java  |   87 +-
 .../expressions/TestVectorDateAddSub.java   |   10 +-
 .../vector/expressions/TestVectorDateDiff.java  |9 +-
 .../expressions/TestVectorFilterCompare.java|   12 +-
 .../expressions/TestVectorIfStatement.java  |3 +-
 .../vector/expressions/TestVectorIndex.java |5 +-
 .../vector/expressions/TestVectorNegative.java  |   21 +-
 .../exec/vector/expressions/TestVectorNull.java |   14 +-
 .../expressions/TestVectorStringConcat.java |3 +-
 .../expressions/TestVectorStringUnary.java  |3 +-
 .../expressions/TestVectorStructField.java  |  370 ++
 .../vector/expressions/TestVectorSubStr.java|3 +-
 .../expressions/TestVectorTimestampExtract.java |3 +-
 .../fast/TestVectorMapJoinFastRowHashMap.java   |  101 +-
 .../clientpositive/query_result_fileformat.q|4 +-
 .../llap/vector_case_when_1.q.out   |8 +-
 .../llap/vector_char_mapjoin1.q.out |1 -
 .../clientpositive/llap/vector_udf1.q.out   |   18 +-
 .../clientpositive/llap/vectorized_casts.q.out  |6 +-
 .../query_result_fileformat.q.out   |   76 +-
 .../clientpositive/vector_case_when_1.q.out |8 +-
 .../clientpositive/vector_char_mapjoin1.q.out   |2 +-
 .../clientpositive/vectorized_casts.q.out   |6 +-
 .../hadoop/hive/serde2/RandomTypeUtil.java  |   29 +
 38 files changed, 1059 insertions(+), 4081 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
index c13510e..07a6e9d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java
@@ -131,7 +131,7 @@ public class VectorSMBMapJoinOperator extends 
SMBMapJoinOperator
 
 List keyDesc = desc.getKeys().get(posBigTable);
 keyExpressions = vContext.getVectorExpressions(keyDesc);
-keyOutputWriters = 
VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
+keyOutputWriters = 
VectorExpressionWriterFactory.getExpressionWriters(keyExpressions);
 
 Map> exprs = desc.getExprs();
 bigTableValueExpressions = 
vContext.getVectorExpressions(exprs.get(posBigTable));

http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index b7feb1c..57f7c01 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -1806,6 +1806,25 @@ public class VectorizationContext {
 return vectorExpression;
   }
 
+  public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] 
vecExprs)
+  throws HiveException{
+if (vecExprs == null) {
+

[2/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

2018-08-25 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java
new file mode 100644
index 000..5062997
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java
@@ -0,0 +1,370 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.expressions;
+
+import java.lang.reflect.Constructor;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
+import org.apache.hadoop.hive.common.type.HiveChar;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec;
+import 
org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
+import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import junit.framework.Assert;
+
+import org.junit.Ignore;
+import org.junit.Test;
+
+public class TestVectorStructField {
+
+  @Test
+  public void testStructField() throws Exception {
+Random random = new Random(7743);
+
+for (int i = 0; i < 5; i++) {
+  doStructFieldTests(random);
+}
+  }
+
+  public enum StructFieldTestMode {
+ROW_MODE,
+VECTOR_EXPRESSION;
+
+static final int count = values().length;
+  }
+
+  private void doStructFieldTests(Random random) throws Exception {
+String structTypeName =
+VectorRandomRowSource.getDecoratedTypeName(
+random, "struct", SupportedTypes.ALL, /* allowedTypeNameSet */ 
null,
+/* depth */ 0, /* maxDepth */ 2);
+StructTypeInfo structTypeInfo =
+

[3/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

2018-08-25 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java
deleted file mode 100644
index 8232e67..000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.expressions;
-
-import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
-
-// cast string group to string (varchar to string, etc.)
-public class CastStringGroupToString extends StringUnaryUDFDirect {
-
-  private static final long serialVersionUID = 1L;
-
-  public CastStringGroupToString() {
-super();
-  }
-
-  public CastStringGroupToString(int inputColumn, int outputColumnNum) {
-super(inputColumn, outputColumnNum);
-  }
-
-  @Override
-  protected void func(BytesColumnVector outV, byte[][] vector, int[] start, 
int[] length, int i) {
-outV.setVal(i, vector[i], start[i], length[i]);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
index 00e529d..75e60eb 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java
@@ -66,53 +66,157 @@ public class VectorElt extends VectorExpression {
 
 outputVector.init();
 
-outputVector.noNulls = false;
 outputVector.isRepeating = false;
 
+final int limit = inputColumns.length;
 LongColumnVector inputIndexVector = (LongColumnVector) 
batch.cols[inputColumns[0]];
+boolean[] inputIndexIsNull = inputIndexVector.isNull;
 long[] indexVector = inputIndexVector.vector;
 if (inputIndexVector.isRepeating) {
-  int index = (int)indexVector[0];
-  if (index > 0 && index < inputColumns.length) {
-BytesColumnVector cv = (BytesColumnVector) 
batch.cols[inputColumns[index]];
-if (cv.isRepeating) {
-  outputVector.setElement(0, 0, cv);
-  outputVector.isRepeating = true;
-} else if (batch.selectedInUse) {
-  for (int j = 0; j != n; j++) {
-int i = sel[j];
-outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]);
+  if (inputIndexVector.noNulls || !inputIndexIsNull[0]) {
+int repeatedIndex = (int) indexVector[0];
+if (repeatedIndex > 0 && repeatedIndex < limit) {
+  BytesColumnVector cv = (BytesColumnVector) 
batch.cols[inputColumns[repeatedIndex]];
+  if (cv.isRepeating) {
+outputVector.isNull[0] = false;
+outputVector.setElement(0, 0, cv);
+outputVector.isRepeating = true;
+  } else if (cv.noNulls) {
+if (batch.selectedInUse) {
+  for (int j = 0; j != n; j++) {
+int i = sel[j];
+outputVector.isNull[i] = false;
+outputVector.setVal(i, cv.vector[i], cv.start[i], 
cv.length[i]);
+  }
+} else {
+  for (int i = 0; i != n; i++) {
+outputVector.isNull[i] = false;
+outputVector.setVal(i, cv.vector[i], cv.start[i], 
cv.length[i]);
+  }
+}
+  } else {
+if (batch.selectedInUse) {
+  for (int j = 0; j != n; j++) {
+int i = sel[j];
+if (!cv.isNull[i]) {
+  outputVector.isNull[i] = false;
+  outputVector.setVal(i, cv.vector[i], cv.start[i], 
cv.length[i]);
+} else {
+  outputVector.isNull[i] = true;
+

[4/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)

2018-08-25 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig
deleted file mode 100644
index 20cc894..000
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig
+++ /dev/null
@@ -1,3771 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import java.lang.reflect.Constructor;
-import java.nio.charset.StandardCharsets;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
-import java.util.regex.Pattern;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.hive.common.type.Date;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
-import 
org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType;
-import 
org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.*;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
-import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor;
-import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.udf.*;
-import org.apache.hadoop.hive.ql.udf.generic.*;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import 
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
-import org.apache.hadoop.hive.serde2.io.DateWritableV2;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import

hive git commit: HIVE-20339: Vectorization: Lift unneeded restriction causing some PTF with RANK not to be vectorized (Matt McCline, reviewed by Teddy Choi)

2018-08-24 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master b5578eb08 -> e2142b206


HIVE-20339: Vectorization: Lift unneeded restriction causing some PTF with RANK 
not to be vectorized (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2142b20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2142b20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2142b20

Branch: refs/heads/master
Commit: e2142b20660c3582bc09c87f67c1d32c201952c3
Parents: b5578eb
Author: Matt McCline 
Authored: Fri Aug 24 22:30:07 2018 -0700
Committer: Matt McCline 
Committed: Fri Aug 24 22:30:07 2018 -0700

--
 .../exec/vector/ptf/VectorPTFEvaluatorBase.java |   7 +
 .../vector/ptf/VectorPTFEvaluatorDenseRank.java |   8 +-
 .../exec/vector/ptf/VectorPTFEvaluatorRank.java |   8 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |  73 -
 .../hadoop/hive/ql/plan/VectorPTFDesc.java  |   6 +-
 .../test/results/clientpositive/llap/ptf.q.out  |   8 +-
 .../llap/vector_ptf_part_simple.q.out   |  74 -
 .../clientpositive/llap/vector_windowing.q.out  |  38 -
 .../llap/vector_windowing_rank.q.out|  41 -
 .../clientpositive/llap/vectorized_ptf.q.out| 151 +--
 .../clientpositive/perf/spark/query47.q.out |   3 +
 .../clientpositive/perf/spark/query57.q.out |   3 +
 .../clientpositive/perf/tez/query47.q.out   |  56 +++
 .../clientpositive/perf/tez/query57.q.out   |  56 +++
 .../test/results/clientpositive/spark/ptf.q.out |   4 +
 .../clientpositive/spark/vectorized_ptf.q.out   | 147 --
 16 files changed, 541 insertions(+), 142 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
index 437c319..daefdc4 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
@@ -65,6 +65,13 @@ public abstract class VectorPTFEvaluatorBase {
 this.outputColumnNum = outputColumnNum;
   }
 
+  public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, int 
outputColumnNum) {
+this.windowFrameDef = windowFrameDef;
+inputVecExpr = null;
+inputColumnNum = -1;
+this.outputColumnNum = outputColumnNum;
+  }
+
   // Evaluate the aggregation input argument expression.
   public void evaluateInputExpr(VectorizedRowBatch batch) throws HiveException 
{
 if (inputVecExpr != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
index cb6b586..c80b077 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.exec.vector.ptf;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
 
@@ -35,9 +34,8 @@ public class VectorPTFEvaluatorDenseRank extends 
VectorPTFEvaluatorBase {
 
   private int denseRank;
 
-  public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, 
VectorExpression inputVecExpr,
-  int outputColumnNum) {
-super(windowFrameDef, inputVecExpr, outputColumnNum);
+  public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, int 
outputColumnNum) {
+super(windowFrameDef, outputColumnNum);
 resetEvaluator();
   }
 
@@ -45,7 +43,7 @@ public class VectorPTFEvaluatorDenseRank extends 
VectorPTFEvaluatorBase {
   public void evaluateGroupBatch(VectorizedRowBatch batch)
   throws HiveException {
 
-evaluateInputExpr(batch);
+// We don't evaluate input columns...
 
 LongColumnVector longColVector = (LongColumnVector) 
batch.cols[outputColumnNum];
 longColVector.isRepeating = true;

hive git commit: HIVE-20352: Vectorization: Support grouping function (Matt McCline, reviewed by Teddy Choi)

2018-08-24 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master cc38bcc5a -> c7235932b


HIVE-20352: Vectorization: Support grouping function (Matt McCline, reviewed by 
Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c7235932
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c7235932
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c7235932

Branch: refs/heads/master
Commit: c7235932b0011cd0336af6ecd138529c5e08c5a9
Parents: cc38bcc
Author: Matt McCline 
Authored: Fri Aug 24 09:35:22 2018 -0700
Committer: Matt McCline 
Committed: Fri Aug 24 09:35:22 2018 -0700

--
 .../ql/exec/vector/VectorizationContext.java| 49 ++
 .../exec/vector/expressions/GroupingColumn.java | 54 +++
 .../vector/expressions/GroupingColumns.java | 69 
 .../vector_groupby_grouping_sets_grouping.q.out | 54 +++
 4 files changed, 199 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c7235932/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index d6bfa7a..b7feb1c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2074,6 +2074,8 @@ public class VectorizationContext {
 
   // Elt is a special case because it can take variable number of 
arguments.
   ve = getEltExpression(childExpr, returnType);
+} else if (udf instanceof GenericUDFGrouping) {
+  ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, 
returnType);
 } else if (udf instanceof GenericUDFBridge) {
   ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, 
childExpr, mode,
   returnType);
@@ -2195,6 +2197,53 @@ public class VectorizationContext {
 return vectorElt;
   }
 
+  private VectorExpression getGroupingExpression(GenericUDFGrouping udf,
+  List childExprs, TypeInfo returnType)
+  throws HiveException {
+
+ExprNodeDesc childExpr0 = childExprs.get(0);
+if (!(childExpr0 instanceof ExprNodeColumnDesc)) {
+  return null;
+}
+ExprNodeColumnDesc groupingIdColDesc = (ExprNodeColumnDesc) childExpr0;
+int groupingIdColNum = getInputColumnIndex(groupingIdColDesc.getColumn());
+
+final int indexCount = childExprs.size() - 1;
+int[] indices = new int[indexCount];
+for (int i = 0; i < indexCount; i++) {
+  ExprNodeDesc indexChildExpr = childExprs.get(i + 1);
+  if (!(indexChildExpr instanceof ExprNodeConstantDesc)) {
+return null;
+  }
+  Object scalarObject = ((ExprNodeConstantDesc) indexChildExpr).getValue();
+  final int index;
+  if (scalarObject instanceof Integer) {
+index = (int) scalarObject;
+  } else if (scalarObject instanceof Long) {
+index = (int) ((long) scalarObject);
+  } else {
+return null;
+  }
+  indices[i] = index;
+}
+
+final int outputColumnNum = ocm.allocateOutputColumn(returnType);
+final VectorExpression ve;
+if (indices.length == 1) {
+  ve = new GroupingColumn(groupingIdColNum, indices[0], outputColumnNum);
+} else {
+  ve = new GroupingColumns(groupingIdColNum, indices, outputColumnNum);
+}
+
+ve.setInputTypeInfos(groupingIdColDesc.getTypeInfo());
+ve.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE);
+
+ve.setOutputTypeInfo(returnType);
+ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE);
+
+return ve;
+  }
+
   public enum InConstantType {
 INT_FAMILY,
 TIMESTAMP,

http://git-wip-us.apache.org/repos/asf/hive/blob/c7235932/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java
new file mode 100644
index 000..9bad386
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the

[2/3] hive git commit: HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi)

2018-08-24 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java
new file mode 100644
index 000..78d543a
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.ptf;
+
+import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type;
+import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * This class evaluates long avg() for a PTF group.
+ *
+ * Sum up non-null column values; group result is sum / non-null count.
+ */
+public class VectorPTFEvaluatorStreamingLongAvg extends VectorPTFEvaluatorBase 
{
+
+  protected boolean isNull;
+  protected long sum;
+  private int nonNullGroupCount;
+  protected double avg;
+
+  public VectorPTFEvaluatorStreamingLongAvg(WindowFrameDef windowFrameDef, 
VectorExpression inputVecExpr,
+  int outputColumnNum) {
+super(windowFrameDef, inputVecExpr, outputColumnNum);
+resetEvaluator();
+  }
+
+  @Override
+  public void evaluateGroupBatch(VectorizedRowBatch batch)
+  throws HiveException {
+
+evaluateInputExpr(batch);
+
+// Sum all non-null long column values for avg; maintain 
isGroupResultNull; after last row of
+// last group batch compute the group avg when sum is non-null.
+
+// We do not filter when PTF is in reducer.
+Preconditions.checkState(!batch.selectedInUse);
+
+final int size = batch.size;
+if (size == 0) {
+  return;
+}
+LongColumnVector longColVector = ((LongColumnVector) 
batch.cols[inputColumnNum]);
+
+DoubleColumnVector outputColVector = (DoubleColumnVector) 
batch.cols[outputColumnNum];
+double[] outputVector = outputColVector.vector;
+
+if (longColVector.isRepeating) {
+
+  if (longColVector.noNulls || !longColVector.isNull[0]) {
+
+// We have a repeated value.
+isNull = false;
+final double repeatedValue = longColVector.vector[0];
+
+for (int i = 0; i < size; i++) {
+  sum += repeatedValue;
+  nonNullGroupCount++;
+
+  avg = sum / nonNullGroupCount;
+
+  // Output row i AVG.
+  outputVector[i] = avg;
+}
+  } else {
+if (isNull) {
+  outputColVector.isNull[0] = true;
+  outputColVector.noNulls = false;
+} else {
+
+  // Continue previous AVG.
+  outputVector[0] = avg;
+}
+outputColVector.isRepeating = true;
+  }
+} else if (longColVector.noNulls) {
+  isNull = false;
+  long[] vector = longColVector.vector;
+  for (int i = 0; i < size; i++) {
+sum += vector[i];
+nonNullGroupCount++;
+
+avg = sum / nonNullGroupCount;
+
+// Output row i AVG.
+outputVector[i] = avg;
+  }
+} else {
+  boolean[] batchIsNull = longColVector.isNull;
+  int i = 0;
+  while (batchIsNull[i]) {
+outputColVector.isNull[i] = true;
+outputColVector.noNulls = false;
+if (++i >= size) {
+  return;
+}
+  }
+
+  isNull = false;
+  long[] vector = longColVector.vector;
+
+  sum += vector[i];
+  nonNullGroupCount++;
+
+  avg = sum / nonNullGroupCount;
+
+  // Output row i AVG.
+  outputVector[i++] = avg;
+
+  for (; i < size; i++) {
+if (!batchIsNull[i]) {
+  sum += vector[i];
+  nonNullGroupCount++;
+
+  avg = sum /

[1/3] hive git commit: HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi)

2018-08-24 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 6a282657c -> cc38bcc5a


http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out
index 91b52e7..7b6fa66 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out
@@ -113,16 +113,28 @@ STAGE PLANS:
 partitionColumnCount: 0
 scratchColumnTypeNames: []
 Reducer 2 
-Execution mode: llap
+Execution mode: vectorized, llap
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine tez IN [tez, spark] IS true
-notVectorizedReason: PTF operator: sum UNBOUNDED end frame is 
not supported for ROWS window type
-vectorized: false
+reduceColumnNullOrder: aza
+reduceColumnSortOrder: +++
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
+rowBatchContext:
+dataColumnCount: 3
+dataColumns: KEY.reducesinkkey0:int, 
KEY.reducesinkkey1:string, KEY.reducesinkkey2:bigint
+partitionColumnCount: 0
+scratchColumnTypeNames: [bigint]
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string)
 outputColumnNames: _col2, _col3, _col7
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumnNums: [0, 2, 1]
 Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE 
Column stats: NONE
 PTF Operator
   Function definitions:
@@ -143,16 +155,39 @@ STAGE PLANS:
   name: sum
   window function: GenericUDAFSumLong
   window frame: ROWS PRECEDING(MAX)~CURRENT
+  PTF Vectorization:
+  className: VectorPTFOperator
+  evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum]
+  functionInputExpressions: [col 2:bigint]
+  functionNames: [sum]
+  keyInputColumns: [0, 2, 1]
+  native: true
+  nonKeyInputColumns: []
+  orderExpressions: [col 1:string, col 2:bigint]
+  outputColumns: [3, 0, 2, 1]
+  outputTypes: [bigint, int, bigint, string]
+  partitionExpressions: [col 0:int]
+  streamingColumns: [3]
   Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE 
Column stats: NONE
   Select Operator
 expressions: _col2 (type: int), _col7 (type: string), 
_col3 (type: bigint), sum_window_0 (type: bigint)
 outputColumnNames: _col0, _col1, _col2, _col3
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+projectedOutputColumnNums: [0, 1, 2, 3]
 Statistics: Num rows: 1 Data size: 196 Basic stats: 
COMPLETE Column stats: NONE
 Limit
   Number of rows: 10
+  Limit Vectorization:
+  className: VectorLimitOperator
+  native: true
   Statistics: Num rows: 1 Data size: 196 Basic stats: 
COMPLETE Column stats: NONE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 1 Data size: 196 Basic stats: 
COMPLETE Column stats: NONE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -245,16 +280,28 @@ STAGE PLANS:
 partitionColumnCount: 0
 scratchColumnTypeNames: []
 Reducer 2 
-Execution mode: llap
+Execution mode: vectorized, llap
 Reduce Vectorization:
 enabled: true
 enableConditionsMet:

[3/3] hive git commit: HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi)

2018-08-24 Thread mmccline

HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt 
McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cc38bcc5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cc38bcc5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cc38bcc5

Branch: refs/heads/master
Commit: cc38bcc5a993304898ba37b8496f13a15d62bf16
Parents: 6a28265
Author: Matt McCline 
Authored: Fri Aug 24 09:30:42 2018 -0700
Committer: Matt McCline 
Committed: Fri Aug 24 09:30:42 2018 -0700

--
 .../exec/vector/ptf/VectorPTFEvaluatorBase.java |  17 +-
 .../vector/ptf/VectorPTFEvaluatorCount.java |   9 +-
 .../vector/ptf/VectorPTFEvaluatorCountStar.java |   9 +-
 .../ptf/VectorPTFEvaluatorDecimalAvg.java   |  22 +-
 .../VectorPTFEvaluatorDecimalFirstValue.java|   4 +-
 .../ptf/VectorPTFEvaluatorDecimalLastValue.java |  12 +-
 .../ptf/VectorPTFEvaluatorDecimalMax.java   |  25 +-
 .../ptf/VectorPTFEvaluatorDecimalMin.java   |  23 +-
 .../ptf/VectorPTFEvaluatorDecimalSum.java   |   9 +-
 .../vector/ptf/VectorPTFEvaluatorDenseRank.java |  11 +-
 .../vector/ptf/VectorPTFEvaluatorDoubleAvg.java |  18 +-
 .../ptf/VectorPTFEvaluatorDoubleFirstValue.java |   4 +-
 .../ptf/VectorPTFEvaluatorDoubleLastValue.java  |  12 +-
 .../vector/ptf/VectorPTFEvaluatorDoubleMax.java |  13 +-
 .../vector/ptf/VectorPTFEvaluatorDoubleMin.java |  11 +-
 .../vector/ptf/VectorPTFEvaluatorDoubleSum.java |   9 +-
 .../vector/ptf/VectorPTFEvaluatorLongAvg.java   |  18 +-
 .../ptf/VectorPTFEvaluatorLongFirstValue.java   |   4 +-
 .../ptf/VectorPTFEvaluatorLongLastValue.java|  12 +-
 .../vector/ptf/VectorPTFEvaluatorLongMax.java   |   9 +-
 .../vector/ptf/VectorPTFEvaluatorLongMin.java   |   9 +-
 .../vector/ptf/VectorPTFEvaluatorLongSum.java   |   9 +-
 .../exec/vector/ptf/VectorPTFEvaluatorRank.java |  13 +-
 .../vector/ptf/VectorPTFEvaluatorRowNumber.java |   5 +-
 .../VectorPTFEvaluatorStreamingDecimalAvg.java  | 185 +
 .../VectorPTFEvaluatorStreamingDecimalMax.java  | 163 +++
 .../VectorPTFEvaluatorStreamingDecimalMin.java  | 163 +++
 .../VectorPTFEvaluatorStreamingDecimalSum.java  | 154 +++
 .../VectorPTFEvaluatorStreamingDoubleAvg.java   | 174 
 .../VectorPTFEvaluatorStreamingDoubleMax.java   | 164 
 .../VectorPTFEvaluatorStreamingDoubleMin.java   | 166 
 .../VectorPTFEvaluatorStreamingDoubleSum.java   | 152 +++
 .../ptf/VectorPTFEvaluatorStreamingLongAvg.java | 168 
 .../ptf/VectorPTFEvaluatorStreamingLongMax.java | 164 
 .../ptf/VectorPTFEvaluatorStreamingLongMin.java | 166 
 .../ptf/VectorPTFEvaluatorStreamingLongSum.java | 154 +++
 .../exec/vector/ptf/VectorPTFGroupBatches.java  |  10 +-
 .../hive/ql/optimizer/physical/Vectorizer.java  |  31 ++-
 .../hadoop/hive/ql/plan/VectorPTFDesc.java  |  98 +--
 .../test/results/clientpositive/llap/ptf.q.out  |  12 +-
 .../llap/vector_ptf_part_simple.q.out   | 119 -
 .../clientpositive/llap/vector_windowing.q.out  | 244 +++--
 .../llap/vector_windowing_expressions.q.out |  77 +-
 .../llap/vector_windowing_order_null.q.out  |  82 +-
 .../llap/vector_windowing_windowspec.q.out  |  82 +-
 .../clientpositive/llap/vectorized_ptf.q.out| 237 ++--
 .../clientpositive/perf/spark/query51.q.out |   1 +
 .../clientpositive/perf/tez/query51.q.out   |  18 +-
 .../test/results/clientpositive/spark/ptf.q.out |   7 +
 .../clientpositive/spark/vectorized_ptf.q.out   | 267 +--
 50 files changed, 3342 insertions(+), 203 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
index 785725c..437c319 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java
@@ -73,14 +73,19 @@ public abstract class VectorPTFEvaluatorBase {
   }
 
   // Evaluate the aggregation over one of the group's batches.
-  public abstract void evaluateGroupBatch(VectorizedRowBatch batch, boolean 
isLastGroupBatch) throws HiveException;
+  public abstract void evaluateGroupBatch(VectorizedRowBatch batch)
+  throws HiveException;
 
-  // Returns true if the aggregation result will be streamed.
-  public boolean streamsResult() {
-// Assume it is not streamjng by default.
-return false;

[1/3] hive git commit: HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)

2018-08-17 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 59cf159a7 -> ccdcc5e2e


http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
new file mode 100644
index 000..1652728
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.wrapper;
+
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSetInfo;
+import org.apache.hive.common.util.HashCodeUtil;
+
+public class VectorHashKeyWrapperTwoLong extends VectorHashKeyWrapperTwoBase {
+
+  private long longValue0;
+  private long longValue1;
+
+  protected VectorHashKeyWrapperTwoLong() {
+super();
+longValue0 = 0;
+longValue1 = 0;
+  }
+
+  @Override
+  public void setHashKey() {
+if (isNull0 || isNull1) {
+  hashcode =
+  (isNull0 && isNull1 ?
+twoNullHashcode :
+(isNull0 ?
+null0Hashcode ^
+HashCodeUtil.calculateLongHashCode(longValue1) :
+HashCodeUtil.calculateLongHashCode(longValue0) ^
+null1Hashcode));
+} else {
+  hashcode =
+  HashCodeUtil.calculateLongHashCode(longValue0) >>> 16 ^
+  HashCodeUtil.calculateLongHashCode(longValue1);
+}
+  }
+
+  @Override
+  public boolean equals(Object that) {
+if (that instanceof VectorHashKeyWrapperTwoLong) {
+  VectorHashKeyWrapperTwoLong keyThat = (VectorHashKeyWrapperTwoLong) that;
+  return
+  isNull0 == keyThat.isNull0 &&
+  longValue0 == keyThat.longValue0 &&
+  isNull1 == keyThat.isNull1 &&
+  longValue1 == keyThat.longValue1;
+}
+return false;
+  }
+
+  @Override
+  protected Object clone() {
+VectorHashKeyWrapperTwoLong clone = new VectorHashKeyWrapperTwoLong();
+clone.isNull0 = isNull0;
+clone.longValue0 = longValue0;
+clone.isNull1 = isNull1;
+clone.longValue1 = longValue1;
+clone.hashcode = hashcode;
+return clone;
+  }
+
+  @Override
+  public void assignLong(int keyIndex, int index, long v) {
+if (keyIndex == 0 && index == 0) {
+  isNull0 = false;
+  longValue0 = v;
+} else if (keyIndex == 1 && index == 1) {
+  isNull1 = false;
+  longValue1 = v;
+} else {
+  throw new ArrayIndexOutOfBoundsException();
+}
+  }
+
+  // FIXME: isNull is not updated; which might cause problems
+  @Deprecated
+  @Override
+  public void assignLong(int index, long v) {
+if (index == 0) {
+  longValue0 = v;
+} else if (index == 1) {
+  longValue1 = v;
+} else {
+  throw new ArrayIndexOutOfBoundsException();
+}
+  }
+
+  @Override
+  public void assignNullLong(int keyIndex, int index) {
+if (keyIndex == 0 && index == 0) {
+  isNull0 = true;
+  longValue0 = 0;   // Assign 0 to make equals simple.
+} else if (keyIndex == 1 && index == 1) {
+  isNull1 = true;
+  longValue1 = 0;   // Assign 0 to make equals simple.
+} else {
+  throw new ArrayIndexOutOfBoundsException();
+}
+  }
+
+  /*
+   * This method is mainly intended for debug display purposes.
+   */
+  @Override
+  public String stringifyKeys(VectorColumnSetInfo columnSetInfo)
+  {
+StringBuilder sb = new StringBuilder();
+sb.append("longs [");
+if (!isNull0) {
+  sb.append(longValue0);
+} else {
+  sb.append("null");
+}
+sb.append(", ");
+if (!isNull1) {
+  sb.append(longValue1);
+} else {
+  sb.append("null");
+}
+sb.append("]");
+return sb.toString();
+  }
+
+  @Override
+  public String toString()
+  {
+StringBuilder sb = new StringBuilder();
+sb.append("longs [");
+sb.append(longValue0);
+sb.append(", ");
+sb.append(longValue1);
+sb.append("], nulls [");
+

[2/3] hive git commit: HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)

2018-08-17 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java
new file mode 100644
index 000..8bf2ccb
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.wrapper;
+
+import org.apache.hive.common.util.Murmur3;
+
+import java.sql.Timestamp;
+
+import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
+import org.apache.hadoop.hive.ql.exec.KeyWrapper;
+import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSetInfo;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * A hash map key wrapper for vectorized processing.
+ * It stores the key values as primitives in arrays for each supported 
primitive type.
+ * This works in conjunction with
+ * {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch 
VectorHashKeyWrapperBatch}
+ * to hash vectorized processing units (batches).
+ */
+public abstract class VectorHashKeyWrapperBase extends KeyWrapper {
+
+  public static final class HashContext {
+private final Murmur3.IncrementalHash32 bytesHash = new 
Murmur3.IncrementalHash32();
+
+public static Murmur3.IncrementalHash32 getBytesHash(HashContext ctx) {
+  if (ctx == null) {
+return new Murmur3.IncrementalHash32();
+  }
+  return ctx.bytesHash;
+}
+  }
+
+  protected int hashcode;
+
+  protected VectorHashKeyWrapperBase() {
+hashcode = 0;
+  }
+
+  @Override
+  public void getNewKey(Object row, ObjectInspector rowInspector) throws 
HiveException {
+throw new HiveException("Should not be called");
+  }
+
+  @Override
+  public void setHashKey() {
+throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public int hashCode() {
+return hashcode;
+  }
+
+  @Override
+  public boolean equals(Object that) {
+throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  protected Object clone() {
+throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public KeyWrapper copyKey() {
+return (KeyWrapper) clone();
+  }
+
+  @Override
+  public void copyKey(KeyWrapper oldWrapper) {
+throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Object[] getKeyArray() {
+throw new UnsupportedOperationException();
+  }
+
+  public void assignLong(int keyIndex, int index, long v) {
+throw new RuntimeException("Not implemented");
+  }
+
+  // FIXME: isNull is not updated; which might cause problems
+  @Deprecated
+  public void assignLong(int index, long v) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignNullLong(int keyIndex, int index) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignDouble(int index, double d) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignNullDouble(int keyIndex, int index) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignString(int index, byte[] bytes, int start, int length) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignNullString(int keyIndex, int index) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignDecimal(int index, HiveDecimalWritable value) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignNullDecimal(int keyIndex, int index) {
+throw new RuntimeException("Not implemented");
+  }
+
+  public void assignTimestamp(int index, Timestamp value) {
+throw new

[3/3] hive git commit: HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)

2018-08-17 Thread mmccline

HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper 
to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccdcc5e2
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccdcc5e2
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccdcc5e2

Branch: refs/heads/master
Commit: ccdcc5e2eb39211ff3a5510bd7866eb5f5df7eb4
Parents: 59cf159
Author: Matt McCline 
Authored: Fri Aug 17 08:08:48 2018 -0500
Committer: Matt McCline 
Committed: Fri Aug 17 08:08:48 2018 -0500

--
 .../ql/exec/persistence/HashMapWrapper.java |6 +-
 .../persistence/HybridHashTableContainer.java   |6 +-
 .../persistence/MapJoinBytesTableContainer.java |6 +-
 .../hive/ql/exec/persistence/MapJoinKey.java|6 +-
 .../ql/exec/persistence/MapJoinKeyObject.java   |6 +-
 .../exec/persistence/MapJoinTableContainer.java |6 +-
 .../ql/exec/vector/VectorColumnSetInfo.java |   20 +-
 .../ql/exec/vector/VectorGroupByOperator.java   |   24 +-
 .../ql/exec/vector/VectorHashKeyWrapper.java|  682 ---
 .../exec/vector/VectorHashKeyWrapperBatch.java  | 1067 -
 .../ql/exec/vector/VectorMapJoinOperator.java   |4 +-
 .../exec/vector/VectorSMBMapJoinOperator.java   |8 +-
 .../wrapper/VectorHashKeyWrapperBase.java   |  223 
 .../wrapper/VectorHashKeyWrapperBatch.java  | 1076 ++
 .../wrapper/VectorHashKeyWrapperEmpty.java  |   81 ++
 .../wrapper/VectorHashKeyWrapperFactory.java|   55 +
 .../wrapper/VectorHashKeyWrapperGeneral.java|  649 +++
 .../wrapper/VectorHashKeyWrapperSingleBase.java |   53 +
 .../wrapper/VectorHashKeyWrapperSingleLong.java |  131 +++
 .../wrapper/VectorHashKeyWrapperTwoBase.java|   63 +
 .../wrapper/VectorHashKeyWrapperTwoLong.java|  170 +++
 .../vector/TestVectorHashKeyWrapperBatch.java   |6 +-
 22 files changed, 2554 insertions(+), 1794 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java
index 9d35805..765a647 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java
@@ -32,9 +32,9 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
-import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper;
-import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch;
 import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
+import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase;
+import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.ByteStream.Output;
@@ -163,7 +163,7 @@ public class HashMapWrapper extends 
AbstractMapJoinTableContainer implements Ser
 }
 
 @Override
-public JoinUtil.JoinResult setFromVector(VectorHashKeyWrapper kw,
+public JoinUtil.JoinResult setFromVector(VectorHashKeyWrapperBase kw,
 VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch 
keyWrapperBatch)
 throws HiveException {
   if (currentKey == null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
index 027e39a..13f1702 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java
@@ -39,10 +39,10 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil;
 import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult;
 import org.apache.hadoop.hive.ql.exec.SerializationUtilities;
 import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper;
-import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper;
-import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch;
 import

[46/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query11.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query11.q.out
index 9a19fdf..87a0cc0 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query11.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with year_total as (
  select c_customer_id customer_id
,c_first_name customer_first_name
@@ -72,7 +72,7 @@ with year_total as (
  order by t_s_secyear.c_preferred_cust_flag
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with year_total as (
  select c_customer_id customer_id
,c_first_name customer_first_name
@@ -146,10 +146,6 @@ with year_total as (
  order by t_s_secyear.c_preferred_cust_flag
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -180,518 +176,237 @@ STAGE PLANS:
   alias: web_sales
   filterExpr: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
   Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int))
 predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), 
ws_ext_list_price (type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 4, 22, 25]
   Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: 
decimal(7,2)), _col3 (type: decimal(7,2))
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 13 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year = 2001) and d_date_sk is not null) 
(type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 
0:int))

[43/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query19.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
index 51a403a..d2994e6 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
sum(ss_ext_sales_price) ext_price
  from date_dim, store_sales, item,customer,customer_address,store
@@ -22,7 +22,7 @@ select  i_brand_id brand_id, i_brand brand, i_manufact_id, 
i_manufact,
  ,i_manufact
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact,
sum(ss_ext_sales_price) ext_price
  from date_dim, store_sales, item,customer,customer_address,store
@@ -46,10 +46,6 @@ select  i_brand_id brand_id, i_brand brand, i_manufact_id, 
i_manufact,
  ,i_manufact
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -66,40 +62,18 @@ STAGE PLANS:
   alias: store
   filterExpr: s_store_sk is not null (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: s_store_sk is not null (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int), s_zip (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 25]
   Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col7 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -120,220 +94,100 @@ STAGE PLANS:
   alias: customer
   filterExpr: (c_customer_sk is not null and c_current_addr_sk 
is not null) (type: boolean)
   Statistics: Num rows: 8000 Data size: 68801615852 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int))
 predicate: (c_current_addr_sk is not null and 
c_customer_sk is not null) (type: boolean)
 Statistics: Num rows: 8000 Data size: 68801615852 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: c_customer_sk (type: int), 
c_current_addr_sk (type: int)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 4]

[44/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query17.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
index 87614e1..35405a7 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  i_item_id
,i_item_desc
,s_state
@@ -42,7 +42,7 @@ select  i_item_id
  ,s_state
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  i_item_id
,i_item_desc
,s_state
@@ -86,10 +86,6 @@ select  i_item_id
  ,s_state
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -106,40 +102,18 @@ STAGE PLANS:
   alias: store
   filterExpr: s_store_sk is not null (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: s_store_sk is not null (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int), s_state (type: 
string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 24]
   Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col3 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -162,304 +136,138 @@ STAGE PLANS:
   alias: store_sales
   filterExpr: (ss_customer_sk is not null and ss_item_sk is 
not null and ss_ticket_number is not null and ss_sold_date_sk is not null and 
ss_store_sk is not null) (type: boolean)
   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 2:int), 
SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 0:int), 
SelectColumnIsNotNull(col 7:int))
 predicate: (ss_customer_sk is not null and ss_item_sk is 
not null and ss_sold_date_sk is not null and ss_store_sk is not null and 
ss_ticket_number is not null) (type: boolean)
 Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: ss_sold_date_sk (type: int), ss_item_sk 
(type: int), ss_customer_sk (type: int), ss_store_sk (type: int), 
ss_ticket_number (type: int), ss_quantity (type: int)
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-

[25/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query51.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query51.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query51.q.out
index 78d164b..c0bb72b 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query51.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query51.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 WITH web_v1 as (
 select
   ws_item_sk item_sk, d_date,
@@ -42,7 +42,7 @@ order by item_sk
 ,d_date
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 WITH web_v1 as (
 select
   ws_item_sk item_sk, d_date,
@@ -86,10 +86,6 @@ order by item_sk
 ,d_date
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -107,40 +103,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
 predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int), d_date (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -154,40 +128,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 3:int, left 1212, right 1223), 
SelectColumnIsNotNull(col 0:int))
 predicate: (d_date_sk is not null and d_month_seq BETWEEN 
1212 AND 1223) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int), d_date (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink

[07/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query78.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query78.q.out
index 720f654..15c7f04 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query78.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with ws as
   (select d_year AS ws_sold_year, ws_item_sk,
 ws_bill_customer_sk ws_customer_sk,
@@ -55,7 +55,7 @@ order by
   round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2)
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with ws as
   (select d_year AS ws_sold_year, ws_item_sk,
 ws_bill_customer_sk ws_customer_sk,
@@ -112,10 +112,6 @@ order by
   round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2)
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -144,321 +140,144 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: ((d_year = 2000) and d_date_sk is not null) 
(type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 
0:int))
 predicate: ((d_year = 2000) and d_date_sk is not null) 
(type: boolean)
 Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 10 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year = 2000) and d_date_sk is not null) 
(type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 
0:int))
 predicate: ((d_year = 2000) and d_date_sk is not null) 
(type: boolean)
 Statistics: Num rows: 36524 Data size: 40870356 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-

[14/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query69.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
index aefe55a..e17832c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query69.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  
   cd_gender,
   cd_marital_status,
@@ -44,7 +44,7 @@ select
   cd_credit_rating
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  
   cd_gender,
   cd_marital_status,
@@ -90,10 +90,6 @@ select
   cd_credit_rating
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -112,40 +108,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and 
d_date_sk is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColumnBetween(col 
8:int, left 1, right 3), SelectColumnIsNotNull(col 0:int))
 predicate: ((d_year = 1999) and d_date_sk is not null and 
d_moy BETWEEN 1 AND 3) (type: boolean)
 Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -159,40 +133,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and 
d_date_sk is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColumnBetween(col 
8:int, left 1, right 3), SelectColumnIsNotNull(col 0:int))
 predicate: ((d_year = 1999) and d_date_sk is not null and 
d_moy BETWEEN 1 AND 3) (type: boolean)
 Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-

[30/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query40.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query40.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query40.q.out
index 01bffec..6cdac29 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query40.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query40.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  
w_state
   ,i_item_id
@@ -25,7 +25,7 @@ select
  order by w_state,i_item_id
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  
w_state
   ,i_item_id
@@ -52,10 +52,6 @@ select
  order by w_state,i_item_id
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -73,40 +69,18 @@ STAGE PLANS:
   alias: warehouse
   filterExpr: w_warehouse_sk is not null (type: boolean)
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: w_warehouse_sk is not null (type: boolean)
 Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: w_warehouse_sk (type: int), w_state (type: 
string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 10]
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -120,40 +94,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-08 16:00:00.0, 
right 1998-05-07 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 
29:timestamp), SelectColumnIsNotNull(col 0:int))
 predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int), d_date (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-

[22/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query57.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
index 53b6778..51e644a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with v1 as(
  select i_category, i_brand,
 cc_name,
@@ -45,7 +45,7 @@ with v1 as(
  order by sum_sales - avg_monthly_sales, 3
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with v1 as(
  select i_category, i_brand,
 cc_name,
@@ -92,10 +92,6 @@ with v1 as(
  order by sum_sales - avg_monthly_sales, 3
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -114,40 +110,18 @@ STAGE PLANS:
   alias: call_center
   filterExpr: (cc_call_center_sk is not null and cc_name is 
not null) (type: boolean)
   Statistics: Num rows: 60 Data size: 122700 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string))
 predicate: (cc_call_center_sk is not null and cc_name is 
not null) (type: boolean)
 Statistics: Num rows: 60 Data size: 122700 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cc_call_center_sk (type: int), cc_name 
(type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 6]
   Statistics: Num rows: 60 Data size: 122700 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -161,40 +135,18 @@ STAGE PLANS:
   alias: call_center
   filterExpr: (cc_call_center_sk is not null and cc_name is 
not null) (type: boolean)
   Statistics: Num rows: 60 Data size: 122700 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string))
 predicate: (cc_call_center_sk is not null and cc_name is 
not null) (type: boolean)
 Statistics: Num rows: 60 Data size: 122700 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: cc_call_center_sk (type: int), cc_name 
(type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 6]
   Statistics: Num rows: 60 Data size: 122700 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-

[10/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query75.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
index b9bd5b0..54c3c69 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 WITH all_sales AS (
  SELECT d_year
,i_brand_id
@@ -67,7 +67,7 @@ WITH all_sales AS (
  ORDER BY sales_cnt_diff
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 WITH all_sales AS (
  SELECT d_year
,i_brand_id
@@ -136,10 +136,6 @@ WITH all_sales AS (
  ORDER BY sales_cnt_diff
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -180,692 +176,319 @@ STAGE PLANS:
   alias: catalog_sales
   filterExpr: (cs_item_sk is not null and cs_sold_date_sk is 
not null) (type: boolean)
   Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int))
 predicate: (cs_item_sk is not null and cs_sold_date_sk is 
not null) (type: boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: cs_sold_date_sk (type: int), cs_item_sk 
(type: int), cs_order_number (type: int), cs_quantity (type: int), 
cs_ext_sales_price (type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 15, 17, 18, 23]
   Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: decimal(7,2))
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 10 
 Map Operator Tree:
 TableScan
   alias: item
   filterExpr: ((i_category = 'Sports') and i_item_sk is not 
null and i_brand_id is not null and i_class_id is not null and i_category_id is 
not null and i_manufact_id is not null) (type: boolean)
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterStringGroupColEqualStringScalar(col 12:string, val Sports), 
SelectColumnIsNotNull(col 0:int),

[41/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query23.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
index 2c6d6f0..4ccc2df 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out
@@ -1,6 +1,6 @@
 Warning: Map Join MAPJOIN[285][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
 Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt
   from store_sales
@@ -51,7 +51,7 @@ from
  and ws_bill_customer_sk in (select c_customer_sk from 
best_ss_customer))) y
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with frequent_ss_items as 
  (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date 
solddate,count(*) cnt
   from store_sales
@@ -102,10 +102,6 @@ from
  and ws_bill_customer_sk in (select c_customer_sk from 
best_ss_customer))) y
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -132,260 +128,117 @@ STAGE PLANS:
   alias: store_sales
   filterExpr: (ss_customer_sk is not null and ss_sold_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int))
 predicate: (ss_customer_sk is not null and ss_sold_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: ss_sold_date_sk (type: int), ss_customer_sk 
(type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 3, 10, 13]
   Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 575995635 Data size: 50814502088 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: decimal(7,2))
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 18 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year) IN (1999, 2000, 2001, 2002) and 
d_date_sk is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter

[20/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query59.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query59.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query59.q.out
index 0393398..1224ab6 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query59.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query59.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with wss as 
  (select d_week_seq,
 ss_store_sk,
@@ -41,7 +41,7 @@ with wss as
  order by s_store_name1,s_store_id1,d_week_seq1
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with wss as 
  (select d_week_seq,
 ss_store_sk,
@@ -84,10 +84,6 @@ with wss as
  order by s_store_name1,s_store_id1,d_week_seq1
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -105,40 +101,18 @@ STAGE PLANS:
   alias: store
   filterExpr: (s_store_sk is not null and s_store_id is not 
null) (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string))
 predicate: (s_store_id is not null and s_store_sk is not 
null) (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int), s_store_id (type: 
string), s_store_name (type: string)
   outputColumnNames: _col0, _col1, _col2
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 1, 5]
   Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -152,40 +126,18 @@ STAGE PLANS:
   alias: store
   filterExpr: (s_store_sk is not null and s_store_id is not 
null) (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string))
 predicate: (s_store_id is not null and s_store_sk is not 
null) (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int), s_store_id (type: 
string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 1]
   Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash

[19/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query60.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query60.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query60.q.out
index 07bb822..f4f61e2 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query60.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query60.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with ss as (
  select
   i_item_id,sum(ss_ext_sales_price) total_sales
@@ -75,7 +75,7 @@ where i_category in ('Children'))
   ,total_sales
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with ss as (
  select
   i_item_id,sum(ss_ext_sales_price) total_sales
@@ -152,10 +152,6 @@ where i_category in ('Children'))
   ,total_sales
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -190,158 +186,72 @@ STAGE PLANS:
   alias: item
   filterExpr: (i_item_id is not null and i_item_sk is not 
null) (type: boolean)
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 1:string), SelectColumnIsNotNull(col 0:int))
 predicate: (i_item_id is not null and i_item_sk is not 
null) (type: boolean)
 Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_item_id (type: 
string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 1]
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col1 (type: string)
 sort order: +
 Map-reduce partition columns: _col1 (type: string)
-Reduce Sink Vectorization:
-className: VectorReduceSinkStringOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 462000 Data size: 663560457 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 12 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year = 1999) and (d_moy = 9) and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 1999), 
FilterLongColEqualLongScalar(col 8:int, val 9), SelectColumnIsNotNull(col 
0:int))
 predicate: ((d_moy = 9) and (d_year = 1999) and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE

[03/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query83.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query83.q.out
index 6a38c0d..1199d29 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query83.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with sr_items as
  (select i_item_id item_id,
 sum(sr_return_quantity) sr_item_qty
@@ -64,7 +64,7 @@ with sr_items as
  ,sr_item_qty
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with sr_items as
  (select i_item_id item_id,
 sum(sr_return_quantity) sr_item_qty
@@ -130,10 +130,6 @@ with sr_items as
  ,sr_item_qty
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -170,117 +166,54 @@ STAGE PLANS:
   alias: catalog_returns
   filterExpr: (cr_item_sk is not null and cr_returned_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 28798881 Data size: 3057234680 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 0:int))
 predicate: (cr_item_sk is not null and cr_returned_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 28798881 Data size: 3057234680 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: cr_returned_date_sk (type: int), cr_item_sk 
(type: int), cr_return_quantity (type: int)
   outputColumnNames: _col0, _col1, _col2
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2, 17]
   Statistics: Num rows: 28798881 Data size: 3057234680 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col1 (type: int)
 sort order: +
 Map-reduce partition columns: _col1 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 28798881 Data size: 3057234680 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col0 (type: int), _col2 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 10 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: (d_week_seq is not null and d_date is not null) 
(type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 2:string))
 predicate: (d_date is not null and d_week_seq is not null) 
(type: boolean)
 Statistics: Num rows: 73049 Data size: 81741831 Basic 
stats: COMPLETE Column stats: NONE

[47/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query10.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
index 7aa9099..b7faa9a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  
   cd_gender,
   cd_marital_status,
@@ -56,7 +56,7 @@ select
   cd_dep_college_count
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  
   cd_gender,
   cd_marital_status,
@@ -114,10 +114,6 @@ select
   cd_dep_college_count
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -136,40 +132,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and 
d_date_sk is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 2002), FilterLongColumnBetween(col 
8:int, left 4, right 7), SelectColumnIsNotNull(col 0:int))
 predicate: ((d_year = 2002) and d_date_sk is not null and 
d_moy BETWEEN 4 AND 7) (type: boolean)
 Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -183,40 +157,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and 
d_date_sk is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 2002), FilterLongColumnBetween(col 
8:int, left 4, right 7), SelectColumnIsNotNull(col 0:int))
 predicate: ((d_year = 2002) and d_date_sk is not null and 
d_moy BETWEEN 4 AND 7) (type: boolean)
 Statistics: Num rows: 4058 Data size: 4540902 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 4058 Data size: 4540902 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-

[24/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query54.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
index aa43c3d..241d6d8 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out
@@ -2,7 +2,7 @@ Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, 
$hdt$_2, $hdt$_3, $hd
 Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 
5' is a cross product
 Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
 Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with my_customers as (
  select distinct c_customer_sk
 , c_current_addr_sk
@@ -57,7 +57,7 @@ with my_customers as (
  order by segment, num_customers
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with my_customers as (
  select distinct c_customer_sk
 , c_current_addr_sk
@@ -112,10 +112,6 @@ with my_customers as (
  order by segment, num_customers
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -137,32 +133,14 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 1999), 
FilterLongColEqualLongScalar(col 8:int, val 3))
 predicate: ((d_moy = 3) and (d_year = 1999)) (type: 
boolean)
 Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: (d_month_seq + 3) (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [29]
-  selectExpressions: LongColAddLongScalar(col 3:int, 
val 3) -> 29:int
   Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE
   Group By Operator
-Group By Vectorization:
-className: VectorGroupByOperator
-groupByMode: HASH
-keyExpressions: col 29:int
-native: false
-vectorProcessingMode: HASH
-projectedOutputColumnNums: []
 keys: _col0 (type: int)
 mode: hash
 outputColumnNames: _col0
@@ -171,108 +149,43 @@ STAGE PLANS:
   key expressions: _col0 (type: int)
   sort order: +
   Map-reduce partition columns: _col0 (type: int)
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkLongOperator
-  native: true
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
   Statistics: Num rows: 18262 Data size: 20435178 
Basic stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 28 
 Execution mode: vectorized
-Reduce Vectorization:
-

[12/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query72.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query72.q.out
index ca142a7..37cf704 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  i_item_desc
   ,w_warehouse_name
   ,d1.d_week_seq
@@ -28,7 +28,7 @@ group by i_item_desc,w_warehouse_name,d1.d_week_seq
 order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  i_item_desc
   ,w_warehouse_name
   ,d1.d_week_seq
@@ -58,10 +58,6 @@ group by i_item_desc,w_warehouse_name,d1.d_week_seq
 order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -79,40 +75,18 @@ STAGE PLANS:
   alias: warehouse
   filterExpr: w_warehouse_sk is not null (type: boolean)
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: w_warehouse_sk is not null (type: boolean)
 Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: w_warehouse_sk (type: int), 
w_warehouse_name (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -126,40 +100,18 @@ STAGE PLANS:
   alias: household_demographics
   filterExpr: ((hd_buy_potential = '1001-5000') and hd_demo_sk 
is not null) (type: boolean)
   Statistics: Num rows: 7200 Data size: 770400 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterStringGroupColEqualStringScalar(col 2:string, val 1001-5000), 
SelectColumnIsNotNull(col 0:int))
 predicate: ((hd_buy_potential = '1001-5000') and 
hd_demo_sk is not null) (type: boolean)
 Statistics: Num rows: 3600 Data size: 385200 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: hd_demo_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 3600 Data size: 385200 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark

[33/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query37.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
index fa25d4c..bce0d68 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  i_item_id
,i_item_desc
,i_current_price
@@ -14,7 +14,7 @@ select  i_item_id
  order by i_item_id
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  i_item_id
,i_item_desc
,i_current_price
@@ -30,10 +30,6 @@ select  i_item_id
  order by i_item_id
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -50,40 +46,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterTimestampColumnBetween(col 29:timestamp, left 2001-06-01 17:00:00.0, 
right 2001-07-31 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 
29:timestamp), SelectColumnIsNotNull(col 0:int))
 predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -101,107 +75,51 @@ STAGE PLANS:
   alias: catalog_sales
   filterExpr: cs_item_sk is not null (type: boolean)
   Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 15:int)
 predicate: cs_item_sk is not null (type: boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: cs_item_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [15]
   Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
   Reduce

[23/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query56.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query56.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query56.q.out
index 40c02ec..e03574f 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query56.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query56.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with ss as (
  select i_item_id,sum(ss_ext_sales_price) total_sales
  from
@@ -65,7 +65,7 @@ where i_color in ('orchid','chiffon','lace'))
  order by total_sales
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with ss as (
  select i_item_id,sum(ss_ext_sales_price) total_sales
  from
@@ -132,10 +132,6 @@ where i_color in ('orchid','chiffon','lace'))
  order by total_sales
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -170,158 +166,72 @@ STAGE PLANS:
   alias: item
   filterExpr: (i_item_id is not null and i_item_sk is not 
null) (type: boolean)
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 1:string), SelectColumnIsNotNull(col 0:int))
 predicate: (i_item_id is not null and i_item_sk is not 
null) (type: boolean)
 Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_item_id (type: 
string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 1]
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col1 (type: string)
 sort order: +
 Map-reduce partition columns: _col1 (type: string)
-Reduce Sink Vectorization:
-className: VectorReduceSinkStringOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 462000 Data size: 663560457 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 12 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year = 2000) and (d_moy = 1) and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColEqualLongScalar(col 6:int, val 2000), 
FilterLongColEqualLongScalar(col 8:int, val 1), SelectColumnIsNotNull(col 
0:int))
 predicate: ((d_moy = 1) and (d_year = 2000) and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 18262 Data size: 20435178 Basic 
stats: COMPLETE Column stats: NONE

[34/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query34.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
index 371d94f..88279a3 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select c_last_name
,c_first_name
,c_salutation
@@ -28,7 +28,7 @@ select c_last_name
   and cnt between 15 and 20
 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select c_last_name
,c_first_name
,c_salutation
@@ -58,10 +58,6 @@ select c_last_name
   and cnt between 15 and 20
 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -78,40 +74,18 @@ STAGE PLANS:
   alias: household_demographics
   filterExpr: ((hd_buy_potential) IN ('>1', 'unknown') and 
(hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN 
(((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE 
(null) END and hd_demo_sk is not null) (type: boolean)
   Statistics: Num rows: 7200 Data size: 770400 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterStringColumnInList(col 2, values >1, unknown), 
FilterLongColGreaterLongScalar(col 4:int, val 0), SelectColumnIsTrue(col 
11:boolean)(children: IfExprCondExprNull(col 6:boolean, col 10:boolean, 
null)(children: LongColGreaterLongScalar(col 4:int, val 0) -> 6:boolean, 
DoubleColGreaterDoubleScalar(col 9:double, val 1.2)(children: 
DoubleColDivideDoubleColumn(col 7:double, col 8:double)(children: 
CastLongToDouble(col 3:int) -> 7:double, CastLongToDouble(col 4:int) -> 
8:double) -> 9:double) -> 10:boolean) -> 11:boolean), SelectColumnIsNotNull(col 
0:int))
 predicate: ((hd_buy_potential) IN ('>1', 'unknown') 
and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN 
(((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE 
(null) END and hd_demo_sk is not null) (type: boolean)
 Statistics: Num rows: 1200 Data size: 128400 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: hd_demo_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 1200 Data size: 128400 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 Map 9 
@@ -120,40 +94,18 @@ STAGE PLANS:
   alias: store
   filterExpr: ((s_county) IN ('Mobile County', 'Maverick 
County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson 
County', 'Barrow County', 'Pennington County') and s_store_sk is not null) 
(type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-

[51/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and 
avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

This reverts commit 470ba3e2835ef769f940d013acbe6c05d9208903.


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/142367d9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/142367d9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/142367d9

Branch: refs/heads/master
Commit: 142367d96d8c4400a56aec4bb9bd1bfda0e61f77
Parents: 489b37a
Author: Matt McCline 
Authored: Wed Aug 15 19:15:00 2018 -0500
Committer: Matt McCline 
Committed: Wed Aug 15 19:15:00 2018 -0500

--
 .../ql/exec/vector/VectorColumnSetInfo.java |8 +-
 .../hive/ql/exec/vector/VectorCopyRow.java  |   63 +-
 .../ql/exec/vector/VectorGroupKeyHelper.java|5 +-
 .../exec/vector/VectorHashKeyWrapperBatch.java  |   12 +-
 .../exec/vector/VectorSMBMapJoinOperator.java   |2 +-
 .../ql/exec/vector/VectorizationContext.java|   46 +-
 .../expressions/CastStringGroupToString.java|   40 +
 .../ql/exec/vector/expressions/VectorElt.java   |  168 +-
 .../VectorExpressionWriterFactory.java  |   34 -
 .../hive/ql/optimizer/physical/Vectorizer.java  |   19 +-
 .../vector/TestVectorHashKeyWrapperBatch.java   |6 +-
 .../ql/exec/vector/TestVectorRowObject.java |3 +-
 .../hive/ql/exec/vector/TestVectorSerDeRow.java |  137 +-
 .../ql/exec/vector/VectorRandomRowSource.java   |   67 +-
 .../hive/ql/exec/vector/VectorVerifyFast.java   |6 +-
 .../aggregation/TestVectorAggregation.java  |9 +-
 .../expressions/TestVectorArithmetic.java   |   14 +-
 .../vector/expressions/TestVectorBetweenIn.java |   38 +-
 .../expressions/TestVectorCastStatement.java|   11 +-
 .../expressions/TestVectorCoalesceElt.java  |   87 +-
 .../expressions/TestVectorDateAddSub.java   |   10 +-
 .../vector/expressions/TestVectorDateDiff.java  |9 +-
 .../expressions/TestVectorFilterCompare.java|   12 +-
 .../expressions/TestVectorIfStatement.java  |3 +-
 .../vector/expressions/TestVectorIndex.java |5 +-
 .../vector/expressions/TestVectorNegative.java  |   21 +-
 .../exec/vector/expressions/TestVectorNull.java |   14 +-
 .../expressions/TestVectorStringConcat.java |3 +-
 .../expressions/TestVectorStringUnary.java  |3 +-
 .../expressions/TestVectorStructField.java  |  370 --
 .../vector/expressions/TestVectorSubStr.java|3 +-
 .../expressions/TestVectorTimestampExtract.java |3 +-
 .../fast/TestVectorMapJoinFastRowHashMap.java   |  101 +-
 .../test/queries/clientpositive/perf/query1.q   |7 +-
 .../test/queries/clientpositive/perf/query10.q  |7 +-
 .../test/queries/clientpositive/perf/query11.q  |7 +-
 .../test/queries/clientpositive/perf/query12.q  |7 +-
 .../test/queries/clientpositive/perf/query13.q  |7 +-
 .../test/queries/clientpositive/perf/query14.q  |7 +-
 .../test/queries/clientpositive/perf/query15.q  |7 +-
 .../test/queries/clientpositive/perf/query16.q  |7 +-
 .../test/queries/clientpositive/perf/query17.q  |7 +-
 .../test/queries/clientpositive/perf/query18.q  |7 +-
 .../test/queries/clientpositive/perf/query19.q  |7 +-
 .../test/queries/clientpositive/perf/query2.q   |7 +-
 .../test/queries/clientpositive/perf/query20.q  |7 +-
 .../test/queries/clientpositive/perf/query21.q  |7 +-
 .../test/queries/clientpositive/perf/query22.q  |7 +-
 .../test/queries/clientpositive/perf/query23.q  |7 +-
 .../test/queries/clientpositive/perf/query24.q  |7 +-
 .../test/queries/clientpositive/perf/query25.q  |7 +-
 .../test/queries/clientpositive/perf/query26.q  |7 +-
 .../test/queries/clientpositive/perf/query27.q  |7 +-
 .../test/queries/clientpositive/perf/query28.q  |7 +-
 .../test/queries/clientpositive/perf/query29.q  |7 +-
 .../test/queries/clientpositive/perf/query3.q   |7 +-
 .../test/queries/clientpositive/perf/query30.q  |7 +-
 .../test/queries/clientpositive/perf/query31.q  |7 +-
 .../test/queries/clientpositive/perf/query32.q  |7 +-
 .../test/queries/clientpositive/perf/query33.q  |7 +-
 .../test/queries/clientpositive/perf/query34.q  |7 +-
 .../test/queries/clientpositive/perf/query35.q  |7 +-
 .../test/queries/clientpositive/perf/query36.q  |7 +-
 .../test/queries/clientpositive/perf/query37.q  |7 +-
 .../test/queries/clientpositive/perf/query38.q  |7 +-
 .../test/queries/clientpositive/perf/query39.q  |7 +-
 .../test/queries/clientpositive/perf/query4.q   |7 +-
 .../test/queries/clientpositive/perf/query40.q  |7 +-
 .../test/queries/clientpositive/perf/query42.q  |7 +-
 .../test/queries/clientpositive/perf/query43.q  |7 +-
 .../test/queries/clientpositive/perf/query44.q  |7 +-

[31/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query4.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
index c49733b..3472613 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with year_total as (
  select c_customer_id customer_id
,c_first_name customer_first_name
@@ -106,7 +106,7 @@ union all
  order by t_s_secyear.customer_preferred_cust_flag
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with year_total as (
  select c_customer_id customer_id
,c_first_name customer_first_name
@@ -214,10 +214,6 @@ union all
  order by t_s_secyear.customer_preferred_cust_flag
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -254,774 +250,355 @@ STAGE PLANS:
   alias: web_sales
   filterExpr: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
   Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int))
 predicate: (ws_bill_customer_sk is not null and 
ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: ws_sold_date_sk (type: int), 
ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), 
ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: 
decimal(7,2)), ws_ext_list_price (type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 4, 22, 23, 24, 25]
   Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 144002668 Data size: 19580198212 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: 
decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 
(type: decimal(7,2))
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 13 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year = 2001) and d_date_sk is not null) 
(type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-

[49/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query39.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/query39.q 
b/ql/src/test/queries/clientpositive/perf/query39.q
index d3b981a..d3c806d 100644
--- a/ql/src/test/queries/clientpositive/perf/query39.q
+++ b/ql/src/test/queries/clientpositive/perf/query39.q
@@ -1,9 +1,6 @@
 set hive.mapred.mode=nonstrict;
-set hive.explain.user=false;
-set hive.auto.convert.join=true;
-set hive.fetch.task.conversion=none;
--- start query  1 in stream 0 using template query39.tpl and seed 1327317894
-explain vectorization expression
+-- start query 1 in stream 0 using template query39.tpl and seed 1327317894
+explain
 with inv as
 (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
,stdev,mean, case mean when 0 then null else stdev/mean end cov

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query4.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/query4.q 
b/ql/src/test/queries/clientpositive/perf/query4.q
index dbd605e..631a464 100644
--- a/ql/src/test/queries/clientpositive/perf/query4.q
+++ b/ql/src/test/queries/clientpositive/perf/query4.q
@@ -1,9 +1,6 @@
 set hive.mapred.mode=nonstrict;
-set hive.explain.user=false;
-set hive.auto.convert.join=true;
-set hive.fetch.task.conversion=none;
--- start query  1 in stream 0 using template query4.tpl and seed 1819994127
-explain vectorization expression
+-- start query 1 in stream 0 using template query4.tpl and seed 1819994127
+explain
 with year_total as (
  select c_customer_id customer_id
,c_first_name customer_first_name

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query40.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/query40.q 
b/ql/src/test/queries/clientpositive/perf/query40.q
index 8432546..61f5ad3 100644
--- a/ql/src/test/queries/clientpositive/perf/query40.q
+++ b/ql/src/test/queries/clientpositive/perf/query40.q
@@ -1,9 +1,6 @@
 set hive.mapred.mode=nonstrict;
-set hive.explain.user=false;
-set hive.auto.convert.join=true;
-set hive.fetch.task.conversion=none;
--- start query  1 in stream 0 using template query40.tpl and seed 1819994127
-explain vectorization expression
+-- start query 1 in stream 0 using template query40.tpl and seed 1819994127
+explain
 select  
w_state
   ,i_item_id

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query42.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/query42.q 
b/ql/src/test/queries/clientpositive/perf/query42.q
index b5c6f3f..6b8abe0 100644
--- a/ql/src/test/queries/clientpositive/perf/query42.q
+++ b/ql/src/test/queries/clientpositive/perf/query42.q
@@ -1,9 +1,6 @@
 set hive.mapred.mode=nonstrict;
-set hive.explain.user=false;
-set hive.auto.convert.join=true;
-set hive.fetch.task.conversion=none;
--- start query  1 in stream 0 using template query42.tpl and seed 1819994127
-explain vectorization expression
+-- start query 1 in stream 0 using template query42.tpl and seed 1819994127
+explain
 select  dt.d_year
,item.i_category_id
,item.i_category

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query43.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/query43.q 
b/ql/src/test/queries/clientpositive/perf/query43.q
index a92e04b..ebdc69d 100644
--- a/ql/src/test/queries/clientpositive/perf/query43.q
+++ b/ql/src/test/queries/clientpositive/perf/query43.q
@@ -1,9 +1,6 @@
 set hive.mapred.mode=nonstrict;
-set hive.explain.user=false;
-set hive.auto.convert.join=true;
-set hive.fetch.task.conversion=none;
--- start query  1 in stream 0 using template query43.tpl and seed 1819994127
-explain vectorization expression
+-- start query 1 in stream 0 using template query43.tpl and seed 1819994127
+explain
 select  s_store_name, s_store_id,
 sum(case when (d_day_name='Sunday') then ss_sales_price else null end) 
sun_sales,
 sum(case when (d_day_name='Monday') then ss_sales_price else null end) 
mon_sales,

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query44.q
--
diff --git a/ql/src/test/queries/clientpositive/perf/query44.q 
b/ql/src/test/queries/clientpositive/perf/query44.q
index 0e8a999..712bbfb 100644
--- a/ql/src/test/queries/clientpositive/perf/query44.q
+++ b/ql/src/test/queries/clientpositive/perf/query44.q
@@ -1,9 +1,6 @@
 set hive.mapred.mode=nonstrict;
-set

[29/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query45.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
index 9c58320..cac3d05 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out
@@ -1,5 +1,5 @@
 Warning: Map Join MAPJOIN[67][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  ca_zip, ca_county, sum(ws_sales_price)
  from web_sales, customer, customer_address, date_dim, item
  where ws_bill_customer_sk = c_customer_sk
@@ -18,7 +18,7 @@ select  ca_zip, ca_county, sum(ws_sales_price)
  order by ca_zip, ca_county
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  ca_zip, ca_county, sum(ws_sales_price)
  from web_sales, customer, customer_address, date_dim, item
  where ws_bill_customer_sk = c_customer_sk
@@ -37,10 +37,6 @@ select  ca_zip, ca_county, sum(ws_sales_price)
  order by ca_zip, ca_county
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -59,80 +55,34 @@ STAGE PLANS:
   alias: item
   filterExpr: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 
29) (type: boolean)
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterLongColumnInList(col 0:int, 
values [2, 3, 5, 7, 11, 13, 17, 19, 23, 29])
 predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 
29) (type: boolean)
 Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_id (type: string)
   outputColumnNames: i_item_id
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [1]
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
   Group By Operator
 aggregations: count(), count(i_item_id)
-Group By Vectorization:
-aggregators: VectorUDAFCountStar(*) -> bigint, 
VectorUDAFCount(col 1:string) -> bigint
-className: VectorGroupByOperator
-groupByMode: HASH
-native: false
-vectorProcessingMode: HASH
-projectedOutputColumnNums: [0, 1]
 mode: hash
 outputColumnNames: _col0, _col1
 Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
 Reduce Output Operator
   sort order: 
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkEmptyKeyOperator
-  native: true
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
   Statistics: Num rows: 1 Data size: 16 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col0 (type: bigint), _col1 
(type: bigint)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: false
-usesVectorUDFAdaptor: false
-vectorized: true
 Reducer 16 
 Execution mode: vectorized
 Local Work:
   Map Reduce Local Work
-Reduce Vectorization:
-

[48/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
index cbc4c5d..b66fb9f 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out
@@ -1035,8 +1035,8 @@ STAGE PLANS:
   0 _col0 (type: decimal(16,2))
   1 _col0 (type: decimal(16,2))
 Map Join Vectorization:
-bigTableKeyExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2)
-bigTableValueExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2), 
ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 6:decimal(14,2)
+bigTableKeyExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 3:decimal(16,2)
+bigTableValueExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2), 
ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 5:decimal(14,2)
 className: VectorMapJoinOperator
 native: false
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true
@@ -1072,7 +1072,7 @@ STAGE PLANS:
 includeColumns: [0, 1]
 dataColumns: dec:decimal(14,2)/DECIMAL_64, 
value_dec:decimal(14,2)/DECIMAL_64
 partitionColumnCount: 0
-scratchColumnTypeNames: [decimal(14,0), decimal(16,2), 
decimal(16,2), decimal(14,2)]
+scratchColumnTypeNames: [decimal(16,2), decimal(16,2), 
decimal(14,2), decimal(14,0)]
 Map 2 
 Map Operator Tree:
 TableScan

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
index 17edd47..ba2d9df 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
@@ -503,7 +503,7 @@ STAGE PLANS:
   aggregators: VectorUDAFSumDecimal64ToDecimal(col 
0:decimal(15,2)/DECIMAL_64) -> decimal(25,2)
   className: VectorGroupByOperator
   groupByMode: HASH
-  keyExpressions: col 0:decimal(15,2)/DECIMAL_64, col 
1:decimal(15,2)/DECIMAL_64
+  keyExpressions: ConvertDecimal64ToDecimal(col 
0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 
1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2)
   native: false
   vectorProcessingMode: HASH
   projectedOutputColumnNums: [0]
@@ -539,7 +539,7 @@ STAGE PLANS:
 includeColumns: [0, 1]
 dataColumns: c1:decimal(15,2)/DECIMAL_64, 
c2:decimal(15,2)/DECIMAL_64
 partitionColumnCount: 0
-scratchColumnTypeNames: []
+scratchColumnTypeNames: [decimal(15,2), decimal(15,2)]
 Reducer 2 
 Execution mode: vectorized, llap
 Reduce Vectorization:
@@ -1801,7 +1801,7 @@ STAGE PLANS:
   aggregators: VectorUDAFSumDecimal64(col 
0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64
   className: VectorGroupByOperator
   groupByMode: HASH
-  keyExpressions: col 0:decimal(7,2)/DECIMAL_64, col 
1:decimal(7,2)/DECIMAL_64
+  keyExpressions: ConvertDecimal64ToDecimal(col 
0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 
1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2)
   native: false
   vectorProcessingMode: HASH
   projectedOutputColumnNums: [0]
@@ -1837,7 +1837,7 @@ STAGE PLANS:
 includeColumns: [0, 1]
 dataColumns: c1:decimal(7,2)/DECIMAL_64, 
c2:decimal(7,2)/DECIMAL_64
 partitionColumnCount: 0
-

[39/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query26.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
index a3fe272..17bbc6a 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  i_item_id, 
 avg(cs_quantity) agg1,
 avg(cs_list_price) agg2,
@@ -18,7 +18,7 @@ select  i_item_id,
  order by i_item_id
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  i_item_id, 
 avg(cs_quantity) agg1,
 avg(cs_list_price) agg2,
@@ -38,10 +38,6 @@ select  i_item_id,
  order by i_item_id
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -58,40 +54,18 @@ STAGE PLANS:
   alias: promotion
   filterExpr: (((p_channel_email = 'N') or (p_channel_event = 
'N')) and p_promo_sk is not null) (type: boolean)
   Statistics: Num rows: 2300 Data size: 2713420 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 9:string, 
val N), FilterStringGroupColEqualStringScalar(col 14:string, val N)), 
SelectColumnIsNotNull(col 0:int))
 predicate: (((p_channel_email = 'N') or (p_channel_event = 
'N')) and p_promo_sk is not null) (type: boolean)
 Statistics: Num rows: 2300 Data size: 2713420 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: p_promo_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 2300 Data size: 2713420 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col3 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -111,176 +85,79 @@ STAGE PLANS:
   alias: catalog_sales
   filterExpr: (cs_bill_cdemo_sk is not null and 
cs_sold_date_sk is not null and cs_item_sk is not null and cs_promo_sk is not 
null) (type: boolean)
   Statistics: Num rows: 287989836 Data size: 38999608952 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int), 
SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 16:int))
 predicate: (cs_bill_cdemo_sk is not null and cs_item_sk is 
not null and cs_promo_sk is not null and cs_sold_date_sk is not null) (type: 
boolean)
 Statistics: Num rows: 287989836 Data size: 38999608952 
Basic stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: cs_sold_date_sk (type: int), 
cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), 
cs_quantity (type: int), cs_list_price (type: decimal(7,2)),

[45/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query13.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
index 8d11ecd..c9fcb88 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select avg(ss_quantity)
,avg(ss_ext_sales_price)
,avg(ss_ext_wholesale_cost)
@@ -48,7 +48,7 @@ select avg(ss_quantity)
   and ss_net_profit between 50 and 250  
  ))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select avg(ss_quantity)
,avg(ss_ext_sales_price)
,avg(ss_ext_wholesale_cost)
@@ -98,10 +98,6 @@ select avg(ss_quantity)
   and ss_net_profit between 50 and 250  
  ))
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -119,40 +115,18 @@ STAGE PLANS:
   alias: store
   filterExpr: s_store_sk is not null (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: s_store_sk is not null (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col4 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -166,40 +140,18 @@ STAGE PLANS:
   alias: household_demographics
   filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not 
null) (type: boolean)
   Statistics: Num rows: 7200 Data size: 770400 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnInList(col 3:int, values [3, 1]), SelectColumnIsNotNull(col 
0:int))
 predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not 
null) (type: boolean)
 Statistics: Num rows: 7200 Data size: 770400 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: hd_demo_sk (type: int), hd_dep_count (type: 
int)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 3]
   Statistics: Num rows: 7200 Data size: 770400 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator

[42/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query21.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query21.q.out
index c3fde7b..1673061 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query21.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  *
  from(select w_warehouse_name
 ,i_item_id
@@ -27,7 +27,7 @@ select  *
  ,i_item_id
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  *
  from(select w_warehouse_name
 ,i_item_id
@@ -56,10 +56,6 @@ select  *
  ,i_item_id
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -77,40 +73,18 @@ STAGE PLANS:
   alias: warehouse
   filterExpr: w_warehouse_sk is not null (type: boolean)
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: w_warehouse_sk is not null (type: boolean)
 Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: w_warehouse_sk (type: int), 
w_warehouse_name (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -124,40 +98,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-08 16:00:00.0, 
right 1998-05-07 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 
29:timestamp), SelectColumnIsNotNull(col 0:int))
 predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int), d_date (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE

[28/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query47.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
index a2387e8..690b105 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with v1 as(
  select i_category, i_brand,
 s_store_name, s_company_name,
@@ -48,7 +48,7 @@ with v1 as(
  order by sum_sales - avg_monthly_sales, 3
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with v1 as(
  select i_category, i_brand,
 s_store_name, s_company_name,
@@ -98,10 +98,6 @@ with v1 as(
  order by sum_sales - avg_monthly_sales, 3
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -120,40 +116,18 @@ STAGE PLANS:
   alias: store
   filterExpr: (s_store_sk is not null and s_store_name is not 
null and s_company_name is not null) (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), 
SelectColumnIsNotNull(col 17:string))
 predicate: (s_company_name is not null and s_store_name is 
not null and s_store_sk is not null) (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int), s_store_name (type: 
string), s_company_name (type: string)
   outputColumnNames: _col0, _col1, _col2
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 5, 17]
   Statistics: Num rows: 1704 Data size: 3256276 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -167,40 +141,18 @@ STAGE PLANS:
   alias: store
   filterExpr: (s_store_sk is not null and s_store_name is not 
null and s_company_name is not null) (type: boolean)
   Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), 
SelectColumnIsNotNull(col 17:string))
 predicate: (s_company_name is not null and s_store_name is 
not null and s_store_sk is not null) (type: boolean)
 Statistics: Num rows: 1704 Data size: 3256276 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: s_store_sk (type: int), s_store_name (type: 
string), s_company_name (type: string)
   outputColumnNames: _col0, _col1, _col2
-  Select Vectorization:
-  className: VectorSelectOperator
-

[38/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query28.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query28.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query28.q.out
index caaca45..b437829 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query28.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query28.q.out
@@ -1,5 +1,5 @@
 Warning: Map Join MAPJOIN[94][bigTable=?] in task 'Stage-1:MAPRED' is a cross 
product
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  *
 from (select avg(ss_list_price) B1_LP
 ,count(ss_list_price) B1_CNT
@@ -51,7 +51,7 @@ from (select avg(ss_list_price) B1_LP
   or ss_wholesale_cost between 42 and 42+20)) B6
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  *
 from (select avg(ss_list_price) B1_LP
 ,count(ss_list_price) B1_CNT
@@ -103,10 +103,6 @@ from (select avg(ss_list_price) B1_LP
   or ss_wholesale_cost between 42 and 42+20)) B6
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -134,33 +130,15 @@ STAGE PLANS:
   alias: store_sales
   filterExpr: (ss_quantity BETWEEN 16 AND 20 and 
(ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or 
ss_wholesale_cost BETWEEN 80 AND 100)) (type: boolean)
   Statistics: Num rows: 575995635 Data size: 50814502088 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterLongColumnBetween(col 10:int, left 16, right 20), 
FilterExprOrExpr(children: FilterDecimal64ColumnBetween(col 
12:decimal(7,2)/DECIMAL_64, decimal64LeftVal 14200, decimalLeftVal 14200, 
decimal64RightVal 15200, decimalRightVal 15200), 
FilterDecimal64ColumnBetween(col 19:decimal(7,2)/DECIMAL_64, decimal64LeftVal 
305400, decimalLeftVal 305400, decimal64RightVal 405400, decimalRightVal 
405400), FilterDecimal64ColumnBetween(col 11:decimal(7,2)/DECIMAL_64, 
decimal64LeftVal 8000, decimalLeftVal 8000, decimal64RightVal 1, 
decimalRightVal 1)))
 predicate: ((ss_list_price BETWEEN 142 AND 152 or 
ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) 
and ss_quantity BETWEEN 16 AND 20) (type: boolean)
 Statistics: Num rows: 21333171 Data size: 1882018537 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: ss_list_price (type: decimal(7,2))
   outputColumnNames: ss_list_price
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [12]
   Statistics: Num rows: 21333171 Data size: 1882018537 
Basic stats: COMPLETE Column stats: NONE
   Group By Operator
 aggregations: sum(ss_list_price), count(ss_list_price)
-Group By Vectorization:
-aggregators: VectorUDAFSumDecimal64(col 
12:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64, VectorUDAFCount(col 
12:decimal(7,2)/DECIMAL_64) -> bigint
-className: VectorGroupByOperator
-groupByMode: HASH
-keyExpressions: col 12:decimal(7,2)/DECIMAL_64
-native: false
-vectorProcessingMode: HASH
-projectedOutputColumnNums: [0, 1]
 keys: ss_list_price (type: decimal(7,2))
 mode: hash
 outputColumnNames: _col0, _col1, _col2
@@ -169,55 +147,24 @@ STAGE PLANS:
   key expressions: _col0 (type: decimal(7,2))
   sort order: +
   Map-reduce partition columns: _col0 (type: 
decimal(7,2))
-  Reduce Sink Vectorization:
-  className: VectorReduceSinkMultiKeyOperator
-  native: true
-  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true,

[09/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query76.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query76.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query76.q.out
index 3adfc10..05ec505 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query76.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query76.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, 
SUM(ext_sales_price) sales_amt FROM (
 SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, 
i_category, ss_ext_sales_price ext_sales_price
  FROM store_sales, item, date_dim
@@ -21,7 +21,7 @@ GROUP BY channel, col_name, d_year, d_qoy, i_category
 ORDER BY channel, col_name, d_year, d_qoy, i_category
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, 
SUM(ext_sales_price) sales_amt FROM (
 SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, 
i_category, ss_ext_sales_price ext_sales_price
  FROM store_sales, item, date_dim
@@ -44,10 +44,6 @@ GROUP BY channel, col_name, d_year, d_qoy, i_category
 ORDER BY channel, col_name, d_year, d_qoy, i_category
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -72,393 +68,181 @@ STAGE PLANS:
   alias: item
   filterExpr: i_item_sk is not null (type: boolean)
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: i_item_sk is not null (type: boolean)
 Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: i_item_sk (type: int), i_category (type: 
string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 12]
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 462000 Data size: 663560457 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: string)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 11 
 Map Operator Tree:
 TableScan
   alias: item
   filterExpr: i_item_sk is not null (type: boolean)
   Statistics: Num rows: 462000 Data size: 663560457 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate:

[27/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query49.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
index e10a925..16cc603 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  
  'web' as channel
  ,web.item
@@ -124,7 +124,7 @@ select
  order by 1,4,5
  limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  
  'web' as channel
  ,web.item
@@ -250,10 +250,6 @@ select
  order by 1,4,5
  limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -288,306 +284,140 @@ STAGE PLANS:
   alias: ws
   filterExpr: ((ws_net_profit > 1) and (ws_net_paid > 0) and 
(ws_quantity > 0) and ws_order_number is not null and ws_item_sk is not null 
and ws_sold_date_sk is not null) (type: boolean)
   Statistics: Num rows: 144002668 Data size: 19580198212 Basic 
stats: COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterDecimal64ColGreaterDecimal64Scalar(col 33:decimal(7,2)/DECIMAL_64, val 
100), FilterDecimal64ColGreaterDecimal64Scalar(col 29:decimal(7,2)/DECIMAL_64, 
val 0), FilterLongColGreaterLongScalar(col 18:int, val 0), 
SelectColumnIsNotNull(col 17:int), SelectColumnIsNotNull(col 3:int), 
SelectColumnIsNotNull(col 0:int))
 predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and 
(ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null 
and ws_sold_date_sk is not null) (type: boolean)
 Statistics: Num rows: 5333432 Data size: 725192506 Basic 
stats: COMPLETE Column stats: NONE
 Select Operator
   expressions: ws_sold_date_sk (type: int), ws_item_sk 
(type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid 
(type: decimal(7,2))
   outputColumnNames: _col0, _col1, _col2, _col3, _col4
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 3, 17, 18, 29]
   Statistics: Num rows: 5333432 Data size: 725192506 Basic 
stats: COMPLETE Column stats: NONE
   Reduce Output Operator
 key expressions: _col0 (type: int)
 sort order: +
 Map-reduce partition columns: _col0 (type: int)
-Reduce Sink Vectorization:
-className: VectorReduceSinkLongOperator
-native: true
-nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 5333432 Data size: 725192506 
Basic stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: int), _col2 (type: 
int), _col3 (type: int), _col4 (type: decimal(7,2))
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Map 10 
 Map Operator Tree:
 TableScan
   alias: date_dim
   filterExpr: ((d_year = 2000) and (d_moy = 12) and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-

[35/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query32.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query32.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query32.q.out
index c4ab76c..af121c5 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query32.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query32.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  sum(cs_ext_discount_amt)  as `excess discount amount` 
 from 
catalog_sales 
@@ -25,7 +25,7 @@ and cs_ext_discount_amt
   ) 
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  sum(cs_ext_discount_amt)  as `excess discount amount` 
 from 
catalog_sales 
@@ -52,10 +52,6 @@ and cs_ext_discount_amt
   ) 
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -73,40 +69,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-17 16:00:00.0, 
right 1998-06-15 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 
29:timestamp), SelectColumnIsNotNull(col 0:int))
 predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: d_date_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 8116 Data size: 9081804 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -120,40 +94,18 @@ STAGE PLANS:
   alias: date_dim
   filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk 
is not null) (type: boolean)
   Statistics: Num rows: 73049 Data size: 81741831 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: FilterExprAndExpr(children: 
FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-17 16:00:00.0, 
right 1998-06-15 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 
29:timestamp), SelectColumnIsNotNull(col 0:int))
 predicate: (CAST( d_date AS TIMESTAMP) BETWEEN 
TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk 
is not null) (type: boolean)
 Statistics: Num rows: 8116 Data size: 9081804 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator

[50/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
index d367fb9..0bca490 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java
@@ -54,7 +54,6 @@ import 
org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.io.IntWritable;
 
 import junit.framework.Assert;
 
@@ -67,11 +66,7 @@ public class TestVectorCoalesceElt {
   public void testCoalesce() throws Exception {
 Random random = new Random(5371);
 
-// Grind through a few more index values...
-int iteration = 0;
-for (int i = 0; i < 10; i++) {
-  iteration =  doCoalesceElt(random, iteration, /* isCoalesce */ true, 
false);
-}
+doCoalesceElt(random, /* isCoalesce */ true, false);
   }
 
   @Test
@@ -79,10 +74,9 @@ public class TestVectorCoalesceElt {
 Random random = new Random(5371);
 
 // Grind through a few more index values...
-int iteration = 0;
-for (int i = 0; i < 10; i++) {
-  iteration = doCoalesceElt(random, iteration, /* isCoalesce */ false, 
false);
-  iteration = doCoalesceElt(random, iteration, /* isCoalesce */ false, 
true);
+for (int i = 0; i < 4; i++) {
+  doCoalesceElt(random, /* isCoalesce */ false, false);
+  doCoalesceElt(random, /* isCoalesce */ false, true);
 }
   }
 
@@ -94,41 +88,39 @@ public class TestVectorCoalesceElt {
 static final int count = values().length;
   }
 
-  private int doCoalesceElt(Random random, int iteration, boolean isCoalesce,
-  boolean isEltIndexConst)
-  throws Exception {
+  private void doCoalesceElt(Random random, boolean isCoalesce, boolean 
isEltIndexConst)
+  throws Exception {
 
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 2,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 2,
 /* constantColumns */ null, /* nullConstantColumns */ null, /* 
allowNulls */ true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 2,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 2,
 /* constantColumns */ null, /* nullConstantColumns */ null, /* 
allowNulls */ false);
 
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 /* constantColumns */ null, /* nullConstantColumns */ null, /* 
allowNulls */ true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 new int[] { 0 }, /* nullConstantColumns */ null, /* allowNulls */ 
true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 new int[] { 0 }, /* nullConstantColumns */ new int[] { 0 }, /* 
allowNulls */ true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 new int[] { 1 }, /* nullConstantColumns */ null, /* allowNulls */ 
true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 new int[] { 1 }, /* nullConstantColumns */ new int[] { 1 }, /* 
allowNulls */ true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 new int[] { 0, 2 }, /* nullConstantColumns */ null, /* allowNulls */ 
true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,
+doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* 
columnCount */ 3,
 new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0 }, /* 
allowNulls */ true);
-doCoalesceOnRandomDataType(random, iteration++, isCoalesce, 
isEltIndexConst, /* columnCount */ 3,

[02/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query85.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
index 09b2a40..d1b3a2c 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 select  substr(r_reason_desc,1,20)
,avg(ws_quantity)
,avg(wr_refunded_cash)
@@ -81,7 +81,7 @@ order by substr(r_reason_desc,1,20)
 ,avg(wr_fee)
 limit 100
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 select  substr(r_reason_desc,1,20)
,avg(ws_quantity)
,avg(wr_refunded_cash)
@@ -164,10 +164,6 @@ order by substr(r_reason_desc,1,20)
 ,avg(wr_fee)
 limit 100
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-1 depends on stages: Stage-2
@@ -184,40 +180,18 @@ STAGE PLANS:
   alias: web_page
   filterExpr: wp_web_page_sk is not null (type: boolean)
   Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: wp_web_page_sk is not null (type: boolean)
 Statistics: Num rows: 4602 Data size: 2696178 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: wp_web_page_sk (type: int)
   outputColumnNames: _col0
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0]
   Statistics: Num rows: 4602 Data size: 2696178 Basic 
stats: COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 Map 15 
@@ -226,40 +200,18 @@ STAGE PLANS:
   alias: reason
   filterExpr: r_reason_sk is not null (type: boolean)
   Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: r_reason_sk is not null (type: boolean)
 Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: r_reason_sk (type: int), r_reason_desc 
(type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 72 Data size: 14400 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col13 (type: int)

[32/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"

2018-08-15 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query39.q.out
--
diff --git a/ql/src/test/results/clientpositive/perf/spark/query39.q.out 
b/ql/src/test/results/clientpositive/perf/spark/query39.q.out
index 1927d3e..cab0feb 100644
--- a/ql/src/test/results/clientpositive/perf/spark/query39.q.out
+++ b/ql/src/test/results/clientpositive/perf/spark/query39.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain
 with inv as
 (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
,stdev,mean, case mean when 0 then null else stdev/mean end cov
@@ -24,7 +24,7 @@ where inv1.i_item_sk = inv2.i_item_sk
 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
 ,inv2.d_moy,inv2.mean, inv2.cov
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain
 with inv as
 (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy
,stdev,mean, case mean when 0 then null else stdev/mean end cov
@@ -50,10 +50,6 @@ where inv1.i_item_sk = inv2.i_item_sk
 order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov
 ,inv2.d_moy,inv2.mean, inv2.cov
 POSTHOOK: type: QUERY
-PLAN VECTORIZATION:
-  enabled: true
-  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
-
 STAGE DEPENDENCIES:
   Stage-2 is a root stage
   Stage-3 depends on stages: Stage-2
@@ -71,40 +67,18 @@ STAGE PLANS:
   alias: warehouse
   filterExpr: w_warehouse_sk is not null (type: boolean)
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: w_warehouse_sk is not null (type: boolean)
 Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: w_warehouse_sk (type: int), 
w_warehouse_name (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
   Spark HashTable Sink Operator
-Spark Hash Table Sink Vectorization:
-className: VectorSparkHashTableSinkOperator
-native: true
 keys:
   0 _col2 (type: int)
   1 _col0 (type: int)
 Execution mode: vectorized
-Map Vectorization:
-enabled: true
-enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
-inputFormatFeatureSupport: [DECIMAL_64]
-featureSupportInUse: [DECIMAL_64]
-inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
-allNative: true
-usesVectorUDFAdaptor: false
-vectorized: true
 Local Work:
   Map Reduce Local Work
 
@@ -118,40 +92,18 @@ STAGE PLANS:
   alias: warehouse
   filterExpr: w_warehouse_sk is not null (type: boolean)
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
-  TableScan Vectorization:
-  native: true
   Filter Operator
-Filter Vectorization:
-className: VectorFilterOperator
-native: true
-predicateExpression: SelectColumnIsNotNull(col 0:int)
 predicate: w_warehouse_sk is not null (type: boolean)
 Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
 Select Operator
   expressions: w_warehouse_sk (type: int), 
w_warehouse_name (type: string)
   outputColumnNames: _col0, _col1
-  Select Vectorization:
-  className: VectorSelectOperator
-  native: true
-  projectedOutputColumnNums: [0, 2]
   Statistics: Num rows: 27 Data size: 27802 Basic stats: 
COMPLETE Column stats: NONE
   Spark

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1414 matches

Mail list logo