[2/3] hive git commit: HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index c64adbf..b11ad87 100644 --- a/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -140,23 +140,46 @@ STAGE PLANS: TableScan alias: timestamps Statistics: Num rows: 51 Data size: 12597 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:cdate:date, 1:ctimestamp1:timestamp, 2:stimestamp1:string, 3:ctimestamp2:timestamp, 4:ROW__ID:struct] Select Operator expressions: ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.9')) THEN ('Early 2010s') ELSE ('Unknown') END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.9')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.9')) THEN ('Early 2010s') ELSE (null) END (type: string), CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.9')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.9')) THEN (null) ELSE (null) END (type: string), if((ctimestamp1 < TIMESTAMP'1974-10-04 17:21:03.989'), year(ctimestamp1), year(ctimestamp2)) (type: int), CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE (TIMESTAMP'2018-03-08 23:04:59') END (type: string), if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)) (type: int), if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null) (type: int), if(((UDFToDouble(ctimestamp1) % 500.0D) > 100.0D), date_add(cdate, 1), date_add(cdate, 365)) (type: date), stimestamp1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 +Select Vectorization: +className: VectorSelectOperator +native: true +projectedOutputColumnNums: [1, 3, 9, 10, 11, 8, 12, 7, 6, 17, 2] +selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.9')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.9) -> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.9) -> 8:boolean) -> 9:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.9')) THEN ('Old') WHEN ((ctimestamp2 < TIME STAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.9') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.9')) THEN ('Early 2010s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.9) -> 5:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.9) -> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.9) -> 8:boolean) ->
[1/3] hive git commit: HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master 857259ed0 -> 97f0513c4 http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/ql/src/test/results/clientpositive/vector_case_when_1.q.out -- diff --git a/ql/src/test/results/clientpositive/vector_case_when_1.q.out b/ql/src/test/results/clientpositive/vector_case_when_1.q.out index 270f5eb..9949de7 100644 --- a/ql/src/test/results/clientpositive/vector_case_when_1.q.out +++ b/ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -202,23 +202,44 @@ STAGE PLANS: TableScan alias: lineitem_test Statistics: Num rows: 101 Data size: 78500 Basic stats: COMPLETE Column stats: NONE +TableScan Vectorization: +native: true +vectorizationSchemaColumns: [0:l_orderkey:int, 1:l_partkey:int, 2:l_suppkey:int, 3:l_linenumber:int, 4:l_quantity:int, 5:l_extendedprice:double, 6:l_discount:double, 7:l_tax:decimal(10,2)/DECIMAL_64, 8:l_returnflag:char(1), 9:l_linestatus:char(1), 10:l_shipdate:date, 11:l_commitdate:date, 12:l_receiptdate:date, 13:l_shipinstruct:varchar(20), 14:l_shipmode:char(10), 15:l_comment:string, 16:ROW__ID:struct] Select Operator expressions: l_quantity (type: int), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END (type: string), CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END (type: string), if((l_shipmode = 'SHIP '), date_add(l_shipdate, 10), date_add(l_shipdate, 5)) (type: date), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0) END (type: double), CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END (type: double), if((CAST( l_shipinstruct AS STRING) = 'DEL IVER IN PERSON'), null, l_tax) (type: decimal(10,2)), if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, null) (type: decimal(10,2)), if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(12,2)), if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(12,2)), if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax) (type: decimal(10,2)), if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0) (type: decimal(10,2)), if((l_partkey > 30), CAST( l_receiptdate AS TIMESTAMP), CAST( l_commitdate AS TIMESTAMP)) (type: timestamp), if((l_suppkey > 1), datediff(l_receiptdate, l_commitdate), null) (type: int), if((l_suppkey > 1), null, datediff(l_receiptdate, l_commitdate)) (type: int), if(((l_suppkey % 500) > 100), DATE'2009-01-01', DATE'2009-12-31') (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 21, 22, 23, 20, 24, 25, 27, 28, 29, 30, 31, 32, 35, 37, 38, 19] + selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 22:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') W HEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN (null) ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 23:string,
[3/3] hive git commit: HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi)
HIVE-20563: Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type are different (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/97f0513c Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/97f0513c Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/97f0513c Branch: refs/heads/master Commit: 97f0513c4c8ff1c251b1bdd1b84bd238557f03b0 Parents: 857259e Author: Matt McCline Authored: Thu Oct 4 14:37:21 2018 -0500 Committer: Matt McCline Committed: Thu Oct 4 14:37:21 2018 -0500 -- .../test/resources/testconfiguration.properties | 1 + .../ql/exec/vector/VectorizationContext.java| 92 ++- .../expressions/CastTimestampToString.java | 10 +- .../hive/ql/optimizer/physical/Vectorizer.java | 15 +- .../exec/vector/TestVectorizationContext.java | 31 +- .../vector_case_when_conversion.q | 136 .../llap/vector_case_when_1.q.out | 36 +- .../llap/vector_case_when_2.q.out | 45 +- .../llap/vector_case_when_conversion.q.out | 616 +++ .../llap/vector_decimal_expressions.q.out | 2 +- .../llap/vector_udf_adaptor_1.q.out | 52 +- .../clientpositive/llap/vectorized_case.q.out | 12 +- .../clientpositive/spark/vectorized_case.q.out | 12 +- .../clientpositive/vector_case_when_1.q.out | 35 +- .../clientpositive/vector_case_when_2.q.out | 39 +- .../vector_decimal_expressions.q.out| 2 +- .../clientpositive/vectorized_case.q.out| 12 +- 17 files changed, 1061 insertions(+), 87 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index fdd8ecc..d444c99 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -763,6 +763,7 @@ minillaplocal.query.files=\ vector_acid4.q,\ vector_annotate_stats_select.q,\ vector_auto_smb_mapjoin_14.q,\ + vector_case_when_conversion.q,\ vector_char_varchar_1.q,\ vector_complex_all.q,\ vector_complex_join.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/97f0513c/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 6ca1248..488f277 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -546,6 +546,7 @@ public class VectorizationContext { private final int initialOutputCol; private int outputColCount = 0; private boolean reuseScratchColumns = true; +private boolean dontReuseTrackedScratchColumns = false; protected OutputColumnManager(int initialOutputCol) { this.initialOutputCol = initialOutputCol; @@ -558,6 +559,7 @@ public class VectorizationContext { private String[] scratchVectorTypeNames = new String[100]; private DataTypePhysicalVariation[] scratchDataTypePhysicalVariations = new DataTypePhysicalVariation[100]; +private boolean[] scratchColumnTrackWasUsed = new boolean[100]; private final Set usedOutputColumns = new HashSet(); @@ -589,6 +591,9 @@ public class VectorizationContext { scratchDataTypePhysicalVariations[i] == dataTypePhysicalVariation)) { continue; } +if (dontReuseTrackedScratchColumns && scratchColumnTrackWasUsed[i]) { + continue; +} //Use i usedOutputColumns.add(i); return i; @@ -597,16 +602,19 @@ public class VectorizationContext { if (outputColCount < scratchVectorTypeNames.length) { int newIndex = outputColCount; scratchVectorTypeNames[outputColCount] = columnType; -scratchDataTypePhysicalVariations[outputColCount++] = dataTypePhysicalVariation; +scratchDataTypePhysicalVariations[outputColCount] = dataTypePhysicalVariation; +scratchColumnTrackWasUsed[outputColCount++] = true; usedOutputColumns.add(newIndex); return newIndex; } else { //Expand the array scratchVectorTypeNames = Arrays.copyOf(scratchVectorTypeNames, 2*outputColCount); scratchDataTypePhysicalVariations = Arrays.copyOf(scratchDataTypePhysicalVariations, 2*outputColCount); +
[12/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorization_0.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index fbcbd64..5e95f39 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -63,10 +63,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0, 1, 2, 3] +valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -119,10 +118,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -244,10 +243,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0] +valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -300,10 +298,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -575,10 +572,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0, 1, 2, 3] +valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type:
[33/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index b075ecf..801948c 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -983,7 +983,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1153,7 +1153,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1229,6 +1229,910 @@ POSTHOOK: Input: default@src1 A masked pattern was here 12744278 500 652447 25 PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: key, value +Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE +value expressions: _col1 (type: bigint) +Execution mode: vectorized, llap +LLAP IO: no inputs +Map 5 +Map Operator Tree: +TableScan + alias: y + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: key, value +Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE +Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 12 Data size: 1128 Basic
[28/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index e33101c..0a8a8a8 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -271,8 +271,8 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_8: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_7: 2100 - RECORDS_OUT_OPERATOR_SEL_6: 2100 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_6: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 262144 ALLOCATED_USED_BYTES: 26 @@ -327,13 +327,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 8 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 8 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 8 - RECORDS_OUT_OPERATOR_SEL_9: 8 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 1048576 ALLOCATED_USED_BYTES: 2731 @@ -367,13 +367,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 22 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 22 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 22 - RECORDS_OUT_OPERATOR_SEL_9: 22 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -405,13 +405,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 16 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 16 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 16 - RECORDS_OUT_OPERATOR_SEL_9: 16 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -443,13 +443,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 18 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 18 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 18 - RECORDS_OUT_OPERATOR_SEL_9: 18 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -487,7 +487,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1 RECORDS_OUT_OPERATOR_SEL_9: 1 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -519,13 +519,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 32 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 32 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 32 - RECORDS_OUT_OPERATOR_SEL_9: 32 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -557,13 +557,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 32 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 32 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 32 - RECORDS_OUT_OPERATOR_SEL_9: 32 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -595,13 +595,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 1697 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 1697 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1697 - RECORDS_OUT_OPERATOR_SEL_9: 1697 -
[11/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index 6b25672..2f5eb26 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1_n14 POSTHOOK: Lineage: test1_n14.col_1 SCRIPT [] POSTHOOK: Lineage: test1_n14.key SCRIPT [] POSTHOOK: Lineage: test1_n14.value SCRIPT [] +col1 col2col3 PREHOOK: query: CREATE TABLE test2_n9 (key INT, value INT, col_2 STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2_n9 POSTHOOK: Lineage: test2_n9.col_2 SCRIPT [] POSTHOOK: Lineage: test2_n9.key SCRIPT [] POSTHOOK: Lineage: test2_n9.value SCRIPT [] -PREHOOK: query: EXPLAIN +col1 col2col3 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,9 +74,14 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 +Select Vectorization: +className: VectorSelectOperator +native: true Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -77,12 +89,19 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +File Sink Vectorization: +className: VectorFileSinkOperator +native: false Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -90,23 +109,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +inputFormatFeatureSupport: [DECIMAL_64] +featureSupportInUse: [DECIMAL_64] +inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: false +usesVectorUDFAdaptor: false +vectorized: true Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 +
[26/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out new file mode 100644 index 000..6ddcef6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out @@ -0,0 +1,1228 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail +select tjoin1.rnum,
[38/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 000..586c850 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { +random = new Random(82733); + +VectorMapJoinFastMultiKeyHashMap map = +new VectorMapJoinFastMultiKeyHashMap( +false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + +VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + +byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; +random.nextBytes(key); +byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; +random.nextBytes(value); + +map.testPutRow(key, value); +verifyTable.add(key, value); + +// Second value. +value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; +random.nextBytes(value); +map.testPutRow(key, value); +verifyTable.add(key, value); + +// Third value. +value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; +random.nextBytes(value); +map.testPutRow(key, value); +verifyTable.add(key, value); + +verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { +random = new Random(29383); + +VectorMapJoinFastMultiKeyHashMap map = +new VectorMapJoinFastMultiKeyHashMap( +false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + +VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + +int keyCount = 100 + random.nextInt(1000); +for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { +// Unique keys for this test. +break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); +} + +verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { +addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { +for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { +value = new byte[generateLargeCount() - 1]; + } else { +value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { +byte[] key; +while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { +// Unique keys for this test. +
[45/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c832cdb..5733688 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; @@ -55,14 +54,17 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTabl import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -124,6 +126,10 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; + protected VectorMapJoinVariation vectorMapJoinVariation; + protected HashTableKind hashTableKind; + protected HashTableKeyType hashTableKeyType; + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; @@ -149,28 +155,70 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); protected String[] bigTableValueColumnNames; protected TypeInfo[] bigTableValueTypeInfos; - // This is a mapping of which big table columns (input and key/value expressions) will be - // part of the big table portion of the join output result. - protected VectorColumnOutputMapping bigTableRetainedMapping; + /* + * NOTE: + *The Big Table key columns are from the key expressions. + *The Big Table value columns are from the getExpr(posBigTable) expressions. + *Any calculations needed for those will be scratch columns. + * + *The Small Table key and value output columns are scratch columns. + * + * Big Table Retain Column Map / TypeInfos: + *Any Big Table Batch columns that will be in the output result. + *0, 1, ore more Column Nums and TypeInfos + * + * Non Outer Small Table Key Mapping: + *For non-[FULL] OUTER MapJoin, when Big Table key columns are not retained for the output + *result but are needed for the Small Table output result, they are put in this mapping + *as they are required for copying rows to the overflow batch. + * + * Outer Small Table Key Mapping + *For [FULL] OUTER MapJoin, the mapping for any Small Table key columns needed for the + *output result from the Big Table key columns. The Big Table keys cannot be projected since + *on NOMATCH there must be a physical column present to hold the non-match NULL. + * + * Full Outer Small Table Key Mapping + *For FULL OUTER MapJoin, the mapping from any needed Small Table key
[27/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index ddcabd8..4c81131 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -87,10 +87,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,10 +203,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -321,10 +319,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -438,10 +435,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +551,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -672,10 +667,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns:
[42/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java new file mode 100644 index 000..b9e86eb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Joiner; + +/** + * FULL OUTER MapJoin planning. + */ +public class FullOuterMapJoinOptimization { + + FullOuterMapJoinOptimization() { + } + + public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws SemanticException { +int[][] filterMaps = mapJoinDesc.getFilterMap(); +if (filterMaps == null) { + return; +} +final byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); +final int numAliases = mapJoinDesc.getExprs().size(); +List valueFilteredTblDescs = mapJoinDesc.getValueFilteredTblDescs(); +for (byte pos = 0; pos < numAliases; pos++) { + if (pos != posBigTable) { +int[] filterMap = filterMaps[pos]; +TableDesc tableDesc = valueFilteredTblDescs.get(pos); +Properties properties = tableDesc.getProperties(); +String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); +String columnNameDelimiter = +properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? +properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : + String.valueOf(SerDeUtils.COMMA); + +String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); +List columnNameList; +if (columnNameProperty.length() == 0) { + columnNameList = new ArrayList(); +} else { + columnNameList = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); +} +List truncatedColumnNameList = columnNameList.subList(0, columnNameList.size() - 1); +String truncatedColumnNameProperty = +Joiner.on(columnNameDelimiter).join(truncatedColumnNameList); + +List columnTypeList; +if (columnTypeProperty.length() == 0) { + columnTypeList = new ArrayList(); +} else { + columnTypeList = TypeInfoUtils + .getTypeInfosFromTypeString(columnTypeProperty); +} +if (!columnTypeList.get(columnTypeList.size() - 1).equals(TypeInfoFactory.shortTypeInfo)) { + throw new SemanticException("Expecting filterTag smallint as last column type"); +} +List truncatedColumnTypeList = +columnTypeList.subList(0, columnTypeList.size() - 1); +String truncatedColumnTypeProperty = +Joiner.on(",").join(truncatedColumnTypeList); + +properties.setProperty(serdeConstants.LIST_COLUMNS, truncatedColumnNameProperty); +properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, truncatedColumnTypeProperty); + } +} +mapJoinDesc.setFilterMap(null); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
[16/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_like_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out index 1a20a35..31b7326 100644 --- a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -63,10 +63,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:boolean Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Execution mode: vectorized, llap http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out index 630f3f7..8ac3a11 100644 --- a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out @@ -104,10 +104,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:varchar(10), 1:int, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out index 45bfc6b..29c4bc1 100644 --- a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out @@ -167,11 +167,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [0] -valueColumnNums: [1] +partitionColumns: 0:int +valueColumns: 1:string Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -223,14 +223,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: -bigTableKeyColumnNums: [0] -bigTableRetainedColumnNums: [0, 1] -bigTableValueColumnNums: [1] +bigTableKeyColumns: 0:int +bigTableRetainColumnNums: [1] +bigTableValueColumns: 1:string
[07/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 18e75aa..87f0ca8 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -289,74 +289,12 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 103val_103 val_103 103val_103 val_103 103val_103 val_103 103val_103 val_103 105val_105 val_105 +11 val_11 val_11 114val_114 val_114 116val_116 val_116 118val_118 val_118 @@ -398,6 +336,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 149val_149 val_149 149val_149 val_149 149val_149 val_149 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 150val_150 val_150 152val_152 val_152 152val_152 val_152 @@ -435,6 +377,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 169val_169 val_169 169val_169 val_169 169val_169 val_169 +17 val_17 val_17 170val_170 val_170 172val_172 val_172 172val_172 val_172 @@ -461,10 +404,13 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 187val_187 val_187 187val_187 val_187 189val_189 val_189 +19 val_19 val_19 190val_190 val_190 192val_192 val_192 194val_194 val_194 196val_196 val_196 +2 val_2 val_2 +20 val_20 val_20 200val_200 val_200 200val_200 val_200 200val_200 val_200 @@ -511,6 +457,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 239val_239 val_239 239val_239 val_239 239val_239 val_239 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 242val_242 val_242 242val_242 val_242 242val_242 val_242 @@ -522,6 +472,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 255val_255 val_255 255val_255 val_255 257val_257 val_257 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 260val_260 val_260 262val_262 val_262 266val_266 val_266 @@ -551,6 +505,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 277val_277 val_277 277val_277 val_277 277val_277 val_277 +28 val_28 val_28 280val_280 val_280 280val_280 val_280 280val_280 val_280 @@ -612,11 +567,21 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 327val_327 val_327 327val_327 val_327 327val_327 val_327 +33 val_33 val_33 332val_332 val_332 336val_336 val_336 338val_338 val_338 341val_341 val_341 345val_345 val_345 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 356val_356 val_356 365val_365 val_365 367val_367 val_367 @@ -632,6 +597,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 369val_369 val_369 369val_369 val_369 369val_369 val_369 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 374val_374 val_374 378val_378 val_378 389val_389 val_389 @@ -646,6
[35/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out -- diff --git a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out index 6b85e13..2288b4b 100644 --- a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out +++ b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out @@ -27,6 +27,85 @@ POSTHOOK: Output: default@tbl_n1 POSTHOOK: Lineage: tbl_n1.n SCRIPT [] POSTHOOK: Lineage: tbl_n1.t SCRIPT [] Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 +Map Reduce Local Work + Alias -> Map Local Tables: +$hdt$_1:tbl_n1 + Fetch Operator +limit: -1 + Alias -> Map Local Operator Tree: +$hdt$_1:tbl_n1 + TableScan +alias: tbl_n1 +Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE +Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: n (type: bigint), t (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 +1 + + Stage: Stage-3 +Map Reduce + Map Operator Tree: + TableScan +alias: tbl_n1 +filterExpr: (n = 1L) (type: boolean) +Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE +Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: t (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: +0 +1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: 1L (type: bigint), _col0 (type: string), _col1 is null (type: boolean), _col2 is null (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: +Map Reduce Local Work + + Stage: Stage-0 +Fetch Operator + limit: -1 + Processor Tree: +ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -37,6 +116,91 @@ POSTHOOK: Input: default@tbl_n1 A masked pattern was here 1 one truetrue Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from
[37/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin2.q -- diff --git a/ql/src/test/queries/clientpositive/mapjoin2.q b/ql/src/test/queries/clientpositive/mapjoin2.q index e194bd0..014dabe 100644 --- a/ql/src/test/queries/clientpositive/mapjoin2.q +++ b/ql/src/test/queries/clientpositive/mapjoin2.q @@ -6,16 +6,30 @@ create table tbl_n1 (n bigint, t string); insert into tbl_n1 values (1, 'one'); insert into tbl_n1 values(2, 'two'); +explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin46.q -- diff --git a/ql/src/test/queries/clientpositive/mapjoin46.q b/ql/src/test/queries/clientpositive/mapjoin46.q index 9de7113..81f9610 100644 --- a/ql/src/test/queries/clientpositive/mapjoin46.q +++ b/ql/src/test/queries/clientpositive/mapjoin46.q @@ -3,6 +3,8 @@ set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; set hive.join.emit.interval=2; +-- SORT_QUERY_RESULTS + CREATE TABLE test1_n4 (key INT, value INT, col_1 STRING); INSERT INTO test1_n4 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); @@ -173,6 +175,22 @@ ON (test1_n4.value=test2_n2.value OR test2_n2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -185,8 +203,23 @@ FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN
[18/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 12db036..b8d76ed 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1_n4 A masked pattern was here 13630578 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 - A masked pattern was here -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 - A masked pattern was here -13630578 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Select Vectorization: +className: VectorSelectOperator +native: true +Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) +Execution mode: vectorized, llap +LLAP IO: all inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +inputFormatFeatureSupport: [DECIMAL_64] +featureSupportInUse: [DECIMAL_64] +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true +Map 2 +Map Operator Tree: +TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Select Vectorization: +className: VectorSelectOperator +native: true +Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: +0 +1 {true} + keys: +0 +1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet:
[06/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join33.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 13cd446..09198b0 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -393,88 +393,88 @@ POSTHOOK: query: select * from dest_j1_n7 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1_n7 A masked pattern was here -146val_146 val_146 -146val_146 val_146 -146val_146 val_146 -146val_146 val_146 +146val_146 +146val_146 +146val_146 +146val_146 150val_150 val_150 -213val_213 val_213 -213val_213 val_213 -213val_213 val_213 -213val_213 val_213 -238val_238 val_238 -238val_238 val_238 -238val_238 val_238 -238val_238 val_238 -255val_255 val_255 -255val_255 val_255 -255val_255 val_255 -255val_255 val_255 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -278val_278 val_278 -278val_278 val_278 -278val_278 val_278 -278val_278 val_278 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 +213val_213 +213val_213 +213val_213 +213val_213 +238val_238 +238val_238 +238val_238 +238val_238 +255val_255 +255val_255 +255val_255 +255val_255 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +278val_278 +278val_278 +278val_278 +278val_278 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join6.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out b/ql/src/test/results/clientpositive/spark/join6.q.out index 6075e5f..caa0849 100644 ---
[20/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join30.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out b/ql/src/test/results/clientpositive/llap/vector_join30.q.out index 4b2f06f..9238bc7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc_n0 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -72,6 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -86,6 +88,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true +rowBatchContext: +dataColumnCount: 2 +includeColumns: [0] +dataColumns: key:string, value:string +partitionColumnCount: 0 +scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -94,6 +102,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -116,9 +125,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: +bigTableKeyColumns: 0:string +bigTableRetainColumnNums: [0, 1] +bigTableValueColumns: 0:string, 1:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +nonOuterSmallTableKeyMapping: [] +projectedOutput: 0:string, 1:string +hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -141,6 +156,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +valueColumns: 0:bigint
[30/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out b/ql/src/test/results/clientpositive/llap/join46.q.out index 07c4a62..95d3611 100644 --- a/ql/src/test/results/clientpositive/llap/join46.q.out +++ b/ql/src/test/results/clientpositive/llap/join46.q.out @@ -1633,7 +1633,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1746,7 +1746,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1857,7 +1857,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1970,7 +1970,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -2148,7 +2148,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index c0c9f95..c3b1eb7 100644 --- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -230,6 +230,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -318,6 +319,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -407,6 +409,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -495,6 +498,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/llap_acid.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/llap_acid.q.out b/ql/src/test/results/clientpositive/llap/llap_acid.q.out index 0d1a331..d441ab0 100644 --- a/ql/src/test/results/clientpositive/llap/llap_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -115,10 +115,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [4, 0] +keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS
[47/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a37827ec Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a37827ec Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a37827ec Branch: refs/heads/master Commit: a37827ecd557c7f7d69f3b2ccdbf6535908b1461 Parents: 45163ee Author: Matt McCline Authored: Sun Sep 16 09:04:59 2018 -0500 Committer: Matt McCline Committed: Sun Sep 16 09:04:59 2018 -0500 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 10 +- data/files/fullouter_long_big_1a.txt| 11 + data/files/fullouter_long_big_1a_nonull.txt | 10 + data/files/fullouter_long_big_1b.txt| 13 + data/files/fullouter_long_big_1c.txt| 11 + data/files/fullouter_long_big_1d.txt| 12 + data/files/fullouter_long_small_1a.txt | 54 + data/files/fullouter_long_small_1a_nonull.txt | 51 + data/files/fullouter_long_small_1b.txt | 72 + data/files/fullouter_long_small_1c.txt | 81 + data/files/fullouter_long_small_1d.txt | 39 + data/files/fullouter_multikey_big_1a.txt| 13 + data/files/fullouter_multikey_big_1a_nonull.txt | 10 + data/files/fullouter_multikey_big_1b.txt| 17 + data/files/fullouter_multikey_small_1a.txt | 92 + .../fullouter_multikey_small_1a_nonull.txt | 90 + data/files/fullouter_multikey_small_1b.txt | 118 + data/files/fullouter_string_big_1a.txt | 13 + data/files/fullouter_string_big_1a_nonull.txt | 12 + data/files/fullouter_string_big_1a_old.txt | 13 + data/files/fullouter_string_small_1a.txt| 38 + data/files/fullouter_string_small_1a_nonull.txt | 35 + data/files/fullouter_string_small_1a_old.txt| 38 + .../vectorization/mapjoin/AbstractMapJoin.java | 66 +- .../mapjoin/MapJoinMultiKeyBenchBase.java |3 +- .../mapjoin/MapJoinOneLongKeyBenchBase.java |3 +- .../mapjoin/MapJoinOneStringKeyBenchBase.java |3 +- .../test/resources/testconfiguration.properties |5 + .../hadoop/hive/ql/exec/CommonJoinOperator.java | 11 +- .../apache/hadoop/hive/ql/exec/ExplainTask.java | 187 +- .../apache/hadoop/hive/ql/exec/JoinUtil.java|2 +- .../hadoop/hive/ql/exec/MapJoinOperator.java| 199 +- .../apache/hadoop/hive/ql/exec/Operator.java| 86 +- .../hadoop/hive/ql/exec/TableScanOperator.java |6 +- .../persistence/BytesBytesMultiHashMap.java | 71 +- .../ql/exec/persistence/HashMapWrapper.java | 22 + .../persistence/HybridHashTableContainer.java | 118 +- .../persistence/MapJoinBytesTableContainer.java | 199 +- .../hive/ql/exec/persistence/MapJoinKey.java| 15 + .../persistence/MapJoinObjectSerDeContext.java | 17 + .../exec/persistence/MapJoinTableContainer.java | 61 +- .../hive/ql/exec/persistence/MatchTracker.java | 154 + .../ReusableGetAdaptorDirectAccess.java |4 +- .../ql/exec/persistence/UnwrapRowContainer.java | 12 +- .../hive/ql/exec/tez/ReduceRecordProcessor.java |2 +- .../vector/VectorAppMasterEventOperator.java|2 +- .../hive/ql/exec/vector/VectorAssignRow.java| 12 + .../hive/ql/exec/vector/VectorCopyRow.java | 21 +- .../ql/exec/vector/VectorDeserializeRow.java| 32 + .../ql/exec/vector/VectorFilterOperator.java|2 +- .../ql/exec/vector/VectorGroupByOperator.java |2 +- .../ql/exec/vector/VectorLimitOperator.java |2 +- .../exec/vector/VectorMapJoinBaseOperator.java | 23 +- .../ql/exec/vector/VectorMapJoinOperator.java | 23 +- .../exec/vector/VectorSMBMapJoinOperator.java |2 +- .../ql/exec/vector/VectorSelectOperator.java|4 +- .../ql/exec/vector/VectorTopNKeyOperator.java |2 +- .../mapjoin/VectorMapJoinCommonOperator.java| 409 +- .../VectorMapJoinFullOuterLongOperator.java | 68 + .../VectorMapJoinFullOuterMultiKeyOperator.java | 71 + .../VectorMapJoinFullOuterStringOperator.java | 71 + .../VectorMapJoinGenerateResultOperator.java| 163 +- ...pJoinInnerBigOnlyGenerateResultOperator.java | 18 +- .../VectorMapJoinInnerBigOnlyLongOperator.java | 63 +- ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java | 60 +- ...VectorMapJoinInnerBigOnlyStringOperator.java | 50 +- ...ectorMapJoinInnerGenerateResultOperator.java | 22 +- .../mapjoin/VectorMapJoinInnerLongOperator.java | 64 +- .../VectorMapJoinInnerMultiKeyOperator.java | 58 +- .../VectorMapJoinInnerStringOperator.java | 50 +- ...orMapJoinLeftSemiGenerateResultOperator.java | 12 +- .../VectorMapJoinLeftSemiLongOperator.java | 64 +- .../VectorMapJoinLeftSemiMultiKeyOperator.java | 58 +- .../VectorMapJoinLeftSemiStringOperator.java| 50 +-
[04/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 352e74f..5954629 100644 --- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -89,10 +89,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -372,10 +371,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -424,10 +422,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -575,10 +572,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -627,10 +623,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
[34/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 194fc5d..a160428 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -188,6 +188,139 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n2 A masked pattern was here 4543526 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) +Execution mode: vectorized, llap +LLAP IO: no inputs +Map 4 +Map Operator Tree: +TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) +Execution mode: vectorized, llap +LLAP IO: no inputs +Reducer 2 +Execution mode: llap +Reduce Operator Tree: + Merge Join Operator +condition map: + Full Outer Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col1 (type: int) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator +aggregations: sum(_col0) +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) +Reducer 3 +Execution mode: vectorized, llap +Reduce Operator Tree: + Group By Operator +aggregations: sum(VALUE._col0) +mode: mergepartial +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat +
[09/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mapjoin46.q.out -- diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out b/ql/src/test/results/clientpositive/mapjoin46.q.out index febb6c7..b6f8b19 100644 --- a/ql/src/test/results/clientpositive/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/mapjoin46.q.out @@ -124,14 +124,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del +1012 Car 103 2 Ema 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -1001 Bob NULLNULLNULL -1012 Car 102 2 Del -1012 Car 103 2 Ema +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -234,12 +234,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob NULLNULLNULL -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -340,12 +340,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob 102 2 Del +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob 102 2 Del -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -430,10 +430,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -99 2 Mat 102 2 Del 1012 Car 102 2 Del -99 2 Mat 103 2 Ema 1012 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULLNULL104 3 Fli NULL NULLNULL105 NULLNone Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product @@ -528,10 +528,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL -98 NULLNoneNULLNULLNULL -99 0 Alice NULLNULLNULL -99 2 Mat NULLNULLNULL 1001 Bob 102 2 Del 1001 Bob 103 2 Ema 1001 Bob 104 3 Fli @@ -540,6 +536,10 @@ NULL NULLNoneNULLNULLNULL 1012 Car 103 2 Ema 1012 Car 104 3 Fli 1012 Car 105 NULLNone +98 NULLNoneNULLNULLNULL +99 0 Alice NULLNULLNULL +99 2 Mat NULLNULLNULL +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -635,11 +635,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNone102 2 Del -98 NULLNone102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 1001 Bob 102 2 Del 1001 Bob 103 2 Ema 1001 Bob 104 3 Fli @@ -648,6 +643,11 @@ NULL NULLNone102 2 Del 1012 Car 103 2 Ema 1012 Car 104 3 Fli 1012 Car 105 NULLNone +98 NULLNone102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULLNone102 2 Del Warning:
[36/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_join30.q -- diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q index 9672a47..74c4433 100644 --- a/ql/src/test/queries/clientpositive/vector_join30.q +++ b/ql/src/test/queries/clientpositive/vector_join30.q @@ -11,7 +11,7 @@ SET hive.auto.convert.join.noconditionaltask.size=10; CREATE TABLE orcsrc_n0 STORED AS ORC AS SELECT * FROM src; -explain vectorization expression +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -19,14 +19,14 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); -explain vectorization expression +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -34,116 +34,238 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) +JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); +
[23/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out new file mode 100644 index 000..52ca0fb --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out @@ -0,0 +1,3923 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE
[31/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 000..c387af5 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,3139 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here
[14/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 6637d33..ef1e653 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -43,11 +43,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [2, 1] +keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [2] -valueColumnNums: [5, 7] +partitionColumns: 2:string +valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -279,11 +279,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [0, 1, 2] -valueColumnNums: [3] +partitionColumns: 0:string, 1:string, 2:int +valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -339,11 +339,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3] + partitionColumns: 1:string + valueColumns: 2:int, 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double) Reducer 3 @@ -529,11 +529,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [0, 1, 2] -valueColumnNums: [3] +partitionColumns: 0:string, 1:string, 2:int +valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode:
[39/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..a37b5a0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -25,7 +25,6 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; @@ -33,9 +32,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; -import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; -import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -48,17 +45,23 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -69,7 +72,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; @@ -86,14 +88,13 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import
[08/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out -- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index 1eab962..a7fdfda 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE(((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cfloat, cstring1, @@ -83,7 +83,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true -vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -135,12 +134,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -169,7 +162,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true -vectorizationSchemaColumns: [0:_col0:timestamp, 1:_col1:float, 2:_col2:string, 3:_col3:boolean, 4:_col4:double, 5:_col5:double, 6:_col6:double, 7:_col7:double, 8:_col8:float, 9:_col9:float, 10:_col10:float, 11:_col11:float, 12:_col12:double, 13:_col13:double, 14:_col14:bigint, 15:_col15:double, 16:_col16:double, 17:_col17:double, 18:_col18:double, 19:_col19:double, 20:_col20:double, 21:_col21:double] Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: @@ -190,12 +182,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 22 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - dataColumns: _col0:timestamp, _col1:float, _col2:string, _col3:boolean, _col4:double, _col5:double, _col6:double, _col7:double, _col8:float, _col9:float, _col10:float, _col11:float, _col12:double, _col13:double, _col14:bigint, _col15:double, _col16:double, _col17:double, _col18:double, _col19:double, _col20:double, _col21:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out -- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index 2d306cf..6974ee8 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE(((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN
[40/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java index bde4424..93fdb28 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,20 +28,23 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class MapJoinTestDescription extends DescriptionTest { + public static enum MapJoinPlanVariation { +DYNAMIC_PARTITION_HASH_JOIN, +SHARED_SMALL_TABLE + } + public static class SmallTableGenerationParameters { public static enum ValueOption { NO_RESTRICTION, ONLY_ONE, - ONLY_TWO, - AT_LEAST_TWO + NO_REGULAR_SMALL_KEYS } private ValueOption valueOption; @@ -82,70 +86,103 @@ public class MapJoinTestDescription extends DescriptionTest { final VectorMapJoinVariation vectorMapJoinVariation; // Adjustable. - public String[] bigTableColumnNames; + public String[] bigTableKeyColumnNames; public TypeInfo[] bigTableTypeInfos; + public int[] bigTableKeyColumnNums; - public String[] smallTableValueColumnNames; + public TypeInfo[] smallTableValueTypeInfos; - public int[] bigTableRetainColumnNums; + public int[] smallTableRetainKeyColumnNums; - public int[] smallTableRetainValueColumnNums; public SmallTableGenerationParameters smallTableGenerationParameters; // Derived. - public List bigTableColumnNamesList; - public String[] bigTableKeyColumnNames; - public TypeInfo[] bigTableKeyTypeInfos; - public List smallTableValueColumnNamesList; + + public int[] bigTableColumnNums; + public String[] bigTableColumnNames; + public List bigTableColumnNameList; public ObjectInspector[] bigTableObjectInspectors; - public List bigTableObjectInspectorsList; + public List bigTableObjectInspectorList; + + public TypeInfo[] bigTableKeyTypeInfos; + + public List smallTableKeyColumnNameList; + public String[] smallTableKeyColumnNames; + public TypeInfo[] smallTableKeyTypeInfos; + public ObjectInspector[] smallTableKeyObjectInspectors; + public List smallTableKeyObjectInspectorList; + + public List smallTableValueColumnNameList; + public String[] smallTableValueColumnNames; + public ObjectInspector[] smallTableValueObjectInspectors; + public List smallTableValueObjectInspectorList; + + public int[] bigTableRetainColumnNums; + public int[] smallTableRetainValueColumnNums; + + public String[] smallTableColumnNames; + public List smallTableColumnNameList; + public TypeInfo[] smallTableTypeInfos; + public List smallTableObjectInspectorList; + public StandardStructObjectInspector bigTableStandardObjectInspector; - public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos; - public ObjectInspector[] smallTableObjectInspectors; - public PrimitiveCategory[] smallTablePrimitiveCategories; - public List smallTableObjectInspectorsList; public StandardStructObjectInspector smallTableStandardObjectInspector; public ObjectInspector[] inputObjectInspectors; + public String[] outputColumnNames; public TypeInfo[] outputTypeInfos; public ObjectInspector[] outputObjectInspectors; + final MapJoinPlanVariation mapJoinPlanVariation; + + public MapJoinTestDescription ( + HiveConf hiveConf, + VectorMapJoinVariation vectorMapJoinVariation, + TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { +this( +hiveConf, +vectorMapJoinVariation, +/* bigTableColumnNames */ null, +bigTableTypeInfos, +bigTableKeyColumnNums, +smallTableValueTypeInfos, +smallTableRetainKeyColumnNums, +
[22/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 5c0d6bb..6eaf7ad 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -72,10 +72,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -209,10 +209,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -372,10 +372,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -632,11 +632,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [4] -valueColumnNums: [3] +partitionColumns: 4:double +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -692,11 +692,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization:
[15/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 5e26637..0f40378 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -296,10 +296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0] +valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -353,11 +352,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Reducer 3 @@ -517,10 +516,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:decimal(15,2), 1:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [2] +valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -576,11 +575,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 3 @@ -747,10 +746,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0] +keyColumns: 0:decimal(15,2) native: true
[13/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index c07f4d9..6660d73 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -62,10 +62,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [1] +valueColumns: 1:bigint Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -129,12 +129,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [2, 1] +keyColumns: 2:int, 1:bigint keyExpressions: ConstantVectorExpression(val 0) -> 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [3] -valueColumnNums: [] +partitionColumns: 3:int Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -304,10 +303,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [1, 2] +valueColumns: 1:string, 2:bigint Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Execution mode: vectorized, llap @@ -363,11 +362,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2] + keyColumns: 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:string + valueColumns: 0:int Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 3 @@ -540,10 +539,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true
[43/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..8dce5b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -47,6 +47,14 @@ public class VectorMapJoinFastLongHashSet return new VectorMapJoinFastHashSet.HashSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + +// Ignore NULL keys (HashSet not used for FULL OUTER). +adaptPutRow(currentKey, currentValue); + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -76,11 +84,18 @@ public class VectorMapJoinFastLongHashSet optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); -long existance = findReadSlot(key, hashCode); +int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; -if (existance == -1) { +if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { +matchTracker.trackMatch(pairIndex / 2); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } @@ -91,9 +106,13 @@ public class VectorMapJoinFastLongHashSet } public VectorMapJoinFastLongHashSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean isFullOuter, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { -super(minMaxEnabled, isOuterJoin, hashTableKeyType, +super( +isFullOuter, +minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..03ef249 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -49,8 +49,6 @@ public abstract class VectorMapJoinFastLongHashTable private final HashTableKeyType hashTableKeyType; - private final boolean isOuterJoin; - private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead; private final boolean useMinMax; @@ -72,14 +70,13 @@ public abstract class VectorMapJoinFastLongHashTable return max; } - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { -return; +return false; } } catch (Exception e) { throw new HiveException( @@ -92,6 +89,7 @@ public abstract class VectorMapJoinFastLongHashTable keyBinarySortableDeserializeRead, hashTableKeyType); add(key, currentValue); +return true; } protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); @@ -215,10 +213,9 @@ public abstract class VectorMapJoinFastLongHashTable largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; -// LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected long findReadSlot(long key, long hashCode) { + protected int findReadSlot(long key, long hashCode) { int intHashCode = (int) hashCode; int slot =
[46/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index f45a012..114cea9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.Constants; @@ -41,12 +42,16 @@ import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashP import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; @@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; @@ -74,8 +81,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; - import com.esotericsoftware.kryo.KryoException; +import com.google.common.base.Preconditions; /** * Map side Join operator implementation. @@ -105,6 +112,23 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + /* + * FULL OUTER MapJoin members. + */ + protected transient boolean isFullOuterMapJoin; // Are we doing a FULL OUTER MapJoin? + + protected transient int fullOuterBigTableRetainSize; + // The number of Big Table columns being + // retained in the output result for + // FULL OUTER MapJoin. + + /* + * Small Table key match tracking used for FULL OUTER MapJoin. Otherwise, null. + * Since the Small Table hash table can be shared among vertces, we require this non-shared object + * for our vertex (i.e. operator private) key match tracking. + */ + protected transient MatchTracker matchTracker; + protected transient boolean isTestingNoHashTableLoad; // Only used in bucket map join. private transient int numBuckets = -1; @@ -177,6 +201,8 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem hybridMapJoinLeftover = false; firstSmallTable = null; +doFullOuterMapJoinInit(); + generateMapMetaData(); isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, @@ -252,6 +278,24 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem } } + /* + * Do initialization for FULL OUTER MapJoin. + * + * Currently, we do not support FULL OUTER MapJoin for N-way. + */ + private void doFullOuterMapJoinInit() { + +// This will be set during the first process call or during closeOp if no rows
[21/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index bbfba28..07c4eed 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -158,10 +157,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -280,10 +279,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -350,10 +348,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -499,10 +497,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No
[19/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 7c1780b..a49e8e2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -47,15 +47,174 @@ POSTHOOK: Input: default@myinput1_n1 A masked pattern was here 4937935 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 - A masked pattern was here -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 - A masked pattern was here -3080335 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator +Filter Vectorization: +className: VectorFilterOperator +native: true +predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +sort order: +Reduce Sink Vectorization: +className: VectorReduceSinkEmptyKeyOperator +native: true +nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int), _col1 (type: int) +Execution mode: vectorized, llap +LLAP IO: all inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +inputFormatFeatureSupport: [DECIMAL_64] +featureSupportInUse: [DECIMAL_64] +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true +Map 2 +Map Operator Tree: +TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true +
[10/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 8efe78d..c9b9e81 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -146,14 +146,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: -bigTableKeyColumnNums: [1] -bigTableRetainedColumnNums: [3] -bigTableValueColumnNums: [3] +bigTableKeyColumns: 1:int +bigTableRetainColumnNums: [3] +bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true -projectedOutputColumnNums: [3] +nonOuterSmallTableKeyMapping: [] +projectedOutput: 3:decimal(8,1) +hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -217,10 +219,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0] +valueColumns: 0:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -281,10 +282,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -351,14 +351,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: -bigTableKeyColumnNums: [1] -bigTableRetainedColumnNums: [3] -bigTableValueColumnNums: [3] +bigTableKeyColumns: 1:int +bigTableRetainColumnNums: [3] +bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true -projectedOutputColumnNums: [3] +nonOuterSmallTableKeyMapping: [] +projectedOutput: 3:decimal(8,1) +
[29/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/mapjoin46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out index 52eb609..d0d9c87 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -128,14 +128,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del +1012 Car 103 2 Ema 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -1001 Bob NULLNULLNULL -1012 Car 102 2 Del -1012 Car 103 2 Ema +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -239,12 +239,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob NULLNULLNULL -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -344,12 +344,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob 102 2 Del +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob 102 2 Del -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -438,10 +438,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -99 2 Mat 102 2 Del 1012 Car 102 2 Del -99 2 Mat 103 2 Ema 1012 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULLNULL104 3 Fli NULL NULLNULL105 NULLNone Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product @@ -535,18 +535,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL -98 NULLNoneNULLNULLNULL -99 0 Alice NULLNULLNULL -99 2 Mat NULLNULLNULL 1001 Bob 102 2 Del -1001 Bob 105 NULLNone -1001 Bob 104 3 Fli 1001 Bob 103 2 Ema +1001 Bob 104 3 Fli +1001 Bob 105 NULLNone 1012 Car 102 2 Del -1012 Car 105 NULLNone -1012 Car 104 3 Fli 1012 Car 103 2 Ema +1012 Car 104 3 Fli +1012 Car 105 NULLNone +98 NULLNoneNULLNULLNULL +99 0 Alice NULLNULLNULL +99 2 Mat NULLNULLNULL +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -644,19 +644,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNone102 2 Del -98 NULLNone102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 1001 Bob 102 2 Del -1001 Bob 105 NULLNone -1001 Bob 104 3 Fli 1001 Bob 103 2 Ema +1001 Bob 104 3 Fli +1001 Bob 105 NULLNone 1012 Car 102 2 Del -1012 Car 105 NULLNone -1012
[17/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 006a51a..960f5f5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -128,18 +128,100 @@ POSTHOOK: query: select * from t4_n19 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4_n19 A masked pattern was here -PREHOOK: query: explain vectorization only summary - +PREHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary - +POSTHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: key is not null (type: boolean) +Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: +0 key (type: int) +1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: +1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: int), _col1 (type: string) +sort order: ++ +Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE +Execution mode: llap +LLAP IO: all inputs +Map 3 +Map Operator Tree: +TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: key is not null (type: boolean) +Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator +keys: _col0 (type: int) +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +Execution mode: llap +LLAP IO: all inputs +Reducer 2 +Execution mode: llap +Reduce Operator Tree: + Select Operator +expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +
[05/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 903e74b..a445b44 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE(((cbigint > -23) OR (cfloat = cdouble ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, @@ -69,7 +69,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -91,10 +90,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized @@ -107,27 +104,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true -rowBatchContext: -dataColumnCount: 12 -includeColumns: [0, 1, 2, 3, 4, 5, 6, 8] -dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean -partitionColumnCount: 0 -scratchColumnTypeNames: [decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true -reduceColumnNullOrder: zz -reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true -rowBatchContext: -dataColumnCount: 14 -dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double -partitionColumnCount: 0 -scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type:
[24/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 000..da513db --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,3945 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD + A
[03/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union14.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union14.q.out b/ql/src/test/results/clientpositive/spark/union14.q.out index 6a95e4a..49d6cb1 100644 --- a/ql/src/test/results/clientpositive/spark/union14.q.out +++ b/ql/src/test/results/clientpositive/spark/union14.q.out @@ -126,20 +126,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 A masked pattern was here -2781 -2731 + 10 1281 -2551 -tst1 1 1461 -3691 +1501 2131 -3111 2241 2381 -1501 - 10 +2551 +2731 +2781 +3111 +3691 +4011 4061 66 1 -4011 98 1 +tst1 1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union7.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union7.q.out b/ql/src/test/results/clientpositive/spark/union7.q.out index 549075c..8556f84 100644 --- a/ql/src/test/results/clientpositive/spark/union7.q.out +++ b/ql/src/test/results/clientpositive/spark/union7.q.out @@ -122,20 +122,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 A masked pattern was here -2781 -2731 + 10 1281 -2551 -tst1 1 1461 -3691 +1501 2131 -3111 2241 2381 -1501 - 10 +2551 +2731 +2781 +3111 +3691 +4011 4061 66 1 -4011 98 1 +tst1 1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_null.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union_null.q.out b/ql/src/test/results/clientpositive/spark/union_null.q.out index d37adbb..696641c 100644 --- a/ql/src/test/results/clientpositive/spark/union_null.q.out +++ b/ql/src/test/results/clientpositive/spark/union_null.q.out @@ -24,16 +24,16 @@ POSTHOOK: query: select x from (select * from (select value as x from src order POSTHOOK: type: QUERY POSTHOOK: Input: default@src A masked pattern was here -val_0 -val_0 -val_0 -val_10 -val_100 NULL NULL NULL NULL NULL +val_0 +val_0 +val_0 +val_10 +val_100 PREHOOK: query: select * from (select * from (select cast(null as string) as N from src1 group by key)a UNION ALL select * from (select cast(null as string) as N from src1 group by key)b ) a PREHOOK: type: QUERY PREHOOK: Input: default@src1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_view.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union_view.q.out b/ql/src/test/results/clientpositive/spark/union_view.q.out index 591ebfa..97a5bef 100644 --- a/ql/src/test/results/clientpositive/spark/union_view.q.out +++ b/ql/src/test/results/clientpositive/spark/union_view.q.out @@ -483,10 +483,10 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 -86 val_86 2 -86 val_86 2 86 val_86 3 86 val_86 3 +86 val_86 2 +86 val_86 2 86 val_86 1 STAGE DEPENDENCIES: Stage-1 is a root stage http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index e3d815b..6e33ead 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] +valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289
[44/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 6785bce..df900a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -98,40 +97,31 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe // @Override - public void process(Object row, int tag) throws HiveException { - -try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { -// Our one time process method initialization. -commonSetup(batch); + protected void commonSetup() throws HiveException { +super.commonSetup(); -/* - * Initialize Single-Column String members for this specialized class. - */ - -singleJoinColumn = bigTableKeyColumnMap[0]; +/* + * Initialize Single-Column String members for this specialized class. + */ -needCommonSetup = false; - } +singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { -// Setup our hash table specialization. It will be the first time the process -// method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { +super.hashTableSetup(); -/* - * Get our Single-Column String hash set information for this specialized class. - */ +/* + * Get our Single-Column String hash set information for this specialized class. + */ -hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; +hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } -needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; +try { // Do the per-batch setup for an left semi join. @@ -144,11 +134,7 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { -if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); -} return; } http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 2e5c568..61bcbf0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -24,13 +24,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import
[25/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out new file mode 100644 index 000..169d94c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out @@ -0,0 +1,3945 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD + A masked pattern was
[01/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master 45163ee4c -> a37827ecd http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out -- diff --git a/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out new file mode 100644 index 000..53c32ff --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out @@ -0,0 +1,2050 @@ +PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), +(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test1 +POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), +(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test1 +POSTHOOK: Lineage: test1.col_1 SCRIPT [] +POSTHOOK: Lineage: test1.key SCRIPT [] +POSTHOOK: Lineage: test1.value SCRIPT [] +col1 col2col3 +PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), +(104, 3, 'Fli'), (105, NULL, 'None') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test2 +POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), +(104, 3, 'Fli'), (105, NULL, 'None') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test2 +POSTHOOK: Lineage: test2.col_2 SCRIPT [] +POSTHOOK: Lineage: test2.key SCRIPT [] +POSTHOOK: Lineage: test2.value SCRIPT [] +col1 col2col3 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 +Map Reduce Local Work + Alias -> Map Local Tables: +$hdt$_1:test2 + Fetch Operator +limit: -1 + Alias -> Map Local Operator Tree: +$hdt$_1:test2 + TableScan +alias: test2 +Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator +keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 +Map Reduce + Map Operator Tree: + TableScan +alias: test1 +Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE +TableScan Vectorization: +native: true +Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Left Outer Join 0 to 1 +keys: + 0 _col1 (type: int) + 1 _col1 (type: int) +Map Join Vectorization: +bigTableKeyExpressions: col 1:int +className: VectorMapJoinOperator +native: false +nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin
hive git commit: Missed files: HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL
Repository: hive Updated Branches: refs/heads/master be1130d56 -> 37120b877 Missed files: HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/37120b87 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/37120b87 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/37120b87 Branch: refs/heads/master Commit: 37120b8c40baf44040fc7778d129cd8a5824 Parents: be1130d Author: Matt McCline Authored: Sat Sep 15 18:43:44 2018 -0500 Committer: Matt McCline Committed: Sat Sep 15 18:44:22 2018 -0500 -- .../test/resources/testconfiguration.properties| 1 + .../apache/hadoop/hive/metastore/ColumnType.java | 17 +++-- 2 files changed, 16 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/37120b87/itests/src/test/resources/testconfiguration.properties -- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 65ae6bb..0e071fb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -688,6 +688,7 @@ minillaplocal.query.files=\ schema_evol_text_vecrow_part_all_primitive.q,\ schema_evol_text_vecrow_table_llap_io.q,\ schema_evol_text_vecrow_table.q,\ + schema_evol_undecorated.q,\ selectDistinctStar.q,\ semijoin.q,\ semijoin6.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/37120b87/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java -- diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java index d5dea4d..39d2b2f 100644 --- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hive.metastore.utils.StringUtils; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Set; @@ -144,6 +145,14 @@ public class ColumnType { NumericCastOrder.put(DOUBLE_TYPE_NAME, 7); } + private static final Set decoratedTypeNames = new HashSet<>(); + + static { +decoratedTypeNames.add("char"); +decoratedTypeNames.add("decimal"); +decoratedTypeNames.add("varchar"); + } + private static final Map alternateTypeNames = new HashMap<>(); static { @@ -199,6 +208,9 @@ public class ColumnType { public static String getTypeName(String typeString) { if (typeString == null) return null; String protoType = typeString.toLowerCase().split("\\W")[0]; +if (decoratedTypeNames.contains(protoType)) { + return protoType; +} String realType = alternateTypeNames.get(protoType); return realType == null ? protoType : realType; } @@ -217,8 +229,9 @@ public class ColumnType { return NumericCastOrder.get(from) < NumericCastOrder.get(to); } - // Allow string to double conversion - if (StringTypes.contains(from) && to.equals(DOUBLE_TYPE_NAME)) return true; + // Allow string to double/decimal conversion + if (StringTypes.contains(from) && + (to.equals(DOUBLE_TYPE_NAME) || to.equals(DECIMAL_TYPE_NAME))) return true; // Void can go to anything if (from.equals(VOID_TYPE_NAME)) return true;
hive git commit: HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL (Matt McCline, reviewed by Jason Dere)
Repository: hive Updated Branches: refs/heads/master 2c8e67942 -> e041c9ece HIVE-20524: Schema Evolution checking is broken in going from Hive version 2 to version 3 for ALTER TABLE VARCHAR to DECIMAL (Matt McCline, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e041c9ec Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e041c9ec Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e041c9ec Branch: refs/heads/master Commit: e041c9ecec9d546cb790d2cf8cf3ea10eeeab202 Parents: 2c8e679 Author: Matt McCline Authored: Sat Sep 15 15:06:19 2018 -0500 Committer: Matt McCline Committed: Sat Sep 15 15:06:19 2018 -0500 -- .../clientpositive/schema_evol_undecorated.q| 14 + .../llap/schema_evol_undecorated.q.out | 64 2 files changed, 78 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e041c9ec/ql/src/test/queries/clientpositive/schema_evol_undecorated.q -- diff --git a/ql/src/test/queries/clientpositive/schema_evol_undecorated.q b/ql/src/test/queries/clientpositive/schema_evol_undecorated.q new file mode 100644 index 000..1fe5c08 --- /dev/null +++ b/ql/src/test/queries/clientpositive/schema_evol_undecorated.q @@ -0,0 +1,14 @@ + +set hive.metastore.disallow.incompatible.col.type.changes=true; + +create external table new_char_decimal (c1 char(20)); +alter table new_char_decimal change c1 c1 decimal(31,0); + +create external table new_varchar_decimal (c1 varchar(25)); +alter table new_varchar_decimal change c1 c1 decimal(12,5); + +create external table new_char_double (c1 char(20)); +alter table new_char_double change c1 c1 double; + +create external table new_varchar_double (c1 varchar(25)); +alter table new_varchar_double change c1 c1 double; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/e041c9ec/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out new file mode 100644 index 000..2cbdb4c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/schema_evol_undecorated.q.out @@ -0,0 +1,64 @@ +PREHOOK: query: create external table new_char_decimal (c1 char(20)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@new_char_decimal +POSTHOOK: query: create external table new_char_decimal (c1 char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_char_decimal +PREHOOK: query: alter table new_char_decimal change c1 c1 decimal(31,0) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@new_char_decimal +PREHOOK: Output: default@new_char_decimal +POSTHOOK: query: alter table new_char_decimal change c1 c1 decimal(31,0) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@new_char_decimal +POSTHOOK: Output: default@new_char_decimal +PREHOOK: query: create external table new_varchar_decimal (c1 varchar(25)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@new_varchar_decimal +POSTHOOK: query: create external table new_varchar_decimal (c1 varchar(25)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_varchar_decimal +PREHOOK: query: alter table new_varchar_decimal change c1 c1 decimal(12,5) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@new_varchar_decimal +PREHOOK: Output: default@new_varchar_decimal +POSTHOOK: query: alter table new_varchar_decimal change c1 c1 decimal(12,5) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@new_varchar_decimal +POSTHOOK: Output: default@new_varchar_decimal +PREHOOK: query: create external table new_char_double (c1 char(20)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@new_char_double +POSTHOOK: query: create external table new_char_double (c1 char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@new_char_double +PREHOOK: query: alter table new_char_double change c1 c1 double +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@new_char_double +PREHOOK: Output: default@new_char_double +POSTHOOK: query: alter table new_char_double change c1 c1 double +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@new_char_double +POSTHOOK: Output: default@new_char_double +PREHOOK: query: create external table new_varchar_double (c1 varchar(25)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output:
[2/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)
HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff98a30a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff98a30a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff98a30a Branch: refs/heads/master Commit: ff98a30ab49c4eafe53974e03c9dd205c14ffee7 Parents: 494b771 Author: Matt McCline Authored: Mon Sep 10 04:24:35 2018 -0500 Committer: Matt McCline Committed: Mon Sep 10 04:24:35 2018 -0500 -- .../fast/VectorMapJoinFastBytesHashKeyRef.java | 178 ++ .../fast/VectorMapJoinFastBytesHashMap.java | 141 +++-- .../VectorMapJoinFastBytesHashMapStore.java | 559 +++ .../VectorMapJoinFastBytesHashMultiSet.java | 132 - ...VectorMapJoinFastBytesHashMultiSetStore.java | 280 ++ .../fast/VectorMapJoinFastBytesHashSet.java | 124 +++- .../VectorMapJoinFastBytesHashSetStore.java | 219 .../fast/VectorMapJoinFastBytesHashTable.java | 148 ++--- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 6 +- .../fast/TestVectorMapJoinFastBytesHashMap.java | 3 + .../fast/TestVectorMapJoinFastLongHashMap.java | 3 + .../clientpositive/bucket_map_join_tez2.q | 2 +- .../test/queries/clientpositive/tez_smb_main.q | 3 +- .../results/clientpositive/llap/orc_llap.q.out | 59 +- .../apache/hadoop/hive/serde2/WriteBuffers.java | 53 ++ 15 files changed, 1661 insertions(+), 249 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java new file mode 100644 index 000..dbfe518 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import org.apache.hadoop.hive.serde2.WriteBuffers; +// import com.google.common.base.Preconditions; + +public class VectorMapJoinFastBytesHashKeyRef { + + public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers writeBuffers, WriteBuffers.Position readPos) { + +// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + +final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + +writeBuffers.setReadPoint(absoluteOffset, readPos); + +int actualKeyLength = KeyRef.getSmallKeyLength(refWord); +boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); +if (!isKeyLengthSmall) { + + // And, if current value is big we must read it. + actualKeyLength = writeBuffers.readVInt(readPos); +} + +if (actualKeyLength != keyLength) { + return false; +} + +// Our reading was positioned to the key. +if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { + return false; +} + +return true; + } + + public static int calculateHashCode(long refWord, WriteBuffers writeBuffers, + WriteBuffers.Position readPos) { + +// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + +final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + +int actualKeyLength = KeyRef.getSmallKeyLength(refWord); +boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); +final long keyAbsoluteOffset; +if (!isKeyLengthSmall) { + + // Position after next relative offset (fixed length) to the key. + writeBuffers.setReadPoint(absoluteOffset, readPos); + + // And, if current value is big we must
[1/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)
Repository: hive Updated Branches: refs/heads/master 494b771ac -> ff98a30ab http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java index 17d4bdb..79462a0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java @@ -57,6 +57,11 @@ public final class WriteBuffers implements RandomAccessOutput, MemoryEstimate { memSize += (2 * jdm.primitive1()); return memSize; } +public void set(Position pos) { + buffer = pos.buffer; + bufferIndex = pos.bufferIndex; + offset = pos.offset; +} } Position writePos = new Position(); // Position where we'd write @@ -552,6 +557,21 @@ public final class WriteBuffers implements RandomAccessOutput, MemoryEstimate { return v; } + public long readNByteLong(int bytes, Position readPos) { +long v = 0; +if (isAllInOneReadBuffer(bytes, readPos)) { + for (int i = 0; i < bytes; ++i) { +v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff); + } + readPos.offset += bytes; +} else { + for (int i = 0; i < bytes; ++i) { +v = (v << 8) + (readNextByte(readPos) & 0xff); + } +} +return v; + } + public void writeFiveByteULong(long offset, long v) { int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset; setWritePoint(offset); @@ -574,10 +594,43 @@ public final class WriteBuffers implements RandomAccessOutput, MemoryEstimate { writePos.offset = prevOffset; } + public void writeFiveByteULong(long v) { +if (isAllInOneWriteBuffer(5)) { + writePos.buffer[writePos.offset] = (byte)(v >>> 32); + writePos.buffer[writePos.offset + 1] = (byte)(v >>> 24); + writePos.buffer[writePos.offset + 2] = (byte)(v >>> 16); + writePos.buffer[writePos.offset + 3] = (byte)(v >>> 8); + writePos.buffer[writePos.offset + 4] = (byte)(v); + writePos.offset += 5; +} else { + write((byte)(v >>> 32)); + write((byte)(v >>> 24)); + write((byte)(v >>> 16)); + write((byte)(v >>> 8)); + write((byte)(v)); +} + } + public int readInt(long offset) { return (int)unsafeReadNByteLong(offset, 4); } + public int readInt(long offset, Position readPos) { +setReadPoint(offset, readPos); +long v = 0; +if (isAllInOneReadBuffer(4, readPos)) { + for (int i = 0; i < 4; ++i) { +v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff); + } + readPos.offset += 4; +} else { + for (int i = 0; i < 4; ++i) { +v = (v << 8) + (readNextByte(readPos) & 0xff); + } +} +return (int) v; + } + @Override public void writeInt(long offset, int v) { int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset;
hive git commit: HIVE-20496: Vectorization: Vectorized PTF IllegalStateException (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master 804535275 -> a4dd84b38 HIVE-20496: Vectorization: Vectorized PTF IllegalStateException (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a4dd84b3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a4dd84b3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a4dd84b3 Branch: refs/heads/master Commit: a4dd84b38083864edc2e09e52e208827b82e82cd Parents: 8045352 Author: Matt McCline Authored: Mon Sep 3 05:42:29 2018 -0500 Committer: Matt McCline Committed: Mon Sep 3 05:42:29 2018 -0500 -- .../org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java | 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/a4dd84b3/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 70d6468..5698639 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -451,6 +451,11 @@ public class ReduceRecordSource implements RecordSource { } reducer.process(batch, tag); + // Do the non-column batch reset logic. + batch.selectedInUse = false; + batch.size = 0; + batch.endOfFile = false; + // Reset just the value columns and value buffer. for (int i = firstValueColumnOffset; i < batch.numCols; i++) { // Note that reset also resets the data buffer for bytes column vectors.
[1/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master e2142b206 -> fa36381fa http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/test/results/clientpositive/vector_case_when_1.q.out -- diff --git a/ql/src/test/results/clientpositive/vector_case_when_1.q.out b/ql/src/test/results/clientpositive/vector_case_when_1.q.out index 01fc3ce..88cba90 100644 --- a/ql/src/test/results/clientpositive/vector_case_when_1.q.out +++ b/ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -516,7 +516,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 36, 40, 42, 45, 46] - selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 21:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 21:string) -> 22:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 24:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprColumnNull(col 20:boolean, col 21:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean, ConstantVectorExpression(val Many) -> 21:string) -> 23:string) -> 24:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 23:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprNullNull(null, null) -> 23:string) -> 25:string) -> 23:string) -> 25:string, IfExprLongColumnLongColumn(col 17:boolean, col 18:date, col 19:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 17:boolean, VectorUDFDateAddColScalar(co l 10:date, val 10) -> 18:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 19:date) -> 26:date, IfExprDoubleColumnLongScalar(col 17:boolean, col 28:double, val 0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 27:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 27:double) -> 28:double) -> 27:double, IfExprDoubleColumnDoubleScalar(col 17:boolean, col 29:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 17:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 28:double) -> 29:double) -> 28:double, IfExprNullColumn(col 17:boolean, null, col 48)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 17:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 48:decimal(10,2)) -> 30:decimal(10,2), IfExprColumnNull(col 18:boolean, col 49:decimal(10,2), null)(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 18:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 49:decimal(10,2)) -> 31:decimal(10,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 32:decimal(12,2), VectorUDFAdaptor(if((CAST( l_shipinstruct AS STRING) = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualStringScalar(col 23:string, val TAKE BACK RETURN)(children: CastStringGroupToString(col 13:varchar(20)) -> 23:string) -> 19:boolean) -> 33:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 19:boolean, decimal64Val 0, decimalVal 0, col 7:dec imal(1,0)/DECIMAL_64)(children: StringGroupColEqualStringScalar(col 23:string, val DELIVER IN PERSON)(children: CastStringGroupToString(col
[5/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fa36381f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fa36381f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fa36381f Branch: refs/heads/master Commit: fa36381faad40576f62e2ac925ef2976efecd8b6 Parents: e2142b2 Author: Matt McCline Authored: Sat Aug 25 09:21:25 2018 -0700 Committer: Matt McCline Committed: Sat Aug 25 09:21:25 2018 -0700 -- .../exec/vector/VectorSMBMapJoinOperator.java |2 +- .../ql/exec/vector/VectorizationContext.java| 46 +- .../exec/vector/VectorizationContext.java.orig | 3771 -- .../expressions/CastStringGroupToString.java| 40 - .../ql/exec/vector/expressions/VectorElt.java | 168 +- .../VectorExpressionWriterFactory.java | 26 + .../ql/exec/vector/TestVectorRowObject.java |3 +- .../hive/ql/exec/vector/TestVectorSerDeRow.java | 137 +- .../ql/exec/vector/VectorRandomRowSource.java | 67 +- .../hive/ql/exec/vector/VectorVerifyFast.java |6 +- .../aggregation/TestVectorAggregation.java |9 +- .../expressions/TestVectorArithmetic.java | 14 +- .../vector/expressions/TestVectorBetweenIn.java | 38 +- .../expressions/TestVectorCastStatement.java| 11 +- .../expressions/TestVectorCoalesceElt.java | 87 +- .../expressions/TestVectorDateAddSub.java | 10 +- .../vector/expressions/TestVectorDateDiff.java |9 +- .../expressions/TestVectorFilterCompare.java| 12 +- .../expressions/TestVectorIfStatement.java |3 +- .../vector/expressions/TestVectorIndex.java |5 +- .../vector/expressions/TestVectorNegative.java | 21 +- .../exec/vector/expressions/TestVectorNull.java | 14 +- .../expressions/TestVectorStringConcat.java |3 +- .../expressions/TestVectorStringUnary.java |3 +- .../expressions/TestVectorStructField.java | 370 ++ .../vector/expressions/TestVectorSubStr.java|3 +- .../expressions/TestVectorTimestampExtract.java |3 +- .../fast/TestVectorMapJoinFastRowHashMap.java | 101 +- .../clientpositive/query_result_fileformat.q|4 +- .../llap/vector_case_when_1.q.out |8 +- .../llap/vector_char_mapjoin1.q.out |1 - .../clientpositive/llap/vector_udf1.q.out | 18 +- .../clientpositive/llap/vectorized_casts.q.out |6 +- .../query_result_fileformat.q.out | 76 +- .../clientpositive/vector_case_when_1.q.out |8 +- .../clientpositive/vector_char_mapjoin1.q.out |2 +- .../clientpositive/vectorized_casts.q.out |6 +- .../hadoop/hive/serde2/RandomTypeUtil.java | 29 + 38 files changed, 1059 insertions(+), 4081 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index c13510e..07a6e9d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -131,7 +131,7 @@ public class VectorSMBMapJoinOperator extends SMBMapJoinOperator List keyDesc = desc.getKeys().get(posBigTable); keyExpressions = vContext.getVectorExpressions(keyDesc); -keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc); +keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyExpressions); Map> exprs = desc.getExprs(); bigTableValueExpressions = vContext.getVectorExpressions(exprs.get(posBigTable)); http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index b7feb1c..57f7c01 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1806,6 +1806,25 @@ public class VectorizationContext { return vectorExpression; } + public void wrapWithDecimal64ToDecimalConversions(VectorExpression[] vecExprs) + throws HiveException{ +if (vecExprs == null) { +
[2/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java new file mode 100644 index 000..5062997 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStructField.java @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import java.lang.reflect.Constructor; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.GenerationSpec; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource.SupportedTypes; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNegative; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import junit.framework.Assert; + +import org.junit.Ignore; +import org.junit.Test; + +public class TestVectorStructField { + + @Test + public void testStructField() throws Exception { +Random random = new Random(7743); + +for (int i = 0; i < 5; i++) { + doStructFieldTests(random); +} + } + + public enum StructFieldTestMode { +ROW_MODE, +VECTOR_EXPRESSION; + +static final int count = values().length; + } + + private void doStructFieldTests(Random random) throws Exception { +String structTypeName = +VectorRandomRowSource.getDecoratedTypeName( +random, "struct", SupportedTypes.ALL, /* allowedTypeNameSet */ null, +/* depth */ 0, /* maxDepth */ 2); +StructTypeInfo structTypeInfo = +
[3/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java deleted file mode 100644 index 8232e67..000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToString.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; - -// cast string group to string (varchar to string, etc.) -public class CastStringGroupToString extends StringUnaryUDFDirect { - - private static final long serialVersionUID = 1L; - - public CastStringGroupToString() { -super(); - } - - public CastStringGroupToString(int inputColumn, int outputColumnNum) { -super(inputColumn, outputColumnNum); - } - - @Override - protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { -outV.setVal(i, vector[i], start[i], length[i]); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java index 00e529d..75e60eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorElt.java @@ -66,53 +66,157 @@ public class VectorElt extends VectorExpression { outputVector.init(); -outputVector.noNulls = false; outputVector.isRepeating = false; +final int limit = inputColumns.length; LongColumnVector inputIndexVector = (LongColumnVector) batch.cols[inputColumns[0]]; +boolean[] inputIndexIsNull = inputIndexVector.isNull; long[] indexVector = inputIndexVector.vector; if (inputIndexVector.isRepeating) { - int index = (int)indexVector[0]; - if (index > 0 && index < inputColumns.length) { -BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[index]]; -if (cv.isRepeating) { - outputVector.setElement(0, 0, cv); - outputVector.isRepeating = true; -} else if (batch.selectedInUse) { - for (int j = 0; j != n; j++) { -int i = sel[j]; -outputVector.setVal(i, cv.vector[0], cv.start[0], cv.length[0]); + if (inputIndexVector.noNulls || !inputIndexIsNull[0]) { +int repeatedIndex = (int) indexVector[0]; +if (repeatedIndex > 0 && repeatedIndex < limit) { + BytesColumnVector cv = (BytesColumnVector) batch.cols[inputColumns[repeatedIndex]]; + if (cv.isRepeating) { +outputVector.isNull[0] = false; +outputVector.setElement(0, 0, cv); +outputVector.isRepeating = true; + } else if (cv.noNulls) { +if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { +int i = sel[j]; +outputVector.isNull[i] = false; +outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); + } +} else { + for (int i = 0; i != n; i++) { +outputVector.isNull[i] = false; +outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); + } +} + } else { +if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { +int i = sel[j]; +if (!cv.isNull[i]) { + outputVector.isNull[i] = false; + outputVector.setVal(i, cv.vector[i], cv.start[i], cv.length[i]); +} else { + outputVector.isNull[i] = true; +
[4/5] hive git commit: HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/fa36381f/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig deleted file mode 100644 index 20cc894..000 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java.orig +++ /dev/null @@ -1,3771 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.lang.reflect.Constructor; -import java.nio.charset.StandardCharsets; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.regex.Pattern; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.hadoop.hive.common.type.Date; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; -import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; -import org.apache.hadoop.hive.ql.exec.FunctionInfo; -import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.ArgumentType; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; -import org.apache.hadoop.hive.ql.exec.vector.expressions.*; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; -import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; -import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.parse.SemanticException; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; -import org.apache.hadoop.hive.ql.udf.*; -import org.apache.hadoop.hive.ql.udf.generic.*; -import org.apache.hadoop.hive.serde2.ByteStream.Output; -import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.io.DateWritableV2; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import
hive git commit: HIVE-20339: Vectorization: Lift unneeded restriction causing some PTF with RANK not to be vectorized (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master b5578eb08 -> e2142b206 HIVE-20339: Vectorization: Lift unneeded restriction causing some PTF with RANK not to be vectorized (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e2142b20 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e2142b20 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e2142b20 Branch: refs/heads/master Commit: e2142b20660c3582bc09c87f67c1d32c201952c3 Parents: b5578eb Author: Matt McCline Authored: Fri Aug 24 22:30:07 2018 -0700 Committer: Matt McCline Committed: Fri Aug 24 22:30:07 2018 -0700 -- .../exec/vector/ptf/VectorPTFEvaluatorBase.java | 7 + .../vector/ptf/VectorPTFEvaluatorDenseRank.java | 8 +- .../exec/vector/ptf/VectorPTFEvaluatorRank.java | 8 +- .../hive/ql/optimizer/physical/Vectorizer.java | 73 - .../hadoop/hive/ql/plan/VectorPTFDesc.java | 6 +- .../test/results/clientpositive/llap/ptf.q.out | 8 +- .../llap/vector_ptf_part_simple.q.out | 74 - .../clientpositive/llap/vector_windowing.q.out | 38 - .../llap/vector_windowing_rank.q.out| 41 - .../clientpositive/llap/vectorized_ptf.q.out| 151 +-- .../clientpositive/perf/spark/query47.q.out | 3 + .../clientpositive/perf/spark/query57.q.out | 3 + .../clientpositive/perf/tez/query47.q.out | 56 +++ .../clientpositive/perf/tez/query57.q.out | 56 +++ .../test/results/clientpositive/spark/ptf.q.out | 4 + .../clientpositive/spark/vectorized_ptf.q.out | 147 -- 16 files changed, 541 insertions(+), 142 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java index 437c319..daefdc4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java @@ -65,6 +65,13 @@ public abstract class VectorPTFEvaluatorBase { this.outputColumnNum = outputColumnNum; } + public VectorPTFEvaluatorBase(WindowFrameDef windowFrameDef, int outputColumnNum) { +this.windowFrameDef = windowFrameDef; +inputVecExpr = null; +inputColumnNum = -1; +this.outputColumnNum = outputColumnNum; + } + // Evaluate the aggregation input argument expression. public void evaluateInputExpr(VectorizedRowBatch batch) throws HiveException { if (inputVecExpr != null) { http://git-wip-us.apache.org/repos/asf/hive/blob/e2142b20/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java index cb6b586..c80b077 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorDenseRank.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.exec.vector.ptf; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; @@ -35,9 +34,8 @@ public class VectorPTFEvaluatorDenseRank extends VectorPTFEvaluatorBase { private int denseRank; - public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, - int outputColumnNum) { -super(windowFrameDef, inputVecExpr, outputColumnNum); + public VectorPTFEvaluatorDenseRank(WindowFrameDef windowFrameDef, int outputColumnNum) { +super(windowFrameDef, outputColumnNum); resetEvaluator(); } @@ -45,7 +43,7 @@ public class VectorPTFEvaluatorDenseRank extends VectorPTFEvaluatorBase { public void evaluateGroupBatch(VectorizedRowBatch batch) throws HiveException { -evaluateInputExpr(batch); +// We don't evaluate input columns... LongColumnVector longColVector = (LongColumnVector) batch.cols[outputColumnNum]; longColVector.isRepeating = true;
hive git commit: HIVE-20352: Vectorization: Support grouping function (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master cc38bcc5a -> c7235932b HIVE-20352: Vectorization: Support grouping function (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c7235932 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c7235932 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c7235932 Branch: refs/heads/master Commit: c7235932b0011cd0336af6ecd138529c5e08c5a9 Parents: cc38bcc Author: Matt McCline Authored: Fri Aug 24 09:35:22 2018 -0700 Committer: Matt McCline Committed: Fri Aug 24 09:35:22 2018 -0700 -- .../ql/exec/vector/VectorizationContext.java| 49 ++ .../exec/vector/expressions/GroupingColumn.java | 54 +++ .../vector/expressions/GroupingColumns.java | 69 .../vector_groupby_grouping_sets_grouping.q.out | 54 +++ 4 files changed, 199 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c7235932/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index d6bfa7a..b7feb1c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2074,6 +2074,8 @@ public class VectorizationContext { // Elt is a special case because it can take variable number of arguments. ve = getEltExpression(childExpr, returnType); +} else if (udf instanceof GenericUDFGrouping) { + ve = getGroupingExpression((GenericUDFGrouping) udf, childExpr, returnType); } else if (udf instanceof GenericUDFBridge) { ve = getGenericUDFBridgeVectorExpression((GenericUDFBridge) udf, childExpr, mode, returnType); @@ -2195,6 +2197,53 @@ public class VectorizationContext { return vectorElt; } + private VectorExpression getGroupingExpression(GenericUDFGrouping udf, + List childExprs, TypeInfo returnType) + throws HiveException { + +ExprNodeDesc childExpr0 = childExprs.get(0); +if (!(childExpr0 instanceof ExprNodeColumnDesc)) { + return null; +} +ExprNodeColumnDesc groupingIdColDesc = (ExprNodeColumnDesc) childExpr0; +int groupingIdColNum = getInputColumnIndex(groupingIdColDesc.getColumn()); + +final int indexCount = childExprs.size() - 1; +int[] indices = new int[indexCount]; +for (int i = 0; i < indexCount; i++) { + ExprNodeDesc indexChildExpr = childExprs.get(i + 1); + if (!(indexChildExpr instanceof ExprNodeConstantDesc)) { +return null; + } + Object scalarObject = ((ExprNodeConstantDesc) indexChildExpr).getValue(); + final int index; + if (scalarObject instanceof Integer) { +index = (int) scalarObject; + } else if (scalarObject instanceof Long) { +index = (int) ((long) scalarObject); + } else { +return null; + } + indices[i] = index; +} + +final int outputColumnNum = ocm.allocateOutputColumn(returnType); +final VectorExpression ve; +if (indices.length == 1) { + ve = new GroupingColumn(groupingIdColNum, indices[0], outputColumnNum); +} else { + ve = new GroupingColumns(groupingIdColNum, indices, outputColumnNum); +} + +ve.setInputTypeInfos(groupingIdColDesc.getTypeInfo()); +ve.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + +ve.setOutputTypeInfo(returnType); +ve.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + +return ve; + } + public enum InConstantType { INT_FAMILY, TIMESTAMP, http://git-wip-us.apache.org/repos/asf/hive/blob/c7235932/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java new file mode 100644 index 000..9bad386 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/GroupingColumn.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the
[2/3] hive git commit: HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java new file mode 100644 index 000..78d543a --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorStreamingLongAvg.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.ptf; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; + +import com.google.common.base.Preconditions; + +/** + * This class evaluates long avg() for a PTF group. + * + * Sum up non-null column values; group result is sum / non-null count. + */ +public class VectorPTFEvaluatorStreamingLongAvg extends VectorPTFEvaluatorBase { + + protected boolean isNull; + protected long sum; + private int nonNullGroupCount; + protected double avg; + + public VectorPTFEvaluatorStreamingLongAvg(WindowFrameDef windowFrameDef, VectorExpression inputVecExpr, + int outputColumnNum) { +super(windowFrameDef, inputVecExpr, outputColumnNum); +resetEvaluator(); + } + + @Override + public void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException { + +evaluateInputExpr(batch); + +// Sum all non-null long column values for avg; maintain isGroupResultNull; after last row of +// last group batch compute the group avg when sum is non-null. + +// We do not filter when PTF is in reducer. +Preconditions.checkState(!batch.selectedInUse); + +final int size = batch.size; +if (size == 0) { + return; +} +LongColumnVector longColVector = ((LongColumnVector) batch.cols[inputColumnNum]); + +DoubleColumnVector outputColVector = (DoubleColumnVector) batch.cols[outputColumnNum]; +double[] outputVector = outputColVector.vector; + +if (longColVector.isRepeating) { + + if (longColVector.noNulls || !longColVector.isNull[0]) { + +// We have a repeated value. +isNull = false; +final double repeatedValue = longColVector.vector[0]; + +for (int i = 0; i < size; i++) { + sum += repeatedValue; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i] = avg; +} + } else { +if (isNull) { + outputColVector.isNull[0] = true; + outputColVector.noNulls = false; +} else { + + // Continue previous AVG. + outputVector[0] = avg; +} +outputColVector.isRepeating = true; + } +} else if (longColVector.noNulls) { + isNull = false; + long[] vector = longColVector.vector; + for (int i = 0; i < size; i++) { +sum += vector[i]; +nonNullGroupCount++; + +avg = sum / nonNullGroupCount; + +// Output row i AVG. +outputVector[i] = avg; + } +} else { + boolean[] batchIsNull = longColVector.isNull; + int i = 0; + while (batchIsNull[i]) { +outputColVector.isNull[i] = true; +outputColVector.noNulls = false; +if (++i >= size) { + return; +} + } + + isNull = false; + long[] vector = longColVector.vector; + + sum += vector[i]; + nonNullGroupCount++; + + avg = sum / nonNullGroupCount; + + // Output row i AVG. + outputVector[i++] = avg; + + for (; i < size; i++) { +if (!batchIsNull[i]) { + sum += vector[i]; + nonNullGroupCount++; + + avg = sum /
[1/3] hive git commit: HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master 6a282657c -> cc38bcc5a http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index 91b52e7..7b6fa66 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -113,16 +113,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 -Execution mode: llap +Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true -notVectorizedReason: PTF operator: sum UNBOUNDED end frame is not supported for ROWS window type -vectorized: false +reduceColumnNullOrder: aza +reduceColumnSortOrder: +++ +allNative: false +usesVectorUDFAdaptor: false +vectorized: true +rowBatchContext: +dataColumnCount: 3 +dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, KEY.reducesinkkey2:bigint +partitionColumnCount: 0 +scratchColumnTypeNames: [bigint] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey2 (type: bigint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col2, _col3, _col7 +Select Vectorization: +className: VectorSelectOperator +native: true +projectedOutputColumnNums: [0, 2, 1] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: @@ -143,16 +155,39 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: ROWS PRECEDING(MAX)~CURRENT + PTF Vectorization: + className: VectorPTFOperator + evaluatorClasses: [VectorPTFEvaluatorStreamingLongSum] + functionInputExpressions: [col 2:bigint] + functionNames: [sum] + keyInputColumns: [0, 2, 1] + native: true + nonKeyInputColumns: [] + orderExpressions: [col 1:string, col 2:bigint] + outputColumns: [3, 0, 2, 1] + outputTypes: [bigint, int, bigint, string] + partitionExpressions: [col 0:int] + streamingColumns: [3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int), _col7 (type: string), _col3 (type: bigint), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 +Select Vectorization: +className: VectorSelectOperator +native: true +projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 10 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false +File Sink Vectorization: +className: VectorFileSinkOperator +native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -245,16 +280,28 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 -Execution mode: llap +Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet:
[3/3] hive git commit: HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi)
HIVE-20367: Vectorization: Support streaming for PTF AVG, MAX, MIN, SUM (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cc38bcc5 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cc38bcc5 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cc38bcc5 Branch: refs/heads/master Commit: cc38bcc5a993304898ba37b8496f13a15d62bf16 Parents: 6a28265 Author: Matt McCline Authored: Fri Aug 24 09:30:42 2018 -0700 Committer: Matt McCline Committed: Fri Aug 24 09:30:42 2018 -0700 -- .../exec/vector/ptf/VectorPTFEvaluatorBase.java | 17 +- .../vector/ptf/VectorPTFEvaluatorCount.java | 9 +- .../vector/ptf/VectorPTFEvaluatorCountStar.java | 9 +- .../ptf/VectorPTFEvaluatorDecimalAvg.java | 22 +- .../VectorPTFEvaluatorDecimalFirstValue.java| 4 +- .../ptf/VectorPTFEvaluatorDecimalLastValue.java | 12 +- .../ptf/VectorPTFEvaluatorDecimalMax.java | 25 +- .../ptf/VectorPTFEvaluatorDecimalMin.java | 23 +- .../ptf/VectorPTFEvaluatorDecimalSum.java | 9 +- .../vector/ptf/VectorPTFEvaluatorDenseRank.java | 11 +- .../vector/ptf/VectorPTFEvaluatorDoubleAvg.java | 18 +- .../ptf/VectorPTFEvaluatorDoubleFirstValue.java | 4 +- .../ptf/VectorPTFEvaluatorDoubleLastValue.java | 12 +- .../vector/ptf/VectorPTFEvaluatorDoubleMax.java | 13 +- .../vector/ptf/VectorPTFEvaluatorDoubleMin.java | 11 +- .../vector/ptf/VectorPTFEvaluatorDoubleSum.java | 9 +- .../vector/ptf/VectorPTFEvaluatorLongAvg.java | 18 +- .../ptf/VectorPTFEvaluatorLongFirstValue.java | 4 +- .../ptf/VectorPTFEvaluatorLongLastValue.java| 12 +- .../vector/ptf/VectorPTFEvaluatorLongMax.java | 9 +- .../vector/ptf/VectorPTFEvaluatorLongMin.java | 9 +- .../vector/ptf/VectorPTFEvaluatorLongSum.java | 9 +- .../exec/vector/ptf/VectorPTFEvaluatorRank.java | 13 +- .../vector/ptf/VectorPTFEvaluatorRowNumber.java | 5 +- .../VectorPTFEvaluatorStreamingDecimalAvg.java | 185 + .../VectorPTFEvaluatorStreamingDecimalMax.java | 163 +++ .../VectorPTFEvaluatorStreamingDecimalMin.java | 163 +++ .../VectorPTFEvaluatorStreamingDecimalSum.java | 154 +++ .../VectorPTFEvaluatorStreamingDoubleAvg.java | 174 .../VectorPTFEvaluatorStreamingDoubleMax.java | 164 .../VectorPTFEvaluatorStreamingDoubleMin.java | 166 .../VectorPTFEvaluatorStreamingDoubleSum.java | 152 +++ .../ptf/VectorPTFEvaluatorStreamingLongAvg.java | 168 .../ptf/VectorPTFEvaluatorStreamingLongMax.java | 164 .../ptf/VectorPTFEvaluatorStreamingLongMin.java | 166 .../ptf/VectorPTFEvaluatorStreamingLongSum.java | 154 +++ .../exec/vector/ptf/VectorPTFGroupBatches.java | 10 +- .../hive/ql/optimizer/physical/Vectorizer.java | 31 ++- .../hadoop/hive/ql/plan/VectorPTFDesc.java | 98 +-- .../test/results/clientpositive/llap/ptf.q.out | 12 +- .../llap/vector_ptf_part_simple.q.out | 119 - .../clientpositive/llap/vector_windowing.q.out | 244 +++-- .../llap/vector_windowing_expressions.q.out | 77 +- .../llap/vector_windowing_order_null.q.out | 82 +- .../llap/vector_windowing_windowspec.q.out | 82 +- .../clientpositive/llap/vectorized_ptf.q.out| 237 ++-- .../clientpositive/perf/spark/query51.q.out | 1 + .../clientpositive/perf/tez/query51.q.out | 18 +- .../test/results/clientpositive/spark/ptf.q.out | 7 + .../clientpositive/spark/vectorized_ptf.q.out | 267 +-- 50 files changed, 3342 insertions(+), 203 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/cc38bcc5/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java index 785725c..437c319 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/ptf/VectorPTFEvaluatorBase.java @@ -73,14 +73,19 @@ public abstract class VectorPTFEvaluatorBase { } // Evaluate the aggregation over one of the group's batches. - public abstract void evaluateGroupBatch(VectorizedRowBatch batch, boolean isLastGroupBatch) throws HiveException; + public abstract void evaluateGroupBatch(VectorizedRowBatch batch) + throws HiveException; - // Returns true if the aggregation result will be streamed. - public boolean streamsResult() { -// Assume it is not streamjng by default. -return false;
[1/3] hive git commit: HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)
Repository: hive Updated Branches: refs/heads/master 59cf159a7 -> ccdcc5e2e http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java new file mode 100644 index 000..1652728 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperTwoLong.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.wrapper; + +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSetInfo; +import org.apache.hive.common.util.HashCodeUtil; + +public class VectorHashKeyWrapperTwoLong extends VectorHashKeyWrapperTwoBase { + + private long longValue0; + private long longValue1; + + protected VectorHashKeyWrapperTwoLong() { +super(); +longValue0 = 0; +longValue1 = 0; + } + + @Override + public void setHashKey() { +if (isNull0 || isNull1) { + hashcode = + (isNull0 && isNull1 ? +twoNullHashcode : +(isNull0 ? +null0Hashcode ^ +HashCodeUtil.calculateLongHashCode(longValue1) : +HashCodeUtil.calculateLongHashCode(longValue0) ^ +null1Hashcode)); +} else { + hashcode = + HashCodeUtil.calculateLongHashCode(longValue0) >>> 16 ^ + HashCodeUtil.calculateLongHashCode(longValue1); +} + } + + @Override + public boolean equals(Object that) { +if (that instanceof VectorHashKeyWrapperTwoLong) { + VectorHashKeyWrapperTwoLong keyThat = (VectorHashKeyWrapperTwoLong) that; + return + isNull0 == keyThat.isNull0 && + longValue0 == keyThat.longValue0 && + isNull1 == keyThat.isNull1 && + longValue1 == keyThat.longValue1; +} +return false; + } + + @Override + protected Object clone() { +VectorHashKeyWrapperTwoLong clone = new VectorHashKeyWrapperTwoLong(); +clone.isNull0 = isNull0; +clone.longValue0 = longValue0; +clone.isNull1 = isNull1; +clone.longValue1 = longValue1; +clone.hashcode = hashcode; +return clone; + } + + @Override + public void assignLong(int keyIndex, int index, long v) { +if (keyIndex == 0 && index == 0) { + isNull0 = false; + longValue0 = v; +} else if (keyIndex == 1 && index == 1) { + isNull1 = false; + longValue1 = v; +} else { + throw new ArrayIndexOutOfBoundsException(); +} + } + + // FIXME: isNull is not updated; which might cause problems + @Deprecated + @Override + public void assignLong(int index, long v) { +if (index == 0) { + longValue0 = v; +} else if (index == 1) { + longValue1 = v; +} else { + throw new ArrayIndexOutOfBoundsException(); +} + } + + @Override + public void assignNullLong(int keyIndex, int index) { +if (keyIndex == 0 && index == 0) { + isNull0 = true; + longValue0 = 0; // Assign 0 to make equals simple. +} else if (keyIndex == 1 && index == 1) { + isNull1 = true; + longValue1 = 0; // Assign 0 to make equals simple. +} else { + throw new ArrayIndexOutOfBoundsException(); +} + } + + /* + * This method is mainly intended for debug display purposes. + */ + @Override + public String stringifyKeys(VectorColumnSetInfo columnSetInfo) + { +StringBuilder sb = new StringBuilder(); +sb.append("longs ["); +if (!isNull0) { + sb.append(longValue0); +} else { + sb.append("null"); +} +sb.append(", "); +if (!isNull1) { + sb.append(longValue1); +} else { + sb.append("null"); +} +sb.append("]"); +return sb.toString(); + } + + @Override + public String toString() + { +StringBuilder sb = new StringBuilder(); +sb.append("longs ["); +sb.append(longValue0); +sb.append(", "); +sb.append(longValue1); +sb.append("], nulls ["); +
[2/3] hive git commit: HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)
http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java new file mode 100644 index 000..8bf2ccb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/wrapper/VectorHashKeyWrapperBase.java @@ -0,0 +1,223 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.wrapper; + +import org.apache.hive.common.util.Murmur3; + +import java.sql.Timestamp; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.ql.exec.KeyWrapper; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSetInfo; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** + * A hash map key wrapper for vectorized processing. + * It stores the key values as primitives in arrays for each supported primitive type. + * This works in conjunction with + * {@link org.apache.hadoop.hive.ql.exec.VectorHashKeyWrapperBatch VectorHashKeyWrapperBatch} + * to hash vectorized processing units (batches). + */ +public abstract class VectorHashKeyWrapperBase extends KeyWrapper { + + public static final class HashContext { +private final Murmur3.IncrementalHash32 bytesHash = new Murmur3.IncrementalHash32(); + +public static Murmur3.IncrementalHash32 getBytesHash(HashContext ctx) { + if (ctx == null) { +return new Murmur3.IncrementalHash32(); + } + return ctx.bytesHash; +} + } + + protected int hashcode; + + protected VectorHashKeyWrapperBase() { +hashcode = 0; + } + + @Override + public void getNewKey(Object row, ObjectInspector rowInspector) throws HiveException { +throw new HiveException("Should not be called"); + } + + @Override + public void setHashKey() { +throw new RuntimeException("Not implemented"); + } + + @Override + public int hashCode() { +return hashcode; + } + + @Override + public boolean equals(Object that) { +throw new RuntimeException("Not implemented"); + } + + @Override + protected Object clone() { +throw new RuntimeException("Not implemented"); + } + + @Override + public KeyWrapper copyKey() { +return (KeyWrapper) clone(); + } + + @Override + public void copyKey(KeyWrapper oldWrapper) { +throw new UnsupportedOperationException(); + } + + @Override + public Object[] getKeyArray() { +throw new UnsupportedOperationException(); + } + + public void assignLong(int keyIndex, int index, long v) { +throw new RuntimeException("Not implemented"); + } + + // FIXME: isNull is not updated; which might cause problems + @Deprecated + public void assignLong(int index, long v) { +throw new RuntimeException("Not implemented"); + } + + public void assignNullLong(int keyIndex, int index) { +throw new RuntimeException("Not implemented"); + } + + public void assignDouble(int index, double d) { +throw new RuntimeException("Not implemented"); + } + + public void assignNullDouble(int keyIndex, int index) { +throw new RuntimeException("Not implemented"); + } + + public void assignString(int index, byte[] bytes, int start, int length) { +throw new RuntimeException("Not implemented"); + } + + public void assignNullString(int keyIndex, int index) { +throw new RuntimeException("Not implemented"); + } + + public void assignDecimal(int index, HiveDecimalWritable value) { +throw new RuntimeException("Not implemented"); + } + + public void assignNullDecimal(int keyIndex, int index) { +throw new RuntimeException("Not implemented"); + } + + public void assignTimestamp(int index, Timestamp value) { +throw new
[3/3] hive git commit: HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan)
HIVE-20321: Vectorization: Cut down memory size of 1 col VectorHashKeyWrapper to <1 CacheLine (Matt McCline, reviewed by Gopal Vijayaraghavan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ccdcc5e2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ccdcc5e2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ccdcc5e2 Branch: refs/heads/master Commit: ccdcc5e2eb39211ff3a5510bd7866eb5f5df7eb4 Parents: 59cf159 Author: Matt McCline Authored: Fri Aug 17 08:08:48 2018 -0500 Committer: Matt McCline Committed: Fri Aug 17 08:08:48 2018 -0500 -- .../ql/exec/persistence/HashMapWrapper.java |6 +- .../persistence/HybridHashTableContainer.java |6 +- .../persistence/MapJoinBytesTableContainer.java |6 +- .../hive/ql/exec/persistence/MapJoinKey.java|6 +- .../ql/exec/persistence/MapJoinKeyObject.java |6 +- .../exec/persistence/MapJoinTableContainer.java |6 +- .../ql/exec/vector/VectorColumnSetInfo.java | 20 +- .../ql/exec/vector/VectorGroupByOperator.java | 24 +- .../ql/exec/vector/VectorHashKeyWrapper.java| 682 --- .../exec/vector/VectorHashKeyWrapperBatch.java | 1067 - .../ql/exec/vector/VectorMapJoinOperator.java |4 +- .../exec/vector/VectorSMBMapJoinOperator.java |8 +- .../wrapper/VectorHashKeyWrapperBase.java | 223 .../wrapper/VectorHashKeyWrapperBatch.java | 1076 ++ .../wrapper/VectorHashKeyWrapperEmpty.java | 81 ++ .../wrapper/VectorHashKeyWrapperFactory.java| 55 + .../wrapper/VectorHashKeyWrapperGeneral.java| 649 +++ .../wrapper/VectorHashKeyWrapperSingleBase.java | 53 + .../wrapper/VectorHashKeyWrapperSingleLong.java | 131 +++ .../wrapper/VectorHashKeyWrapperTwoBase.java| 63 + .../wrapper/VectorHashKeyWrapperTwoLong.java| 170 +++ .../vector/TestVectorHashKeyWrapperBatch.java |6 +- 22 files changed, 2554 insertions(+), 1794 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java index 9d35805..765a647 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java @@ -32,9 +32,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; -import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; -import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; +import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase; +import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.ByteStream.Output; @@ -163,7 +163,7 @@ public class HashMapWrapper extends AbstractMapJoinTableContainer implements Ser } @Override -public JoinUtil.JoinResult setFromVector(VectorHashKeyWrapper kw, +public JoinUtil.JoinResult setFromVector(VectorHashKeyWrapperBase kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch) throws HiveException { if (currentKey == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/ccdcc5e2/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 027e39a..13f1702 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -39,10 +39,10 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; -import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; -import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import
[46/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query11.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query11.q.out b/ql/src/test/results/clientpositive/perf/spark/query11.q.out index 9a19fdf..87a0cc0 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query11.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name @@ -72,7 +72,7 @@ with year_total as ( order by t_s_secyear.c_preferred_cust_flag limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name @@ -146,10 +146,6 @@ with year_total as ( order by t_s_secyear.c_preferred_cust_flag limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -180,518 +176,237 @@ STAGE PLANS: alias: web_sales filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int)) predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4, 22, 25] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 13 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2001), SelectColumnIsNotNull(col 0:int))
[43/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query19.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query19.q.out b/ql/src/test/results/clientpositive/perf/spark/query19.q.out index 51a403a..d2994e6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query19.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query19.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item,customer,customer_address,store @@ -22,7 +22,7 @@ select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, ,i_manufact limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, sum(ss_ext_sales_price) ext_price from date_dim, store_sales, item,customer,customer_address,store @@ -46,10 +46,6 @@ select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, ,i_manufact limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -66,40 +62,18 @@ STAGE PLANS: alias: store filterExpr: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_zip (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 25] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col7 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -120,220 +94,100 @@ STAGE PLANS: alias: customer filterExpr: (c_customer_sk is not null and c_current_addr_sk is not null) (type: boolean) Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 4:int)) predicate: (c_current_addr_sk is not null and c_customer_sk is not null) (type: boolean) Statistics: Num rows: 8000 Data size: 68801615852 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: c_customer_sk (type: int), c_current_addr_sk (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4]
[44/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query17.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out index 87614e1..35405a7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_item_id ,i_item_desc ,s_state @@ -42,7 +42,7 @@ select i_item_id ,s_state limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_item_id ,i_item_desc ,s_state @@ -86,10 +86,6 @@ select i_item_id ,s_state limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -106,40 +102,18 @@ STAGE PLANS: alias: store filterExpr: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_state (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 24] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -162,304 +136,138 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_customer_sk is not null and ss_item_sk is not null and ss_ticket_number is not null and ss_sold_date_sk is not null and ss_store_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 9:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 7:int)) predicate: (ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_item_sk (type: int), ss_customer_sk (type: int), ss_store_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true -
[25/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query51.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query51.q.out b/ql/src/test/results/clientpositive/perf/spark/query51.q.out index 78d164b..c0bb72b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain WITH web_v1 as ( select ws_item_sk item_sk, d_date, @@ -42,7 +42,7 @@ order by item_sk ,d_date limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain WITH web_v1 as ( select ws_item_sk item_sk, d_date, @@ -86,10 +86,6 @@ order by item_sk ,d_date limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -107,40 +103,18 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -154,40 +128,18 @@ STAGE PLANS: alias: date_dim filterExpr: (d_month_seq BETWEEN 1212 AND 1223 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 3:int, left 1212, right 1223), SelectColumnIsNotNull(col 0:int)) predicate: (d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink
[07/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query78.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query78.q.out b/ql/src/test/results/clientpositive/perf/spark/query78.q.out index 720f654..15c7f04 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query78.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with ws as (select d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, @@ -55,7 +55,7 @@ order by round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with ws as (select d_year AS ws_sold_year, ws_item_sk, ws_bill_customer_sk ws_customer_sk, @@ -112,10 +112,6 @@ order by round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -144,321 +140,144 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 10 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 -
[14/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query69.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query69.q.out b/ql/src/test/results/clientpositive/perf/spark/query69.q.out index aefe55a..e17832c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query69.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select cd_gender, cd_marital_status, @@ -44,7 +44,7 @@ select cd_credit_rating limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select cd_gender, cd_marital_status, @@ -90,10 +90,6 @@ select cd_credit_rating limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -112,40 +108,18 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColumnBetween(col 8:int, left 1, right 3), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -159,40 +133,18 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 1999) and d_moy BETWEEN 1 AND 3 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColumnBetween(col 8:int, left 1, right 3), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 1999) and d_date_sk is not null and d_moy BETWEEN 1 AND 3) (type: boolean) Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -
[30/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query40.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query40.q.out b/ql/src/test/results/clientpositive/perf/spark/query40.q.out index 01bffec..6cdac29 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query40.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select w_state ,i_item_id @@ -25,7 +25,7 @@ select order by w_state,i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select w_state ,i_item_id @@ -52,10 +52,6 @@ select order by w_state,i_item_id limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -73,40 +69,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int), w_state (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 10] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -120,40 +94,18 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-08 16:00:00.0, right 1998-05-07 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -
[22/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query57.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query57.q.out b/ql/src/test/results/clientpositive/perf/spark/query57.q.out index 53b6778..51e644a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query57.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with v1 as( select i_category, i_brand, cc_name, @@ -45,7 +45,7 @@ with v1 as( order by sum_sales - avg_monthly_sales, 3 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with v1 as( select i_category, i_brand, cc_name, @@ -92,10 +92,6 @@ with v1 as( order by sum_sales - avg_monthly_sales, 3 limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -114,40 +110,18 @@ STAGE PLANS: alias: call_center filterExpr: (cc_call_center_sk is not null and cc_name is not null) (type: boolean) Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string)) predicate: (cc_call_center_sk is not null and cc_name is not null) (type: boolean) Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cc_call_center_sk (type: int), cc_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6] Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -161,40 +135,18 @@ STAGE PLANS: alias: call_center filterExpr: (cc_call_center_sk is not null and cc_name is not null) (type: boolean) Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 6:string)) predicate: (cc_call_center_sk is not null and cc_name is not null) (type: boolean) Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cc_call_center_sk (type: int), cc_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 6] Statistics: Num rows: 60 Data size: 122700 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -
[10/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query75.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query75.q.out b/ql/src/test/results/clientpositive/perf/spark/query75.q.out index b9bd5b0..54c3c69 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query75.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain WITH all_sales AS ( SELECT d_year ,i_brand_id @@ -67,7 +67,7 @@ WITH all_sales AS ( ORDER BY sales_cnt_diff limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain WITH all_sales AS ( SELECT d_year ,i_brand_id @@ -136,10 +136,6 @@ WITH all_sales AS ( ORDER BY sales_cnt_diff limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -180,692 +176,319 @@ STAGE PLANS: alias: catalog_sales filterExpr: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 0:int)) predicate: (cs_item_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_item_sk (type: int), cs_order_number (type: int), cs_quantity (type: int), cs_ext_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 15, 17, 18, 23] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 10 Map Operator Tree: TableScan alias: item filterExpr: ((i_category = 'Sports') and i_item_sk is not null and i_brand_id is not null and i_class_id is not null and i_category_id is not null and i_manufact_id is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 12:string, val Sports), SelectColumnIsNotNull(col 0:int),
[41/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query23.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query23.q.out b/ql/src/test/results/clientpositive/perf/spark/query23.q.out index 2c6d6f0..4ccc2df 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query23.q.out @@ -1,6 +1,6 @@ Warning: Map Join MAPJOIN[285][bigTable=?] in task 'Stage-1:MAPRED' is a cross product Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales @@ -51,7 +51,7 @@ from and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with frequent_ss_items as (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt from store_sales @@ -102,10 +102,6 @@ from and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer))) y limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -132,260 +128,117 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int)) predicate: (ss_customer_sk is not null and ss_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_sold_date_sk (type: int), ss_customer_sk (type: int), ss_quantity (type: int), ss_sales_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 10, 13] Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: decimal(7,2)) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 18 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year) IN (1999, 2000, 2001, 2002) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter
[20/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query59.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query59.q.out b/ql/src/test/results/clientpositive/perf/spark/query59.q.out index 0393398..1224ab6 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query59.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with wss as (select d_week_seq, ss_store_sk, @@ -41,7 +41,7 @@ with wss as order by s_store_name1,s_store_id1,d_week_seq1 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with wss as (select d_week_seq, ss_store_sk, @@ -84,10 +84,6 @@ with wss as order by s_store_name1,s_store_id1,d_week_seq1 limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -105,40 +101,18 @@ STAGE PLANS: alias: store filterExpr: (s_store_sk is not null and s_store_id is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (s_store_id is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_id (type: string), s_store_name (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 5] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col1 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -152,40 +126,18 @@ STAGE PLANS: alias: store filterExpr: (s_store_sk is not null and s_store_id is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) predicate: (s_store_id is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash
[19/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query60.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query60.q.out b/ql/src/test/results/clientpositive/perf/spark/query60.q.out index 07bb822..f4f61e2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query60.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with ss as ( select i_item_id,sum(ss_ext_sales_price) total_sales @@ -75,7 +75,7 @@ where i_category in ('Children')) ,total_sales limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with ss as ( select i_item_id,sum(ss_ext_sales_price) total_sales @@ -152,10 +152,6 @@ where i_category in ('Children')) ,total_sales limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -190,158 +186,72 @@ STAGE PLANS: alias: item filterExpr: (i_item_id is not null and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:string), SelectColumnIsNotNull(col 0:int)) predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_item_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) -Reduce Sink Vectorization: -className: VectorReduceSinkStringOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 12 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 1999) and (d_moy = 9) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 9), SelectColumnIsNotNull(col 0:int)) predicate: ((d_moy = 9) and (d_year = 1999) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
[03/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query83.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query83.q.out b/ql/src/test/results/clientpositive/perf/spark/query83.q.out index 6a38c0d..1199d29 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query83.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with sr_items as (select i_item_id item_id, sum(sr_return_quantity) sr_item_qty @@ -64,7 +64,7 @@ with sr_items as ,sr_item_qty limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with sr_items as (select i_item_id item_id, sum(sr_return_quantity) sr_item_qty @@ -130,10 +130,6 @@ with sr_items as ,sr_item_qty limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,117 +166,54 @@ STAGE PLANS: alias: catalog_returns filterExpr: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 0:int)) predicate: (cr_item_sk is not null and cr_returned_date_sk is not null) (type: boolean) Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cr_returned_date_sk (type: int), cr_item_sk (type: int), cr_return_quantity (type: int) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2, 17] Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 28798881 Data size: 3057234680 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 10 Map Operator Tree: TableScan alias: date_dim filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 2:string)) predicate: (d_date is not null and d_week_seq is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
[47/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query10.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query10.q.out b/ql/src/test/results/clientpositive/perf/spark/query10.q.out index 7aa9099..b7faa9a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query10.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select cd_gender, cd_marital_status, @@ -56,7 +56,7 @@ select cd_dep_college_count limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select cd_gender, cd_marital_status, @@ -114,10 +114,6 @@ select cd_dep_college_count limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -136,40 +132,18 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2002), FilterLongColumnBetween(col 8:int, left 4, right 7), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -183,40 +157,18 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 2002) and d_moy BETWEEN 4 AND 7 and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2002), FilterLongColumnBetween(col 8:int, left 4, right 7), SelectColumnIsNotNull(col 0:int)) predicate: ((d_year = 2002) and d_date_sk is not null and d_moy BETWEEN 4 AND 7) (type: boolean) Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4058 Data size: 4540902 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -
[24/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query54.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query54.q.out b/ql/src/test/results/clientpositive/perf/spark/query54.q.out index aa43c3d..241d6d8 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query54.q.out @@ -2,7 +2,7 @@ Warning: Shuffle Join JOIN[84][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hd Warning: Shuffle Join JOIN[115][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 5' is a cross product Warning: Map Join MAPJOIN[145][bigTable=?] in task 'Stage-1:MAPRED' is a cross product Warning: Map Join MAPJOIN[144][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with my_customers as ( select distinct c_customer_sk , c_current_addr_sk @@ -57,7 +57,7 @@ with my_customers as ( order by segment, num_customers limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with my_customers as ( select distinct c_customer_sk , c_current_addr_sk @@ -112,10 +112,6 @@ with my_customers as ( order by segment, num_customers limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -137,32 +133,14 @@ STAGE PLANS: alias: date_dim filterExpr: ((d_year = 1999) and (d_moy = 3)) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 1999), FilterLongColEqualLongScalar(col 8:int, val 3)) predicate: ((d_moy = 3) and (d_year = 1999)) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: (d_month_seq + 3) (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [29] - selectExpressions: LongColAddLongScalar(col 3:int, val 3) -> 29:int Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Group By Operator -Group By Vectorization: -className: VectorGroupByOperator -groupByMode: HASH -keyExpressions: col 29:int -native: false -vectorProcessingMode: HASH -projectedOutputColumnNums: [] keys: _col0 (type: int) mode: hash outputColumnNames: _col0 @@ -171,108 +149,43 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: false -usesVectorUDFAdaptor: false -vectorized: true Reducer 28 Execution mode: vectorized -Reduce Vectorization: -
[12/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query72.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query72.q.out b/ql/src/test/results/clientpositive/perf/spark/query72.q.out index ca142a7..37cf704 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query72.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_item_desc ,w_warehouse_name ,d1.d_week_seq @@ -28,7 +28,7 @@ group by i_item_desc,w_warehouse_name,d1.d_week_seq order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_item_desc ,w_warehouse_name ,d1.d_week_seq @@ -58,10 +58,6 @@ group by i_item_desc,w_warehouse_name,d1.d_week_seq order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -79,40 +75,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -126,40 +100,18 @@ STAGE PLANS: alias: household_demographics filterExpr: ((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterStringGroupColEqualStringScalar(col 2:string, val 1001-5000), SelectColumnIsNotNull(col 0:int)) predicate: ((hd_buy_potential = '1001-5000') and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 3600 Data size: 385200 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark
[33/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query37.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query37.q.out b/ql/src/test/results/clientpositive/perf/spark/query37.q.out index fa25d4c..bce0d68 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query37.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_item_id ,i_item_desc ,i_current_price @@ -14,7 +14,7 @@ select i_item_id order by i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_item_id ,i_item_desc ,i_current_price @@ -30,10 +30,6 @@ select i_item_id order by i_item_id limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -50,40 +46,18 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 2001-06-01 17:00:00.0, right 2001-07-31 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-06-02 00:00:00' AND TIMESTAMP'2001-08-01 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -101,107 +75,51 @@ STAGE PLANS: alias: catalog_sales filterExpr: cs_item_sk is not null (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 15:int) predicate: cs_item_sk is not null (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_item_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [15] Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Reduce
[23/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query56.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query56.q.out b/ql/src/test/results/clientpositive/perf/spark/query56.q.out index 40c02ec..e03574f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query56.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with ss as ( select i_item_id,sum(ss_ext_sales_price) total_sales from @@ -65,7 +65,7 @@ where i_color in ('orchid','chiffon','lace')) order by total_sales limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with ss as ( select i_item_id,sum(ss_ext_sales_price) total_sales from @@ -132,10 +132,6 @@ where i_color in ('orchid','chiffon','lace')) order by total_sales limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -170,158 +166,72 @@ STAGE PLANS: alias: item filterExpr: (i_item_id is not null and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:string), SelectColumnIsNotNull(col 0:int)) predicate: (i_item_id is not null and i_item_sk is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_item_id (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) -Reduce Sink Vectorization: -className: VectorReduceSinkStringOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 12 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 2000) and (d_moy = 1) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 6:int, val 2000), FilterLongColEqualLongScalar(col 8:int, val 1), SelectColumnIsNotNull(col 0:int)) predicate: ((d_moy = 1) and (d_year = 2000) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 18262 Data size: 20435178 Basic stats: COMPLETE Column stats: NONE
[34/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query34.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query34.q.out b/ql/src/test/results/clientpositive/perf/spark/query34.q.out index 371d94f..88279a3 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query34.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select c_last_name ,c_first_name ,c_salutation @@ -28,7 +28,7 @@ select c_last_name and cnt between 15 and 20 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select c_last_name ,c_first_name ,c_salutation @@ -58,10 +58,6 @@ select c_last_name and cnt between 15 and 20 order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -78,40 +74,18 @@ STAGE PLANS: alias: household_demographics filterExpr: ((hd_buy_potential) IN ('>1', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterStringColumnInList(col 2, values >1, unknown), FilterLongColGreaterLongScalar(col 4:int, val 0), SelectColumnIsTrue(col 11:boolean)(children: IfExprCondExprNull(col 6:boolean, col 10:boolean, null)(children: LongColGreaterLongScalar(col 4:int, val 0) -> 6:boolean, DoubleColGreaterDoubleScalar(col 9:double, val 1.2)(children: DoubleColDivideDoubleColumn(col 7:double, col 8:double)(children: CastLongToDouble(col 3:int) -> 7:double, CastLongToDouble(col 4:int) -> 8:double) -> 9:double) -> 10:boolean) -> 11:boolean), SelectColumnIsNotNull(col 0:int)) predicate: ((hd_buy_potential) IN ('>1', 'unknown') and (hd_vehicle_count > 0) and CASE WHEN ((hd_vehicle_count > 0)) THEN (((UDFToDouble(hd_dep_count) / UDFToDouble(hd_vehicle_count)) > 1.2D)) ELSE (null) END and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1200 Data size: 128400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work Map 9 @@ -120,40 +94,18 @@ STAGE PLANS: alias: store filterExpr: ((s_county) IN ('Mobile County', 'Maverick County', 'Huron County', 'Kittitas County', 'Fairfield County', 'Jackson County', 'Barrow County', 'Pennington County') and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -
[51/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)" This reverts commit 470ba3e2835ef769f940d013acbe6c05d9208903. Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/142367d9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/142367d9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/142367d9 Branch: refs/heads/master Commit: 142367d96d8c4400a56aec4bb9bd1bfda0e61f77 Parents: 489b37a Author: Matt McCline Authored: Wed Aug 15 19:15:00 2018 -0500 Committer: Matt McCline Committed: Wed Aug 15 19:15:00 2018 -0500 -- .../ql/exec/vector/VectorColumnSetInfo.java |8 +- .../hive/ql/exec/vector/VectorCopyRow.java | 63 +- .../ql/exec/vector/VectorGroupKeyHelper.java|5 +- .../exec/vector/VectorHashKeyWrapperBatch.java | 12 +- .../exec/vector/VectorSMBMapJoinOperator.java |2 +- .../ql/exec/vector/VectorizationContext.java| 46 +- .../expressions/CastStringGroupToString.java| 40 + .../ql/exec/vector/expressions/VectorElt.java | 168 +- .../VectorExpressionWriterFactory.java | 34 - .../hive/ql/optimizer/physical/Vectorizer.java | 19 +- .../vector/TestVectorHashKeyWrapperBatch.java |6 +- .../ql/exec/vector/TestVectorRowObject.java |3 +- .../hive/ql/exec/vector/TestVectorSerDeRow.java | 137 +- .../ql/exec/vector/VectorRandomRowSource.java | 67 +- .../hive/ql/exec/vector/VectorVerifyFast.java |6 +- .../aggregation/TestVectorAggregation.java |9 +- .../expressions/TestVectorArithmetic.java | 14 +- .../vector/expressions/TestVectorBetweenIn.java | 38 +- .../expressions/TestVectorCastStatement.java| 11 +- .../expressions/TestVectorCoalesceElt.java | 87 +- .../expressions/TestVectorDateAddSub.java | 10 +- .../vector/expressions/TestVectorDateDiff.java |9 +- .../expressions/TestVectorFilterCompare.java| 12 +- .../expressions/TestVectorIfStatement.java |3 +- .../vector/expressions/TestVectorIndex.java |5 +- .../vector/expressions/TestVectorNegative.java | 21 +- .../exec/vector/expressions/TestVectorNull.java | 14 +- .../expressions/TestVectorStringConcat.java |3 +- .../expressions/TestVectorStringUnary.java |3 +- .../expressions/TestVectorStructField.java | 370 -- .../vector/expressions/TestVectorSubStr.java|3 +- .../expressions/TestVectorTimestampExtract.java |3 +- .../fast/TestVectorMapJoinFastRowHashMap.java | 101 +- .../test/queries/clientpositive/perf/query1.q |7 +- .../test/queries/clientpositive/perf/query10.q |7 +- .../test/queries/clientpositive/perf/query11.q |7 +- .../test/queries/clientpositive/perf/query12.q |7 +- .../test/queries/clientpositive/perf/query13.q |7 +- .../test/queries/clientpositive/perf/query14.q |7 +- .../test/queries/clientpositive/perf/query15.q |7 +- .../test/queries/clientpositive/perf/query16.q |7 +- .../test/queries/clientpositive/perf/query17.q |7 +- .../test/queries/clientpositive/perf/query18.q |7 +- .../test/queries/clientpositive/perf/query19.q |7 +- .../test/queries/clientpositive/perf/query2.q |7 +- .../test/queries/clientpositive/perf/query20.q |7 +- .../test/queries/clientpositive/perf/query21.q |7 +- .../test/queries/clientpositive/perf/query22.q |7 +- .../test/queries/clientpositive/perf/query23.q |7 +- .../test/queries/clientpositive/perf/query24.q |7 +- .../test/queries/clientpositive/perf/query25.q |7 +- .../test/queries/clientpositive/perf/query26.q |7 +- .../test/queries/clientpositive/perf/query27.q |7 +- .../test/queries/clientpositive/perf/query28.q |7 +- .../test/queries/clientpositive/perf/query29.q |7 +- .../test/queries/clientpositive/perf/query3.q |7 +- .../test/queries/clientpositive/perf/query30.q |7 +- .../test/queries/clientpositive/perf/query31.q |7 +- .../test/queries/clientpositive/perf/query32.q |7 +- .../test/queries/clientpositive/perf/query33.q |7 +- .../test/queries/clientpositive/perf/query34.q |7 +- .../test/queries/clientpositive/perf/query35.q |7 +- .../test/queries/clientpositive/perf/query36.q |7 +- .../test/queries/clientpositive/perf/query37.q |7 +- .../test/queries/clientpositive/perf/query38.q |7 +- .../test/queries/clientpositive/perf/query39.q |7 +- .../test/queries/clientpositive/perf/query4.q |7 +- .../test/queries/clientpositive/perf/query40.q |7 +- .../test/queries/clientpositive/perf/query42.q |7 +- .../test/queries/clientpositive/perf/query43.q |7 +- .../test/queries/clientpositive/perf/query44.q |7 +-
[31/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query4.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query4.q.out b/ql/src/test/results/clientpositive/perf/spark/query4.q.out index c49733b..3472613 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query4.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name @@ -106,7 +106,7 @@ union all order by t_s_secyear.customer_preferred_cust_flag limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name @@ -214,10 +214,6 @@ union all order by t_s_secyear.customer_preferred_cust_flag limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -254,774 +250,355 @@ STAGE PLANS: alias: web_sales filterExpr: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int)) predicate: (ws_bill_customer_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_bill_customer_sk (type: int), ws_ext_discount_amt (type: decimal(7,2)), ws_ext_sales_price (type: decimal(7,2)), ws_ext_wholesale_cost (type: decimal(7,2)), ws_ext_list_price (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 4, 22, 23, 24, 25] Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 13 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 2001) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -
[49/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query39.q -- diff --git a/ql/src/test/queries/clientpositive/perf/query39.q b/ql/src/test/queries/clientpositive/perf/query39.q index d3b981a..d3c806d 100644 --- a/ql/src/test/queries/clientpositive/perf/query39.q +++ b/ql/src/test/queries/clientpositive/perf/query39.q @@ -1,9 +1,6 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.auto.convert.join=true; -set hive.fetch.task.conversion=none; --- start query 1 in stream 0 using template query39.tpl and seed 1327317894 -explain vectorization expression +-- start query 1 in stream 0 using template query39.tpl and seed 1327317894 +explain with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query4.q -- diff --git a/ql/src/test/queries/clientpositive/perf/query4.q b/ql/src/test/queries/clientpositive/perf/query4.q index dbd605e..631a464 100644 --- a/ql/src/test/queries/clientpositive/perf/query4.q +++ b/ql/src/test/queries/clientpositive/perf/query4.q @@ -1,9 +1,6 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.auto.convert.join=true; -set hive.fetch.task.conversion=none; --- start query 1 in stream 0 using template query4.tpl and seed 1819994127 -explain vectorization expression +-- start query 1 in stream 0 using template query4.tpl and seed 1819994127 +explain with year_total as ( select c_customer_id customer_id ,c_first_name customer_first_name http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query40.q -- diff --git a/ql/src/test/queries/clientpositive/perf/query40.q b/ql/src/test/queries/clientpositive/perf/query40.q index 8432546..61f5ad3 100644 --- a/ql/src/test/queries/clientpositive/perf/query40.q +++ b/ql/src/test/queries/clientpositive/perf/query40.q @@ -1,9 +1,6 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.auto.convert.join=true; -set hive.fetch.task.conversion=none; --- start query 1 in stream 0 using template query40.tpl and seed 1819994127 -explain vectorization expression +-- start query 1 in stream 0 using template query40.tpl and seed 1819994127 +explain select w_state ,i_item_id http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query42.q -- diff --git a/ql/src/test/queries/clientpositive/perf/query42.q b/ql/src/test/queries/clientpositive/perf/query42.q index b5c6f3f..6b8abe0 100644 --- a/ql/src/test/queries/clientpositive/perf/query42.q +++ b/ql/src/test/queries/clientpositive/perf/query42.q @@ -1,9 +1,6 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.auto.convert.join=true; -set hive.fetch.task.conversion=none; --- start query 1 in stream 0 using template query42.tpl and seed 1819994127 -explain vectorization expression +-- start query 1 in stream 0 using template query42.tpl and seed 1819994127 +explain select dt.d_year ,item.i_category_id ,item.i_category http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query43.q -- diff --git a/ql/src/test/queries/clientpositive/perf/query43.q b/ql/src/test/queries/clientpositive/perf/query43.q index a92e04b..ebdc69d 100644 --- a/ql/src/test/queries/clientpositive/perf/query43.q +++ b/ql/src/test/queries/clientpositive/perf/query43.q @@ -1,9 +1,6 @@ set hive.mapred.mode=nonstrict; -set hive.explain.user=false; -set hive.auto.convert.join=true; -set hive.fetch.task.conversion=none; --- start query 1 in stream 0 using template query43.tpl and seed 1819994127 -explain vectorization expression +-- start query 1 in stream 0 using template query43.tpl and seed 1819994127 +explain select s_store_name, s_store_id, sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/queries/clientpositive/perf/query44.q -- diff --git a/ql/src/test/queries/clientpositive/perf/query44.q b/ql/src/test/queries/clientpositive/perf/query44.q index 0e8a999..712bbfb 100644 --- a/ql/src/test/queries/clientpositive/perf/query44.q +++ b/ql/src/test/queries/clientpositive/perf/query44.q @@ -1,9 +1,6 @@ set hive.mapred.mode=nonstrict; -set
[29/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query45.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query45.q.out b/ql/src/test/results/clientpositive/perf/spark/query45.q.out index 9c58320..cac3d05 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query45.q.out @@ -1,5 +1,5 @@ Warning: Map Join MAPJOIN[67][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item where ws_bill_customer_sk = c_customer_sk @@ -18,7 +18,7 @@ select ca_zip, ca_county, sum(ws_sales_price) order by ca_zip, ca_county limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select ca_zip, ca_county, sum(ws_sales_price) from web_sales, customer, customer_address, date_dim, item where ws_bill_customer_sk = c_customer_sk @@ -37,10 +37,6 @@ select ca_zip, ca_county, sum(ws_sales_price) order by ca_zip, ca_county limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -59,80 +55,34 @@ STAGE PLANS: alias: item filterExpr: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterLongColumnInList(col 0:int, values [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]) predicate: (i_item_sk) IN (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_id (type: string) outputColumnNames: i_item_id - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(), count(i_item_id) -Group By Vectorization: -aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 1:string) -> bigint -className: VectorGroupByOperator -groupByMode: HASH -native: false -vectorProcessingMode: HASH -projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Reduce Sink Vectorization: - className: VectorReduceSinkEmptyKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: false -usesVectorUDFAdaptor: false -vectorized: true Reducer 16 Execution mode: vectorized Local Work: Map Reduce Local Work -Reduce Vectorization: -
[48/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index cbc4c5d..b66fb9f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -1035,8 +1035,8 @@ STAGE PLANS: 0 _col0 (type: decimal(16,2)) 1 _col0 (type: decimal(16,2)) Map Join Vectorization: -bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2) -bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 5:decimal(16,2), ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 6:decimal(14,2) +bigTableKeyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 3:decimal(16,2) +bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(16,2)/DECIMAL_64) -> 4:decimal(16,2), ConvertDecimal64ToDecimal(col 1:decimal(14,2)/DECIMAL_64) -> 5:decimal(14,2) className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true @@ -1072,7 +1072,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: dec:decimal(14,2)/DECIMAL_64, value_dec:decimal(14,2)/DECIMAL_64 partitionColumnCount: 0 -scratchColumnTypeNames: [decimal(14,0), decimal(16,2), decimal(16,2), decimal(14,2)] +scratchColumnTypeNames: [decimal(16,2), decimal(16,2), decimal(14,2), decimal(14,0)] Map 2 Map Operator Tree: TableScan http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 17edd47..ba2d9df 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -503,7 +503,7 @@ STAGE PLANS: aggregators: VectorUDAFSumDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> decimal(25,2) className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:decimal(15,2)/DECIMAL_64, col 1:decimal(15,2)/DECIMAL_64 + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(15,2)/DECIMAL_64) -> 3:decimal(15,2), ConvertDecimal64ToDecimal(col 1:decimal(15,2)/DECIMAL_64) -> 4:decimal(15,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -539,7 +539,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: c1:decimal(15,2)/DECIMAL_64, c2:decimal(15,2)/DECIMAL_64 partitionColumnCount: 0 -scratchColumnTypeNames: [] +scratchColumnTypeNames: [decimal(15,2), decimal(15,2)] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1801,7 +1801,7 @@ STAGE PLANS: aggregators: VectorUDAFSumDecimal64(col 0:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64 className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:decimal(7,2)/DECIMAL_64, col 1:decimal(7,2)/DECIMAL_64 + keyExpressions: ConvertDecimal64ToDecimal(col 0:decimal(7,2)/DECIMAL_64) -> 3:decimal(7,2), ConvertDecimal64ToDecimal(col 1:decimal(7,2)/DECIMAL_64) -> 4:decimal(7,2) native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0] @@ -1837,7 +1837,7 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: c1:decimal(7,2)/DECIMAL_64, c2:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 -
[39/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query26.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out index a3fe272..17bbc6a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, @@ -18,7 +18,7 @@ select i_item_id, order by i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select i_item_id, avg(cs_quantity) agg1, avg(cs_list_price) agg2, @@ -38,10 +38,6 @@ select i_item_id, order by i_item_id limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -58,40 +54,18 @@ STAGE PLANS: alias: promotion filterExpr: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) (type: boolean) Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringGroupColEqualStringScalar(col 9:string, val N), FilterStringGroupColEqualStringScalar(col 14:string, val N)), SelectColumnIsNotNull(col 0:int)) predicate: (((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) (type: boolean) Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_promo_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 2300 Data size: 2713420 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col3 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -111,176 +85,79 @@ STAGE PLANS: alias: catalog_sales filterExpr: (cs_bill_cdemo_sk is not null and cs_sold_date_sk is not null and cs_item_sk is not null and cs_promo_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 4:int), SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 15:int), SelectColumnIsNotNull(col 16:int)) predicate: (cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 287989836 Data size: 38999608952 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cs_sold_date_sk (type: int), cs_bill_cdemo_sk (type: int), cs_item_sk (type: int), cs_promo_sk (type: int), cs_quantity (type: int), cs_list_price (type: decimal(7,2)),
[45/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query13.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out index 8d11ecd..c9fcb88 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select avg(ss_quantity) ,avg(ss_ext_sales_price) ,avg(ss_ext_wholesale_cost) @@ -48,7 +48,7 @@ select avg(ss_quantity) and ss_net_profit between 50 and 250 )) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select avg(ss_quantity) ,avg(ss_ext_sales_price) ,avg(ss_ext_wholesale_cost) @@ -98,10 +98,6 @@ select avg(ss_quantity) and ss_net_profit between 50 and 250 )) POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -119,40 +115,18 @@ STAGE PLANS: alias: store filterExpr: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: s_store_sk is not null (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col4 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -166,40 +140,18 @@ STAGE PLANS: alias: household_demographics filterExpr: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColumnInList(col 3:int, values [3, 1]), SelectColumnIsNotNull(col 0:int)) predicate: ((hd_dep_count) IN (3, 1) and hd_demo_sk is not null) (type: boolean) Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hd_demo_sk (type: int), hd_dep_count (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3] Statistics: Num rows: 7200 Data size: 770400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator
[42/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query21.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query21.q.out b/ql/src/test/results/clientpositive/perf/spark/query21.q.out index c3fde7b..1673061 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query21.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select * from(select w_warehouse_name ,i_item_id @@ -27,7 +27,7 @@ select * ,i_item_id limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select * from(select w_warehouse_name ,i_item_id @@ -56,10 +56,6 @@ select * ,i_item_id limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -77,40 +73,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -124,40 +98,18 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-08 16:00:00.0, right 1998-05-07 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int), d_date (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE
[28/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query47.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query47.q.out b/ql/src/test/results/clientpositive/perf/spark/query47.q.out index a2387e8..690b105 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query47.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with v1 as( select i_category, i_brand, s_store_name, s_company_name, @@ -48,7 +48,7 @@ with v1 as( order by sum_sales - avg_monthly_sales, 3 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with v1 as( select i_category, i_brand, s_store_name, s_company_name, @@ -98,10 +98,6 @@ with v1 as( order by sum_sales - avg_monthly_sales, 3 limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -120,40 +116,18 @@ STAGE PLANS: alias: store filterExpr: (s_store_sk is not null and s_store_name is not null and s_company_name is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), SelectColumnIsNotNull(col 17:string)) predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 5, 17] Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -167,40 +141,18 @@ STAGE PLANS: alias: store filterExpr: (s_store_sk is not null and s_store_name is not null and s_company_name is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 5:string), SelectColumnIsNotNull(col 17:string)) predicate: (s_company_name is not null and s_store_name is not null and s_store_sk is not null) (type: boolean) Statistics: Num rows: 1704 Data size: 3256276 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s_store_sk (type: int), s_store_name (type: string), s_company_name (type: string) outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator -
[38/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query28.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query28.q.out b/ql/src/test/results/clientpositive/perf/spark/query28.q.out index caaca45..b437829 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query28.q.out @@ -1,5 +1,5 @@ Warning: Map Join MAPJOIN[94][bigTable=?] in task 'Stage-1:MAPRED' is a cross product -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select * from (select avg(ss_list_price) B1_LP ,count(ss_list_price) B1_CNT @@ -51,7 +51,7 @@ from (select avg(ss_list_price) B1_LP or ss_wholesale_cost between 42 and 42+20)) B6 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select * from (select avg(ss_list_price) B1_LP ,count(ss_list_price) B1_CNT @@ -103,10 +103,6 @@ from (select avg(ss_list_price) B1_LP or ss_wholesale_cost between 42 and 42+20)) B6 limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -134,33 +130,15 @@ STAGE PLANS: alias: store_sales filterExpr: (ss_quantity BETWEEN 16 AND 20 and (ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100)) (type: boolean) Statistics: Num rows: 575995635 Data size: 50814502088 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterLongColumnBetween(col 10:int, left 16, right 20), FilterExprOrExpr(children: FilterDecimal64ColumnBetween(col 12:decimal(7,2)/DECIMAL_64, decimal64LeftVal 14200, decimalLeftVal 14200, decimal64RightVal 15200, decimalRightVal 15200), FilterDecimal64ColumnBetween(col 19:decimal(7,2)/DECIMAL_64, decimal64LeftVal 305400, decimalLeftVal 305400, decimal64RightVal 405400, decimalRightVal 405400), FilterDecimal64ColumnBetween(col 11:decimal(7,2)/DECIMAL_64, decimal64LeftVal 8000, decimalLeftVal 8000, decimal64RightVal 1, decimalRightVal 1))) predicate: ((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) (type: boolean) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ss_list_price (type: decimal(7,2)) outputColumnNames: ss_list_price - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [12] Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ss_list_price), count(ss_list_price) -Group By Vectorization: -aggregators: VectorUDAFSumDecimal64(col 12:decimal(7,2)/DECIMAL_64) -> decimal(17,2)/DECIMAL_64, VectorUDAFCount(col 12:decimal(7,2)/DECIMAL_64) -> bigint -className: VectorGroupByOperator -groupByMode: HASH -keyExpressions: col 12:decimal(7,2)/DECIMAL_64 -native: false -vectorProcessingMode: HASH -projectedOutputColumnNums: [0, 1] keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -169,55 +147,24 @@ STAGE PLANS: key expressions: _col0 (type: decimal(7,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
[09/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query76.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query76.q.out b/ql/src/test/results/clientpositive/perf/spark/query76.q.out index 3adfc10..05ec505 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query76.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query76.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim @@ -21,7 +21,7 @@ GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( SELECT 'store' as channel, 'ss_addr_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price FROM store_sales, item, date_dim @@ -44,10 +44,6 @@ GROUP BY channel, col_name, d_year, d_qoy, i_category ORDER BY channel, col_name, d_year, d_qoy, i_category limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -72,393 +68,181 @@ STAGE PLANS: alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: i_item_sk (type: int), i_category (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 12] Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 11 Map Operator Tree: TableScan alias: item filterExpr: i_item_sk is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate:
[27/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query49.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query49.q.out b/ql/src/test/results/clientpositive/perf/spark/query49.q.out index e10a925..16cc603 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query49.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select 'web' as channel ,web.item @@ -124,7 +124,7 @@ select order by 1,4,5 limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select 'web' as channel ,web.item @@ -250,10 +250,6 @@ select order by 1,4,5 limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -288,306 +284,140 @@ STAGE PLANS: alias: ws filterExpr: ((ws_net_profit > 1) and (ws_net_paid > 0) and (ws_quantity > 0) and ws_order_number is not null and ws_item_sk is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 144002668 Data size: 19580198212 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterDecimal64ColGreaterDecimal64Scalar(col 33:decimal(7,2)/DECIMAL_64, val 100), FilterDecimal64ColGreaterDecimal64Scalar(col 29:decimal(7,2)/DECIMAL_64, val 0), FilterLongColGreaterLongScalar(col 18:int, val 0), SelectColumnIsNotNull(col 17:int), SelectColumnIsNotNull(col 3:int), SelectColumnIsNotNull(col 0:int)) predicate: ((ws_net_paid > 0) and (ws_net_profit > 1) and (ws_quantity > 0) and ws_item_sk is not null and ws_order_number is not null and ws_sold_date_sk is not null) (type: boolean) Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ws_sold_date_sk (type: int), ws_item_sk (type: int), ws_order_number (type: int), ws_quantity (type: int), ws_net_paid (type: decimal(7,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 3, 17, 18, 29] Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) -Reduce Sink Vectorization: -className: VectorReduceSinkLongOperator -native: true -nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5333432 Data size: 725192506 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Map 10 Map Operator Tree: TableScan alias: date_dim filterExpr: ((d_year = 2000) and (d_moy = 12) and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -
[35/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query32.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query32.q.out b/ql/src/test/results/clientpositive/perf/spark/query32.q.out index c4ab76c..af121c5 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query32.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select sum(cs_ext_discount_amt) as `excess discount amount` from catalog_sales @@ -25,7 +25,7 @@ and cs_ext_discount_amt ) limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select sum(cs_ext_discount_amt) as `excess discount amount` from catalog_sales @@ -52,10 +52,6 @@ and cs_ext_discount_amt ) limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -73,40 +69,18 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-17 16:00:00.0, right 1998-06-15 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: d_date_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col0 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -120,40 +94,18 @@ STAGE PLANS: alias: date_dim filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 29:timestamp, left 1998-03-17 16:00:00.0, right 1998-06-15 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 29:timestamp), SelectColumnIsNotNull(col 0:int)) predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' and d_date_sk is not null) (type: boolean) Statistics: Num rows: 8116 Data size: 9081804 Basic stats: COMPLETE Column stats: NONE Select Operator
[50/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java index d367fb9..0bca490 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorCoalesceElt.java @@ -54,7 +54,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.IntWritable; import junit.framework.Assert; @@ -67,11 +66,7 @@ public class TestVectorCoalesceElt { public void testCoalesce() throws Exception { Random random = new Random(5371); -// Grind through a few more index values... -int iteration = 0; -for (int i = 0; i < 10; i++) { - iteration = doCoalesceElt(random, iteration, /* isCoalesce */ true, false); -} +doCoalesceElt(random, /* isCoalesce */ true, false); } @Test @@ -79,10 +74,9 @@ public class TestVectorCoalesceElt { Random random = new Random(5371); // Grind through a few more index values... -int iteration = 0; -for (int i = 0; i < 10; i++) { - iteration = doCoalesceElt(random, iteration, /* isCoalesce */ false, false); - iteration = doCoalesceElt(random, iteration, /* isCoalesce */ false, true); +for (int i = 0; i < 4; i++) { + doCoalesceElt(random, /* isCoalesce */ false, false); + doCoalesceElt(random, /* isCoalesce */ false, true); } } @@ -94,41 +88,39 @@ public class TestVectorCoalesceElt { static final int count = values().length; } - private int doCoalesceElt(Random random, int iteration, boolean isCoalesce, - boolean isEltIndexConst) - throws Exception { + private void doCoalesceElt(Random random, boolean isCoalesce, boolean isEltIndexConst) + throws Exception { -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 2, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 2, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 2, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ false); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, /* constantColumns */ null, /* nullConstantColumns */ null, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0 }, /* nullConstantColumns */ null, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 1 }, /* nullConstantColumns */ null, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 1 }, /* nullConstantColumns */ new int[] { 1 }, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0, 2 }, /* nullConstantColumns */ null, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3, +doCoalesceOnRandomDataType(random, isCoalesce, isEltIndexConst, /* columnCount */ 3, new int[] { 0, 2 }, /* nullConstantColumns */ new int[] { 0 }, /* allowNulls */ true); -doCoalesceOnRandomDataType(random, iteration++, isCoalesce, isEltIndexConst, /* columnCount */ 3,
[02/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query85.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index 09b2a40..d1b3a2c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain select substr(r_reason_desc,1,20) ,avg(ws_quantity) ,avg(wr_refunded_cash) @@ -81,7 +81,7 @@ order by substr(r_reason_desc,1,20) ,avg(wr_fee) limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain select substr(r_reason_desc,1,20) ,avg(ws_quantity) ,avg(wr_refunded_cash) @@ -164,10 +164,6 @@ order by substr(r_reason_desc,1,20) ,avg(wr_fee) limit 100 POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 @@ -184,40 +180,18 @@ STAGE PLANS: alias: web_page filterExpr: wp_web_page_sk is not null (type: boolean) Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: wp_web_page_sk is not null (type: boolean) Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: wp_web_page_sk (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] Statistics: Num rows: 4602 Data size: 2696178 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work Map 15 @@ -226,40 +200,18 @@ STAGE PLANS: alias: reason filterExpr: r_reason_sk is not null (type: boolean) Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: r_reason_sk is not null (type: boolean) Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: r_reason_sk (type: int), r_reason_desc (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 72 Data size: 14400 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col13 (type: int)
[32/51] [partial] hive git commit: Revert "HIVE-20315: Vectorization: Fix more NULL / Wrong Results issues and avoid unnecessary casts/conversions (Matt McCline, reviewed by Teddy Choi)"
http://git-wip-us.apache.org/repos/asf/hive/blob/142367d9/ql/src/test/results/clientpositive/perf/spark/query39.q.out -- diff --git a/ql/src/test/results/clientpositive/perf/spark/query39.q.out b/ql/src/test/results/clientpositive/perf/spark/query39.q.out index 1927d3e..cab0feb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query39.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov @@ -24,7 +24,7 @@ where inv1.i_item_sk = inv2.i_item_sk order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy ,stdev,mean, case mean when 0 then null else stdev/mean end cov @@ -50,10 +50,6 @@ where inv1.i_item_sk = inv2.i_item_sk order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov ,inv2.d_moy,inv2.mean, inv2.cov POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 @@ -71,40 +67,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator -Spark Hash Table Sink Vectorization: -className: VectorSparkHashTableSinkOperator -native: true keys: 0 _col2 (type: int) 1 _col0 (type: int) Execution mode: vectorized -Map Vectorization: -enabled: true -enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true -inputFormatFeatureSupport: [DECIMAL_64] -featureSupportInUse: [DECIMAL_64] -inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat -allNative: true -usesVectorUDFAdaptor: false -vectorized: true Local Work: Map Reduce Local Work @@ -118,40 +92,18 @@ STAGE PLANS: alias: warehouse filterExpr: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true Filter Operator -Filter Vectorization: -className: VectorFilterOperator -native: true -predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: w_warehouse_sk is not null (type: boolean) Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: w_warehouse_sk (type: int), w_warehouse_name (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 2] Statistics: Num rows: 27 Data size: 27802 Basic stats: COMPLETE Column stats: NONE Spark