Repository: hive Updated Branches: refs/heads/master 299672cfc -> ef32c73ac
HIVE-14045: (Vectorization) Add missing case for BINARY in VectorizationContext.getNormalizedName method (Matt McCline, reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ef32c73a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ef32c73a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ef32c73a Branch: refs/heads/master Commit: ef32c73ace9756572bc2620e2a55be01c3772b43 Parents: 299672c Author: Matt McCline <[email protected]> Authored: Wed Jun 22 06:47:44 2016 -0700 Committer: Matt McCline <[email protected]> Committed: Wed Jun 22 06:47:44 2016 -0700 ---------------------------------------------------------------------- .../exec/vector/VectorExpressionDescriptor.java | 6 +- .../ql/exec/vector/VectorizationContext.java | 4 + .../clientpositive/vector_binary_join_groupby.q | 8 +- .../tez/vector_binary_join_groupby.q.out | 92 +++++++++++++++++++- .../vector_binary_join_groupby.q.out | 90 ++++++++++++++++++- 5 files changed, 190 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ef32c73a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index 7b3f781..217af3f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -75,6 +75,7 @@ public class VectorExpressionDescriptor { TIMESTAMP (0x080), INTERVAL_YEAR_MONTH (0x100), INTERVAL_DAY_TIME (0x200), + BINARY (0x400), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), INTERVAL_FAMILY (INTERVAL_YEAR_MONTH.value | INTERVAL_DAY_TIME.value), INT_INTERVAL_YEAR_MONTH (INT_FAMILY.value | INTERVAL_YEAR_MONTH.value), @@ -109,6 +110,8 @@ public class VectorExpressionDescriptor { return CHAR; } else if (VectorizationContext.varcharTypePattern.matcher(lower).matches()) { return VARCHAR; + } else if (lower.equals("binary")) { + return BINARY; } else if (VectorizationContext.decimalTypePattern.matcher(lower).matches()) { return DECIMAL; } else if (lower.equals("timestamp")) { @@ -163,7 +166,8 @@ public class VectorExpressionDescriptor { return "Decimal"; } else if (argType == STRING || argType == CHAR || - argType == VARCHAR) { + argType == VARCHAR || + argType == BINARY) { return "String"; } else { return "None"; http://git-wip-us.apache.org/repos/asf/hive/blob/ef32c73a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index c4f47e1..9de1833 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2298,6 +2298,8 @@ public class VectorizationContext { case VARCHAR: //Return the VARCHAR type as is, it includes maximum length. return hiveTypeName; + case BINARY: + return "Binary"; case DATE: return "Date"; case TIMESTAMP: @@ -2325,6 +2327,8 @@ public class VectorizationContext { return "Char"; case VARCHAR: return "VarChar"; + case BINARY: + return "Binary"; case DATE: return "Date"; case TIMESTAMP: http://git-wip-us.apache.org/repos/asf/hive/blob/ef32c73a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q index 1a9d280..1d99e34 100644 --- a/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q +++ b/ql/src/test/queries/clientpositive/vector_binary_join_groupby.q @@ -45,7 +45,7 @@ SELECT sum(hash(*)) FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin; +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin; EXPLAIN SELECT count(*), bin @@ -55,3 +55,9 @@ GROUP BY bin; SELECT count(*), bin FROM hundredorc GROUP BY bin; + +-- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/ef32c73a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out index 8cbb4b1..6fbbf91 100644 --- a/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_binary_join_groupby.q.out @@ -194,18 +194,17 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY PREHOOK: Input: default@hundredorc #### A masked pattern was here #### POSTHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### --107801098240 +-27832781952 PREHOOK: query: EXPLAIN SELECT count(*), bin FROM hundredorc @@ -315,3 +314,88 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies +PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 1 Map 2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map 2 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: binary) + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/ef32c73a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index d9c027a..dc1fcd7 100644 --- a/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -190,18 +190,17 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Map Join MAPJOIN[18][bigTable=?] in task 'Stage-2:MAPRED' is a cross product PREHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin PREHOOK: type: QUERY PREHOOK: Input: default@hundredorc #### A masked pattern was here #### POSTHOOK: query: SELECT sum(hash(*)) -FROM hundredorc t1 JOIN hundredorc t2 ON t2.bin = t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.bin = t2.bin POSTHOOK: type: QUERY POSTHOOK: Input: default@hundredorc #### A masked pattern was here #### --107801098240 +-27832781952 PREHOOK: query: EXPLAIN SELECT count(*), bin FROM hundredorc @@ -303,3 +302,86 @@ POSTHOOK: Input: default@hundredorc 3 xylophone band 2 yard duty 3 zync studies +PREHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-14045: Involve a binary vector scratch column for small table result (Native Vector MapJoin). + +EXPLAIN +SELECT t1.i, t1.bin, t2.bin +FROM hundredorc t1 JOIN hundredorc t2 ON t1.i = t2.i +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:t1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:t1 + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: i is not null (type: boolean) + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: i (type: int), bin (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +
