hive git commit: HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran reviewed by Gopal V, Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/master 37120b877 -> 45163ee4c HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran reviewed by Gopal V, Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/45163ee4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/45163ee4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/45163ee4 Branch: refs/heads/master Commit: 45163ee4cad0162b8cecf7e8e597fb618c87884f Parents: 37120b8 Author: Prasanth Jayachandran Authored: Sun Sep 16 00:11:23 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Sep 16 00:11:23 2018 -0700 -- bin/hive| 1 + .../apache/hive/common/util/ProcessUtils.java | 66 .../java/org/apache/hive/http/HttpServer.java | 19 + .../apache/hive/http/ProfileOutputServlet.java | 51 +++ .../org/apache/hive/http/ProfileServlet.java| 359 +++ 5 files changed, 496 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/45163ee4/bin/hive -- diff --git a/bin/hive b/bin/hive index 1ade51e..a7ae2f5 100755 --- a/bin/hive +++ b/bin/hive @@ -356,6 +356,7 @@ fi # include the log4j jar that is used for hive into the classpath CLASSPATH="${CLASSPATH}:${LOG_JAR_CLASSPATH}" export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${LOG_JAR_CLASSPATH}" +export JVM_PID="$$" if [ "$TORUN" = "" ] ; then echo "Service $SERVICE not found" http://git-wip-us.apache.org/repos/asf/hive/blob/45163ee4/common/src/java/org/apache/hive/common/util/ProcessUtils.java -- diff --git a/common/src/java/org/apache/hive/common/util/ProcessUtils.java b/common/src/java/org/apache/hive/common/util/ProcessUtils.java new file mode 100644 index 000..409384f --- /dev/null +++ b/common/src/java/org/apache/hive/common/util/ProcessUtils.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.common.util; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Process related utilities. + */ +public class ProcessUtils { + private static Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); + + public static Integer getPid() { +// JVM_PID is exported by bin/hive +String pidStr = System.getenv("JVM_PID"); + +// in case if it is not set correctly used fallback from mxbean which is implementation specific +if (pidStr == null || pidStr.trim().isEmpty()) { + String name = ManagementFactory.getRuntimeMXBean().getName(); + if (name != null) { +int idx = name.indexOf("@"); +if (idx != -1) { + pidStr = name.substring(0, name.indexOf("@")); +} + } +} +try { + if (pidStr != null) { +return Integer.valueOf(pidStr); + } +} catch (NumberFormatException nfe) { + // ignore +} +return null; + } + + public static Process runCmdAsync(List cmd) { +try { + LOG.info("Running command async: " + cmd); + return new ProcessBuilder(cmd).inheritIO().start(); +} catch (IOException ex) { + throw new IllegalStateException(ex); +} + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/45163ee4/common/src/java/org/apache/hive/http/HttpServer.java -- diff --git a/common/src/java/org/apache/hive/http/HttpServer.java b/common/src/java/org/apache/hive/http/HttpServer.java index 3cb7a33..24c5422 100644 --- a/common/src/java/org/apache/hive/http/HttpServer.java +++ b/common/src/java/org/apache/hive/http/HttpServer.java @@ -21,6 +21,9 @@ package org.apache.hive.http; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import
hive git commit: HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran reviewed by Gopal V, Ashutosh Chauhan)
Repository: hive Updated Branches: refs/heads/branch-3 b4e552df1 -> 550cc6199 HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran reviewed by Gopal V, Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/550cc619 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/550cc619 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/550cc619 Branch: refs/heads/branch-3 Commit: 550cc61998964a66fd80f72441f262ecf87256e4 Parents: b4e552d Author: Prasanth Jayachandran Authored: Sun Sep 16 00:11:23 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Sep 16 00:12:18 2018 -0700 -- bin/hive| 1 + .../apache/hive/common/util/ProcessUtils.java | 66 .../java/org/apache/hive/http/HttpServer.java | 19 + .../apache/hive/http/ProfileOutputServlet.java | 51 +++ .../org/apache/hive/http/ProfileServlet.java| 359 +++ 5 files changed, 496 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/550cc619/bin/hive -- diff --git a/bin/hive b/bin/hive index 1ade51e..a7ae2f5 100755 --- a/bin/hive +++ b/bin/hive @@ -356,6 +356,7 @@ fi # include the log4j jar that is used for hive into the classpath CLASSPATH="${CLASSPATH}:${LOG_JAR_CLASSPATH}" export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${LOG_JAR_CLASSPATH}" +export JVM_PID="$$" if [ "$TORUN" = "" ] ; then echo "Service $SERVICE not found" http://git-wip-us.apache.org/repos/asf/hive/blob/550cc619/common/src/java/org/apache/hive/common/util/ProcessUtils.java -- diff --git a/common/src/java/org/apache/hive/common/util/ProcessUtils.java b/common/src/java/org/apache/hive/common/util/ProcessUtils.java new file mode 100644 index 000..409384f --- /dev/null +++ b/common/src/java/org/apache/hive/common/util/ProcessUtils.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.common.util; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Process related utilities. + */ +public class ProcessUtils { + private static Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); + + public static Integer getPid() { +// JVM_PID is exported by bin/hive +String pidStr = System.getenv("JVM_PID"); + +// in case if it is not set correctly used fallback from mxbean which is implementation specific +if (pidStr == null || pidStr.trim().isEmpty()) { + String name = ManagementFactory.getRuntimeMXBean().getName(); + if (name != null) { +int idx = name.indexOf("@"); +if (idx != -1) { + pidStr = name.substring(0, name.indexOf("@")); +} + } +} +try { + if (pidStr != null) { +return Integer.valueOf(pidStr); + } +} catch (NumberFormatException nfe) { + // ignore +} +return null; + } + + public static Process runCmdAsync(List cmd) { +try { + LOG.info("Running command async: " + cmd); + return new ProcessBuilder(cmd).inheritIO().start(); +} catch (IOException ex) { + throw new IllegalStateException(ex); +} + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/550cc619/common/src/java/org/apache/hive/http/HttpServer.java -- diff --git a/common/src/java/org/apache/hive/http/HttpServer.java b/common/src/java/org/apache/hive/http/HttpServer.java index 3cb7a33..24c5422 100644 --- a/common/src/java/org/apache/hive/http/HttpServer.java +++ b/common/src/java/org/apache/hive/http/HttpServer.java @@ -21,6 +21,9 @@ package org.apache.hive.http; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import
[33/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index b075ecf..801948c 100644 --- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -983,7 +983,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1153,7 +1153,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1229,6 +1229,910 @@ POSTHOOK: Input: default@src1 A masked pattern was here 12744278 500 652447 25 PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Map 5 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: x + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: key, value +Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE +Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE +value expressions: _col1 (type: bigint) +Execution mode: vectorized, llap +LLAP IO: no inputs +Map 5 +Map Operator Tree: +TableScan + alias: y + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator +expressions: key (type: string), value (type: string) +outputColumnNames: key, value +Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE +Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator +key expressions: _col0 (type: string) +sort order: + +Map-reduce partition columns: _col0 (type: string) +Statistics: Num rows: 12 Data size: 1128 Basic
[28/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out index e33101c..0a8a8a8 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out @@ -271,8 +271,8 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_GBY_8: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_7: 2100 - RECORDS_OUT_OPERATOR_SEL_6: 2100 - RECORDS_OUT_OPERATOR_TS_0: 2100 + RECORDS_OUT_OPERATOR_SEL_6: 3 + RECORDS_OUT_OPERATOR_TS_0: 3 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 262144 ALLOCATED_USED_BYTES: 26 @@ -327,13 +327,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 8 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 8 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 8 - RECORDS_OUT_OPERATOR_SEL_9: 8 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: ALLOCATED_BYTES: 1048576 ALLOCATED_USED_BYTES: 2731 @@ -367,13 +367,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 22 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 22 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 22 - RECORDS_OUT_OPERATOR_SEL_9: 22 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -405,13 +405,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 16 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 16 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 16 - RECORDS_OUT_OPERATOR_SEL_9: 16 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -443,13 +443,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 18 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 18 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 18 - RECORDS_OUT_OPERATOR_SEL_9: 18 - RECORDS_OUT_OPERATOR_TS_0: 2000 + RECORDS_OUT_OPERATOR_SEL_9: 2 + RECORDS_OUT_OPERATOR_TS_0: 2 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -487,7 +487,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1 RECORDS_OUT_OPERATOR_SEL_9: 1 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -519,13 +519,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 32 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 32 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 32 - RECORDS_OUT_OPERATOR_SEL_9: 32 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -557,13 +557,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 32 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 32 + RECORDS_OUT_OPERATOR_FIL_8: 1 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 32 - RECORDS_OUT_OPERATOR_SEL_9: 32 - RECORDS_OUT_OPERATOR_TS_0: 1000 + RECORDS_OUT_OPERATOR_SEL_9: 1 + RECORDS_OUT_OPERATOR_TS_0: 1 Stage-1 LLAP IO COUNTERS: CACHE_HIT_BYTES: 1071 CACHE_MISS_BYTES: 0 @@ -595,13 +595,13 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_0: 1 RECORDS_OUT_INTERMEDIATE_Map_1: 1697 RECORDS_OUT_INTERMEDIATE_Reducer_2: 0 - RECORDS_OUT_OPERATOR_FIL_8: 1697 + RECORDS_OUT_OPERATOR_FIL_8: 2 RECORDS_OUT_OPERATOR_FS_12: 1 RECORDS_OUT_OPERATOR_GBY_11: 1 RECORDS_OUT_OPERATOR_MAP_0: 0 RECORDS_OUT_OPERATOR_RS_10: 1697 - RECORDS_OUT_OPERATOR_SEL_9: 1697 -
[11/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index 6b25672..2f5eb26 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1_n14 POSTHOOK: Lineage: test1_n14.col_1 SCRIPT [] POSTHOOK: Lineage: test1_n14.key SCRIPT [] POSTHOOK: Lineage: test1_n14.value SCRIPT [] +col1 col2col3 PREHOOK: query: CREATE TABLE test2_n9 (key INT, value INT, col_2 STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2_n9 POSTHOOK: Lineage: test2_n9.col_2 SCRIPT [] POSTHOOK: Lineage: test2_n9.key SCRIPT [] POSTHOOK: Lineage: test2_n9.value SCRIPT [] -PREHOOK: query: EXPLAIN +col1 col2col3 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR SELECT * FROM test1_n14 LEFT OUTER JOIN test2_n9 ON (test1_n14.value=test2_n9.value) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,9 +74,14 @@ STAGE PLANS: TableScan alias: test1_n14 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 +Select Vectorization: +className: VectorSelectOperator +native: true Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -77,12 +89,19 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false +File Sink Vectorization: +className: VectorFileSinkOperator +native: false Statistics: Num rows: 8 Data size: 859 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -90,23 +109,50 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true +inputFormatFeatureSupport: [DECIMAL_64] +featureSupportInUse: [DECIMAL_64] +inputFileFormats: org.apache.hadoop.mapred.TextInputFormat +allNative: false +usesVectorUDFAdaptor: false +vectorized: true Map 2 Map Operator Tree: TableScan alias: test2_n9 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 +
[05/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 903e74b..a445b44 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE(((cbigint > -23) OR (cfloat = cdouble ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cstring1, cint, @@ -69,7 +69,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -91,10 +90,8 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized @@ -107,27 +104,14 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true -rowBatchContext: -dataColumnCount: 12 -includeColumns: [0, 1, 2, 3, 4, 5, 6, 8] -dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean -partitionColumnCount: 0 -scratchColumnTypeNames: [decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double] Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true -reduceColumnNullOrder: zz -reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true -rowBatchContext: -dataColumnCount: 14 -dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double -partitionColumnCount: 0 -scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type:
[24/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 000..da513db --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,3945 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD + A
[40/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java index bde4424..93fdb28 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,20 +28,23 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class MapJoinTestDescription extends DescriptionTest { + public static enum MapJoinPlanVariation { +DYNAMIC_PARTITION_HASH_JOIN, +SHARED_SMALL_TABLE + } + public static class SmallTableGenerationParameters { public static enum ValueOption { NO_RESTRICTION, ONLY_ONE, - ONLY_TWO, - AT_LEAST_TWO + NO_REGULAR_SMALL_KEYS } private ValueOption valueOption; @@ -82,70 +86,103 @@ public class MapJoinTestDescription extends DescriptionTest { final VectorMapJoinVariation vectorMapJoinVariation; // Adjustable. - public String[] bigTableColumnNames; + public String[] bigTableKeyColumnNames; public TypeInfo[] bigTableTypeInfos; + public int[] bigTableKeyColumnNums; - public String[] smallTableValueColumnNames; + public TypeInfo[] smallTableValueTypeInfos; - public int[] bigTableRetainColumnNums; + public int[] smallTableRetainKeyColumnNums; - public int[] smallTableRetainValueColumnNums; public SmallTableGenerationParameters smallTableGenerationParameters; // Derived. - public List bigTableColumnNamesList; - public String[] bigTableKeyColumnNames; - public TypeInfo[] bigTableKeyTypeInfos; - public List smallTableValueColumnNamesList; + + public int[] bigTableColumnNums; + public String[] bigTableColumnNames; + public List bigTableColumnNameList; public ObjectInspector[] bigTableObjectInspectors; - public List bigTableObjectInspectorsList; + public List bigTableObjectInspectorList; + + public TypeInfo[] bigTableKeyTypeInfos; + + public List smallTableKeyColumnNameList; + public String[] smallTableKeyColumnNames; + public TypeInfo[] smallTableKeyTypeInfos; + public ObjectInspector[] smallTableKeyObjectInspectors; + public List smallTableKeyObjectInspectorList; + + public List smallTableValueColumnNameList; + public String[] smallTableValueColumnNames; + public ObjectInspector[] smallTableValueObjectInspectors; + public List smallTableValueObjectInspectorList; + + public int[] bigTableRetainColumnNums; + public int[] smallTableRetainValueColumnNums; + + public String[] smallTableColumnNames; + public List smallTableColumnNameList; + public TypeInfo[] smallTableTypeInfos; + public List smallTableObjectInspectorList; + public StandardStructObjectInspector bigTableStandardObjectInspector; - public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos; - public ObjectInspector[] smallTableObjectInspectors; - public PrimitiveCategory[] smallTablePrimitiveCategories; - public List smallTableObjectInspectorsList; public StandardStructObjectInspector smallTableStandardObjectInspector; public ObjectInspector[] inputObjectInspectors; + public String[] outputColumnNames; public TypeInfo[] outputTypeInfos; public ObjectInspector[] outputObjectInspectors; + final MapJoinPlanVariation mapJoinPlanVariation; + + public MapJoinTestDescription ( + HiveConf hiveConf, + VectorMapJoinVariation vectorMapJoinVariation, + TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { +this( +hiveConf, +vectorMapJoinVariation, +/* bigTableColumnNames */ null, +bigTableTypeInfos, +bigTableKeyColumnNums, +smallTableValueTypeInfos, +smallTableRetainKeyColumnNums, +
[22/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 5c0d6bb..6eaf7ad 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -72,10 +72,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -209,10 +209,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -372,10 +372,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -632,11 +632,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [4] -valueColumnNums: [3] +partitionColumns: 4:double +valueColumns: 3:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -692,11 +692,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization:
[15/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 5e26637..0f40378 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -296,10 +296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0] +valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -353,11 +352,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Reducer 3 @@ -517,10 +516,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:decimal(15,2), 1:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [2] +valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -576,11 +575,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 3 @@ -747,10 +746,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0] +keyColumns: 0:decimal(15,2) native: true
[13/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index c07f4d9..6660d73 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -62,10 +62,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [1] +valueColumns: 1:bigint Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -129,12 +129,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [2, 1] +keyColumns: 2:int, 1:bigint keyExpressions: ConstantVectorExpression(val 0) -> 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [3] -valueColumnNums: [] +partitionColumns: 3:int Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -304,10 +303,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [1, 2] +valueColumns: 1:string, 2:bigint Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Execution mode: vectorized, llap @@ -363,11 +362,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2] + keyColumns: 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:string + valueColumns: 0:int Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 3 @@ -540,10 +539,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true
[43/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..8dce5b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -47,6 +47,14 @@ public class VectorMapJoinFastLongHashSet return new VectorMapJoinFastHashSet.HashSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + +// Ignore NULL keys (HashSet not used for FULL OUTER). +adaptPutRow(currentKey, currentValue); + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -76,11 +84,18 @@ public class VectorMapJoinFastLongHashSet optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); -long existance = findReadSlot(key, hashCode); +int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; -if (existance == -1) { +if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { +matchTracker.trackMatch(pairIndex / 2); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } @@ -91,9 +106,13 @@ public class VectorMapJoinFastLongHashSet } public VectorMapJoinFastLongHashSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean isFullOuter, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { -super(minMaxEnabled, isOuterJoin, hashTableKeyType, +super( +isFullOuter, +minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..03ef249 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -49,8 +49,6 @@ public abstract class VectorMapJoinFastLongHashTable private final HashTableKeyType hashTableKeyType; - private final boolean isOuterJoin; - private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead; private final boolean useMinMax; @@ -72,14 +70,13 @@ public abstract class VectorMapJoinFastLongHashTable return max; } - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { -return; +return false; } } catch (Exception e) { throw new HiveException( @@ -92,6 +89,7 @@ public abstract class VectorMapJoinFastLongHashTable keyBinarySortableDeserializeRead, hashTableKeyType); add(key, currentValue); +return true; } protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); @@ -215,10 +213,9 @@ public abstract class VectorMapJoinFastLongHashTable largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; -// LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected long findReadSlot(long key, long hashCode) { + protected int findReadSlot(long key, long hashCode) { int intHashCode = (int) hashCode; int slot =
[46/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index f45a012..114cea9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.Constants; @@ -41,12 +42,16 @@ import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashP import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; @@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; @@ -74,8 +81,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; - import com.esotericsoftware.kryo.KryoException; +import com.google.common.base.Preconditions; /** * Map side Join operator implementation. @@ -105,6 +112,23 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + /* + * FULL OUTER MapJoin members. + */ + protected transient boolean isFullOuterMapJoin; // Are we doing a FULL OUTER MapJoin? + + protected transient int fullOuterBigTableRetainSize; + // The number of Big Table columns being + // retained in the output result for + // FULL OUTER MapJoin. + + /* + * Small Table key match tracking used for FULL OUTER MapJoin. Otherwise, null. + * Since the Small Table hash table can be shared among vertces, we require this non-shared object + * for our vertex (i.e. operator private) key match tracking. + */ + protected transient MatchTracker matchTracker; + protected transient boolean isTestingNoHashTableLoad; // Only used in bucket map join. private transient int numBuckets = -1; @@ -177,6 +201,8 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem hybridMapJoinLeftover = false; firstSmallTable = null; +doFullOuterMapJoinInit(); + generateMapMetaData(); isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, @@ -252,6 +278,24 @@ public class MapJoinOperator extends AbstractMapJoinOperator implem } } + /* + * Do initialization for FULL OUTER MapJoin. + * + * Currently, we do not support FULL OUTER MapJoin for N-way. + */ + private void doFullOuterMapJoinInit() { + +// This will be set during the first process call or during closeOp if no rows
[21/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index bbfba28..07c4eed 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -158,10 +157,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -280,10 +279,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -350,10 +348,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [3] +valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -499,10 +497,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator -keyColumnNums: [0, 1] +keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No
[19/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 7c1780b..a49e8e2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -47,15 +47,174 @@ POSTHOOK: Input: default@myinput1_n1 A masked pattern was here 4937935 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n1 - A masked pattern was here -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n1 - A masked pattern was here -3080335 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + filterExpr: ((key > 40) and (value > 50) and (key = value)) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Filter Operator +Filter Vectorization: +className: VectorFilterOperator +native: true +predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +sort order: +Reduce Sink Vectorization: +className: VectorReduceSinkEmptyKeyOperator +native: true +nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +value expressions: _col0 (type: int), _col1 (type: int) +Execution mode: vectorized, llap +LLAP IO: all inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +inputFormatFeatureSupport: [DECIMAL_64] +featureSupportInUse: [DECIMAL_64] +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true +Map 2 +Map Operator Tree: +TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true +
[10/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 8efe78d..c9b9e81 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -146,14 +146,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: -bigTableKeyColumnNums: [1] -bigTableRetainedColumnNums: [3] -bigTableValueColumnNums: [3] +bigTableKeyColumns: 1:int +bigTableRetainColumnNums: [3] +bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true -projectedOutputColumnNums: [3] +nonOuterSmallTableKeyMapping: [] +projectedOutput: 3:decimal(8,1) +hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -217,10 +219,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0] +valueColumns: 0:int Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -281,10 +282,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -351,14 +351,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: -bigTableKeyColumnNums: [1] -bigTableRetainedColumnNums: [3] -bigTableValueColumnNums: [3] +bigTableKeyColumns: 1:int +bigTableRetainColumnNums: [3] +bigTableValueColumns: 3:decimal(8,1) bigTableValueExpressions: ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true -projectedOutputColumnNums: [3] +nonOuterSmallTableKeyMapping: [] +projectedOutput: 3:decimal(8,1) +
[23/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out new file mode 100644 index 000..52ca0fb --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out @@ -0,0 +1,3923 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE
[31/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 000..c387af5 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,3139 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here
[14/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 6637d33..ef1e653 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -43,11 +43,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [2, 1] +keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [2] -valueColumnNums: [5, 7] +partitionColumns: 2:string +valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -279,11 +279,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [0, 1, 2] -valueColumnNums: [3] +partitionColumns: 0:string, 1:string, 2:int +valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -339,11 +339,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3] + partitionColumns: 1:string + valueColumns: 2:int, 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double) Reducer 3 @@ -529,11 +529,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0, 1, 2] +keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [0, 1, 2] -valueColumnNums: [3] +partitionColumns: 0:string, 1:string, 2:int +valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode:
[03/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union14.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union14.q.out b/ql/src/test/results/clientpositive/spark/union14.q.out index 6a95e4a..49d6cb1 100644 --- a/ql/src/test/results/clientpositive/spark/union14.q.out +++ b/ql/src/test/results/clientpositive/spark/union14.q.out @@ -126,20 +126,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 A masked pattern was here -2781 -2731 + 10 1281 -2551 -tst1 1 1461 -3691 +1501 2131 -3111 2241 2381 -1501 - 10 +2551 +2731 +2781 +3111 +3691 +4011 4061 66 1 -4011 98 1 +tst1 1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union7.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union7.q.out b/ql/src/test/results/clientpositive/spark/union7.q.out index 549075c..8556f84 100644 --- a/ql/src/test/results/clientpositive/spark/union7.q.out +++ b/ql/src/test/results/clientpositive/spark/union7.q.out @@ -122,20 +122,20 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 A masked pattern was here -2781 -2731 + 10 1281 -2551 -tst1 1 1461 -3691 +1501 2131 -3111 2241 2381 -1501 - 10 +2551 +2731 +2781 +3111 +3691 +4011 4061 66 1 -4011 98 1 +tst1 1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_null.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union_null.q.out b/ql/src/test/results/clientpositive/spark/union_null.q.out index d37adbb..696641c 100644 --- a/ql/src/test/results/clientpositive/spark/union_null.q.out +++ b/ql/src/test/results/clientpositive/spark/union_null.q.out @@ -24,16 +24,16 @@ POSTHOOK: query: select x from (select * from (select value as x from src order POSTHOOK: type: QUERY POSTHOOK: Input: default@src A masked pattern was here -val_0 -val_0 -val_0 -val_10 -val_100 NULL NULL NULL NULL NULL +val_0 +val_0 +val_0 +val_10 +val_100 PREHOOK: query: select * from (select * from (select cast(null as string) as N from src1 group by key)a UNION ALL select * from (select cast(null as string) as N from src1 group by key)b ) a PREHOOK: type: QUERY PREHOOK: Input: default@src1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_view.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/union_view.q.out b/ql/src/test/results/clientpositive/spark/union_view.q.out index 591ebfa..97a5bef 100644 --- a/ql/src/test/results/clientpositive/spark/union_view.q.out +++ b/ql/src/test/results/clientpositive/spark/union_view.q.out @@ -483,10 +483,10 @@ STAGE PLANS: 86 val_86 2 86 val_86 3 86 val_86 3 -86 val_86 2 -86 val_86 2 86 val_86 3 86 val_86 3 +86 val_86 2 +86 val_86 2 86 val_86 1 STAGE DEPENDENCIES: Stage-1 is a root stage http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index e3d815b..6e33ead 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] +valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289
[29/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/mapjoin46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out index 52eb609..d0d9c87 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -128,14 +128,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del +1012 Car 103 2 Ema 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -1001 Bob NULLNULLNULL -1012 Car 102 2 Del -1012 Car 103 2 Ema +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -239,12 +239,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob NULLNULLNULL -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -344,12 +344,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob 102 2 Del +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob 102 2 Del -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -438,10 +438,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -99 2 Mat 102 2 Del 1012 Car 102 2 Del -99 2 Mat 103 2 Ema 1012 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULLNULL104 3 Fli NULL NULLNULL105 NULLNone Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product @@ -535,18 +535,18 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL -98 NULLNoneNULLNULLNULL -99 0 Alice NULLNULLNULL -99 2 Mat NULLNULLNULL 1001 Bob 102 2 Del -1001 Bob 105 NULLNone -1001 Bob 104 3 Fli 1001 Bob 103 2 Ema +1001 Bob 104 3 Fli +1001 Bob 105 NULLNone 1012 Car 102 2 Del -1012 Car 105 NULLNone -1012 Car 104 3 Fli 1012 Car 103 2 Ema +1012 Car 104 3 Fli +1012 Car 105 NULLNone +98 NULLNoneNULLNULLNULL +99 0 Alice NULLNULLNULL +99 2 Mat NULLNULLNULL +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -644,19 +644,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNone102 2 Del -98 NULLNone102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 1001 Bob 102 2 Del -1001 Bob 105 NULLNone -1001 Bob 104 3 Fli 1001 Bob 103 2 Ema +1001 Bob 104 3 Fli +1001 Bob 105 NULLNone 1012 Car 102 2 Del -1012 Car 105 NULLNone -1012
[17/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 006a51a..960f5f5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -128,18 +128,100 @@ POSTHOOK: query: select * from t4_n19 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4_n19 A masked pattern was here -PREHOOK: query: explain vectorization only summary - +PREHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only summary - +POSTHOOK: query: explain vectorization expression select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Map 1 <- Map 3 (BROADCAST_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: key is not null (type: boolean) +Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: +0 key (type: int) +1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: +1 Map 3 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator +key expressions: _col0 (type: int), _col1 (type: string) +sort order: ++ +Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE +Execution mode: llap +LLAP IO: all inputs +Map 3 +Map Operator Tree: +TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator +predicate: key is not null (type: boolean) +Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator +keys: _col0 (type: int) +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE +Execution mode: llap +LLAP IO: all inputs +Reducer 2 +Execution mode: llap +Reduce Operator Tree: + Select Operator +expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat +
[39/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..a37b5a0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -25,7 +25,6 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; @@ -33,9 +32,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; -import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; -import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -48,17 +45,23 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -69,7 +72,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; @@ -86,14 +88,13 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import
[44/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 6785bce..df900a1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -98,40 +97,31 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe // @Override - public void process(Object row, int tag) throws HiveException { - -try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { -// Our one time process method initialization. -commonSetup(batch); + protected void commonSetup() throws HiveException { +super.commonSetup(); -/* - * Initialize Single-Column String members for this specialized class. - */ - -singleJoinColumn = bigTableKeyColumnMap[0]; +/* + * Initialize Single-Column String members for this specialized class. + */ -needCommonSetup = false; - } +singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { -// Setup our hash table specialization. It will be the first time the process -// method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { +super.hashTableSetup(); -/* - * Get our Single-Column String hash set information for this specialized class. - */ +/* + * Get our Single-Column String hash set information for this specialized class. + */ -hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; +hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } -needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; +try { // Do the per-batch setup for an left semi join. @@ -144,11 +134,7 @@ public class VectorMapJoinLeftSemiStringOperator extends VectorMapJoinLeftSemiGe } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { -if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); -} return; } http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 2e5c568..61bcbf0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -24,13 +24,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import
[08/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out -- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index 1eab962..a7fdfda 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE(((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT ctimestamp1, cfloat, cstring1, @@ -83,7 +83,6 @@ STAGE PLANS: Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true -vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -135,12 +134,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 12 - includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] - dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean - partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -169,7 +162,6 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true -vectorizationSchemaColumns: [0:_col0:timestamp, 1:_col1:float, 2:_col2:string, 3:_col3:boolean, 4:_col4:double, 5:_col5:double, 6:_col6:double, 7:_col7:double, 8:_col8:float, 9:_col9:float, 10:_col10:float, 11:_col11:float, 12:_col12:double, 13:_col13:double, 14:_col14:bigint, 15:_col15:double, 16:_col16:double, 17:_col17:double, 18:_col18:double, 19:_col19:double, 20:_col20:double, 21:_col21:double] Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: @@ -190,12 +182,6 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - rowBatchContext: - dataColumnCount: 22 - includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - dataColumns: _col0:timestamp, _col1:float, _col2:string, _col3:boolean, _col4:double, _col5:double, _col6:double, _col7:double, _col8:float, _col9:float, _col10:float, _col11:float, _col12:double, _col13:double, _col14:bigint, _col15:double, _col16:double, _col17:double, _col18:double, _col19:double, _col20:double, _col21:double - partitionColumnCount: 0 - scratchColumnTypeNames: [] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out -- diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index 2d306cf..6974ee8 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE(((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +POSTHOOK: query: EXPLAIN
[25/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out new file mode 100644 index 000..169d94c --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out @@ -0,0 +1,3945 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD + A masked pattern was
[30/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join46.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out b/ql/src/test/results/clientpositive/llap/join46.q.out index 07c4a62..95d3611 100644 --- a/ql/src/test/results/clientpositive/llap/join46.q.out +++ b/ql/src/test/results/clientpositive/llap/join46.q.out @@ -1633,7 +1633,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1746,7 +1746,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1857,7 +1857,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1970,7 +1970,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -2148,7 +2148,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index c0c9f95..c3b1eb7 100644 --- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -230,6 +230,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -318,6 +319,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -407,6 +409,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -495,6 +498,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 791 Data size: 140798 Basic stats: COMPLETE Column stats: COMPLETE +DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/llap_acid.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/llap_acid.q.out b/ql/src/test/results/clientpositive/llap/llap_acid.q.out index 0d1a331..d441ab0 100644 --- a/ql/src/test/results/clientpositive/llap/llap_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -115,10 +115,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [4, 0] +keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS
[47/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a37827ec Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a37827ec Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a37827ec Branch: refs/heads/master Commit: a37827ecd557c7f7d69f3b2ccdbf6535908b1461 Parents: 45163ee Author: Matt McCline Authored: Sun Sep 16 09:04:59 2018 -0500 Committer: Matt McCline Committed: Sun Sep 16 09:04:59 2018 -0500 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 10 +- data/files/fullouter_long_big_1a.txt| 11 + data/files/fullouter_long_big_1a_nonull.txt | 10 + data/files/fullouter_long_big_1b.txt| 13 + data/files/fullouter_long_big_1c.txt| 11 + data/files/fullouter_long_big_1d.txt| 12 + data/files/fullouter_long_small_1a.txt | 54 + data/files/fullouter_long_small_1a_nonull.txt | 51 + data/files/fullouter_long_small_1b.txt | 72 + data/files/fullouter_long_small_1c.txt | 81 + data/files/fullouter_long_small_1d.txt | 39 + data/files/fullouter_multikey_big_1a.txt| 13 + data/files/fullouter_multikey_big_1a_nonull.txt | 10 + data/files/fullouter_multikey_big_1b.txt| 17 + data/files/fullouter_multikey_small_1a.txt | 92 + .../fullouter_multikey_small_1a_nonull.txt | 90 + data/files/fullouter_multikey_small_1b.txt | 118 + data/files/fullouter_string_big_1a.txt | 13 + data/files/fullouter_string_big_1a_nonull.txt | 12 + data/files/fullouter_string_big_1a_old.txt | 13 + data/files/fullouter_string_small_1a.txt| 38 + data/files/fullouter_string_small_1a_nonull.txt | 35 + data/files/fullouter_string_small_1a_old.txt| 38 + .../vectorization/mapjoin/AbstractMapJoin.java | 66 +- .../mapjoin/MapJoinMultiKeyBenchBase.java |3 +- .../mapjoin/MapJoinOneLongKeyBenchBase.java |3 +- .../mapjoin/MapJoinOneStringKeyBenchBase.java |3 +- .../test/resources/testconfiguration.properties |5 + .../hadoop/hive/ql/exec/CommonJoinOperator.java | 11 +- .../apache/hadoop/hive/ql/exec/ExplainTask.java | 187 +- .../apache/hadoop/hive/ql/exec/JoinUtil.java|2 +- .../hadoop/hive/ql/exec/MapJoinOperator.java| 199 +- .../apache/hadoop/hive/ql/exec/Operator.java| 86 +- .../hadoop/hive/ql/exec/TableScanOperator.java |6 +- .../persistence/BytesBytesMultiHashMap.java | 71 +- .../ql/exec/persistence/HashMapWrapper.java | 22 + .../persistence/HybridHashTableContainer.java | 118 +- .../persistence/MapJoinBytesTableContainer.java | 199 +- .../hive/ql/exec/persistence/MapJoinKey.java| 15 + .../persistence/MapJoinObjectSerDeContext.java | 17 + .../exec/persistence/MapJoinTableContainer.java | 61 +- .../hive/ql/exec/persistence/MatchTracker.java | 154 + .../ReusableGetAdaptorDirectAccess.java |4 +- .../ql/exec/persistence/UnwrapRowContainer.java | 12 +- .../hive/ql/exec/tez/ReduceRecordProcessor.java |2 +- .../vector/VectorAppMasterEventOperator.java|2 +- .../hive/ql/exec/vector/VectorAssignRow.java| 12 + .../hive/ql/exec/vector/VectorCopyRow.java | 21 +- .../ql/exec/vector/VectorDeserializeRow.java| 32 + .../ql/exec/vector/VectorFilterOperator.java|2 +- .../ql/exec/vector/VectorGroupByOperator.java |2 +- .../ql/exec/vector/VectorLimitOperator.java |2 +- .../exec/vector/VectorMapJoinBaseOperator.java | 23 +- .../ql/exec/vector/VectorMapJoinOperator.java | 23 +- .../exec/vector/VectorSMBMapJoinOperator.java |2 +- .../ql/exec/vector/VectorSelectOperator.java|4 +- .../ql/exec/vector/VectorTopNKeyOperator.java |2 +- .../mapjoin/VectorMapJoinCommonOperator.java| 409 +- .../VectorMapJoinFullOuterLongOperator.java | 68 + .../VectorMapJoinFullOuterMultiKeyOperator.java | 71 + .../VectorMapJoinFullOuterStringOperator.java | 71 + .../VectorMapJoinGenerateResultOperator.java| 163 +- ...pJoinInnerBigOnlyGenerateResultOperator.java | 18 +- .../VectorMapJoinInnerBigOnlyLongOperator.java | 63 +- ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java | 60 +- ...VectorMapJoinInnerBigOnlyStringOperator.java | 50 +- ...ectorMapJoinInnerGenerateResultOperator.java | 22 +- .../mapjoin/VectorMapJoinInnerLongOperator.java | 64 +- .../VectorMapJoinInnerMultiKeyOperator.java | 58 +- .../VectorMapJoinInnerStringOperator.java | 50 +- ...orMapJoinLeftSemiGenerateResultOperator.java | 12 +- .../VectorMapJoinLeftSemiLongOperator.java | 64 +- .../VectorMapJoinLeftSemiMultiKeyOperator.java | 58 +- .../VectorMapJoinLeftSemiStringOperator.java| 50 +-
[04/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 352e74f..5954629 100644 --- a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -89,10 +89,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -372,10 +371,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -424,10 +422,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -575,10 +572,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -627,10 +623,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator -keyColumnNums: [0] +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
[01/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
Repository: hive Updated Branches: refs/heads/master 45163ee4c -> a37827ecd http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out -- diff --git a/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out new file mode 100644 index 000..53c32ff --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out @@ -0,0 +1,2050 @@ +PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test1 +POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test1 +PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), +(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test1 +POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), +(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test1 +POSTHOOK: Lineage: test1.col_1 SCRIPT [] +POSTHOOK: Lineage: test1.key SCRIPT [] +POSTHOOK: Lineage: test1.value SCRIPT [] +col1 col2col3 +PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test2 +POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test2 +PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), +(104, 3, 'Fli'), (105, NULL, 'None') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test2 +POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), +(104, 3, 'Fli'), (105, NULL, 'None') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test2 +POSTHOOK: Lineage: test2.col_2 SCRIPT [] +POSTHOOK: Lineage: test2.key SCRIPT [] +POSTHOOK: Lineage: test2.value SCRIPT [] +col1 col2col3 +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 +Map Reduce Local Work + Alias -> Map Local Tables: +$hdt$_1:test2 + Fetch Operator +limit: -1 + Alias -> Map Local Operator Tree: +$hdt$_1:test2 + TableScan +alias: test2 +Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator +keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-3 +Map Reduce + Map Operator Tree: + TableScan +alias: test1 +Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE +TableScan Vectorization: +native: true +Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator +condition map: + Left Outer Join 0 to 1 +keys: + 0 _col1 (type: int) + 1 _col1 (type: int) +Map Join Vectorization: +bigTableKeyExpressions: col 1:int +className: VectorMapJoinOperator +native: false +nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin
[34/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 194fc5d..a160428 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -188,6 +188,139 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1_n2 A masked pattern was here 4543526 +PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) +Execution mode: vectorized, llap +LLAP IO: no inputs +Map 4 +Map Operator Tree: +TableScan + alias: b + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) +Execution mode: vectorized, llap +LLAP IO: no inputs +Reducer 2 +Execution mode: llap +Reduce Operator Tree: + Merge Join Operator +condition map: + Full Outer Join 0 to 1 +keys: + 0 _col0 (type: int) + 1 _col1 (type: int) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator +aggregations: sum(_col0) +mode: hash +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) +Reducer 3 +Execution mode: vectorized, llap +Reduce Operator Tree: + Group By Operator +aggregations: sum(VALUE._col0) +mode: mergepartial +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat +
[26/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out new file mode 100644 index 000..6ddcef6 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out @@ -0,0 +1,1228 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD + A masked pattern was here +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD + A masked pattern was here +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail +select tjoin1.rnum,
[38/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java -- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 000..586c850 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { +random = new Random(82733); + +VectorMapJoinFastMultiKeyHashMap map = +new VectorMapJoinFastMultiKeyHashMap( +false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + +VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + +byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; +random.nextBytes(key); +byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; +random.nextBytes(value); + +map.testPutRow(key, value); +verifyTable.add(key, value); + +// Second value. +value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; +random.nextBytes(value); +map.testPutRow(key, value); +verifyTable.add(key, value); + +// Third value. +value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; +random.nextBytes(value); +map.testPutRow(key, value); +verifyTable.add(key, value); + +verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { +random = new Random(29383); + +VectorMapJoinFastMultiKeyHashMap map = +new VectorMapJoinFastMultiKeyHashMap( +false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + +VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + +int keyCount = 100 + random.nextInt(1000); +for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { +// Unique keys for this test. +break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); +} + +verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { +addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { +for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { +value = new byte[generateLargeCount() - 1]; + } else { +value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { +byte[] key; +while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { +// Unique keys for this test. +
[45/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c832cdb..5733688 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; @@ -55,14 +54,17 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTabl import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.BaseWork; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -124,6 +126,10 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; + protected VectorMapJoinVariation vectorMapJoinVariation; + protected HashTableKind hashTableKind; + protected HashTableKeyType hashTableKeyType; + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; @@ -149,28 +155,70 @@ private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); protected String[] bigTableValueColumnNames; protected TypeInfo[] bigTableValueTypeInfos; - // This is a mapping of which big table columns (input and key/value expressions) will be - // part of the big table portion of the join output result. - protected VectorColumnOutputMapping bigTableRetainedMapping; + /* + * NOTE: + *The Big Table key columns are from the key expressions. + *The Big Table value columns are from the getExpr(posBigTable) expressions. + *Any calculations needed for those will be scratch columns. + * + *The Small Table key and value output columns are scratch columns. + * + * Big Table Retain Column Map / TypeInfos: + *Any Big Table Batch columns that will be in the output result. + *0, 1, ore more Column Nums and TypeInfos + * + * Non Outer Small Table Key Mapping: + *For non-[FULL] OUTER MapJoin, when Big Table key columns are not retained for the output + *result but are needed for the Small Table output result, they are put in this mapping + *as they are required for copying rows to the overflow batch. + * + * Outer Small Table Key Mapping + *For [FULL] OUTER MapJoin, the mapping for any Small Table key columns needed for the + *output result from the Big Table key columns. The Big Table keys cannot be projected since + *on NOMATCH there must be a physical column present to hold the non-match NULL. + * + * Full Outer Small Table Key Mapping + *For FULL OUTER MapJoin, the mapping from any needed Small Table key
[27/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index ddcabd8..4c81131 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -87,10 +87,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,10 +203,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -321,10 +319,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -438,10 +435,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +551,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -672,10 +667,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns:
[42/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java new file mode 100644 index 000..b9e86eb --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Joiner; + +/** + * FULL OUTER MapJoin planning. + */ +public class FullOuterMapJoinOptimization { + + FullOuterMapJoinOptimization() { + } + + public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws SemanticException { +int[][] filterMaps = mapJoinDesc.getFilterMap(); +if (filterMaps == null) { + return; +} +final byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); +final int numAliases = mapJoinDesc.getExprs().size(); +List valueFilteredTblDescs = mapJoinDesc.getValueFilteredTblDescs(); +for (byte pos = 0; pos < numAliases; pos++) { + if (pos != posBigTable) { +int[] filterMap = filterMaps[pos]; +TableDesc tableDesc = valueFilteredTblDescs.get(pos); +Properties properties = tableDesc.getProperties(); +String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); +String columnNameDelimiter = +properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? +properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : + String.valueOf(SerDeUtils.COMMA); + +String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); +List columnNameList; +if (columnNameProperty.length() == 0) { + columnNameList = new ArrayList(); +} else { + columnNameList = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); +} +List truncatedColumnNameList = columnNameList.subList(0, columnNameList.size() - 1); +String truncatedColumnNameProperty = +Joiner.on(columnNameDelimiter).join(truncatedColumnNameList); + +List columnTypeList; +if (columnTypeProperty.length() == 0) { + columnTypeList = new ArrayList(); +} else { + columnTypeList = TypeInfoUtils + .getTypeInfosFromTypeString(columnTypeProperty); +} +if (!columnTypeList.get(columnTypeList.size() - 1).equals(TypeInfoFactory.shortTypeInfo)) { + throw new SemanticException("Expecting filterTag smallint as last column type"); +} +List truncatedColumnTypeList = +columnTypeList.subList(0, columnTypeList.size() - 1); +String truncatedColumnTypeProperty = +Joiner.on(",").join(truncatedColumnTypeList); + +properties.setProperty(serdeConstants.LIST_COLUMNS, truncatedColumnNameProperty); +properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, truncatedColumnTypeProperty); + } +} +mapJoinDesc.setFilterMap(null); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
[16/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_like_2.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out index 1a20a35..31b7326 100644 --- a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -63,10 +63,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:boolean Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Execution mode: vectorized, llap http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out index 630f3f7..8ac3a11 100644 --- a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out @@ -104,10 +104,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:varchar(10), 1:int, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 2820 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out index 45bfc6b..29c4bc1 100644 --- a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out @@ -167,11 +167,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator -keyColumnNums: [0] +keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -partitionColumnNums: [0] -valueColumnNums: [1] +partitionColumns: 0:int +valueColumns: 1:string Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -223,14 +223,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: -bigTableKeyColumnNums: [0] -bigTableRetainedColumnNums: [0, 1] -bigTableValueColumnNums: [1] +bigTableKeyColumns: 0:int +bigTableRetainColumnNums: [1] +bigTableValueColumns: 1:string
[07/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 18e75aa..87f0ca8 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -289,74 +289,12 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 0 val_0 val_0 0 val_0 val_0 0 val_0 val_0 -2 val_2 val_2 -4 val_4 val_4 -8 val_8 val_8 -11 val_11 val_11 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -15 val_15 val_15 -17 val_17 val_17 -19 val_19 val_19 -20 val_20 val_20 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -24 val_24 val_24 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -26 val_26 val_26 -28 val_28 val_28 -33 val_33 val_33 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -35 val_35 val_35 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -37 val_37 val_37 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -42 val_42 val_42 -44 val_44 val_44 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -51 val_51 val_51 -53 val_53 val_53 -57 val_57 val_57 -64 val_64 val_64 -66 val_66 val_66 -77 val_77 val_77 -80 val_80 val_80 -82 val_82 val_82 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -84 val_84 val_84 -86 val_86 val_86 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -95 val_95 val_95 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 -97 val_97 val_97 103val_103 val_103 103val_103 val_103 103val_103 val_103 103val_103 val_103 105val_105 val_105 +11 val_11 val_11 114val_114 val_114 116val_116 val_116 118val_118 val_118 @@ -398,6 +336,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 149val_149 val_149 149val_149 val_149 149val_149 val_149 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 +15 val_15 val_15 150val_150 val_150 152val_152 val_152 152val_152 val_152 @@ -435,6 +377,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 169val_169 val_169 169val_169 val_169 169val_169 val_169 +17 val_17 val_17 170val_170 val_170 172val_172 val_172 172val_172 val_172 @@ -461,10 +404,13 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 187val_187 val_187 187val_187 val_187 189val_189 val_189 +19 val_19 val_19 190val_190 val_190 192val_192 val_192 194val_194 val_194 196val_196 val_196 +2 val_2 val_2 +20 val_20 val_20 200val_200 val_200 200val_200 val_200 200val_200 val_200 @@ -511,6 +457,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 239val_239 val_239 239val_239 val_239 239val_239 val_239 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 +24 val_24 val_24 242val_242 val_242 242val_242 val_242 242val_242 val_242 @@ -522,6 +472,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 255val_255 val_255 255val_255 val_255 257val_257 val_257 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 +26 val_26 val_26 260val_260 val_260 262val_262 val_262 266val_266 val_266 @@ -551,6 +505,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 277val_277 val_277 277val_277 val_277 277val_277 val_277 +28 val_28 val_28 280val_280 val_280 280val_280 val_280 280val_280 val_280 @@ -612,11 +567,21 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 327val_327 val_327 327val_327 val_327 327val_327 val_327 +33 val_33 val_33 332val_332 val_332 336val_336 val_336 338val_338 val_338 341val_341 val_341 345val_345 val_345 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 +35 val_35 val_35 356val_356 val_356 365val_365 val_365 367val_367 val_367 @@ -632,6 +597,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08 369val_369 val_369 369val_369 val_369 369val_369 val_369 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 +37 val_37 val_37 374val_374 val_374 378val_378 val_378 389val_389 val_389 @@ -646,6
[35/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out -- diff --git a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out index 6b85e13..2288b4b 100644 --- a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out +++ b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out @@ -27,6 +27,85 @@ POSTHOOK: Output: default@tbl_n1 POSTHOOK: Lineage: tbl_n1.n SCRIPT [] POSTHOOK: Lineage: tbl_n1.t SCRIPT [] Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 +Map Reduce Local Work + Alias -> Map Local Tables: +$hdt$_1:tbl_n1 + Fetch Operator +limit: -1 + Alias -> Map Local Operator Tree: +$hdt$_1:tbl_n1 + TableScan +alias: tbl_n1 +Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE +Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: n (type: bigint), t (type: string) +outputColumnNames: _col0, _col1 +Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE +HashTable Sink Operator + keys: +0 +1 + + Stage: Stage-3 +Map Reduce + Map Operator Tree: + TableScan +alias: tbl_n1 +filterExpr: (n = 1L) (type: boolean) +Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE +Filter Operator + predicate: (n = 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: t (type: string) +outputColumnNames: _col0 +Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: +0 +1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Select Operator +expressions: 1L (type: bigint), _col0 (type: string), _col1 is null (type: boolean), _col2 is null (type: boolean) +outputColumnNames: _col0, _col1, _col2, _col3 +Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE +File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Local Work: +Map Reduce Local Work + + Stage: Stage-0 +Fetch Operator + limit: -1 + Processor Tree: +ListSink + +Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n PREHOOK: type: QUERY PREHOOK: Input: default@tbl_n1 @@ -37,6 +116,91 @@ POSTHOOK: Input: default@tbl_n1 A masked pattern was here 1 one truetrue Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n +PREHOOK: type: QUERY +POSTHOOK: query: explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from
[37/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin2.q -- diff --git a/ql/src/test/queries/clientpositive/mapjoin2.q b/ql/src/test/queries/clientpositive/mapjoin2.q index e194bd0..014dabe 100644 --- a/ql/src/test/queries/clientpositive/mapjoin2.q +++ b/ql/src/test/queries/clientpositive/mapjoin2.q @@ -6,16 +6,30 @@ create table tbl_n1 (n bigint, t string); insert into tbl_n1 values (1, 'one'); insert into tbl_n1 values(2, 'two'); +explain +select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a left outer join (select * from tbl_n1 where 1 = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 1) a right outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 1) a full outer join (select * from tbl_n1 where n = 2) b on a.n = b.n; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; +explain +select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin46.q -- diff --git a/ql/src/test/queries/clientpositive/mapjoin46.q b/ql/src/test/queries/clientpositive/mapjoin46.q index 9de7113..81f9610 100644 --- a/ql/src/test/queries/clientpositive/mapjoin46.q +++ b/ql/src/test/queries/clientpositive/mapjoin46.q @@ -3,6 +3,8 @@ set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; set hive.join.emit.interval=2; +-- SORT_QUERY_RESULTS + CREATE TABLE test1_n4 (key INT, value INT, col_1 STRING); INSERT INTO test1_n4 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); @@ -173,6 +175,22 @@ ON (test1_n4.value=test2_n2.value OR test2_n2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SELECT * +FROM test1_n4 FULL OUTER JOIN test2_n2 +ON (test1_n4.value=test2_n2.value + OR test1_n4.key between 100 and 102 + OR test2_n2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1_n4 FULL OUTER JOIN test2_n2 @@ -185,8 +203,23 @@ FROM test1_n4 FULL OUTER JOIN test2_n2 ON (test1_n4.value=test2_n2.value OR test1_n4.key between 100 and 102 OR test2_n2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN
[18/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 12db036..b8d76ed 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1_n4 A masked pattern was here 13630578 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b +PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1_n4 - A masked pattern was here -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b +POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1_n4 - A masked pattern was here -13630578 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 +Tez + A masked pattern was here + Edges: +Map 2 <- Map 1 (BROADCAST_EDGE) +Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + A masked pattern was here + Vertices: +Map 1 +Map Operator Tree: +TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Select Vectorization: +className: VectorSelectOperator +native: true +Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE +Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) +Execution mode: vectorized, llap +LLAP IO: all inputs +Map Vectorization: +enabled: true +enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true +inputFormatFeatureSupport: [DECIMAL_64] +featureSupportInUse: [DECIMAL_64] +inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +allNative: true +usesVectorUDFAdaptor: false +vectorized: true +Map 2 +Map Operator Tree: +TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator +expressions: key (type: int), value (type: int) +outputColumnNames: _col0, _col1 +Select Vectorization: +className: VectorSelectOperator +native: true +Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE +Map Join Operator + condition map: + Right Outer Join 0 to 1 + filter predicates: +0 +1 {true} + keys: +0 +1 + Map Join Vectorization: + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet:
[06/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join33.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index 13cd446..09198b0 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -393,88 +393,88 @@ POSTHOOK: query: select * from dest_j1_n7 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1_n7 A masked pattern was here -146val_146 val_146 -146val_146 val_146 -146val_146 val_146 -146val_146 val_146 +146val_146 +146val_146 +146val_146 +146val_146 150val_150 val_150 -213val_213 val_213 -213val_213 val_213 -213val_213 val_213 -213val_213 val_213 -238val_238 val_238 -238val_238 val_238 -238val_238 val_238 -238val_238 val_238 -255val_255 val_255 -255val_255 val_255 -255val_255 val_255 -255val_255 val_255 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -273val_273 val_273 -278val_278 val_278 -278val_278 val_278 -278val_278 val_278 -278val_278 val_278 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -311val_311 val_311 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -401val_401 val_401 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 -406val_406 val_406 +213val_213 +213val_213 +213val_213 +213val_213 +238val_238 +238val_238 +238val_238 +238val_238 +255val_255 +255val_255 +255val_255 +255val_255 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +273val_273 +278val_278 +278val_278 +278val_278 +278val_278 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +311val_311 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +401val_401 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 +406val_406 66 val_66 val_66 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 -98 val_98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join6.q.out -- diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out b/ql/src/test/results/clientpositive/spark/join6.q.out index 6075e5f..caa0849 100644 ---
[12/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorization_0.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index fbcbd64..5e95f39 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -63,10 +63,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0, 1, 2, 3] +valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -119,10 +118,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -244,10 +243,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0] +valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -300,10 +298,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -575,10 +572,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator -keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true -valueColumnNums: [0, 1, 2, 3] +valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type:
[09/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mapjoin46.q.out -- diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out b/ql/src/test/results/clientpositive/mapjoin46.q.out index febb6c7..b6f8b19 100644 --- a/ql/src/test/results/clientpositive/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/mapjoin46.q.out @@ -124,14 +124,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del +1012 Car 103 2 Ema 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -1001 Bob NULLNULLNULL -1012 Car 102 2 Del -1012 Car 103 2 Ema +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 LEFT OUTER JOIN test2_n2 @@ -234,12 +234,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob NULLNULLNULL +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob NULLNULLNULL -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -340,12 +340,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL +1001 Bob 102 2 Del +1012 Car 102 2 Del 98 NULLNoneNULLNULLNULL 99 0 Alice NULLNULLNULL 99 2 Mat NULLNULLNULL -1001 Bob 102 2 Del -1012 Car 102 2 Del +NULL NULLNoneNULLNULLNULL PREHOOK: query: EXPLAIN SELECT * FROM test1_n4 RIGHT OUTER JOIN test2_n2 @@ -430,10 +430,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -99 2 Mat 102 2 Del 1012 Car 102 2 Del -99 2 Mat 103 2 Ema 1012 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULLNULL104 3 Fli NULL NULLNULL105 NULLNone Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product @@ -528,10 +528,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNoneNULLNULLNULL -98 NULLNoneNULLNULLNULL -99 0 Alice NULLNULLNULL -99 2 Mat NULLNULLNULL 1001 Bob 102 2 Del 1001 Bob 103 2 Ema 1001 Bob 104 3 Fli @@ -540,6 +536,10 @@ NULL NULLNoneNULLNULLNULL 1012 Car 103 2 Ema 1012 Car 104 3 Fli 1012 Car 105 NULLNone +98 NULLNoneNULLNULLNULL +99 0 Alice NULLNULLNULL +99 2 Mat NULLNULLNULL +NULL NULLNoneNULLNULLNULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -635,11 +635,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1_n4 POSTHOOK: Input: default@test2_n2 A masked pattern was here -NULL NULLNone102 2 Del -98 NULLNone102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 1001 Bob 102 2 Del 1001 Bob 103 2 Ema 1001 Bob 104 3 Fli @@ -648,6 +643,11 @@ NULL NULLNone102 2 Del 1012 Car 103 2 Ema 1012 Car 104 3 Fli 1012 Car 105 NULLNone +98 NULLNone102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULLNone102 2 Del Warning:
[36/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_join30.q -- diff --git a/ql/src/test/queries/clientpositive/vector_join30.q b/ql/src/test/queries/clientpositive/vector_join30.q index 9672a47..74c4433 100644 --- a/ql/src/test/queries/clientpositive/vector_join30.q +++ b/ql/src/test/queries/clientpositive/vector_join30.q @@ -11,7 +11,7 @@ SET hive.auto.convert.join.noconditionaltask.size=10; CREATE TABLE orcsrc_n0 STORED AS ORC AS SELECT * FROM src; -explain vectorization expression +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -19,14 +19,14 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -FROM -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN -(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); -explain vectorization expression +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x LEFT OUTER JOIN @@ -34,116 +34,238 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) +JOIN +(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail +FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); +
[20/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join30.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out b/ql/src/test/results/clientpositive/llap/vector_join30.q.out index 4b2f06f..9238bc7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc_n0 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail FROM (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x JOIN @@ -51,6 +51,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -72,6 +73,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator +keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE @@ -86,6 +88,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true +rowBatchContext: +dataColumnCount: 2 +includeColumns: [0] +dataColumns: key:string, value:string +partitionColumnCount: 0 +scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -94,6 +102,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -116,9 +125,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: +bigTableKeyColumns: 0:string +bigTableRetainColumnNums: [0, 1] +bigTableValueColumns: 0:string, 1:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true +nonOuterSmallTableKeyMapping: [] +projectedOutput: 0:string, 1:string +hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -141,6 +156,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true +valueColumns: 0:bigint