date:20180916

hive git commit: HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran reviewed by Gopal V, Ashutosh Chauhan)

2018-09-16 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/master 37120b877 -> 45163ee4c


HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran 
reviewed by Gopal V, Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/45163ee4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/45163ee4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/45163ee4

Branch: refs/heads/master
Commit: 45163ee4cad0162b8cecf7e8e597fb618c87884f
Parents: 37120b8
Author: Prasanth Jayachandran 
Authored: Sun Sep 16 00:11:23 2018 -0700
Committer: Prasanth Jayachandran 
Committed: Sun Sep 16 00:11:23 2018 -0700

--
 bin/hive|   1 +
 .../apache/hive/common/util/ProcessUtils.java   |  66 
 .../java/org/apache/hive/http/HttpServer.java   |  19 +
 .../apache/hive/http/ProfileOutputServlet.java  |  51 +++
 .../org/apache/hive/http/ProfileServlet.java| 359 +++
 5 files changed, 496 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/45163ee4/bin/hive
--
diff --git a/bin/hive b/bin/hive
index 1ade51e..a7ae2f5 100755
--- a/bin/hive
+++ b/bin/hive
@@ -356,6 +356,7 @@ fi
 # include the log4j jar that is used for hive into the classpath
 CLASSPATH="${CLASSPATH}:${LOG_JAR_CLASSPATH}"
 export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${LOG_JAR_CLASSPATH}"
+export JVM_PID="$$"
 
 if [ "$TORUN" = "" ] ; then
   echo "Service $SERVICE not found"

http://git-wip-us.apache.org/repos/asf/hive/blob/45163ee4/common/src/java/org/apache/hive/common/util/ProcessUtils.java
--
diff --git a/common/src/java/org/apache/hive/common/util/ProcessUtils.java 
b/common/src/java/org/apache/hive/common/util/ProcessUtils.java
new file mode 100644
index 000..409384f
--- /dev/null
+++ b/common/src/java/org/apache/hive/common/util/ProcessUtils.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Process related utilities.
+ */
+public class ProcessUtils {
+  private static Logger LOG = LoggerFactory.getLogger(ProcessUtils.class);
+
+  public static Integer getPid() {
+// JVM_PID is exported by bin/hive
+String pidStr = System.getenv("JVM_PID");
+
+// in case if it is not set correctly used fallback from mxbean which is 
implementation specific
+if (pidStr == null || pidStr.trim().isEmpty()) {
+  String name = ManagementFactory.getRuntimeMXBean().getName();
+  if (name != null) {
+int idx = name.indexOf("@");
+if (idx != -1) {
+  pidStr = name.substring(0, name.indexOf("@"));
+}
+  }
+}
+try {
+  if (pidStr != null) {
+return Integer.valueOf(pidStr);
+  }
+} catch (NumberFormatException nfe) {
+  // ignore
+}
+return null;
+  }
+
+  public static Process runCmdAsync(List cmd) {
+try {
+  LOG.info("Running command async: " + cmd);
+  return new ProcessBuilder(cmd).inheritIO().start();
+} catch (IOException ex) {
+  throw new IllegalStateException(ex);
+}
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/45163ee4/common/src/java/org/apache/hive/http/HttpServer.java
--
diff --git a/common/src/java/org/apache/hive/http/HttpServer.java 
b/common/src/java/org/apache/hive/http/HttpServer.java
index 3cb7a33..24c5422 100644
--- a/common/src/java/org/apache/hive/http/HttpServer.java
+++ b/common/src/java/org/apache/hive/http/HttpServer.java
@@ -21,6 +21,9 @@ package org.apache.hive.http;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import

hive git commit: HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran reviewed by Gopal V, Ashutosh Chauhan)

2018-09-16 Thread prasanthj

Repository: hive
Updated Branches:
  refs/heads/branch-3 b4e552df1 -> 550cc6199


HIVE-20202: Add profiler endpoint to HS2 and LLAP (Prasanth Jayachandran 
reviewed by Gopal V, Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/550cc619
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/550cc619
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/550cc619

Branch: refs/heads/branch-3
Commit: 550cc61998964a66fd80f72441f262ecf87256e4
Parents: b4e552d
Author: Prasanth Jayachandran 
Authored: Sun Sep 16 00:11:23 2018 -0700
Committer: Prasanth Jayachandran 
Committed: Sun Sep 16 00:12:18 2018 -0700

--
 bin/hive|   1 +
 .../apache/hive/common/util/ProcessUtils.java   |  66 
 .../java/org/apache/hive/http/HttpServer.java   |  19 +
 .../apache/hive/http/ProfileOutputServlet.java  |  51 +++
 .../org/apache/hive/http/ProfileServlet.java| 359 +++
 5 files changed, 496 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/550cc619/bin/hive
--
diff --git a/bin/hive b/bin/hive
index 1ade51e..a7ae2f5 100755
--- a/bin/hive
+++ b/bin/hive
@@ -356,6 +356,7 @@ fi
 # include the log4j jar that is used for hive into the classpath
 CLASSPATH="${CLASSPATH}:${LOG_JAR_CLASSPATH}"
 export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${LOG_JAR_CLASSPATH}"
+export JVM_PID="$$"
 
 if [ "$TORUN" = "" ] ; then
   echo "Service $SERVICE not found"

http://git-wip-us.apache.org/repos/asf/hive/blob/550cc619/common/src/java/org/apache/hive/common/util/ProcessUtils.java
--
diff --git a/common/src/java/org/apache/hive/common/util/ProcessUtils.java 
b/common/src/java/org/apache/hive/common/util/ProcessUtils.java
new file mode 100644
index 000..409384f
--- /dev/null
+++ b/common/src/java/org/apache/hive/common/util/ProcessUtils.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hive.common.util;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Process related utilities.
+ */
+public class ProcessUtils {
+  private static Logger LOG = LoggerFactory.getLogger(ProcessUtils.class);
+
+  public static Integer getPid() {
+// JVM_PID is exported by bin/hive
+String pidStr = System.getenv("JVM_PID");
+
+// in case if it is not set correctly used fallback from mxbean which is 
implementation specific
+if (pidStr == null || pidStr.trim().isEmpty()) {
+  String name = ManagementFactory.getRuntimeMXBean().getName();
+  if (name != null) {
+int idx = name.indexOf("@");
+if (idx != -1) {
+  pidStr = name.substring(0, name.indexOf("@"));
+}
+  }
+}
+try {
+  if (pidStr != null) {
+return Integer.valueOf(pidStr);
+  }
+} catch (NumberFormatException nfe) {
+  // ignore
+}
+return null;
+  }
+
+  public static Process runCmdAsync(List cmd) {
+try {
+  LOG.info("Running command async: " + cmd);
+  return new ProcessBuilder(cmd).inheritIO().start();
+} catch (IOException ex) {
+  throw new IllegalStateException(ex);
+}
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/550cc619/common/src/java/org/apache/hive/http/HttpServer.java
--
diff --git a/common/src/java/org/apache/hive/http/HttpServer.java 
b/common/src/java/org/apache/hive/http/HttpServer.java
index 3cb7a33..24c5422 100644
--- a/common/src/java/org/apache/hive/http/HttpServer.java
+++ b/common/src/java/org/apache/hive/http/HttpServer.java
@@ -21,6 +21,9 @@ package org.apache.hive.http;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import

[33/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out 
b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
index b075ecf..801948c 100644
--- a/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
+++ b/ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out
@@ -983,7 +983,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 _col0 (type: string)
   1 _col0 (type: string)
@@ -1153,7 +1153,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 _col0 (type: string)
   1 _col0 (type: string)
@@ -1229,6 +1229,910 @@ POSTHOOK: Input: default@src1
  A masked pattern was here 
 12744278   500 652447  25
 PREHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+  FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by 
x.key) a
+  FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y 
group by y.key) b
+  ON (a.key = b.key)) tmp
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2))
+FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2
+  FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by 
x.key) a
+  FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y 
group by y.key) b
+  ON (a.key = b.key)) tmp
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+Reducer 6 <- Map 5 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: x
+  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: key, value
+Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+Group By Operator
+  aggregations: count(value)
+  keys: key (type: string)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 250 Data size: 23750 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 250 Data size: 23750 Basic 
stats: COMPLETE Column stats: COMPLETE
+value expressions: _col1 (type: bigint)
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Map 5 
+Map Operator Tree:
+TableScan
+  alias: y
+  Statistics: Num rows: 25 Data size: 4375 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Select Operator
+expressions: key (type: string), value (type: string)
+outputColumnNames: key, value
+Statistics: Num rows: 25 Data size: 4375 Basic stats: 
COMPLETE Column stats: COMPLETE
+Group By Operator
+  aggregations: count(value)
+  keys: key (type: string)
+  mode: hash
+  outputColumnNames: _col0, _col1
+  Statistics: Num rows: 12 Data size: 1128 Basic stats: 
COMPLETE Column stats: COMPLETE
+  Reduce Output Operator
+key expressions: _col0 (type: string)
+sort order: +
+Map-reduce partition columns: _col0 (type: string)
+Statistics: Num rows: 12 Data size: 1128 Basic

[28/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out 
b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
index e33101c..0a8a8a8 100644
--- a/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_llap_counters.q.out
@@ -271,8 +271,8 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_GBY_8: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_7: 2100
-   RECORDS_OUT_OPERATOR_SEL_6: 2100
-   RECORDS_OUT_OPERATOR_TS_0: 2100
+   RECORDS_OUT_OPERATOR_SEL_6: 3
+   RECORDS_OUT_OPERATOR_TS_0: 3
 Stage-1 LLAP IO COUNTERS:
ALLOCATED_BYTES: 262144
ALLOCATED_USED_BYTES: 26
@@ -327,13 +327,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 8
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 8
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 8
-   RECORDS_OUT_OPERATOR_SEL_9: 8
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
ALLOCATED_BYTES: 1048576
ALLOCATED_USED_BYTES: 2731
@@ -367,13 +367,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 22
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 22
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 22
-   RECORDS_OUT_OPERATOR_SEL_9: 22
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -405,13 +405,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 16
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 16
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 16
-   RECORDS_OUT_OPERATOR_SEL_9: 16
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -443,13 +443,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 18
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 18
+   RECORDS_OUT_OPERATOR_FIL_8: 2
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 18
-   RECORDS_OUT_OPERATOR_SEL_9: 18
-   RECORDS_OUT_OPERATOR_TS_0: 2000
+   RECORDS_OUT_OPERATOR_SEL_9: 2
+   RECORDS_OUT_OPERATOR_TS_0: 2
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -487,7 +487,7 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 1
RECORDS_OUT_OPERATOR_SEL_9: 1
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -519,13 +519,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 32
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 32
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 32
-   RECORDS_OUT_OPERATOR_SEL_9: 32
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -557,13 +557,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 32
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 32
+   RECORDS_OUT_OPERATOR_FIL_8: 1
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 32
-   RECORDS_OUT_OPERATOR_SEL_9: 32
-   RECORDS_OUT_OPERATOR_TS_0: 1000
+   RECORDS_OUT_OPERATOR_SEL_9: 1
+   RECORDS_OUT_OPERATOR_TS_0: 1
 Stage-1 LLAP IO COUNTERS:
CACHE_HIT_BYTES: 1071
CACHE_MISS_BYTES: 0
@@ -595,13 +595,13 @@ Stage-1 HIVE COUNTERS:
RECORDS_OUT_0: 1
RECORDS_OUT_INTERMEDIATE_Map_1: 1697
RECORDS_OUT_INTERMEDIATE_Reducer_2: 0
-   RECORDS_OUT_OPERATOR_FIL_8: 1697
+   RECORDS_OUT_OPERATOR_FIL_8: 2
RECORDS_OUT_OPERATOR_FS_12: 1
RECORDS_OUT_OPERATOR_GBY_11: 1
RECORDS_OUT_OPERATOR_MAP_0: 0
RECORDS_OUT_OPERATOR_RS_10: 1697
-   RECORDS_OUT_OPERATOR_SEL_9: 1697
-

[11/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
index 6b25672..2f5eb26 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out
@@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1_n14
 POSTHOOK: Lineage: test1_n14.col_1 SCRIPT []
 POSTHOOK: Lineage: test1_n14.key SCRIPT []
 POSTHOOK: Lineage: test1_n14.value SCRIPT []
+col1   col2col3
 PREHOOK: query: CREATE TABLE test2_n9 (key INT, value INT, col_2 STRING)
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2_n9
 POSTHOOK: Lineage: test2_n9.col_2 SCRIPT []
 POSTHOOK: Lineage: test2_n9.key SCRIPT []
 POSTHOOK: Lineage: test2_n9.value SCRIPT []
-PREHOOK: query: EXPLAIN
+col1   col2col3
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
 SELECT *
 FROM test1_n14 LEFT OUTER JOIN test2_n9
 ON (test1_n14.value=test2_n9.value)
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
 SELECT *
 FROM test1_n14 LEFT OUTER JOIN test2_n9
 ON (test1_n14.value=test2_n9.value)
 POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-0 depends on stages: Stage-1
@@ -67,9 +74,14 @@ STAGE PLANS:
 TableScan
   alias: test1_n14
   Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE 
Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
   Select Operator
 expressions: key (type: int), value (type: int), col_1 
(type: string)
 outputColumnNames: _col0, _col1, _col2
+Select Vectorization:
+className: VectorSelectOperator
+native: true
 Statistics: Num rows: 6 Data size: 572 Basic stats: 
COMPLETE Column stats: COMPLETE
 Map Join Operator
   condition map:
@@ -77,12 +89,19 @@ STAGE PLANS:
   keys:
 0 _col1 (type: int)
 1 _col1 (type: int)
+  Map Join Vectorization:
+  className: VectorMapJoinOuterLongOperator
+  native: true
+  nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized 
Table and Supports Key Types IS true
   outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5
   input vertices:
 1 Map 2
   Statistics: Num rows: 8 Data size: 859 Basic stats: 
COMPLETE Column stats: COMPLETE
   File Output Operator
 compressed: false
+File Sink Vectorization:
+className: VectorFileSinkOperator
+native: false
 Statistics: Num rows: 8 Data size: 859 Basic stats: 
COMPLETE Column stats: COMPLETE
 table:
 input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -90,23 +109,50 @@ STAGE PLANS:
 serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 Execution mode: vectorized, llap
 LLAP IO: no inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+allNative: false
+usesVectorUDFAdaptor: false
+vectorized: true
 Map 2 
 Map Operator Tree:
 TableScan
   alias: test2_n9
   Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE 
Column stats: COMPLETE
+  TableScan Vectorization:
+  native: true
   Select Operator
 expressions: key (type: int), value (type: int), col_2 
(type: string)
 outputColumnNames: _col0, _col1, _col2
+

[05/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out 
b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
index 903e74b..a445b44 100644
--- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cfloat,
  cstring1,
  cint,
@@ -22,7 +22,7 @@ WHERE(((cbigint > -23)
   OR (cfloat = cdouble
 ORDER BY cbigint, cfloat
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cfloat,
  cstring1,
  cint,
@@ -69,7 +69,6 @@ STAGE PLANS:
   Statistics: Num rows: 12288 Data size: 147456 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
-  vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct]
   Filter Operator
 Filter Vectorization:
 className: VectorFilterOperator
@@ -91,10 +90,8 @@ STAGE PLANS:
 sort order: ++
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [3, 4]
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 
20, 22, 18]
 Statistics: Num rows: 4096 Data size: 49152 Basic 
stats: COMPLETE Column stats: NONE
 value expressions: _col1 (type: string), _col2 (type: 
int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), 
_col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: 
double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: 
double)
 Execution mode: vectorized
@@ -107,27 +104,14 @@ STAGE PLANS:
 allNative: true
 usesVectorUDFAdaptor: false
 vectorized: true
-rowBatchContext:
-dataColumnCount: 12
-includeColumns: [0, 1, 2, 3, 4, 5, 6, 8]
-dataColumns: ctinyint:tinyint, csmallint:smallint, 
cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, 
cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, 
cboolean1:boolean, cboolean2:boolean
-partitionColumnCount: 0
-scratchColumnTypeNames: [decimal(13,3), double, double, 
bigint, double, double, double, double, decimal(19,0), decimal(11,4), double]
 Reducer 2 
 Execution mode: vectorized
 Reduce Vectorization:
 enabled: true
 enableConditionsMet: hive.vectorized.execution.reduce.enabled 
IS true, hive.execution.engine spark IN [tez, spark] IS true
-reduceColumnNullOrder: zz
-reduceColumnSortOrder: ++
 allNative: false
 usesVectorUDFAdaptor: false
 vectorized: true
-rowBatchContext:
-dataColumnCount: 14
-dataColumns: KEY.reducesinkkey0:bigint, 
KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, 
VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, 
VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, 
VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double
-partitionColumnCount: 0
-scratchColumnTypeNames: []
 Reduce Operator Tree:
   Select Operator
 expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 
(type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), 
VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 
(type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), 
VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: 
double), VALUE._col10 (type:

[24/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
new file mode 100644
index 000..da513db
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out
@@ -0,0 +1,3945 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+POSTHOOK: type: LOAD
+ A

[40/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
index bde4424..93fdb28 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
@@ -27,20 +28,23 @@ import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import 
org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
 public class MapJoinTestDescription extends DescriptionTest {
 
+  public static enum MapJoinPlanVariation {
+DYNAMIC_PARTITION_HASH_JOIN,
+SHARED_SMALL_TABLE
+  }
+
   public static class SmallTableGenerationParameters {
 
 public static enum ValueOption {
   NO_RESTRICTION,
   ONLY_ONE,
-  ONLY_TWO,
-  AT_LEAST_TWO
+  NO_REGULAR_SMALL_KEYS
 }
 
 private ValueOption valueOption;
@@ -82,70 +86,103 @@ public class MapJoinTestDescription extends 
DescriptionTest {
   final VectorMapJoinVariation vectorMapJoinVariation;
 
   // Adjustable.
-  public String[] bigTableColumnNames;
+  public String[] bigTableKeyColumnNames;
   public TypeInfo[] bigTableTypeInfos;
+
   public int[] bigTableKeyColumnNums;
-  public String[] smallTableValueColumnNames;
+
   public TypeInfo[] smallTableValueTypeInfos;
-  public int[] bigTableRetainColumnNums;
+
   public int[] smallTableRetainKeyColumnNums;
-  public int[] smallTableRetainValueColumnNums;
 
   public SmallTableGenerationParameters smallTableGenerationParameters;
 
   // Derived.
-  public List bigTableColumnNamesList;
-  public String[] bigTableKeyColumnNames;
-  public TypeInfo[] bigTableKeyTypeInfos;
-  public List smallTableValueColumnNamesList;
+
+  public int[] bigTableColumnNums;
+  public String[] bigTableColumnNames;
+  public List bigTableColumnNameList;
   public ObjectInspector[] bigTableObjectInspectors;
-  public List bigTableObjectInspectorsList;
+  public List bigTableObjectInspectorList;
+
+  public TypeInfo[] bigTableKeyTypeInfos;
+
+  public List smallTableKeyColumnNameList;
+  public String[] smallTableKeyColumnNames;
+  public TypeInfo[] smallTableKeyTypeInfos;
+  public ObjectInspector[] smallTableKeyObjectInspectors;
+  public List smallTableKeyObjectInspectorList;
+
+  public List smallTableValueColumnNameList;
+  public String[] smallTableValueColumnNames;
+  public ObjectInspector[] smallTableValueObjectInspectors;
+  public List smallTableValueObjectInspectorList;
+
+  public int[] bigTableRetainColumnNums;
+  public int[] smallTableRetainValueColumnNums;
+
+  public String[] smallTableColumnNames;
+  public List smallTableColumnNameList;
+  public TypeInfo[] smallTableTypeInfos;
+  public List smallTableObjectInspectorList;
+
   public StandardStructObjectInspector bigTableStandardObjectInspector;
-  public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos;
-  public ObjectInspector[] smallTableObjectInspectors;
-  public PrimitiveCategory[] smallTablePrimitiveCategories;
-  public List smallTableObjectInspectorsList;
   public StandardStructObjectInspector smallTableStandardObjectInspector;
   public ObjectInspector[] inputObjectInspectors;
+
   public String[] outputColumnNames;
   public TypeInfo[] outputTypeInfos;
   public ObjectInspector[] outputObjectInspectors;
 
+  final MapJoinPlanVariation mapJoinPlanVariation;
+
+  public MapJoinTestDescription (
+  HiveConf hiveConf,
+  VectorMapJoinVariation vectorMapJoinVariation,
+  TypeInfo[] bigTableTypeInfos,
+  int[] bigTableKeyColumnNums,
+  TypeInfo[] smallTableValueTypeInfos,
+  int[] smallTableRetainKeyColumnNums,
+  SmallTableGenerationParameters smallTableGenerationParameters,
+  MapJoinPlanVariation mapJoinPlanVariation) {
+this(
+hiveConf,
+vectorMapJoinVariation,
+/* bigTableColumnNames */ null,
+bigTableTypeInfos,
+bigTableKeyColumnNums,
+smallTableValueTypeInfos,
+smallTableRetainKeyColumnNums,
+

[22/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
index 5c0d6bb..6eaf7ad 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out
@@ -72,10 +72,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -209,10 +209,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -372,10 +372,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -632,11 +632,11 @@ STAGE PLANS:
 Map-reduce partition columns: rand() (type: double)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [4]
-valueColumnNums: [3]
+partitionColumns: 4:double
+valueColumns: 3:bigint
 Statistics: Num rows: 4 Data size: 1472 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -692,11 +692,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string), _col1 
(type: string)
   Reduce Sink Vectorization:

[15/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out 
b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
index 5e26637..0f40378 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out
@@ -296,10 +296,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0]
+valueColumns: 0:decimal(25,2)
 Statistics: Num rows: 1 Data size: 112 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: decimal(25,2))
 Execution mode: vectorized, llap
@@ -353,11 +352,11 @@ STAGE PLANS:
   Map-reduce partition columns: 0 (type: int)
   Reduce Sink Vectorization:
   className: VectorReduceSinkLongOperator
-  keyColumnNums: [1]
+  keyColumns: 1:int
   keyExpressions: ConstantVectorExpression(val 0) -> 1:int
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: [0]
+  valueColumns: 0:decimal(25,2)
   Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE 
Column stats: COMPLETE
   value expressions: _col0 (type: decimal(25,2))
 Reducer 3 
@@ -517,10 +516,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: 
decimal(15,2)), _col1 (type: decimal(15,2))
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:decimal(15,2), 1:decimal(15,2)
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [2]
+valueColumns: 2:decimal(25,2)
 Statistics: Num rows: 2 Data size: 672 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col2 (type: decimal(25,2))
 Execution mode: vectorized, llap
@@ -576,11 +575,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col1 (type: decimal(15,2))
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [1, 0]
+  keyColumns: 1:decimal(15,2), 0:decimal(15,2)
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  partitionColumnNums: [1]
-  valueColumnNums: [2]
+  partitionColumns: 1:decimal(15,2)
+  valueColumns: 2:decimal(25,2)
   Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE 
Column stats: COMPLETE
   value expressions: _col2 (type: decimal(25,2))
 Reducer 3 
@@ -747,10 +746,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: 
decimal(15,2))
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0]
+keyColumns: 0:decimal(15,2)
 native: true

[13/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
index c07f4d9..6660d73 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out
@@ -62,10 +62,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [1]
+valueColumns: 1:bigint
 Statistics: Num rows: 7 Data size: 651 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col1 (type: bigint)
 Execution mode: vectorized, llap
@@ -129,12 +129,11 @@ STAGE PLANS:
 Map-reduce partition columns: 0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [2, 1]
+keyColumns: 2:int, 1:bigint
 keyExpressions: ConstantVectorExpression(val 0) -> 
2:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [3]
-valueColumnNums: []
+partitionColumns: 3:int
 Statistics: Num rows: 7 Data size: 651 Basic stats: 
COMPLETE Column stats: COMPLETE
 Reducer 3 
 Execution mode: vectorized, llap
@@ -304,10 +303,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkLongOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [1, 2]
+valueColumns: 1:string, 2:bigint
 Statistics: Num rows: 6 Data size: 1176 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col1 (type: string), _col2 (type: 
bigint)
 Execution mode: vectorized, llap
@@ -363,11 +362,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col1 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [1, 2]
+  keyColumns: 1:string, 2:bigint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  partitionColumnNums: [1]
-  valueColumnNums: [0]
+  partitionColumns: 1:string
+  valueColumns: 0:int
   Statistics: Num rows: 6 Data size: 1176 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col0 (type: int)
 Reducer 3 
@@ -540,10 +539,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true

[43/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
index 4c049cb..8dce5b8 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java
@@ -47,6 +47,14 @@ public class VectorMapJoinFastLongHashSet
 return new VectorMapJoinFastHashSet.HashSetResult();
   }
 
+  @Override
+  public void putRow(BytesWritable currentKey, BytesWritable currentValue)
+  throws HiveException, IOException {
+
+// Ignore NULL keys (HashSet not used for FULL OUTER).
+adaptPutRow(currentKey, currentValue);
+  }
+
   /*
* A Unit Test convenience method for putting the key into the hash table 
using the
* actual type.
@@ -76,11 +84,18 @@ public class VectorMapJoinFastLongHashSet
 optimizedHashSetResult.forget();
 
 long hashCode = HashCodeUtil.calculateLongHashCode(key);
-long existance = findReadSlot(key, hashCode);
+int pairIndex = findReadSlot(key, hashCode);
 JoinUtil.JoinResult joinResult;
-if (existance == -1) {
+if (pairIndex == -1) {
   joinResult = JoinUtil.JoinResult.NOMATCH;
 } else {
+  /*
+   * NOTE: Support for trackMatched not needed yet for Set.
+
+  if (matchTracker != null) {
+matchTracker.trackMatch(pairIndex / 2);
+  }
+  */
   joinResult = JoinUtil.JoinResult.MATCH;
 }
 
@@ -91,9 +106,13 @@ public class VectorMapJoinFastLongHashSet
   }
 
   public VectorMapJoinFastLongHashSet(
-  boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType 
hashTableKeyType,
+  boolean isFullOuter,
+  boolean minMaxEnabled,
+  HashTableKeyType hashTableKeyType,
   int initialCapacity, float loadFactor, int writeBuffersSize, long 
estimatedKeyCount) {
-super(minMaxEnabled, isOuterJoin, hashTableKeyType,
+super(
+isFullOuter,
+minMaxEnabled, hashTableKeyType,
 initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount);
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index c9c3e80..03ef249 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -49,8 +49,6 @@ public abstract class VectorMapJoinFastLongHashTable
 
   private final HashTableKeyType hashTableKeyType;
 
-  private final boolean isOuterJoin;
-
   private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead;
 
   private final boolean useMinMax;
@@ -72,14 +70,13 @@ public abstract class VectorMapJoinFastLongHashTable
 return max;
   }
 
-  @Override
-  public void putRow(BytesWritable currentKey, BytesWritable currentValue) 
throws HiveException, IOException {
+  public boolean adaptPutRow(BytesWritable currentKey, BytesWritable 
currentValue) throws HiveException, IOException {
 byte[] keyBytes = currentKey.getBytes();
 int keyLength = currentKey.getLength();
 keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength);
 try {
   if (!keyBinarySortableDeserializeRead.readNextField()) {
-return;
+return false;
   }
 } catch (Exception e) {
   throw new HiveException(
@@ -92,6 +89,7 @@ public abstract class VectorMapJoinFastLongHashTable
 keyBinarySortableDeserializeRead, 
hashTableKeyType);
 
 add(key, currentValue);
+return true;
   }
 
   protected abstract void assignSlot(int slot, long key, boolean isNewKey, 
BytesWritable currentValue);
@@ -215,10 +213,9 @@ public abstract class VectorMapJoinFastLongHashTable
 largestNumberOfSteps = newLargestNumberOfSteps;
 resizeThreshold = (int)(logicalHashBucketCount * loadFactor);
 metricExpands++;
-// LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new 
logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + 
resizeThreshold + " metricExpands " + metricExpands);
   }
 
-  protected long findReadSlot(long key, long hashCode) {
+  protected int findReadSlot(long key, long hashCode) {
 
 int intHashCode = (int) hashCode;
 int slot =

[46/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
index f45a012..114cea9 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java
@@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock;
 
 import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.commons.lang3.tuple.Pair;
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.conf.Constants;
@@ -41,12 +42,16 @@ import 
org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashP
 import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper;
+import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
+import 
org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess;
+import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator;
 import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
+import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
 import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer;
 import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities;
@@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hive.common.util.ReflectionUtil;
@@ -74,8 +81,8 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.annotations.VisibleForTesting;
-
 import com.esotericsoftware.kryo.KryoException;
+import com.google.common.base.Preconditions;
 
 /**
  * Map side Join operator implementation.
@@ -105,6 +112,23 @@ public class MapJoinOperator extends 
AbstractMapJoinOperator implem
   protected HybridHashTableContainer firstSmallTable; // The first small table;
   // Only this table has 
spilled big table rows
 
+  /*
+   * FULL OUTER MapJoin members.
+   */
+  protected transient boolean isFullOuterMapJoin; // Are we doing a FULL 
OUTER MapJoin?
+
+  protected transient int fullOuterBigTableRetainSize;
+  // The number of Big 
Table columns being
+  // retained in the 
output result for
+  // FULL OUTER MapJoin.
+
+  /*
+   * Small Table key match tracking used for FULL OUTER MapJoin.  Otherwise, 
null.
+   * Since the Small Table hash table can be shared among vertces, we require 
this non-shared object
+   * for our vertex (i.e. operator private) key match tracking.
+   */
+  protected transient MatchTracker matchTracker;
+
   protected transient boolean isTestingNoHashTableLoad;
   // Only used in bucket map join.
   private transient int numBuckets = -1;
@@ -177,6 +201,8 @@ public class MapJoinOperator extends 
AbstractMapJoinOperator implem
 hybridMapJoinLeftover = false;
 firstSmallTable = null;
 
+doFullOuterMapJoinInit();
+
 generateMapMetaData();
 
 isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf,
@@ -252,6 +278,24 @@ public class MapJoinOperator extends 
AbstractMapJoinOperator implem
 }
   }
 
+  /*
+   * Do initialization for FULL OUTER MapJoin.
+   *
+   * Currently, we do not support FULL OUTER MapJoin for N-way.
+   */
+  private void doFullOuterMapJoinInit() {
+
+// This will be set during the first process call or during closeOp if no 
rows

[21/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
index bbfba28..07c4eed 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out
@@ -88,10 +88,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 6 Data size: 2208 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -158,10 +157,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 12 Data size: 4416 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Reducer 3 
@@ -280,10 +279,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 6 Data size: 2208 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -350,10 +348,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), _col1 
(type: string), _col2 (type: bigint)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:bigint
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [3]
+valueColumns: 3:bigint
 Statistics: Num rows: 12 Data size: 4416 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col3 (type: bigint)
 Reducer 3 
@@ -499,10 +497,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkMultiKeyOperator
-keyColumnNums: [0, 1]
+keyColumns: 0:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No

[19/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
index 7c1780b..a49e8e2 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_filters.q.out
@@ -47,15 +47,174 @@ POSTHOOK: Input: default@myinput1_n1
  A masked pattern was here 
 4937935
 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM 
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND 
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1_n1 a RIGHT OUTER 
JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key 
> 40 AND b.value > 50 AND b.key = b.value
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n1
- A masked pattern was here 
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM 
myinput1_n1 a RIGHT OUTER JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND 
a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value))  FROM myinput1_n1 a RIGHT OUTER 
JOIN myinput1_n1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key 
> 40 AND b.value > 50 AND b.key = b.value
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n1
- A masked pattern was here 
-3080335
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 2 <- Map 1 (BROADCAST_EDGE)
+Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  filterExpr: ((key > 40) and (value > 50) and (key = value)) 
(type: boolean)
+  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  Filter Operator
+Filter Vectorization:
+className: VectorFilterOperator
+native: true
+predicate: ((key = value) and (key > 40) and (value > 50)) 
(type: boolean)
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: key (type: int), value (type: int)
+  outputColumnNames: _col0, _col1
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+sort order: 
+Reduce Sink Vectorization:
+className: VectorReduceSinkEmptyKeyOperator
+native: true
+nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+value expressions: _col0 (type: int), _col1 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+Map 2 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+

[10/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out 
b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
index 8efe78d..c9b9e81 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out
@@ -146,14 +146,16 @@ STAGE PLANS:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Map Join Vectorization:
-bigTableKeyColumnNums: [1]
-bigTableRetainedColumnNums: [3]
-bigTableValueColumnNums: [3]
+bigTableKeyColumns: 1:int
+bigTableRetainColumnNums: [3]
+bigTableValueColumns: 3:decimal(8,1)
 bigTableValueExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1)
 className: VectorMapJoinInnerBigOnlyLongOperator
 native: true
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
-projectedOutputColumnNums: [3]
+nonOuterSmallTableKeyMapping: []
+projectedOutput: 3:decimal(8,1)
+hashTableImplementationType: OPTIMIZED
 outputColumnNames: _col0
 input vertices:
   1 Reducer 3
@@ -217,10 +219,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0]
+valueColumns: 0:int
 Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
 value expressions: _col0 (type: int)
 Execution mode: vectorized, llap
@@ -281,10 +282,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkLongOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
 
   Stage: Stage-0
@@ -351,14 +351,16 @@ STAGE PLANS:
   0 _col1 (type: int)
   1 _col0 (type: int)
 Map Join Vectorization:
-bigTableKeyColumnNums: [1]
-bigTableRetainedColumnNums: [3]
-bigTableValueColumnNums: [3]
+bigTableKeyColumns: 1:int
+bigTableRetainColumnNums: [3]
+bigTableValueColumns: 3:decimal(8,1)
 bigTableValueExpressions: 
ConvertDecimal64ToDecimal(col 0:decimal(8,1)/DECIMAL_64) -> 3:decimal(8,1)
 className: VectorMapJoinInnerBigOnlyLongOperator
 native: true
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
-projectedOutputColumnNums: [3]
+nonOuterSmallTableKeyMapping: []
+projectedOutput: 3:decimal(8,1)
+

[23/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
new file mode 100644
index 000..52ca0fb
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out
@@ -0,0 +1,3923 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE

[31/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out 
b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out
new file mode 100644
index 000..c387af5
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out
@@ -0,0 +1,3139 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here

[14/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out 
b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
index 6637d33..ef1e653 100644
--- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out
@@ -43,11 +43,11 @@ STAGE PLANS:
 Map-reduce partition columns: p_mfgr (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [2, 1]
+keyColumns: 2:string, 1:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [2]
-valueColumnNums: [5, 7]
+partitionColumns: 2:string
+valueColumns: 5:int, 7:double
 Statistics: Num rows: 26 Data size: 6006 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: p_size (type: int), p_retailprice 
(type: double)
 Execution mode: vectorized, llap
@@ -279,11 +279,11 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [0, 1, 2]
-valueColumnNums: [3]
+partitionColumns: 0:string, 1:string, 2:int
+valueColumns: 3:double
 Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col3 (type: double)
 Execution mode: vectorized, llap
@@ -339,11 +339,11 @@ STAGE PLANS:
   Map-reduce partition columns: _col1 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [1, 0]
+  keyColumns: 1:string, 0:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  partitionColumnNums: [1]
-  valueColumnNums: [2, 3]
+  partitionColumns: 1:string
+  valueColumns: 2:int, 3:double
   Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
   value expressions: _col2 (type: int), _col3 (type: double)
 Reducer 3 
@@ -529,11 +529,11 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string), _col2 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0, 1, 2]
+keyColumns: 0:string, 1:string, 2:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [0, 1, 2]
-valueColumnNums: [3]
+partitionColumns: 0:string, 1:string, 2:int
+valueColumns: 3:double
 Statistics: Num rows: 13 Data size: 3003 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col3 (type: double)
 Execution mode:

[03/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union14.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union14.q.out 
b/ql/src/test/results/clientpositive/spark/union14.q.out
index 6a95e4a..49d6cb1 100644
--- a/ql/src/test/results/clientpositive/spark/union14.q.out
+++ b/ql/src/test/results/clientpositive/spark/union14.q.out
@@ -126,20 +126,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Input: default@src1
  A masked pattern was here 
-2781
-2731
+   10
 1281
-2551
-tst1   1
 1461
-3691
+1501
 2131
-3111
 2241
 2381
-1501
-   10
+2551
+2731
+2781
+3111
+3691
+4011
 4061
 66 1
-4011
 98 1
+tst1   1

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union7.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union7.q.out 
b/ql/src/test/results/clientpositive/spark/union7.q.out
index 549075c..8556f84 100644
--- a/ql/src/test/results/clientpositive/spark/union7.q.out
+++ b/ql/src/test/results/clientpositive/spark/union7.q.out
@@ -122,20 +122,20 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Input: default@src1
  A masked pattern was here 
-2781
-2731
+   10
 1281
-2551
-tst1   1
 1461
-3691
+1501
 2131
-3111
 2241
 2381
-1501
-   10
+2551
+2731
+2781
+3111
+3691
+4011
 4061
 66 1
-4011
 98 1
+tst1   1

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_null.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union_null.q.out 
b/ql/src/test/results/clientpositive/spark/union_null.q.out
index d37adbb..696641c 100644
--- a/ql/src/test/results/clientpositive/spark/union_null.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_null.q.out
@@ -24,16 +24,16 @@ POSTHOOK: query: select x from (select * from (select value 
as x from src order
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
  A masked pattern was here 
-val_0
-val_0
-val_0
-val_10
-val_100
 NULL
 NULL
 NULL
 NULL
 NULL
+val_0
+val_0
+val_0
+val_10
+val_100
 PREHOOK: query: select * from (select * from (select cast(null as string) as N 
from src1 group by key)a UNION ALL select * from (select cast(null as string) 
as N from src1 group by key)b ) a
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src1

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/union_view.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/union_view.q.out 
b/ql/src/test/results/clientpositive/spark/union_view.q.out
index 591ebfa..97a5bef 100644
--- a/ql/src/test/results/clientpositive/spark/union_view.q.out
+++ b/ql/src/test/results/clientpositive/spark/union_view.q.out
@@ -483,10 +483,10 @@ STAGE PLANS:
 86 val_86  2
 86 val_86  3
 86 val_86  3
-86 val_86  2
-86 val_86  2
 86 val_86  3
 86 val_86  3
+86 val_86  2
+86 val_86  2
 86 val_86  1
 STAGE DEPENDENCIES:
   Stage-1 is a root stage

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out 
b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
index e3d815b..6e33ead 100644
--- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out
@@ -99,10 +99,10 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkLongOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9]
+valueColumns: 1:bigint, 2:decimal(20,10), 
3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 
7:decimal(23,14), 8:decimal(33,14), 9:bigint
 Statistics: Num rows: 12289

[29/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out 
b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
index 52eb609..d0d9c87 100644
--- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
+++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out
@@ -128,14 +128,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
+1012   Car 103 2   Ema
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat 102 2   Del
 99 2   Mat 103 2   Ema
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
-1012   Car 103 2   Ema
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 LEFT OUTER JOIN test2_n2
@@ -239,12 +239,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -344,12 +344,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob 102 2   Del
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob 102 2   Del
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 RIGHT OUTER JOIN test2_n2
@@ -438,10 +438,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-99 2   Mat 102 2   Del
 1012   Car 102 2   Del
-99 2   Mat 103 2   Ema
 1012   Car 103 2   Ema
+99 2   Mat 102 2   Del
+99 2   Mat 103 2   Ema
 NULL   NULLNULL104 3   Fli
 NULL   NULLNULL105 NULLNone
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
@@ -535,18 +535,18 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
-98 NULLNoneNULLNULLNULL
-99 0   Alice   NULLNULLNULL
-99 2   Mat NULLNULLNULL
 1001   Bob 102 2   Del
-1001   Bob 105 NULLNone
-1001   Bob 104 3   Fli
 1001   Bob 103 2   Ema
+1001   Bob 104 3   Fli
+1001   Bob 105 NULLNone
 1012   Car 102 2   Del
-1012   Car 105 NULLNone
-1012   Car 104 3   Fli
 1012   Car 103 2   Ema
+1012   Car 104 3   Fli
+1012   Car 105 NULLNone
+98 NULLNoneNULLNULLNULL
+99 0   Alice   NULLNULLNULL
+99 2   Mat NULLNULLNULL
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -644,19 +644,19 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNone102 2   Del
-98 NULLNone102 2   Del
-99 0   Alice   102 2   Del
-99 2   Mat 102 2   Del
-99 2   Mat 103 2   Ema
 1001   Bob 102 2   Del
-1001   Bob 105 NULLNone
-1001   Bob 104 3   Fli
 1001   Bob 103 2   Ema
+1001   Bob 104 3   Fli
+1001   Bob 105 NULLNone
 1012   Car 102 2   Del
-1012   Car 105 NULLNone
-1012

[17/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out 
b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
index 006a51a..960f5f5 100644
--- a/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out
@@ -128,18 +128,100 @@ POSTHOOK: query: select * from t4_n19
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t4_n19
  A masked pattern was here 
-PREHOOK: query: explain vectorization only summary
-
+PREHOOK: query: explain vectorization expression
 select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, 
a.value
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization only summary
-
+POSTHOOK: query: explain vectorization expression
 select * from t1_n148 a left semi join t2_n87 b on a.key=b.key sort by a.key, 
a.value
 POSTHOOK: type: QUERY
 PLAN VECTORIZATION:
   enabled: false
   enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false]
 
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 1 <- Map 3 (BROADCAST_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  filterExpr: key is not null (type: boolean)
+  Statistics: Num rows: 11 Data size: 2068 Basic stats: 
COMPLETE Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 11 Data size: 2068 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Semi Join 0 to 1
+  keys:
+0 key (type: int)
+1 _col0 (type: int)
+  outputColumnNames: _col0, _col1
+  input vertices:
+1 Map 3
+  Statistics: Num rows: 12 Data size: 2274 Basic stats: 
COMPLETE Column stats: NONE
+  Reduce Output Operator
+key expressions: _col0 (type: int), _col1 (type: 
string)
+sort order: ++
+Statistics: Num rows: 12 Data size: 2274 Basic stats: 
COMPLETE Column stats: NONE
+Execution mode: llap
+LLAP IO: all inputs
+Map 3 
+Map Operator Tree:
+TableScan
+  alias: b
+  filterExpr: key is not null (type: boolean)
+  Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE 
Column stats: NONE
+  Filter Operator
+predicate: key is not null (type: boolean)
+Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+Select Operator
+  expressions: key (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+  Group By Operator
+keys: _col0 (type: int)
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 11 Data size: 44 Basic stats: 
COMPLETE Column stats: NONE
+Execution mode: llap
+LLAP IO: all inputs
+Reducer 2 
+Execution mode: llap
+Reduce Operator Tree:
+  Select Operator
+expressions: KEY.reducesinkkey0 (type: int), 
KEY.reducesinkkey1 (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 12 Data size: 2274 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+

[39/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
index 4c41f9c..a37b5a0 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
@@ -25,7 +25,6 @@ import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
 import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
@@ -33,9 +32,7 @@ import 
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator;
-import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator;
 import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase;
-import 
org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator;
 import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects;
 import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet;
 import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
@@ -48,17 +45,23 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator;
 import 
org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
-import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream;
 import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator;
 import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType;
 import 
org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer;
 import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow;
@@ -69,7 +72,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
@@ -86,14 +88,13 @@ import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import 
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import

[44/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
index 6785bce..df900a1 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java
@@ -31,7 +31,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
-
 import org.apache.hadoop.hive.ql.plan.VectorDesc;
 // Single-Column String hash table import.
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet;
@@ -98,40 +97,31 @@ public class VectorMapJoinLeftSemiStringOperator extends 
VectorMapJoinLeftSemiGe
   //
 
   @Override
-  public void process(Object row, int tag) throws HiveException {
-
-try {
-  VectorizedRowBatch batch = (VectorizedRowBatch) row;
-
-  alias = (byte) tag;
-
-  if (needCommonSetup) {
-// Our one time process method initialization.
-commonSetup(batch);
+  protected void commonSetup() throws HiveException {
+super.commonSetup();
 
-/*
- * Initialize Single-Column String members for this specialized class.
- */
-
-singleJoinColumn = bigTableKeyColumnMap[0];
+/*
+ * Initialize Single-Column String members for this specialized class.
+ */
 
-needCommonSetup = false;
-  }
+singleJoinColumn = bigTableKeyColumnMap[0];
+  }
 
-  if (needHashTableSetup) {
-// Setup our hash table specialization.  It will be the first time the 
process
-// method is called, or after a Hybrid Grace reload.
+  @Override
+  public void hashTableSetup() throws HiveException {
+super.hashTableSetup();
 
-/*
- * Get our Single-Column String hash set information for this 
specialized class.
- */
+/*
+ * Get our Single-Column String hash set information for this specialized 
class.
+ */
 
-hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
+hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable;
+  }
 
-needHashTableSetup = false;
-  }
+  @Override
+  public void processBatch(VectorizedRowBatch batch) throws HiveException {
 
-  batchCounter++;
+try {
 
   // Do the per-batch setup for an left semi join.
 
@@ -144,11 +134,7 @@ public class VectorMapJoinLeftSemiStringOperator extends 
VectorMapJoinLeftSemiGe
   }
 
   final int inputLogicalSize = batch.size;
-
   if (inputLogicalSize == 0) {
-if (LOG.isDebugEnabled()) {
-  LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty");
-}
 return;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index 2e5c568..61bcbf0 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -24,13 +24,19 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.exec.JoinUtil;
+import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult;
+import

[08/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
--
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out 
b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
index 1eab962..a7fdfda 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   ctimestamp1,
  cfloat,
  cstring1,
@@ -31,7 +31,7 @@ WHERE(((ctinyint <= cbigint)
 GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
 ORDER BY cstring1, cfloat, cdouble, ctimestamp1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   ctimestamp1,
  cfloat,
  cstring1,
@@ -83,7 +83,6 @@ STAGE PLANS:
 Statistics: Num rows: 12288 Data size: 147456 Basic stats: 
COMPLETE Column stats: NONE
 TableScan Vectorization:
 native: true
-vectorizationSchemaColumns: [0:ctinyint:tinyint, 
1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 
5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 
8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 
11:cboolean2:boolean, 
12:ROW__ID:struct]
 Filter Operator
   Filter Vectorization:
   className: VectorFilterOperator
@@ -135,12 +134,6 @@ STAGE PLANS:
   allNative: false
   usesVectorUDFAdaptor: false
   vectorized: true
-  rowBatchContext:
-  dataColumnCount: 12
-  includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10]
-  dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, 
cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, 
ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, 
cboolean2:boolean
-  partitionColumnCount: 0
-  scratchColumnTypeNames: [double, double, double, double]
   Reduce Vectorization:
   enabled: false
   enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true
@@ -169,7 +162,6 @@ STAGE PLANS:
   TableScan
 TableScan Vectorization:
 native: true
-vectorizationSchemaColumns: [0:_col0:timestamp, 1:_col1:float, 
2:_col2:string, 3:_col3:boolean, 4:_col4:double, 5:_col5:double, 
6:_col6:double, 7:_col7:double, 8:_col8:float, 9:_col9:float, 10:_col10:float, 
11:_col11:float, 12:_col12:double, 13:_col13:double, 14:_col14:bigint, 
15:_col15:double, 16:_col16:double, 17:_col17:double, 18:_col18:double, 
19:_col19:double, 20:_col20:double, 21:_col21:double]
 Reduce Output Operator
   key expressions: _col2 (type: string), _col1 (type: float), 
_col4 (type: double), _col0 (type: timestamp)
   sort order: 
@@ -190,12 +182,6 @@ STAGE PLANS:
   allNative: false
   usesVectorUDFAdaptor: false
   vectorized: true
-  rowBatchContext:
-  dataColumnCount: 22
-  includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 
14, 15, 16, 17, 18, 19, 20, 21]
-  dataColumns: _col0:timestamp, _col1:float, _col2:string, 
_col3:boolean, _col4:double, _col5:double, _col6:double, _col7:double, 
_col8:float, _col9:float, _col10:float, _col11:float, _col12:double, 
_col13:double, _col14:bigint, _col15:double, _col16:double, _col17:double, 
_col18:double, _col19:double, _col20:double, _col21:double
-  partitionColumnCount: 0
-  scratchColumnTypeNames: []
   Reduce Vectorization:
   enabled: false
   enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
--
diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out 
b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
index 2d306cf..6974ee8 100644
--- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
+PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION
 SELECT   cfloat,
  cboolean1,
  cdouble,
@@ -29,7 +29,7 @@ WHERE(((cstring2 LIKE '%ss%')
 GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
 PREHOOK: type: QUERY
-POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL
+POSTHOOK: query: EXPLAIN

[25/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out
new file mode 100644
index 000..169d94c
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out
@@ -0,0 +1,3945 @@
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * 
FROM fullouter_long_big_1a_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a
+POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE 
[(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, 
type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE 
fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS 
SELECT * FROM fullouter_long_big_1a_nonull_txt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_big_1a_nonull
+POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE 
[(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key,
 type:bigint, comment:null), ]
+PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date 
date)
+row format delimited fields terminated by ','
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@fullouter_long_small_1a_txt
+PREHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@fullouter_long_small_1a_txt
+POSTHOOK: query: LOAD DATA LOCAL INPATH 
'../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE 
fullouter_long_small_1a_txt
+POSTHOOK: type: LOAD
+ A masked pattern was

[30/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join46.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out 
b/ql/src/test/results/clientpositive/llap/join46.q.out
index 07c4a62..95d3611 100644
--- a/ql/src/test/results/clientpositive/llap/join46.q.out
+++ b/ql/src/test/results/clientpositive/llap/join46.q.out
@@ -1633,7 +1633,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 
@@ -1746,7 +1746,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 
@@ -1857,7 +1857,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 
@@ -1970,7 +1970,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 _col1 (type: int)
   1 _col1 (type: int)
@@ -2148,7 +2148,7 @@ STAGE PLANS:
 Reduce Operator Tree:
   Merge Join Operator
 condition map:
- Outer Join 0 to 1
+ Full Outer Join 0 to 1
 keys:
   0 
   1 

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out 
b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
index c0c9f95..c3b1eb7 100644
--- a/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
+++ b/ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out
@@ -230,6 +230,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false
@@ -318,6 +319,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false
@@ -407,6 +409,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false
@@ -495,6 +498,7 @@ STAGE PLANS:
 input vertices:
   1 Map 3
 Statistics: Num rows: 791 Data size: 140798 Basic stats: 
COMPLETE Column stats: COMPLETE
+DynamicPartitionHashJoin: true
 HybridGraceHashJoin: true
 File Output Operator
   compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/llap_acid.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/llap_acid.q.out 
b/ql/src/test/results/clientpositive/llap/llap_acid.q.out
index 0d1a331..d441ab0 100644
--- a/ql/src/test/results/clientpositive/llap/llap_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_acid.q.out
@@ -115,10 +115,10 @@ STAGE PLANS:
   projectedOutputColumnNums: [0, 4, 1]
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [4, 0]
+keyColumns: 4:smallint, 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS

[47/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a37827ec
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a37827ec
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a37827ec

Branch: refs/heads/master
Commit: a37827ecd557c7f7d69f3b2ccdbf6535908b1461
Parents: 45163ee
Author: Matt McCline 
Authored: Sun Sep 16 09:04:59 2018 -0500
Committer: Matt McCline 
Committed: Sun Sep 16 09:04:59 2018 -0500

--
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   10 +-
 data/files/fullouter_long_big_1a.txt|   11 +
 data/files/fullouter_long_big_1a_nonull.txt |   10 +
 data/files/fullouter_long_big_1b.txt|   13 +
 data/files/fullouter_long_big_1c.txt|   11 +
 data/files/fullouter_long_big_1d.txt|   12 +
 data/files/fullouter_long_small_1a.txt  |   54 +
 data/files/fullouter_long_small_1a_nonull.txt   |   51 +
 data/files/fullouter_long_small_1b.txt  |   72 +
 data/files/fullouter_long_small_1c.txt  |   81 +
 data/files/fullouter_long_small_1d.txt  |   39 +
 data/files/fullouter_multikey_big_1a.txt|   13 +
 data/files/fullouter_multikey_big_1a_nonull.txt |   10 +
 data/files/fullouter_multikey_big_1b.txt|   17 +
 data/files/fullouter_multikey_small_1a.txt  |   92 +
 .../fullouter_multikey_small_1a_nonull.txt  |   90 +
 data/files/fullouter_multikey_small_1b.txt  |  118 +
 data/files/fullouter_string_big_1a.txt  |   13 +
 data/files/fullouter_string_big_1a_nonull.txt   |   12 +
 data/files/fullouter_string_big_1a_old.txt  |   13 +
 data/files/fullouter_string_small_1a.txt|   38 +
 data/files/fullouter_string_small_1a_nonull.txt |   35 +
 data/files/fullouter_string_small_1a_old.txt|   38 +
 .../vectorization/mapjoin/AbstractMapJoin.java  |   66 +-
 .../mapjoin/MapJoinMultiKeyBenchBase.java   |3 +-
 .../mapjoin/MapJoinOneLongKeyBenchBase.java |3 +-
 .../mapjoin/MapJoinOneStringKeyBenchBase.java   |3 +-
 .../test/resources/testconfiguration.properties |5 +
 .../hadoop/hive/ql/exec/CommonJoinOperator.java |   11 +-
 .../apache/hadoop/hive/ql/exec/ExplainTask.java |  187 +-
 .../apache/hadoop/hive/ql/exec/JoinUtil.java|2 +-
 .../hadoop/hive/ql/exec/MapJoinOperator.java|  199 +-
 .../apache/hadoop/hive/ql/exec/Operator.java|   86 +-
 .../hadoop/hive/ql/exec/TableScanOperator.java  |6 +-
 .../persistence/BytesBytesMultiHashMap.java |   71 +-
 .../ql/exec/persistence/HashMapWrapper.java |   22 +
 .../persistence/HybridHashTableContainer.java   |  118 +-
 .../persistence/MapJoinBytesTableContainer.java |  199 +-
 .../hive/ql/exec/persistence/MapJoinKey.java|   15 +
 .../persistence/MapJoinObjectSerDeContext.java  |   17 +
 .../exec/persistence/MapJoinTableContainer.java |   61 +-
 .../hive/ql/exec/persistence/MatchTracker.java  |  154 +
 .../ReusableGetAdaptorDirectAccess.java |4 +-
 .../ql/exec/persistence/UnwrapRowContainer.java |   12 +-
 .../hive/ql/exec/tez/ReduceRecordProcessor.java |2 +-
 .../vector/VectorAppMasterEventOperator.java|2 +-
 .../hive/ql/exec/vector/VectorAssignRow.java|   12 +
 .../hive/ql/exec/vector/VectorCopyRow.java  |   21 +-
 .../ql/exec/vector/VectorDeserializeRow.java|   32 +
 .../ql/exec/vector/VectorFilterOperator.java|2 +-
 .../ql/exec/vector/VectorGroupByOperator.java   |2 +-
 .../ql/exec/vector/VectorLimitOperator.java |2 +-
 .../exec/vector/VectorMapJoinBaseOperator.java  |   23 +-
 .../ql/exec/vector/VectorMapJoinOperator.java   |   23 +-
 .../exec/vector/VectorSMBMapJoinOperator.java   |2 +-
 .../ql/exec/vector/VectorSelectOperator.java|4 +-
 .../ql/exec/vector/VectorTopNKeyOperator.java   |2 +-
 .../mapjoin/VectorMapJoinCommonOperator.java|  409 +-
 .../VectorMapJoinFullOuterLongOperator.java |   68 +
 .../VectorMapJoinFullOuterMultiKeyOperator.java |   71 +
 .../VectorMapJoinFullOuterStringOperator.java   |   71 +
 .../VectorMapJoinGenerateResultOperator.java|  163 +-
 ...pJoinInnerBigOnlyGenerateResultOperator.java |   18 +-
 .../VectorMapJoinInnerBigOnlyLongOperator.java  |   63 +-
 ...ctorMapJoinInnerBigOnlyMultiKeyOperator.java |   60 +-
 ...VectorMapJoinInnerBigOnlyStringOperator.java |   50 +-
 ...ectorMapJoinInnerGenerateResultOperator.java |   22 +-
 .../mapjoin/VectorMapJoinInnerLongOperator.java |   64 +-
 .../VectorMapJoinInnerMultiKeyOperator.java |   58 +-
 .../VectorMapJoinInnerStringOperator.java   |   50 +-
 ...orMapJoinLeftSemiGenerateResultOperator.java |   12 +-
 .../VectorMapJoinLeftSemiLongOperator.java  |   64 +-
 .../VectorMapJoinLeftSemiMultiKeyOperator.java  |   58 +-
 .../VectorMapJoinLeftSemiStringOperator.java|   50 +-

[04/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
 
b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
index 352e74f..5954629 100644
--- 
a/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
+++ 
b/ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out
@@ -89,10 +89,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 2000 Data size: 21248 Basic 
stats: COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -372,10 +371,9 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkStringOperator
-  keyColumnNums: [2]
+  keyColumns: 2:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -424,10 +422,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -575,10 +572,9 @@ STAGE PLANS:
   Map-reduce partition columns: _col0 (type: string)
   Reduce Sink Vectorization:
   className: VectorReduceSinkStringOperator
-  keyColumnNums: [2]
+  keyColumns: 2:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2000 Data size: 21248 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized
 Map Vectorization:
@@ -627,10 +623,9 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
-keyColumnNums: [0]
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS 
true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: []
 Statistics: Num rows: 1 Data size: 188 Basic stats: 
COMPLETE Column stats: NONE

[01/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

Repository: hive
Updated Branches:
  refs/heads/master 45163ee4c -> a37827ecd


http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out
--
diff --git a/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out 
b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out
new file mode 100644
index 000..53c32ff
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vectorized_join46_mr.q.out
@@ -0,0 +1,2050 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 
'None'),
+(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 
'None'),
+(99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SCRIPT []
+POSTHOOK: Lineage: test1.key SCRIPT []
+POSTHOOK: Lineage: test1.value SCRIPT []
+col1   col2col3
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+(104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+(104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SCRIPT []
+POSTHOOK: Lineage: test2.key SCRIPT []
+POSTHOOK: Lineage: test2.value SCRIPT []
+col1   col2col3
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value)
+POSTHOOK: type: QUERY
+Explain
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+Map Reduce Local Work
+  Alias -> Map Local Tables:
+$hdt$_1:test2 
+  Fetch Operator
+limit: -1
+  Alias -> Map Local Operator Tree:
+$hdt$_1:test2 
+  TableScan
+alias: test2
+Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column 
stats: NONE
+Select Operator
+  expressions: key (type: int), value (type: int), col_2 (type: 
string)
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE 
Column stats: NONE
+  HashTable Sink Operator
+keys:
+  0 _col1 (type: int)
+  1 _col1 (type: int)
+
+  Stage: Stage-3
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: test1
+Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column 
stats: NONE
+TableScan Vectorization:
+native: true
+Select Operator
+  expressions: key (type: int), value (type: int), col_1 (type: 
string)
+  outputColumnNames: _col0, _col1, _col2
+  Select Vectorization:
+  className: VectorSelectOperator
+  native: true
+  Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE 
Column stats: NONE
+  Map Join Operator
+condition map:
+ Left Outer Join 0 to 1
+keys:
+  0 _col1 (type: int)
+  1 _col1 (type: int)
+Map Join Vectorization:
+bigTableKeyExpressions: col 1:int
+className: VectorMapJoinOperator
+native: false
+nativeConditionsMet: hive.mapjoin.optimized.hashtable IS 
true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin

[34/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out 
b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
index 194fc5d..a160428 100644
--- a/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out
@@ -188,6 +188,139 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@myinput1_n2
  A masked pattern was here 
 4543526
+PREHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col0 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col0 (type: int)
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col1 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Map 4 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  key expressions: _col1 (type: int)
+  sort order: +
+  Map-reduce partition columns: _col1 (type: int)
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: no inputs
+Reducer 2 
+Execution mode: llap
+Reduce Operator Tree:
+  Merge Join Operator
+condition map:
+ Full Outer Join 0 to 1
+keys:
+  0 _col0 (type: int)
+  1 _col1 (type: int)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Select Operator
+  expressions: hash(_col0,_col1,_col2,_col3) (type: int)
+  outputColumnNames: _col0
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  Group By Operator
+aggregations: sum(_col0)
+mode: hash
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: bigint)
+Reducer 3 
+Execution mode: vectorized, llap
+Reduce Operator Tree:
+  Group By Operator
+aggregations: sum(VALUE._col0)
+mode: mergepartial
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+

[26/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out 
b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out
new file mode 100644
index 000..6ddcef6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out
@@ -0,0 +1,1228 @@
+PREHOOK: query: drop table if exists TJOIN1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists TJOIN1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists TJOIN2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists TJOIN2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) 
STORED AS orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN1
+POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) 
STORED AS orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN1
+PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 
char(2)) STORED AS orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN2
+POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 
char(2)) STORED AS orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN2
+PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN1STAGE
+POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN1STAGE
+PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TJOIN2STAGE
+POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 
char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TJOIN2STAGE
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE 
INTO TABLE TJOIN1STAGE
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@tjoin1stage
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' 
OVERWRITE INTO TABLE TJOIN1STAGE
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@tjoin1stage
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE 
INTO TABLE TJOIN2STAGE
+PREHOOK: type: LOAD
+ A masked pattern was here 
+PREHOOK: Output: default@tjoin2stage
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' 
OVERWRITE INTO TABLE TJOIN2STAGE
+POSTHOOK: type: LOAD
+ A masked pattern was here 
+POSTHOOK: Output: default@tjoin2stage
+PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tjoin1stage
+PREHOOK: Output: default@tjoin1
+POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tjoin1stage
+POSTHOOK: Output: default@tjoin1
+POSTHOOK: Lineage: tjoin1.c1 SIMPLE 
[(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: tjoin1.c2 EXPRESSION 
[(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ]
+POSTHOOK: Lineage: tjoin1.rnum SIMPLE 
[(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ]
+_col0  _col1   _col2
+PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tjoin2stage
+PREHOOK: Output: default@tjoin2
+POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tjoin2stage
+POSTHOOK: Output: default@tjoin2
+POSTHOOK: Lineage: tjoin2.c1 SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ]
+POSTHOOK: Lineage: tjoin2.c2 SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ]
+POSTHOOK: Lineage: tjoin2.rnum SIMPLE 
[(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ]
+tjoin2stage.rnum   tjoin2stage.c1  tjoin2stage.c2
+PREHOOK: query: explain vectorization detail
+select tjoin1.rnum,

[38/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
--
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
new file mode 100644
index 000..586c850
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import java.io.IOException;
+import java.util.Random;
+
+import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.junit.Test;
+
+/*
+ * An multi-key value hash map optimized for vector map join.
+ *
+ * The key is uninterpreted bytes.
+ */
+public class TestVectorMapJoinFastBytesHashMapNonMatched extends 
CommonFastHashTable {
+
+  @Test
+  public void testOneKey() throws Exception {
+random = new Random(82733);
+
+VectorMapJoinFastMultiKeyHashMap map =
+new VectorMapJoinFastMultiKeyHashMap(
+false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1);
+
+VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+random.nextBytes(key);
+byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+random.nextBytes(value);
+
+map.testPutRow(key, value);
+verifyTable.add(key, value);
+
+// Second value.
+value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+random.nextBytes(value);
+map.testPutRow(key, value);
+verifyTable.add(key, value);
+
+// Third value.
+value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+random.nextBytes(value);
+map.testPutRow(key, value);
+verifyTable.add(key, value);
+
+verifyTable.verifyNonMatched(map, random);
+  }
+
+  @Test
+  public void testMultipleKeysSingleValue() throws Exception {
+random = new Random(29383);
+
+VectorMapJoinFastMultiKeyHashMap map =
+new VectorMapJoinFastMultiKeyHashMap(
+false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1);
+
+VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap();
+
+int keyCount = 100 + random.nextInt(1000);
+for (int i = 0; i < keyCount; i++) {
+  byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)];
+  random.nextBytes(key);
+  if (!verifyTable.contains(key)) {
+// Unique keys for this test.
+break;
+  }
+  byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)];
+  random.nextBytes(value);
+
+  map.testPutRow(key, value);
+  verifyTable.add(key, value);
+}
+
+verifyTable.verifyNonMatched(map, random);
+  }
+
+  public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+  VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable)
+  throws HiveException, IOException {
+addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, 
MAX_KEY_LENGTH, -1);
+  }
+
+  public void addAndVerifyMultipleKeyMultipleValue(int keyCount,
+  VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable,
+  int maxKeyLength, int fixedValueLength)
+  throws HiveException, IOException {
+for (int i = 0; i < keyCount; i++) {
+  byte[] value;
+  if (fixedValueLength == -1) {
+value = new byte[generateLargeCount() - 1];
+  } else {
+value = new byte[fixedValueLength];
+  }
+  random.nextBytes(value);
+
+  // Add a new key or add a value to an existing key?
+  if (random.nextBoolean() || verifyTable.getCount() == 0) {
+byte[] key;
+while (true) {
+  key = new byte[random.nextInt(maxKeyLength)];
+  random.nextBytes(key);
+  if (!verifyTable.contains(key)) {
+// Unique keys for this test.
+

[45/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index c832cdb..5733688 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -21,7 +21,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.Map;
+import java.util.Map.Entry;
 
 import org.apache.commons.lang.ArrayUtils;
 import org.slf4j.Logger;
@@ -41,7 +41,6 @@ import 
org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping;
 import org.apache.hadoop.hive.ql.exec.vector.VectorCopyRow;
 import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator;
@@ -55,14 +54,17 @@ import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTabl
 import 
org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastHashTableLoader;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorDesc;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
 import 
org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation;
 import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo;
 import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import 
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
 import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -124,6 +126,10 @@ private static final Logger LOG = 
LoggerFactory.getLogger(CLASS_NAME);
   // a mixture of input big table columns and new scratch columns.
   protected VectorizationContext vOutContext;
 
+  protected VectorMapJoinVariation vectorMapJoinVariation;
+  protected HashTableKind hashTableKind;
+  protected HashTableKeyType hashTableKeyType;
+
   // The output column projection of the vectorized row batch.  And, the type 
infos of the output
   // columns.
   protected int[] outputProjection;
@@ -149,28 +155,70 @@ private static final Logger LOG = 
LoggerFactory.getLogger(CLASS_NAME);
   protected String[] bigTableValueColumnNames;
   protected TypeInfo[] bigTableValueTypeInfos;
 
-  // This is a mapping of which big table columns (input and key/value 
expressions) will be
-  // part of the big table portion of the join output result.
-  protected VectorColumnOutputMapping bigTableRetainedMapping;
+  /*
+   * NOTE:
+   *The Big Table key columns are from the key expressions.
+   *The Big Table value columns are from the getExpr(posBigTable) 
expressions.
+   *Any calculations needed for those will be scratch columns.
+   *
+   *The Small Table key and value output columns are scratch columns.
+   *
+   * Big Table Retain Column Map / TypeInfos:
+   *Any Big Table Batch columns that will be in the output result.
+   *0, 1, ore more Column Nums and TypeInfos
+   *
+   * Non Outer Small Table Key Mapping:
+   *For non-[FULL] OUTER MapJoin, when Big Table key columns are not 
retained for the output
+   *result but are needed for the Small Table output result, they are put 
in this mapping
+   *as they are required for copying rows to the overflow batch.
+   *
+   * Outer Small Table Key Mapping
+   *For [FULL] OUTER MapJoin, the mapping for any Small Table key columns 
needed for the
+   *output result from the Big Table key columns.  The Big Table keys 
cannot be projected since
+   *on NOMATCH there must be a physical column present to hold the 
non-match NULL.
+   *
+   * Full Outer Small Table Key Mapping
+   *For FULL OUTER MapJoin, the mapping from any needed Small Table key

[27/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
index ddcabd8..4c81131 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out
@@ -87,10 +87,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:boolean
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -204,10 +203,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:tinyint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -321,10 +319,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:smallint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -438,10 +435,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:int
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -555,10 +551,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns: 5:bigint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 2 Data size: 16 Basic stats: 
COMPLETE Column stats: COMPLETE
 Execution mode: vectorized, llap
 LLAP IO: all inputs
@@ -672,10 +667,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [5]
+  keyColumns:

[42/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
new file mode 100644
index 000..b9e86eb
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Properties;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import com.google.common.base.Joiner;
+
+/**
+ * FULL OUTER MapJoin planning.
+ */
+public class FullOuterMapJoinOptimization {
+
+  FullOuterMapJoinOptimization() {
+  }
+
+  public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws 
SemanticException {
+int[][] filterMaps = mapJoinDesc.getFilterMap();
+if (filterMaps == null) {
+  return;
+}
+final byte posBigTable = (byte) mapJoinDesc.getPosBigTable();
+final int numAliases = mapJoinDesc.getExprs().size();
+List valueFilteredTblDescs = 
mapJoinDesc.getValueFilteredTblDescs();
+for (byte pos = 0; pos < numAliases; pos++) {
+  if (pos != posBigTable) {
+int[] filterMap = filterMaps[pos];
+TableDesc tableDesc = valueFilteredTblDescs.get(pos);
+Properties properties = tableDesc.getProperties();
+String columnNameProperty = 
properties.getProperty(serdeConstants.LIST_COLUMNS);
+String columnNameDelimiter =
+properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ?
+properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) :
+  String.valueOf(SerDeUtils.COMMA);
+
+String columnTypeProperty = 
properties.getProperty(serdeConstants.LIST_COLUMN_TYPES);
+List columnNameList;
+if (columnNameProperty.length() == 0) {
+  columnNameList = new ArrayList();
+} else {
+  columnNameList = 
Arrays.asList(columnNameProperty.split(columnNameDelimiter));
+}
+List truncatedColumnNameList = columnNameList.subList(0, 
columnNameList.size() - 1);
+String truncatedColumnNameProperty =
+Joiner.on(columnNameDelimiter).join(truncatedColumnNameList);
+
+List columnTypeList;
+if (columnTypeProperty.length() == 0) {
+  columnTypeList = new ArrayList();
+} else {
+  columnTypeList = TypeInfoUtils
+  .getTypeInfosFromTypeString(columnTypeProperty);
+}
+if (!columnTypeList.get(columnTypeList.size() - 
1).equals(TypeInfoFactory.shortTypeInfo)) {
+  throw new SemanticException("Expecting filterTag smallint as last 
column type");
+}
+List truncatedColumnTypeList =
+columnTypeList.subList(0, columnTypeList.size() - 1);
+String truncatedColumnTypeProperty =
+Joiner.on(",").join(truncatedColumnTypeList);
+
+properties.setProperty(serdeConstants.LIST_COLUMNS, 
truncatedColumnNameProperty);
+properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, 
truncatedColumnTypeProperty);
+  }
+}
+mapJoinDesc.setFilterMap(null);
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java

[16/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out 
b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
index 1a20a35..31b7326 100644
--- a/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_like_2.q.out
@@ -63,10 +63,10 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0]
+  keyColumns: 0:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: [2]
+  valueColumns: 2:boolean
   Statistics: Num rows: 3 Data size: 552 Basic stats: 
COMPLETE Column stats: NONE
   value expressions: _col1 (type: boolean)
 Execution mode: vectorized, llap

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out 
b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
index 630f3f7..8ac3a11 100644
--- 
a/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
+++ 
b/ql/src/test/results/clientpositive/llap/vector_llap_io_data_conversion.q.out
@@ -104,10 +104,9 @@ STAGE PLANS:
   sort order: +++
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0, 1, 2]
+  keyColumns: 0:varchar(10), 1:int, 2:string
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 10 Data size: 2820 Basic stats: 
COMPLETE Column stats: NONE
 Execution mode: vectorized, llap
 LLAP IO: all inputs

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out 
b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
index 45bfc6b..29c4bc1 100644
--- a/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out
@@ -167,11 +167,11 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: int)
 Reduce Sink Vectorization:
 className: VectorReduceSinkObjectHashOperator
-keyColumnNums: [0]
+keyColumns: 0:int
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-partitionColumnNums: [0]
-valueColumnNums: [1]
+partitionColumns: 0:int
+valueColumns: 1:string
 Statistics: Num rows: 242 Data size: 22990 Basic 
stats: COMPLETE Column stats: COMPLETE
 value expressions: _col1 (type: string)
 Execution mode: vectorized, llap
@@ -223,14 +223,16 @@ STAGE PLANS:
   0 _col0 (type: int)
   1 _col0 (type: int)
 Map Join Vectorization:
-bigTableKeyColumnNums: [0]
-bigTableRetainedColumnNums: [0, 1]
-bigTableValueColumnNums: [1]
+bigTableKeyColumns: 0:int
+bigTableRetainColumnNums: [1]
+bigTableValueColumns: 1:string

[07/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out 
b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
index 18e75aa..87f0ca8 100644
--- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
+++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out
@@ -289,74 +289,12 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 0  val_0   val_0
 0  val_0   val_0
 0  val_0   val_0
-2  val_2   val_2
-4  val_4   val_4
-8  val_8   val_8
-11 val_11  val_11
-15 val_15  val_15
-15 val_15  val_15
-15 val_15  val_15
-15 val_15  val_15
-17 val_17  val_17
-19 val_19  val_19
-20 val_20  val_20
-24 val_24  val_24
-24 val_24  val_24
-24 val_24  val_24
-24 val_24  val_24
-26 val_26  val_26
-26 val_26  val_26
-26 val_26  val_26
-26 val_26  val_26
-28 val_28  val_28
-33 val_33  val_33
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-35 val_35  val_35
-37 val_37  val_37
-37 val_37  val_37
-37 val_37  val_37
-37 val_37  val_37
-42 val_42  val_42
-42 val_42  val_42
-42 val_42  val_42
-42 val_42  val_42
-44 val_44  val_44
-51 val_51  val_51
-51 val_51  val_51
-51 val_51  val_51
-51 val_51  val_51
-53 val_53  val_53
-57 val_57  val_57
-64 val_64  val_64
-66 val_66  val_66
-77 val_77  val_77
-80 val_80  val_80
-82 val_82  val_82
-84 val_84  val_84
-84 val_84  val_84
-84 val_84  val_84
-84 val_84  val_84
-86 val_86  val_86
-95 val_95  val_95
-95 val_95  val_95
-95 val_95  val_95
-95 val_95  val_95
-97 val_97  val_97
-97 val_97  val_97
-97 val_97  val_97
-97 val_97  val_97
 103val_103 val_103
 103val_103 val_103
 103val_103 val_103
 103val_103 val_103
 105val_105 val_105
+11 val_11  val_11
 114val_114 val_114
 116val_116 val_116
 118val_118 val_118
@@ -398,6 +336,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 149val_149 val_149
 149val_149 val_149
 149val_149 val_149
+15 val_15  val_15
+15 val_15  val_15
+15 val_15  val_15
+15 val_15  val_15
 150val_150 val_150
 152val_152 val_152
 152val_152 val_152
@@ -435,6 +377,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 169val_169 val_169
 169val_169 val_169
 169val_169 val_169
+17 val_17  val_17
 170val_170 val_170
 172val_172 val_172
 172val_172 val_172
@@ -461,10 +404,13 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 187val_187 val_187
 187val_187 val_187
 189val_189 val_189
+19 val_19  val_19
 190val_190 val_190
 192val_192 val_192
 194val_194 val_194
 196val_196 val_196
+2  val_2   val_2
+20 val_20  val_20
 200val_200 val_200
 200val_200 val_200
 200val_200 val_200
@@ -511,6 +457,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 239val_239 val_239
 239val_239 val_239
 239val_239 val_239
+24 val_24  val_24
+24 val_24  val_24
+24 val_24  val_24
+24 val_24  val_24
 242val_242 val_242
 242val_242 val_242
 242val_242 val_242
@@ -522,6 +472,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 255val_255 val_255
 255val_255 val_255
 257val_257 val_257
+26 val_26  val_26
+26 val_26  val_26
+26 val_26  val_26
+26 val_26  val_26
 260val_260 val_260
 262val_262 val_262
 266val_266 val_266
@@ -551,6 +505,7 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 277val_277 val_277
 277val_277 val_277
 277val_277 val_277
+28 val_28  val_28
 280val_280 val_280
 280val_280 val_280
 280val_280 val_280
@@ -612,11 +567,21 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 327val_327 val_327
 327val_327 val_327
 327val_327 val_327
+33 val_33  val_33
 332val_332 val_332
 336val_336 val_336
 338val_338 val_338
 341val_341 val_341
 345val_345 val_345
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
+35 val_35  val_35
 356val_356 val_356
 365val_365 val_365
 367val_367 val_367
@@ -632,6 +597,10 @@ POSTHOOK: Input: default@tab_part_n9@ds=2008-04-08
 369val_369 val_369
 369val_369 val_369
 369val_369 val_369
+37 val_37  val_37
+37 val_37  val_37
+37 val_37  val_37
+37 val_37  val_37
 374val_374 val_374
 378val_378 val_378
 389val_389 val_389
@@ -646,6

[35/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
--
diff --git a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out 
b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
index 6b85e13..2288b4b 100644
--- a/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
+++ b/ql/src/test/results/clientpositive/beeline/mapjoin2.q.out
@@ -27,6 +27,85 @@ POSTHOOK: Output: default@tbl_n1
 POSTHOOK: Lineage: tbl_n1.n SCRIPT []
 POSTHOOK: Lineage: tbl_n1.t SCRIPT []
 Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
+PREHOOK: query: explain
+select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-3 depends on stages: Stage-4
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+Map Reduce Local Work
+  Alias -> Map Local Tables:
+$hdt$_1:tbl_n1 
+  Fetch Operator
+limit: -1
+  Alias -> Map Local Operator Tree:
+$hdt$_1:tbl_n1 
+  TableScan
+alias: tbl_n1
+Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column 
stats: NONE
+Filter Operator
+  predicate: false (type: boolean)
+  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: n (type: bigint), t (type: string)
+outputColumnNames: _col0, _col1
+Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+HashTable Sink Operator
+  keys:
+0 
+1 
+
+  Stage: Stage-3
+Map Reduce
+  Map Operator Tree:
+  TableScan
+alias: tbl_n1
+filterExpr: (n = 1L) (type: boolean)
+Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column 
stats: NONE
+Filter Operator
+  predicate: (n = 1L) (type: boolean)
+  Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: t (type: string)
+outputColumnNames: _col0
+Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE 
Column stats: NONE
+Map Join Operator
+  condition map:
+   Left Outer Join 0 to 1
+  keys:
+0 
+1 
+  outputColumnNames: _col0, _col1, _col2
+  Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE 
Column stats: NONE
+  Select Operator
+expressions: 1L (type: bigint), _col0 (type: string), 
_col1 is null (type: boolean), _col2 is null (type: boolean)
+outputColumnNames: _col0, _col1, _col2, _col3
+Statistics: Num rows: 1 Data size: 11 Basic stats: 
COMPLETE Column stats: NONE
+File Output Operator
+  compressed: false
+  Statistics: Num rows: 1 Data size: 11 Basic stats: 
COMPLETE Column stats: NONE
+  table:
+  input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+  output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+  serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+  Execution mode: vectorized
+  Local Work:
+Map Reduce Local Work
+
+  Stage: Stage-0
+Fetch Operator
+  limit: -1
+  Processor Tree:
+ListSink
+
+Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
 PREHOOK: query: select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from 
tbl_n1 where n = 1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on 
a.n = b.n
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tbl_n1
@@ -37,6 +116,91 @@ POSTHOOK: Input: default@tbl_n1
  A masked pattern was here 
 1  one truetrue
 Warning: Map Join MAPJOIN[13][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
+PREHOOK: query: explain
+select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from tbl_n1 where n = 2) b on a.n = b.n
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from

[37/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin2.q
--
diff --git a/ql/src/test/queries/clientpositive/mapjoin2.q 
b/ql/src/test/queries/clientpositive/mapjoin2.q
index e194bd0..014dabe 100644
--- a/ql/src/test/queries/clientpositive/mapjoin2.q
+++ b/ql/src/test/queries/clientpositive/mapjoin2.q
@@ -6,16 +6,30 @@ create table tbl_n1 (n bigint, t string);
 insert into tbl_n1 values (1, 'one'); 
 insert into tbl_n1 values(2, 'two');
 
+explain
+select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n;
 select a.n, a.t, isnull(b.n), isnull(b.t) from (select * from tbl_n1 where n = 
1) a  left outer join  (select * from tbl_n1 where 1 = 2) b on a.n = b.n;
 
+explain
+select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from tbl_n1 where n = 2) b on a.n = b.n;
 select isnull(a.n), isnull(a.t), b.n, b.t from (select * from tbl_n1 where 2 = 
1) a  right outer join  (select * from tbl_n1 where n = 2) b on a.n = b.n;
 
+explain
+select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from 
tbl_n1 where n = 1) a  full outer join  (select * from tbl_n1 where n = 2) b on 
a.n = b.n;
 select isnull(a.n), isnull(a.t), isnull(b.n), isnull(b.t) from (select * from 
tbl_n1 where n = 1) a  full outer join  (select * from tbl_n1 where n = 2) b on 
a.n = b.n;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 
b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a join ( SELECT 11 key, 0 confuse_you, 1 
b_one, 0 b_zero ) b on a.key = b.key;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a left outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 
+explain
+select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a right outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;
 select a.key, a.a_one, b.b_one, a.a_zero, b.b_zero from ( SELECT 11 key, 0 
confuse_you, 1 a_one, 0 a_zero ) a full outer join ( SELECT 11 key, 0 
confuse_you, 1 b_one, 0 b_zero ) b on a.key = b.key;

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/mapjoin46.q
--
diff --git a/ql/src/test/queries/clientpositive/mapjoin46.q 
b/ql/src/test/queries/clientpositive/mapjoin46.q
index 9de7113..81f9610 100644
--- a/ql/src/test/queries/clientpositive/mapjoin46.q
+++ b/ql/src/test/queries/clientpositive/mapjoin46.q
@@ -3,6 +3,8 @@ set hive.auto.convert.join=true;
 set hive.strict.checks.cartesian.product=false;
 set hive.join.emit.interval=2;
 
+-- SORT_QUERY_RESULTS
+
 CREATE TABLE test1_n4 (key INT, value INT, col_1 STRING);
 INSERT INTO test1_n4 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
 (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car');
@@ -173,6 +175,22 @@ ON (test1_n4.value=test2_n2.value
 OR test2_n2.key between 100 and 102));
 
 -- Disjunction with pred on multiple inputs and single inputs (full outer join)
+SET hive.mapjoin.full.outer=false;
+EXPLAIN
+SELECT *
+FROM test1_n4 FULL OUTER JOIN test2_n2
+ON (test1_n4.value=test2_n2.value
+  OR test1_n4.key between 100 and 102
+  OR test2_n2.key between 100 and 102);
+
+SELECT *
+FROM test1_n4 FULL OUTER JOIN test2_n2
+ON (test1_n4.value=test2_n2.value
+  OR test1_n4.key between 100 and 102
+  OR test2_n2.key between 100 and 102);
+
+SET hive.mapjoin.full.outer=true;
+SET hive.merge.nway.joins=false;
 EXPLAIN
 SELECT *
 FROM test1_n4 FULL OUTER JOIN test2_n2
@@ -185,8 +203,23 @@ FROM test1_n4 FULL OUTER JOIN test2_n2
 ON (test1_n4.value=test2_n2.value
   OR test1_n4.key between 100 and 102
   OR test2_n2.key between 100 and 102);
+SET hive.merge.nway.joins=true;
 
 -- Disjunction with pred on multiple inputs and left input (full outer join)
+SET hive.mapjoin.full.outer=false;
+EXPLAIN

[18/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
index 12db036..b8d76ed 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out
@@ -47,15 +47,167 @@ POSTHOOK: Input: default@myinput1_n4
  A masked pattern was here 
 13630578
 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product
-PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 
a RIGHT OUTER JOIN myinput1_n4 b
+PREHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b
 PREHOOK: type: QUERY
-PREHOOK: Input: default@myinput1_n4
- A masked pattern was here 
-POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM 
myinput1_n4 a RIGHT OUTER JOIN myinput1_n4 b
+POSTHOOK: query: EXPLAIN VECTORIZATION OPERATOR
+SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n4 a RIGHT OUTER 
JOIN myinput1_n4 b
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@myinput1_n4
- A masked pattern was here 
-13630578
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+Tez
+ A masked pattern was here 
+  Edges:
+Map 2 <- Map 1 (BROADCAST_EDGE)
+Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE)
+ A masked pattern was here 
+  Vertices:
+Map 1 
+Map Operator Tree:
+TableScan
+  alias: a
+  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+Reduce Output Operator
+  sort order: 
+  Reduce Sink Vectorization:
+  className: VectorReduceSinkEmptyKeyOperator
+  native: true
+  nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+  Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+  value expressions: _col0 (type: int), _col1 (type: int)
+Execution mode: vectorized, llap
+LLAP IO: all inputs
+Map Vectorization:
+enabled: true
+enabledConditionsMet: 
hive.vectorized.use.vectorized.input.format IS true
+inputFormatFeatureSupport: [DECIMAL_64]
+featureSupportInUse: [DECIMAL_64]
+inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+allNative: true
+usesVectorUDFAdaptor: false
+vectorized: true
+Map 2 
+Map Operator Tree:
+TableScan
+  alias: b
+  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE 
Column stats: NONE
+  TableScan Vectorization:
+  native: true
+  Select Operator
+expressions: key (type: int), value (type: int)
+outputColumnNames: _col0, _col1
+Select Vectorization:
+className: VectorSelectOperator
+native: true
+Statistics: Num rows: 3 Data size: 24 Basic stats: 
COMPLETE Column stats: NONE
+Map Join Operator
+  condition map:
+   Right Outer Join 0 to 1
+  filter predicates:
+0 
+1 {true}
+  keys:
+0 
+1 
+  Map Join Vectorization:
+  className: VectorMapJoinOuterFilteredOperator
+  native: false
+  nativeConditionsMet:

[06/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join33.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out 
b/ql/src/test/results/clientpositive/spark/join33.q.out
index 13cd446..09198b0 100644
--- a/ql/src/test/results/clientpositive/spark/join33.q.out
+++ b/ql/src/test/results/clientpositive/spark/join33.q.out
@@ -393,88 +393,88 @@ POSTHOOK: query: select * from dest_j1_n7
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@dest_j1_n7
  A masked pattern was here 
-146val_146 val_146
-146val_146 val_146
-146val_146 val_146
-146val_146 val_146
+146val_146
+146val_146
+146val_146
+146val_146
 150val_150 val_150
-213val_213 val_213
-213val_213 val_213
-213val_213 val_213
-213val_213 val_213
-238val_238 val_238
-238val_238 val_238
-238val_238 val_238
-238val_238 val_238
-255val_255 val_255
-255val_255 val_255
-255val_255 val_255
-255val_255 val_255
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-273val_273 val_273
-278val_278 val_278
-278val_278 val_278
-278val_278 val_278
-278val_278 val_278
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-311val_311 val_311
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-401val_401 val_401
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
-406val_406 val_406
+213val_213
+213val_213
+213val_213
+213val_213
+238val_238
+238val_238
+238val_238
+238val_238
+255val_255
+255val_255
+255val_255
+255val_255
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+273val_273
+278val_278
+278val_278
+278val_278
+278val_278
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+311val_311
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+401val_401
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
+406val_406
 66 val_66  val_66
-98 val_98  val_98
-98 val_98  val_98
-98 val_98  val_98
-98 val_98  val_98
+98 val_98
+98 val_98
+98 val_98
+98 val_98

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/spark/join6.q.out
--
diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out 
b/ql/src/test/results/clientpositive/spark/join6.q.out
index 6075e5f..caa0849 100644
---

[12/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out 
b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
index fbcbd64..5e95f39 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out
@@ -63,10 +63,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0, 1, 2, 3]
+valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 
3:bigint
 Statistics: Num rows: 1 Data size: 24 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: tinyint), _col1 (type: 
tinyint), _col2 (type: bigint), _col3 (type: bigint)
 Execution mode: vectorized, llap
@@ -119,10 +118,10 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0]
+  keyColumns: 0:tinyint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: [1, 2, 3]
+  valueColumns: 1:tinyint, 2:bigint, 3:bigint
   Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE 
Column stats: COMPLETE
   value expressions: _col1 (type: tinyint), _col2 (type: 
bigint), _col3 (type: bigint)
 Reducer 3 
@@ -244,10 +243,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0]
+valueColumns: 0:bigint
 Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type: bigint)
 Execution mode: vectorized, llap
@@ -300,10 +298,9 @@ STAGE PLANS:
   sort order: +
   Reduce Sink Vectorization:
   className: VectorReduceSinkObjectHashOperator
-  keyColumnNums: [0]
+  keyColumns: 0:bigint
   native: true
   nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-  valueColumnNums: []
   Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
 Reducer 3 
 Execution mode: vectorized, llap
@@ -575,10 +572,9 @@ STAGE PLANS:
 sort order: 
 Reduce Sink Vectorization:
 className: VectorReduceSinkEmptyKeyOperator
-keyColumnNums: []
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-valueColumnNums: [0, 1, 2, 3]
+valueColumns: 0:bigint, 1:bigint, 2:bigint, 
3:bigint
 Statistics: Num rows: 1 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
 value expressions: _col0 (type:

[09/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/mapjoin46.q.out
--
diff --git a/ql/src/test/results/clientpositive/mapjoin46.q.out 
b/ql/src/test/results/clientpositive/mapjoin46.q.out
index febb6c7..b6f8b19 100644
--- a/ql/src/test/results/clientpositive/mapjoin46.q.out
+++ b/ql/src/test/results/clientpositive/mapjoin46.q.out
@@ -124,14 +124,14 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
+1012   Car 103 2   Ema
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat 102 2   Del
 99 2   Mat 103 2   Ema
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
-1012   Car 103 2   Ema
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 LEFT OUTER JOIN test2_n2
@@ -234,12 +234,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob NULLNULLNULL
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob NULLNULLNULL
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -340,12 +340,12 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
+1001   Bob 102 2   Del
+1012   Car 102 2   Del
 98 NULLNoneNULLNULLNULL
 99 0   Alice   NULLNULLNULL
 99 2   Mat NULLNULLNULL
-1001   Bob 102 2   Del
-1012   Car 102 2   Del
+NULL   NULLNoneNULLNULLNULL
 PREHOOK: query: EXPLAIN
 SELECT *
 FROM test1_n4 RIGHT OUTER JOIN test2_n2
@@ -430,10 +430,10 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-99 2   Mat 102 2   Del
 1012   Car 102 2   Del
-99 2   Mat 103 2   Ema
 1012   Car 103 2   Ema
+99 2   Mat 102 2   Del
+99 2   Mat 103 2   Ema
 NULL   NULLNULL104 3   Fli
 NULL   NULLNULL105 NULLNone
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
@@ -528,10 +528,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNoneNULLNULLNULL
-98 NULLNoneNULLNULLNULL
-99 0   Alice   NULLNULLNULL
-99 2   Mat NULLNULLNULL
 1001   Bob 102 2   Del
 1001   Bob 103 2   Ema
 1001   Bob 104 3   Fli
@@ -540,6 +536,10 @@ NULL   NULLNoneNULLNULLNULL
 1012   Car 103 2   Ema
 1012   Car 104 3   Fli
 1012   Car 105 NULLNone
+98 NULLNoneNULLNULLNULL
+99 0   Alice   NULLNULLNULL
+99 2   Mat NULLNULLNULL
+NULL   NULLNoneNULLNULLNULL
 Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
 PREHOOK: query: EXPLAIN
 SELECT *
@@ -635,11 +635,6 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@test1_n4
 POSTHOOK: Input: default@test2_n2
  A masked pattern was here 
-NULL   NULLNone102 2   Del
-98 NULLNone102 2   Del
-99 0   Alice   102 2   Del
-99 2   Mat 102 2   Del
-99 2   Mat 103 2   Ema
 1001   Bob 102 2   Del
 1001   Bob 103 2   Ema
 1001   Bob 104 3   Fli
@@ -648,6 +643,11 @@ NULL   NULLNone102 2   Del
 1012   Car 103 2   Ema
 1012   Car 104 3   Fli
 1012   Car 105 NULLNone
+98 NULLNone102 2   Del
+99 0   Alice   102 2   Del
+99 2   Mat 102 2   Del
+99 2   Mat 103 2   Ema
+NULL   NULLNone102 2   Del
 Warning:

[36/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/vector_join30.q
--
diff --git a/ql/src/test/queries/clientpositive/vector_join30.q 
b/ql/src/test/queries/clientpositive/vector_join30.q
index 9672a47..74c4433 100644
--- a/ql/src/test/queries/clientpositive/vector_join30.q
+++ b/ql/src/test/queries/clientpositive/vector_join30.q
@@ -11,7 +11,7 @@ SET hive.auto.convert.join.noconditionaltask.size=10;
 
 CREATE TABLE orcsrc_n0 STORED AS ORC AS SELECT * FROM src;
 
-explain vectorization expression
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -19,14 +19,14 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
-FROM 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN 
-(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
-ON (x.key = Y.key)
-select sum(hash(Y.key,Y.value));
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 LEFT OUTER JOIN
@@ -34,116 +34,238 @@ LEFT OUTER JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- LEFT OUTER JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-LEFT OUTER JOIN 
+RIGHT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- RIGHT OUTER JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN
+FULL OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value));
 
+-- FROM 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- FULL OUTER JOIN 
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-RIGHT OUTER JOIN 
+JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
+JOIN
+(SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-FROM
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
+FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-explain vectorization expression
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+
+explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
-JOIN
+LEFT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
 ON (x.key = Y.key)
-LEFT OUTER JOIN
+RIGHT OUTER JOIN
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
 ON (x.key = Z.key)
 select sum(hash(Y.key,Y.value));
 
-FROM
+-- FROM
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
+-- LEFT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Y
+-- ON (x.key = Y.key)
+-- RIGHT OUTER JOIN
+-- (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by value) Z
+-- ON (x.key = Z.key)
+-- select sum(hash(Y.key,Y.value));
+

[20/47] hive git commit: HIVE-18908: FULL OUTER JOIN to MapJoin (Matt McCline, reviewed by Teddy Choi)

2018-09-16 Thread mmccline

http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/results/clientpositive/llap/vector_join30.q.out
--
diff --git a/ql/src/test/results/clientpositive/llap/vector_join30.q.out 
b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
index 4b2f06f..9238bc7 100644
--- a/ql/src/test/results/clientpositive/llap/vector_join30.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_join30.q.out
@@ -10,7 +10,7 @@ POSTHOOK: Output: database:default
 POSTHOOK: Output: default@orcsrc_n0
 POSTHOOK: Lineage: orcsrc_n0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
 POSTHOOK: Lineage: orcsrc_n0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
-PREHOOK: query: explain vectorization expression
+PREHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -18,7 +18,7 @@ JOIN
 ON (x.key = Y.key)
 select sum(hash(Y.key,Y.value))
 PREHOOK: type: QUERY
-POSTHOOK: query: explain vectorization expression
+POSTHOOK: query: explain vectorization detail
 FROM 
 (SELECT orcsrc_n0.* FROM orcsrc_n0 sort by key) x
 JOIN
@@ -51,6 +51,7 @@ STAGE PLANS:
   Statistics: Num rows: 500 Data size: 87584 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
+  vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct]
   Filter Operator
 Filter Vectorization:
 className: VectorFilterOperator
@@ -72,6 +73,7 @@ STAGE PLANS:
 Map-reduce partition columns: _col0 (type: string)
 Reduce Sink Vectorization:
 className: VectorReduceSinkStringOperator
+keyColumns: 0:string
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
 Statistics: Num rows: 475 Data size: 83204 Basic 
stats: COMPLETE Column stats: NONE
@@ -86,6 +88,12 @@ STAGE PLANS:
 allNative: true
 usesVectorUDFAdaptor: false
 vectorized: true
+rowBatchContext:
+dataColumnCount: 2
+includeColumns: [0]
+dataColumns: key:string, value:string
+partitionColumnCount: 0
+scratchColumnTypeNames: []
 Map 2 
 Map Operator Tree:
 TableScan
@@ -94,6 +102,7 @@ STAGE PLANS:
   Statistics: Num rows: 500 Data size: 175168 Basic stats: 
COMPLETE Column stats: NONE
   TableScan Vectorization:
   native: true
+  vectorizationSchemaColumns: [0:key:string, 
1:value:string, 2:ROW__ID:struct]
   Filter Operator
 Filter Vectorization:
 className: VectorFilterOperator
@@ -116,9 +125,15 @@ STAGE PLANS:
   0 _col0 (type: string)
   1 _col0 (type: string)
 Map Join Vectorization:
+bigTableKeyColumns: 0:string
+bigTableRetainColumnNums: [0, 1]
+bigTableValueColumns: 0:string, 1:string
 className: VectorMapJoinInnerBigOnlyStringOperator
 native: true
 nativeConditionsMet: 
hive.mapjoin.optimized.hashtable IS true, 
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS 
true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS 
true
+nonOuterSmallTableKeyMapping: []
+projectedOutput: 0:string, 1:string
+hashTableImplementationType: OPTIMIZED
 outputColumnNames: _col2, _col3
 input vertices:
   0 Map 1
@@ -141,6 +156,7 @@ STAGE PLANS:
 className: VectorReduceSinkEmptyKeyOperator
 native: true
 nativeConditionsMet: 
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine 
tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, 
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+valueColumns: 0:bigint

46 matches

Mail list logo