hive git commit: HIVE-17921 Aggregation with struct in LLAP produces wrong result (Saurabh Seth via Eugene Koifman)

2018-09-10 Thread ekoifman
Repository: hive
Updated Branches:
  refs/heads/master 346c0ce44 -> ad93cfd75


HIVE-17921 Aggregation with struct in LLAP produces wrong result (Saurabh Seth 
via Eugene Koifman)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad93cfd7
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad93cfd7
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad93cfd7

Branch: refs/heads/master
Commit: ad93cfd753276ade6ec346470a83b7b05c60339b
Parents: 346c0ce
Author: Eugene Koifman 
Authored: Mon Sep 10 13:38:00 2018 -0700
Committer: Eugene Koifman 
Committed: Mon Sep 10 13:38:27 2018 -0700

--
 ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java| 3 ++-
 .../results/clientpositive/llap/acid_vectorization_original.q.out | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ad93cfd7/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
index 64428f0..bce7977 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.io.AcidInputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.ColumnarSplit;
@@ -243,7 +244,7 @@ public class OrcSplit extends FileSplit implements 
ColumnarSplit, LlapAwareSplit
   public boolean canUseLlapIo(Configuration conf) {
 final boolean hasDelta = deltas != null && !deltas.isEmpty();
 final boolean isAcidRead = AcidUtils.isFullAcidScan(conf);
-final boolean isVectorized = HiveConf.getBoolVar(conf, 
ConfVars.HIVE_VECTORIZATION_ENABLED);
+final boolean isVectorized = Utilities.getIsVectorized(conf);
 Boolean isSplitUpdate = null;
 if (isAcidRead) {
   final AcidUtils.AcidOperationalProperties acidOperationalProperties

http://git-wip-us.apache.org/repos/asf/hive/blob/ad93cfd7/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
--
diff --git 
a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out 
b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
index 57ff575..00be86c 100644
--- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
+++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out
@@ -718,7 +718,6 @@ POSTHOOK: query: select ROW__ID, count(*) from 
over10k_orc_bucketed group by ROW
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@over10k_orc_bucketed
  A masked pattern was here 
-NULL   6
 PREHOOK: query: select ROW__ID, * from over10k_orc_bucketed where ROW__ID is 
null
 PREHOOK: type: QUERY
 PREHOOK: Input: default@over10k_orc_bucketed



hive git commit: HIVE-20506 - HOS times out when cluster is full while Hive-on-MR waits

2018-09-10 Thread brock
Repository: hive
Updated Branches:
  refs/heads/master afb61aebf -> 346c0ce44


HIVE-20506 - HOS times out when cluster is full while Hive-on-MR waits

(Brock Noland reviewed by Sahil Takiar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/346c0ce4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/346c0ce4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/346c0ce4

Branch: refs/heads/master
Commit: 346c0ce44d89ab030400e017f3e8ec6000344a6e
Parents: afb61ae
Author: Brock Noland 
Authored: Mon Sep 10 15:31:22 2018 -0500
Committer: Brock Noland 
Committed: Mon Sep 10 15:31:57 2018 -0500

--
 .../spark/session/SparkSessionManagerImpl.java  |   2 +-
 .../hive/spark/client/SparkClientFactory.java   |   4 +-
 .../spark/client/SparkSubmitSparkClient.java|  27 -
 .../apache/hive/spark/client/rpc/RpcServer.java | 116 ++-
 .../hive/spark/client/TestSparkClient.java  |   2 +-
 .../apache/hive/spark/client/rpc/TestRpc.java   |  85 ++
 6 files changed, 207 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/346c0ce4/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java
index 79a56bd..8dae54d 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java
@@ -96,7 +96,7 @@ public class SparkSessionManagerImpl implements 
SparkSessionManager {
   startTimeoutThread();
   Map sparkConf = 
HiveSparkClientFactory.initiateSparkConf(hiveConf, null);
   try {
-SparkClientFactory.initialize(sparkConf);
+SparkClientFactory.initialize(sparkConf, hiveConf);
 inited = true;
   } catch (IOException e) {
 throw new HiveException("Error initializing SparkClientFactory", 
e);

http://git-wip-us.apache.org/repos/asf/hive/blob/346c0ce4/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java
--
diff --git 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java
 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java
index 1974e88..54ecdf0 100644
--- 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java
+++ 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java
@@ -47,12 +47,12 @@ public final class SparkClientFactory {
*
* @param conf Map containing configuration parameters for the client 
library.
*/
-  public static void initialize(Map conf) throws IOException {
+  public static void initialize(Map conf, HiveConf hiveConf) 
throws IOException {
 if (server == null) {
   synchronized (serverLock) {
 if (server == null) {
   try {
-server = new RpcServer(conf);
+server = new RpcServer(conf, hiveConf);
   } catch (InterruptedException ie) {
 throw Throwables.propagate(ie);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/346c0ce4/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java
--
diff --git 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java
 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java
index 7a6e77b..1879829 100644
--- 
a/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java
+++ 
b/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java
@@ -31,6 +31,8 @@ import java.util.Map;
 import java.util.concurrent.Callable;
 import java.util.concurrent.Future;
 import java.util.concurrent.FutureTask;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 
 import org.apache.commons.lang3.StringUtils;
 
@@ -51,6 +53,7 @@ class SparkSubmitSparkClient extends AbstractSparkClient {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(SparkSubmitSparkClient.class);
 
+  private static final Pattern YARN_APPLICATION_ID_REGEX = 
Pattern.compile("\\s(application_[0-9]+_[0-9]+)(\\s|$)");
   private static final String SPARK_HOME_ENV = "SPARK_HOME";
   private static final String SPARK_HOME_KEY = "spark.home";
 
@@ -191,17 +194,37 @@ class SparkSubmitSparkClient extends AbstractSparkClient {
 final Process child = pb.start();
 

hive git commit: HIVE-20020: Hive contrib jar should not be in lib (Alice Fan, reviewed by Aihua Xu)

2018-09-10 Thread aihuaxu
Repository: hive
Updated Branches:
  refs/heads/master b60b13f97 -> afb61aebf


HIVE-20020: Hive contrib jar should not be in lib (Alice Fan, reviewed by Aihua 
Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/afb61aeb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/afb61aeb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/afb61aeb

Branch: refs/heads/master
Commit: afb61aebfd475e2cb4f7935f94f765ae53bb174f
Parents: b60b13f
Author: Aihua Xu 
Authored: Mon Sep 10 11:31:16 2018 -0700
Committer: Aihua Xu 
Committed: Mon Sep 10 11:35:14 2018 -0700

--
 packaging/src/main/assembly/bin.xml | 10 ++
 1 file changed, 10 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/afb61aeb/packaging/src/main/assembly/bin.xml
--
diff --git a/packaging/src/main/assembly/bin.xml 
b/packaging/src/main/assembly/bin.xml
index eeed3ec..fceb1be 100644
--- a/packaging/src/main/assembly/bin.xml
+++ b/packaging/src/main/assembly/bin.xml
@@ -47,6 +47,7 @@
 co.cask.tephra:*
 commons-configuration:commons-configuration
 org.apache.hive:hive-jdbc:*:standalone
+org.apache.hive:hive-contrib
   
 
 
@@ -59,6 +60,15 @@
   
 
 
+  contrib/
+  false
+  false
+  true
+  
+org.apache.hive:hive-contrib
+  
+
+
   lib
   false
   false



hive git commit: HIVE-18724: Improve error handling for subqueries referencing columns(correlated) of its grand-parent query (Igor Kryvenko, reviewed by Vineet Garg)

2018-09-10 Thread vgarg
Repository: hive
Updated Branches:
  refs/heads/master c7d5606b5 -> b60b13f97


HIVE-18724: Improve error handling for subqueries referencing 
columns(correlated) of its grand-parent query (Igor Kryvenko, reviewed by 
Vineet Garg)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b60b13f9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b60b13f9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b60b13f9

Branch: refs/heads/master
Commit: b60b13f9714395ee527593f288475e7507209e15
Parents: c7d5606
Author: Igor Kryvenko 
Authored: Mon Sep 10 11:32:27 2018 -0700
Committer: Vineet Garg 
Committed: Mon Sep 10 11:33:05 2018 -0700

--
 .../queries/clientnegative/subquery_correlated_grand_parent.q   | 5 +
 .../clientnegative/subquery_correlated_grand_parent.q.out   | 1 +
 2 files changed, 6 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/b60b13f9/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q
--
diff --git 
a/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q 
b/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q
new file mode 100644
index 000..0aabff1
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q
@@ -0,0 +1,5 @@
+--! qt:dataset:part
+
+select t1.p_name from part t1 where t1.p_name IN (select t2.p_name from part 
t2 where t2.p_name IN
+(select max(t3.p_name) from part t3, part t4 where t3.p_name=t2.p_name and 
t3.p_name=t1.p_name))
+

http://git-wip-us.apache.org/repos/asf/hive/blob/b60b13f9/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out
--
diff --git 
a/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out 
b/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out
new file mode 100644
index 000..f93d84d
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10004]: 
org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: 
Line 4:89 Invalid table alias or column reference 't1': (possible column names 
are: t3.p_partkey, t3.p_name, t3.p_mfgr, t3.p_brand, t3.p_type, t3.p_size, 
t3.p_container, t3.p_retailprice, t3.p_comment, t4.p_partkey, t4.p_name, 
t4.p_mfgr, t4.p_brand, t4.p_type, t4.p_size, t4.p_container, t4.p_retailprice, 
t4.p_comment)



hive git commit: HIVE-20481: Add the Kafka Key record as part of the row (Slim Bouguerra, reviewed by Vineet Garg)

2018-09-10 Thread vgarg
Repository: hive
Updated Branches:
  refs/heads/master 116d2393f -> c7d5606b5


HIVE-20481: Add the Kafka Key record as part of the row (Slim Bouguerra, 
reviewed by Vineet Garg)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c7d5606b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c7d5606b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c7d5606b

Branch: refs/heads/master
Commit: c7d5606b53f0570b1101c08930da627331ac89fe
Parents: 116d239
Author: Slim Bouguerra 
Authored: Mon Sep 10 11:20:59 2018 -0700
Committer: Vineet Garg 
Committed: Mon Sep 10 11:22:30 2018 -0700

--
 .../hive/kafka/SingleNodeKafkaCluster.java  |   7 +-
 .../hadoop/hive/kafka/GenericKafkaSerDe.java|  44 +-
 .../hadoop/hive/kafka/KafkaJsonSerDe.java   |  19 +--
 .../hive/kafka/KafkaPullerRecordReader.java |   8 +-
 .../hadoop/hive/kafka/KafkaRecordWritable.java  |  53 ++--
 .../hadoop/hive/kafka/KafkaScanTrimmer.java |  13 +-
 .../hadoop/hive/kafka/KafkaStorageHandler.java  |  73 +++---
 .../hive/kafka/KafkaStorageHandlerInfo.java |  71 ++
 .../hadoop/hive/kafka/KafkaStreamingUtils.java  | 136 +++
 .../hive/kafka/KafkaPullerInputSplitTest.java   |   4 +-
 .../hive/kafka/KafkaRecordIteratorTest.java |  38 +++---
 .../hive/kafka/KafkaRecordWritableTest.java |  34 -
 .../hadoop/hive/kafka/KafkaScanTrimmerTest.java |  22 +--
 .../hive/kafka/KafkaStreamingUtilsTest.java |  55 +++-
 .../hive/ql/metadata/StorageHandlerInfo.java|   5 -
 .../clientpositive/kafka_storage_handler.q  |  20 ++-
 .../druid/kafka_storage_handler.q.out   | 124 -
 .../ptest2/conf/deployed/master-mr2.properties  |   2 +-
 18 files changed, 534 insertions(+), 194 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c7d5606b/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java
--
diff --git 
a/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java
 
b/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java
index c9339b5..3f2c9a7 100644
--- 
a/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java
+++ 
b/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java
@@ -10,6 +10,7 @@ import kafka.utils.ZkUtils;
 import org.apache.hadoop.service.AbstractService;
 import org.apache.kafka.clients.producer.KafkaProducer;
 import org.apache.kafka.clients.producer.ProducerRecord;
+import org.apache.kafka.clients.producer.RecordMetadata;
 import org.apache.kafka.common.serialization.ByteArraySerializer;
 import org.apache.kafka.common.serialization.StringSerializer;
 
@@ -26,6 +27,10 @@ import java.io.IOException;
 import java.nio.charset.Charset;
 import java.util.List;
 import java.util.Properties;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.stream.IntStream;
 
 /**
@@ -100,7 +105,7 @@ public class SingleNodeKafkaCluster extends AbstractService 
{
 )){
   List events = Files.readLines(datafile, 
Charset.forName("UTF-8"));
   for(String event : events){
-producer.send(new ProducerRecord<>(topicName, event));
+producer.send(new ProducerRecord<>(topicName, "key", event));
   }
 } catch (IOException e) {
   Throwables.propagate(e);

http://git-wip-us.apache.org/repos/asf/hive/blob/c7d5606b/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java
--
diff --git 
a/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java 
b/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java
index e7ea53f..a0c79b3 100644
--- a/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java
+++ b/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.kafka;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Supplier;
 import com.google.common.base.Suppliers;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
@@ -40,11 +39,7 @@ import 
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import 

hive git commit: HIVE-20489: Recursive calls to intern path strings causes parse to hang (Janaki Lahorani, reviewed by Yongzhi Chen and Naveen Gangam)

2018-09-10 Thread ngangam
Repository: hive
Updated Branches:
  refs/heads/master de9aaf607 -> c30dcbb4b


HIVE-20489: Recursive calls to intern path strings causes parse to hang (Janaki 
Lahorani, reviewed by Yongzhi Chen and Naveen Gangam)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c30dcbb4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c30dcbb4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c30dcbb4

Branch: refs/heads/master
Commit: c30dcbb4bdaedb9c59b1078ece63e4a61c7a3dbe
Parents: de9aaf6
Author: Naveen Gangam 
Authored: Mon Sep 10 09:40:01 2018 -0400
Committer: Naveen Gangam 
Committed: Mon Sep 10 09:40:21 2018 -0400

--
 .../hadoop/hive/ql/exec/util/DAGTraversal.java  |   5 +
 .../hive/ql/optimizer/GenMapRedUtils.java   | 105 ++-
 .../hadoop/hive/ql/parse/TaskCompiler.java  |   9 +-
 3 files changed, 64 insertions(+), 55 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/c30dcbb4/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java
index 6dce835..cb5dc2e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hive.ql.exec.util;
 
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
 import org.apache.hadoop.hive.ql.exec.Task;
 
 import java.io.Serializable;
@@ -38,6 +39,10 @@ public class DAGTraversal {
 if (function.skipProcessing(task)) {
   continue;
 }
+// Add list tasks from conditional tasks
+if (task instanceof ConditionalTask) {
+  children.addAll(((ConditionalTask) task).getListTasks());
+}
 if (task.getDependentTasks() != null) {
   children.addAll(task.getDependentTasks());
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/c30dcbb4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index d887124..e3ae0bf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
+import org.apache.hadoop.hive.ql.exec.tez.TezTask;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.io.RCFileInputFormat;
 import org.apache.hadoop.hive.ql.io.merge.MergeFileWork;
@@ -894,64 +895,66 @@ public final class GenMapRedUtils {
   }
 
   /**
-   * Called at the end of TaskCompiler::compile to derive final
-   * explain attributes based on previous compilation.
+   * Called at the end of TaskCompiler::compile
+   * This currently does the following for each map work
+   *   1.  Intern the table descriptors of the partitions
+   *   2.  derive final explain attributes based on previous compilation.
+   *
+   * The original implementation had 2 functions internTableDesc and 
deriveFinalExplainAttributes,
+   * respectively implementing 1 and 2 mentioned above.  This was done using 
recursion over the
+   * task graph.  The recursion was inefficient in a couple of ways.
+   *   - For large graphs the recursion was filling up the stack
+   *   - Instead of finding the mapworks, it was walking all possible paths 
from root
+   * causing a huge performance problem.
+   *
+   * This implementation combines internTableDesc and 
deriveFinalExplainAttributes into 1 call.
+   * This can be done because each refers to information within Map Work and 
performs a specific
+   * action.
+   *
+   * The revised implementation generates all the map works from all MapReduce 
tasks (getMRTasks),
+   * Spark Tasks (getSparkTasks) and Tez tasks (getTezTasks).  Then for each 
of those map works
+   * invokes the respective call.  getMRTasks, getSparkTasks and getTezTasks 
iteratively walks
+   * the task graph to find the respective map works.
+   *
+   * The iterative implementation of these functions was done as part of 
HIVE-17195.  Before
+   * HIVE-17195, these functions were recursive and had the same issue.  So, 
picking this patch
+   * for an older release will also require picking HIVE-17195 at the least.
*/
-  public static void 

hive git commit: HIVE-20502: Fix NPE while running skewjoin_mapjoin10.q when column stats is used. (Daniel Voros via Zoltan Haindrich)

2018-09-10 Thread kgyrtkirk
Repository: hive
Updated Branches:
  refs/heads/master ff98a30ab -> de9aaf607


HIVE-20502: Fix NPE while running skewjoin_mapjoin10.q when column stats is 
used. (Daniel Voros via Zoltan Haindrich)

Signed-off-by: Zoltan Haindrich 


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de9aaf60
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de9aaf60
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de9aaf60

Branch: refs/heads/master
Commit: de9aaf6072424263c7691b9aa1bf61ea1ed2b2d3
Parents: ff98a30
Author: Daniel Voros 
Authored: Mon Sep 10 15:26:41 2018 +0200
Committer: Zoltan Haindrich 
Committed: Mon Sep 10 15:26:41 2018 +0200

--
 .../java/org/apache/hadoop/hive/ql/plan/JoinDesc.java   |  7 +++
 ql/src/test/results/clientpositive/quotedid_skew.q.out  |  3 +++
 .../test/results/clientpositive/skewjoin_mapjoin2.q.out |  3 +++
 .../clientpositive/skewjoin_union_remove_1.q.out| 12 
 .../clientpositive/skewjoin_union_remove_2.q.out|  4 
 ql/src/test/results/clientpositive/skewjoinopt1.q.out   | 12 
 ql/src/test/results/clientpositive/skewjoinopt10.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt11.q.out  |  6 ++
 ql/src/test/results/clientpositive/skewjoinopt12.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt14.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt16.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt17.q.out  |  6 ++
 ql/src/test/results/clientpositive/skewjoinopt19.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt2.q.out   | 12 
 ql/src/test/results/clientpositive/skewjoinopt20.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt21.q.out  |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt3.q.out   |  6 ++
 ql/src/test/results/clientpositive/skewjoinopt4.q.out   |  6 ++
 ql/src/test/results/clientpositive/skewjoinopt5.q.out   |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt6.q.out   |  3 +++
 ql/src/test/results/clientpositive/skewjoinopt7.q.out   |  4 
 ql/src/test/results/clientpositive/skewjoinopt8.q.out   |  4 
 22 files changed, 112 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/de9aaf60/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java
--
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java
index b5ffcd9..4313a6b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java
@@ -690,6 +690,7 @@ public class JoinDesc extends AbstractOperatorDesc {
 aliasToOpInfo = joinDesc.aliasToOpInfo;
 leftInputJoin = joinDesc.leftInputJoin;
 streamAliases = joinDesc.streamAliases;
+joinKeys = joinDesc.joinKeys;
   }
 
   public void setQBJoinTreeProps(QBJoinTree joinTree) {
@@ -716,6 +717,12 @@ public class JoinDesc extends AbstractOperatorDesc {
 aliasToOpInfo = new HashMap>(joinDesc.aliasToOpInfo);
 leftInputJoin = joinDesc.leftInputJoin;
 streamAliases = joinDesc.streamAliases == null ? null : new 
ArrayList(joinDesc.streamAliases);
+if (joinDesc.joinKeys != null) {
+  joinKeys = new ExprNodeDesc[joinDesc.joinKeys.length][];
+  for(int i = 0; i < joinDesc.joinKeys.length; i++) {
+joinKeys[i] = joinDesc.joinKeys[i].clone();
+  }
+}
   }
 
   public MemoryMonitorInfo getMemoryMonitorInfo() {

http://git-wip-us.apache.org/repos/asf/hive/blob/de9aaf60/ql/src/test/results/clientpositive/quotedid_skew.q.out
--
diff --git a/ql/src/test/results/clientpositive/quotedid_skew.q.out 
b/ql/src/test/results/clientpositive/quotedid_skew.q.out
index 902be6b..a682644 100644
--- a/ql/src/test/results/clientpositive/quotedid_skew.q.out
+++ b/ql/src/test/results/clientpositive/quotedid_skew.q.out
@@ -163,6 +163,9 @@ STAGE PLANS:
 Join Operator
   condition map:
Inner Join 0 to 1
+  keys:
+0 _col0 (type: string)
+1 _col0 (type: string)
   outputColumnNames: _col0, _col1, _col2, _col3
   Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column 
stats: NONE
   File Output Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/de9aaf60/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out
--
diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out 
b/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out
index 78f20cc..fa0f615 100644
--- 

[2/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)

2018-09-10 Thread mmccline
HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt 
McCline, reviewed by Zoltan Haindrich)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff98a30a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff98a30a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff98a30a

Branch: refs/heads/master
Commit: ff98a30ab49c4eafe53974e03c9dd205c14ffee7
Parents: 494b771
Author: Matt McCline 
Authored: Mon Sep 10 04:24:35 2018 -0500
Committer: Matt McCline 
Committed: Mon Sep 10 04:24:35 2018 -0500

--
 .../fast/VectorMapJoinFastBytesHashKeyRef.java  | 178 ++
 .../fast/VectorMapJoinFastBytesHashMap.java | 141 +++--
 .../VectorMapJoinFastBytesHashMapStore.java | 559 +++
 .../VectorMapJoinFastBytesHashMultiSet.java | 132 -
 ...VectorMapJoinFastBytesHashMultiSetStore.java | 280 ++
 .../fast/VectorMapJoinFastBytesHashSet.java | 124 +++-
 .../VectorMapJoinFastBytesHashSetStore.java | 219 
 .../fast/VectorMapJoinFastBytesHashTable.java   | 148 ++---
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |   6 +-
 .../fast/TestVectorMapJoinFastBytesHashMap.java |   3 +
 .../fast/TestVectorMapJoinFastLongHashMap.java  |   3 +
 .../clientpositive/bucket_map_join_tez2.q   |   2 +-
 .../test/queries/clientpositive/tez_smb_main.q  |   3 +-
 .../results/clientpositive/llap/orc_llap.q.out  |  59 +-
 .../apache/hadoop/hive/serde2/WriteBuffers.java |  53 ++
 15 files changed, 1661 insertions(+), 249 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
--
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
new file mode 100644
index 000..dbfe518
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java
@@ -0,0 +1,178 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import org.apache.hadoop.hive.serde2.WriteBuffers;
+// import com.google.common.base.Preconditions;
+
+public class VectorMapJoinFastBytesHashKeyRef {
+
+  public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, 
int keyLength,
+  WriteBuffers writeBuffers, WriteBuffers.Position readPos) {
+
+// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 
0);
+
+final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord);
+
+writeBuffers.setReadPoint(absoluteOffset, readPos);
+
+int actualKeyLength = KeyRef.getSmallKeyLength(refWord);
+boolean isKeyLengthSmall = (actualKeyLength != 
KeyRef.SmallKeyLength.allBitsOn);
+if (!isKeyLengthSmall) {
+
+  // And, if current value is big we must read it.
+  actualKeyLength = writeBuffers.readVInt(readPos);
+}
+
+if (actualKeyLength != keyLength) {
+  return false;
+}
+
+// Our reading was positioned to the key.
+if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) {
+  return false;
+}
+
+return true;
+  }
+
+  public static int calculateHashCode(long refWord, WriteBuffers writeBuffers,
+  WriteBuffers.Position readPos) {
+
+// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 
0);
+
+final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord);
+
+int actualKeyLength = KeyRef.getSmallKeyLength(refWord);
+boolean isKeyLengthSmall = (actualKeyLength != 
KeyRef.SmallKeyLength.allBitsOn);
+final long keyAbsoluteOffset;
+if (!isKeyLengthSmall) {
+
+  // Position after next relative offset (fixed length) to the key.
+  writeBuffers.setReadPoint(absoluteOffset, readPos);
+
+  // And, if current value is big we must 

[1/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)

2018-09-10 Thread mmccline
Repository: hive
Updated Branches:
  refs/heads/master 494b771ac -> ff98a30ab


http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
--
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
index 17d4bdb..79462a0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java
@@ -57,6 +57,11 @@ public final class WriteBuffers implements 
RandomAccessOutput, MemoryEstimate {
   memSize += (2 * jdm.primitive1());
   return memSize;
 }
+public void set(Position pos) {
+  buffer = pos.buffer;
+  bufferIndex = pos.bufferIndex;
+  offset = pos.offset;
+}
   }
 
   Position writePos = new Position(); // Position where we'd write
@@ -552,6 +557,21 @@ public final class WriteBuffers implements 
RandomAccessOutput, MemoryEstimate {
 return v;
   }
 
+  public long readNByteLong(int bytes, Position readPos) {
+long v = 0;
+if (isAllInOneReadBuffer(bytes, readPos)) {
+  for (int i = 0; i < bytes; ++i) {
+v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff);
+  }
+  readPos.offset += bytes;
+} else {
+  for (int i = 0; i < bytes; ++i) {
+v = (v << 8) + (readNextByte(readPos) & 0xff);
+  }
+}
+return v;
+  }
+
   public void writeFiveByteULong(long offset, long v) {
 int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset;
 setWritePoint(offset);
@@ -574,10 +594,43 @@ public final class WriteBuffers implements 
RandomAccessOutput, MemoryEstimate {
 writePos.offset = prevOffset;
   }
 
+  public void writeFiveByteULong(long v) {
+if (isAllInOneWriteBuffer(5)) {
+  writePos.buffer[writePos.offset] = (byte)(v >>> 32);
+  writePos.buffer[writePos.offset + 1] = (byte)(v >>> 24);
+  writePos.buffer[writePos.offset + 2] = (byte)(v >>> 16);
+  writePos.buffer[writePos.offset + 3] = (byte)(v >>> 8);
+  writePos.buffer[writePos.offset + 4] = (byte)(v);
+  writePos.offset += 5;
+} else {
+  write((byte)(v >>> 32));
+  write((byte)(v >>> 24));
+  write((byte)(v >>> 16));
+  write((byte)(v >>> 8));
+  write((byte)(v));
+}
+  }
+
   public int readInt(long offset) {
 return (int)unsafeReadNByteLong(offset, 4);
   }
 
+  public int readInt(long offset, Position readPos) {
+setReadPoint(offset, readPos);
+long v = 0;
+if (isAllInOneReadBuffer(4, readPos)) {
+  for (int i = 0; i < 4; ++i) {
+v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff);
+  }
+  readPos.offset += 4;
+} else {
+  for (int i = 0; i < 4; ++i) {
+v = (v << 8) + (readNextByte(readPos) & 0xff);
+  }
+}
+return (int) v;
+  }
+
   @Override
   public void writeInt(long offset, int v) {
 int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset;