hive git commit: HIVE-17921 Aggregation with struct in LLAP produces wrong result (Saurabh Seth via Eugene Koifman)
Repository: hive Updated Branches: refs/heads/master 346c0ce44 -> ad93cfd75 HIVE-17921 Aggregation with struct in LLAP produces wrong result (Saurabh Seth via Eugene Koifman) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ad93cfd7 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ad93cfd7 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ad93cfd7 Branch: refs/heads/master Commit: ad93cfd753276ade6ec346470a83b7b05c60339b Parents: 346c0ce Author: Eugene Koifman Authored: Mon Sep 10 13:38:00 2018 -0700 Committer: Eugene Koifman Committed: Mon Sep 10 13:38:27 2018 -0700 -- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java| 3 ++- .../results/clientpositive/llap/acid_vectorization_original.q.out | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ad93cfd7/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java index 64428f0..bce7977 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSplit.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.ColumnarSplit; @@ -243,7 +244,7 @@ public class OrcSplit extends FileSplit implements ColumnarSplit, LlapAwareSplit public boolean canUseLlapIo(Configuration conf) { final boolean hasDelta = deltas != null && !deltas.isEmpty(); final boolean isAcidRead = AcidUtils.isFullAcidScan(conf); -final boolean isVectorized = HiveConf.getBoolVar(conf, ConfVars.HIVE_VECTORIZATION_ENABLED); +final boolean isVectorized = Utilities.getIsVectorized(conf); Boolean isSplitUpdate = null; if (isAcidRead) { final AcidUtils.AcidOperationalProperties acidOperationalProperties http://git-wip-us.apache.org/repos/asf/hive/blob/ad93cfd7/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out -- diff --git a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out index 57ff575..00be86c 100644 --- a/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_vectorization_original.q.out @@ -718,7 +718,6 @@ POSTHOOK: query: select ROW__ID, count(*) from over10k_orc_bucketed group by ROW POSTHOOK: type: QUERY POSTHOOK: Input: default@over10k_orc_bucketed A masked pattern was here -NULL 6 PREHOOK: query: select ROW__ID, * from over10k_orc_bucketed where ROW__ID is null PREHOOK: type: QUERY PREHOOK: Input: default@over10k_orc_bucketed
hive git commit: HIVE-20506 - HOS times out when cluster is full while Hive-on-MR waits
Repository: hive Updated Branches: refs/heads/master afb61aebf -> 346c0ce44 HIVE-20506 - HOS times out when cluster is full while Hive-on-MR waits (Brock Noland reviewed by Sahil Takiar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/346c0ce4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/346c0ce4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/346c0ce4 Branch: refs/heads/master Commit: 346c0ce44d89ab030400e017f3e8ec6000344a6e Parents: afb61ae Author: Brock Noland Authored: Mon Sep 10 15:31:22 2018 -0500 Committer: Brock Noland Committed: Mon Sep 10 15:31:57 2018 -0500 -- .../spark/session/SparkSessionManagerImpl.java | 2 +- .../hive/spark/client/SparkClientFactory.java | 4 +- .../spark/client/SparkSubmitSparkClient.java| 27 - .../apache/hive/spark/client/rpc/RpcServer.java | 116 ++- .../hive/spark/client/TestSparkClient.java | 2 +- .../apache/hive/spark/client/rpc/TestRpc.java | 85 ++ 6 files changed, 207 insertions(+), 29 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/346c0ce4/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java index 79a56bd..8dae54d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/session/SparkSessionManagerImpl.java @@ -96,7 +96,7 @@ public class SparkSessionManagerImpl implements SparkSessionManager { startTimeoutThread(); Map sparkConf = HiveSparkClientFactory.initiateSparkConf(hiveConf, null); try { -SparkClientFactory.initialize(sparkConf); +SparkClientFactory.initialize(sparkConf, hiveConf); inited = true; } catch (IOException e) { throw new HiveException("Error initializing SparkClientFactory", e); http://git-wip-us.apache.org/repos/asf/hive/blob/346c0ce4/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java index 1974e88..54ecdf0 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientFactory.java @@ -47,12 +47,12 @@ public final class SparkClientFactory { * * @param conf Map containing configuration parameters for the client library. */ - public static void initialize(Map conf) throws IOException { + public static void initialize(Map conf, HiveConf hiveConf) throws IOException { if (server == null) { synchronized (serverLock) { if (server == null) { try { -server = new RpcServer(conf); +server = new RpcServer(conf, hiveConf); } catch (InterruptedException ie) { throw Throwables.propagate(ie); } http://git-wip-us.apache.org/repos/asf/hive/blob/346c0ce4/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java -- diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java index 7a6e77b..1879829 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkSubmitSparkClient.java @@ -31,6 +31,8 @@ import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; +import java.util.regex.Pattern; +import java.util.regex.Matcher; import org.apache.commons.lang3.StringUtils; @@ -51,6 +53,7 @@ class SparkSubmitSparkClient extends AbstractSparkClient { private static final Logger LOG = LoggerFactory.getLogger(SparkSubmitSparkClient.class); + private static final Pattern YARN_APPLICATION_ID_REGEX = Pattern.compile("\\s(application_[0-9]+_[0-9]+)(\\s|$)"); private static final String SPARK_HOME_ENV = "SPARK_HOME"; private static final String SPARK_HOME_KEY = "spark.home"; @@ -191,17 +194,37 @@ class SparkSubmitSparkClient extends AbstractSparkClient { final Process child = pb.start();
hive git commit: HIVE-20020: Hive contrib jar should not be in lib (Alice Fan, reviewed by Aihua Xu)
Repository: hive Updated Branches: refs/heads/master b60b13f97 -> afb61aebf HIVE-20020: Hive contrib jar should not be in lib (Alice Fan, reviewed by Aihua Xu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/afb61aeb Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/afb61aeb Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/afb61aeb Branch: refs/heads/master Commit: afb61aebfd475e2cb4f7935f94f765ae53bb174f Parents: b60b13f Author: Aihua Xu Authored: Mon Sep 10 11:31:16 2018 -0700 Committer: Aihua Xu Committed: Mon Sep 10 11:35:14 2018 -0700 -- packaging/src/main/assembly/bin.xml | 10 ++ 1 file changed, 10 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/afb61aeb/packaging/src/main/assembly/bin.xml -- diff --git a/packaging/src/main/assembly/bin.xml b/packaging/src/main/assembly/bin.xml index eeed3ec..fceb1be 100644 --- a/packaging/src/main/assembly/bin.xml +++ b/packaging/src/main/assembly/bin.xml @@ -47,6 +47,7 @@ co.cask.tephra:* commons-configuration:commons-configuration org.apache.hive:hive-jdbc:*:standalone +org.apache.hive:hive-contrib @@ -59,6 +60,15 @@ + contrib/ + false + false + true + +org.apache.hive:hive-contrib + + + lib false false
hive git commit: HIVE-18724: Improve error handling for subqueries referencing columns(correlated) of its grand-parent query (Igor Kryvenko, reviewed by Vineet Garg)
Repository: hive Updated Branches: refs/heads/master c7d5606b5 -> b60b13f97 HIVE-18724: Improve error handling for subqueries referencing columns(correlated) of its grand-parent query (Igor Kryvenko, reviewed by Vineet Garg) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b60b13f9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b60b13f9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b60b13f9 Branch: refs/heads/master Commit: b60b13f9714395ee527593f288475e7507209e15 Parents: c7d5606 Author: Igor Kryvenko Authored: Mon Sep 10 11:32:27 2018 -0700 Committer: Vineet Garg Committed: Mon Sep 10 11:33:05 2018 -0700 -- .../queries/clientnegative/subquery_correlated_grand_parent.q | 5 + .../clientnegative/subquery_correlated_grand_parent.q.out | 1 + 2 files changed, 6 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/b60b13f9/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q -- diff --git a/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q b/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q new file mode 100644 index 000..0aabff1 --- /dev/null +++ b/ql/src/test/queries/clientnegative/subquery_correlated_grand_parent.q @@ -0,0 +1,5 @@ +--! qt:dataset:part + +select t1.p_name from part t1 where t1.p_name IN (select t2.p_name from part t2 where t2.p_name IN +(select max(t3.p_name) from part t3, part t4 where t3.p_name=t2.p_name and t3.p_name=t1.p_name)) + http://git-wip-us.apache.org/repos/asf/hive/blob/b60b13f9/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out -- diff --git a/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out b/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out new file mode 100644 index 000..f93d84d --- /dev/null +++ b/ql/src/test/results/clientnegative/subquery_correlated_grand_parent.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10004]: org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException: Line 4:89 Invalid table alias or column reference 't1': (possible column names are: t3.p_partkey, t3.p_name, t3.p_mfgr, t3.p_brand, t3.p_type, t3.p_size, t3.p_container, t3.p_retailprice, t3.p_comment, t4.p_partkey, t4.p_name, t4.p_mfgr, t4.p_brand, t4.p_type, t4.p_size, t4.p_container, t4.p_retailprice, t4.p_comment)
hive git commit: HIVE-20481: Add the Kafka Key record as part of the row (Slim Bouguerra, reviewed by Vineet Garg)
Repository: hive Updated Branches: refs/heads/master 116d2393f -> c7d5606b5 HIVE-20481: Add the Kafka Key record as part of the row (Slim Bouguerra, reviewed by Vineet Garg) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c7d5606b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c7d5606b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c7d5606b Branch: refs/heads/master Commit: c7d5606b53f0570b1101c08930da627331ac89fe Parents: 116d239 Author: Slim Bouguerra Authored: Mon Sep 10 11:20:59 2018 -0700 Committer: Vineet Garg Committed: Mon Sep 10 11:22:30 2018 -0700 -- .../hive/kafka/SingleNodeKafkaCluster.java | 7 +- .../hadoop/hive/kafka/GenericKafkaSerDe.java| 44 +- .../hadoop/hive/kafka/KafkaJsonSerDe.java | 19 +-- .../hive/kafka/KafkaPullerRecordReader.java | 8 +- .../hadoop/hive/kafka/KafkaRecordWritable.java | 53 ++-- .../hadoop/hive/kafka/KafkaScanTrimmer.java | 13 +- .../hadoop/hive/kafka/KafkaStorageHandler.java | 73 +++--- .../hive/kafka/KafkaStorageHandlerInfo.java | 71 ++ .../hadoop/hive/kafka/KafkaStreamingUtils.java | 136 +++ .../hive/kafka/KafkaPullerInputSplitTest.java | 4 +- .../hive/kafka/KafkaRecordIteratorTest.java | 38 +++--- .../hive/kafka/KafkaRecordWritableTest.java | 34 - .../hadoop/hive/kafka/KafkaScanTrimmerTest.java | 22 +-- .../hive/kafka/KafkaStreamingUtilsTest.java | 55 +++- .../hive/ql/metadata/StorageHandlerInfo.java| 5 - .../clientpositive/kafka_storage_handler.q | 20 ++- .../druid/kafka_storage_handler.q.out | 124 - .../ptest2/conf/deployed/master-mr2.properties | 2 +- 18 files changed, 534 insertions(+), 194 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c7d5606b/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java -- diff --git a/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java b/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java index c9339b5..3f2c9a7 100644 --- a/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java +++ b/itests/qtest-druid/src/main/java/org/apache/hive/kafka/SingleNodeKafkaCluster.java @@ -10,6 +10,7 @@ import kafka.utils.ZkUtils; import org.apache.hadoop.service.AbstractService; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.clients.producer.RecordMetadata; import org.apache.kafka.common.serialization.ByteArraySerializer; import org.apache.kafka.common.serialization.StringSerializer; @@ -26,6 +27,10 @@ import java.io.IOException; import java.nio.charset.Charset; import java.util.List; import java.util.Properties; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.stream.IntStream; /** @@ -100,7 +105,7 @@ public class SingleNodeKafkaCluster extends AbstractService { )){ List events = Files.readLines(datafile, Charset.forName("UTF-8")); for(String event : events){ -producer.send(new ProducerRecord<>(topicName, event)); +producer.send(new ProducerRecord<>(topicName, "key", event)); } } catch (IOException e) { Throwables.propagate(e); http://git-wip-us.apache.org/repos/asf/hive/blob/c7d5606b/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java -- diff --git a/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java b/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java index e7ea53f..a0c79b3 100644 --- a/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java +++ b/kafka-handler/src/java/org/apache/hadoop/hive/kafka/GenericKafkaSerDe.java @@ -21,7 +21,6 @@ package org.apache.hadoop.hive.kafka; import com.google.common.base.Preconditions; import com.google.common.base.Supplier; import com.google.common.base.Suppliers; -import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import org.apache.avro.Schema; import org.apache.avro.generic.GenericRecord; @@ -40,11 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import
hive git commit: HIVE-20489: Recursive calls to intern path strings causes parse to hang (Janaki Lahorani, reviewed by Yongzhi Chen and Naveen Gangam)
Repository: hive Updated Branches: refs/heads/master de9aaf607 -> c30dcbb4b HIVE-20489: Recursive calls to intern path strings causes parse to hang (Janaki Lahorani, reviewed by Yongzhi Chen and Naveen Gangam) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/c30dcbb4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/c30dcbb4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/c30dcbb4 Branch: refs/heads/master Commit: c30dcbb4bdaedb9c59b1078ece63e4a61c7a3dbe Parents: de9aaf6 Author: Naveen Gangam Authored: Mon Sep 10 09:40:01 2018 -0400 Committer: Naveen Gangam Committed: Mon Sep 10 09:40:21 2018 -0400 -- .../hadoop/hive/ql/exec/util/DAGTraversal.java | 5 + .../hive/ql/optimizer/GenMapRedUtils.java | 105 ++- .../hadoop/hive/ql/parse/TaskCompiler.java | 9 +- 3 files changed, 64 insertions(+), 55 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/c30dcbb4/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java index 6dce835..cb5dc2e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/util/DAGTraversal.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.util; +import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.Task; import java.io.Serializable; @@ -38,6 +39,10 @@ public class DAGTraversal { if (function.skipProcessing(task)) { continue; } +// Add list tasks from conditional tasks +if (task instanceof ConditionalTask) { + children.addAll(((ConditionalTask) task).getListTasks()); +} if (task.getDependentTasks() != null) { children.addAll(task.getDependentTasks()); } http://git-wip-us.apache.org/repos/asf/hive/blob/c30dcbb4/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index d887124..e3ae0bf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -64,6 +64,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.exec.spark.SparkTask; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.merge.MergeFileWork; @@ -894,64 +895,66 @@ public final class GenMapRedUtils { } /** - * Called at the end of TaskCompiler::compile to derive final - * explain attributes based on previous compilation. + * Called at the end of TaskCompiler::compile + * This currently does the following for each map work + * 1. Intern the table descriptors of the partitions + * 2. derive final explain attributes based on previous compilation. + * + * The original implementation had 2 functions internTableDesc and deriveFinalExplainAttributes, + * respectively implementing 1 and 2 mentioned above. This was done using recursion over the + * task graph. The recursion was inefficient in a couple of ways. + * - For large graphs the recursion was filling up the stack + * - Instead of finding the mapworks, it was walking all possible paths from root + * causing a huge performance problem. + * + * This implementation combines internTableDesc and deriveFinalExplainAttributes into 1 call. + * This can be done because each refers to information within Map Work and performs a specific + * action. + * + * The revised implementation generates all the map works from all MapReduce tasks (getMRTasks), + * Spark Tasks (getSparkTasks) and Tez tasks (getTezTasks). Then for each of those map works + * invokes the respective call. getMRTasks, getSparkTasks and getTezTasks iteratively walks + * the task graph to find the respective map works. + * + * The iterative implementation of these functions was done as part of HIVE-17195. Before + * HIVE-17195, these functions were recursive and had the same issue. So, picking this patch + * for an older release will also require picking HIVE-17195 at the least. */ - public static void
hive git commit: HIVE-20502: Fix NPE while running skewjoin_mapjoin10.q when column stats is used. (Daniel Voros via Zoltan Haindrich)
Repository: hive Updated Branches: refs/heads/master ff98a30ab -> de9aaf607 HIVE-20502: Fix NPE while running skewjoin_mapjoin10.q when column stats is used. (Daniel Voros via Zoltan Haindrich) Signed-off-by: Zoltan Haindrich Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/de9aaf60 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/de9aaf60 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/de9aaf60 Branch: refs/heads/master Commit: de9aaf6072424263c7691b9aa1bf61ea1ed2b2d3 Parents: ff98a30 Author: Daniel Voros Authored: Mon Sep 10 15:26:41 2018 +0200 Committer: Zoltan Haindrich Committed: Mon Sep 10 15:26:41 2018 +0200 -- .../java/org/apache/hadoop/hive/ql/plan/JoinDesc.java | 7 +++ ql/src/test/results/clientpositive/quotedid_skew.q.out | 3 +++ .../test/results/clientpositive/skewjoin_mapjoin2.q.out | 3 +++ .../clientpositive/skewjoin_union_remove_1.q.out| 12 .../clientpositive/skewjoin_union_remove_2.q.out| 4 ql/src/test/results/clientpositive/skewjoinopt1.q.out | 12 ql/src/test/results/clientpositive/skewjoinopt10.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt11.q.out | 6 ++ ql/src/test/results/clientpositive/skewjoinopt12.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt14.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt16.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt17.q.out | 6 ++ ql/src/test/results/clientpositive/skewjoinopt19.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt2.q.out | 12 ql/src/test/results/clientpositive/skewjoinopt20.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt21.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out | 6 ++ ql/src/test/results/clientpositive/skewjoinopt4.q.out | 6 ++ ql/src/test/results/clientpositive/skewjoinopt5.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt6.q.out | 3 +++ ql/src/test/results/clientpositive/skewjoinopt7.q.out | 4 ql/src/test/results/clientpositive/skewjoinopt8.q.out | 4 22 files changed, 112 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hive/blob/de9aaf60/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index b5ffcd9..4313a6b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -690,6 +690,7 @@ public class JoinDesc extends AbstractOperatorDesc { aliasToOpInfo = joinDesc.aliasToOpInfo; leftInputJoin = joinDesc.leftInputJoin; streamAliases = joinDesc.streamAliases; +joinKeys = joinDesc.joinKeys; } public void setQBJoinTreeProps(QBJoinTree joinTree) { @@ -716,6 +717,12 @@ public class JoinDesc extends AbstractOperatorDesc { aliasToOpInfo = new HashMap>(joinDesc.aliasToOpInfo); leftInputJoin = joinDesc.leftInputJoin; streamAliases = joinDesc.streamAliases == null ? null : new ArrayList(joinDesc.streamAliases); +if (joinDesc.joinKeys != null) { + joinKeys = new ExprNodeDesc[joinDesc.joinKeys.length][]; + for(int i = 0; i < joinDesc.joinKeys.length; i++) { +joinKeys[i] = joinDesc.joinKeys[i].clone(); + } +} } public MemoryMonitorInfo getMemoryMonitorInfo() { http://git-wip-us.apache.org/repos/asf/hive/blob/de9aaf60/ql/src/test/results/clientpositive/quotedid_skew.q.out -- diff --git a/ql/src/test/results/clientpositive/quotedid_skew.q.out b/ql/src/test/results/clientpositive/quotedid_skew.q.out index 902be6b..a682644 100644 --- a/ql/src/test/results/clientpositive/quotedid_skew.q.out +++ b/ql/src/test/results/clientpositive/quotedid_skew.q.out @@ -163,6 +163,9 @@ STAGE PLANS: Join Operator condition map: Inner Join 0 to 1 + keys: +0 _col0 (type: string) +1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator http://git-wip-us.apache.org/repos/asf/hive/blob/de9aaf60/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out -- diff --git a/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out b/ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out index 78f20cc..fa0f615 100644 ---
[2/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)
HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff98a30a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff98a30a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff98a30a Branch: refs/heads/master Commit: ff98a30ab49c4eafe53974e03c9dd205c14ffee7 Parents: 494b771 Author: Matt McCline Authored: Mon Sep 10 04:24:35 2018 -0500 Committer: Matt McCline Committed: Mon Sep 10 04:24:35 2018 -0500 -- .../fast/VectorMapJoinFastBytesHashKeyRef.java | 178 ++ .../fast/VectorMapJoinFastBytesHashMap.java | 141 +++-- .../VectorMapJoinFastBytesHashMapStore.java | 559 +++ .../VectorMapJoinFastBytesHashMultiSet.java | 132 - ...VectorMapJoinFastBytesHashMultiSetStore.java | 280 ++ .../fast/VectorMapJoinFastBytesHashSet.java | 124 +++- .../VectorMapJoinFastBytesHashSetStore.java | 219 .../fast/VectorMapJoinFastBytesHashTable.java | 148 ++--- .../hive/ql/optimizer/ConvertJoinMapJoin.java | 6 +- .../fast/TestVectorMapJoinFastBytesHashMap.java | 3 + .../fast/TestVectorMapJoinFastLongHashMap.java | 3 + .../clientpositive/bucket_map_join_tez2.q | 2 +- .../test/queries/clientpositive/tez_smb_main.q | 3 +- .../results/clientpositive/llap/orc_llap.q.out | 59 +- .../apache/hadoop/hive/serde2/WriteBuffers.java | 53 ++ 15 files changed, 1661 insertions(+), 249 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java new file mode 100644 index 000..dbfe518 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashKeyRef.java @@ -0,0 +1,178 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import org.apache.hadoop.hive.serde2.WriteBuffers; +// import com.google.common.base.Preconditions; + +public class VectorMapJoinFastBytesHashKeyRef { + + public static boolean equalKey(long refWord, byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers writeBuffers, WriteBuffers.Position readPos) { + +// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + +final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + +writeBuffers.setReadPoint(absoluteOffset, readPos); + +int actualKeyLength = KeyRef.getSmallKeyLength(refWord); +boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); +if (!isKeyLengthSmall) { + + // And, if current value is big we must read it. + actualKeyLength = writeBuffers.readVInt(readPos); +} + +if (actualKeyLength != keyLength) { + return false; +} + +// Our reading was positioned to the key. +if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { + return false; +} + +return true; + } + + public static int calculateHashCode(long refWord, WriteBuffers writeBuffers, + WriteBuffers.Position readPos) { + +// Preconditions.checkState((refWord & KeyRef.IsInvalidFlag.flagOnMask) == 0); + +final long absoluteOffset = KeyRef.getAbsoluteOffset(refWord); + +int actualKeyLength = KeyRef.getSmallKeyLength(refWord); +boolean isKeyLengthSmall = (actualKeyLength != KeyRef.SmallKeyLength.allBitsOn); +final long keyAbsoluteOffset; +if (!isKeyLengthSmall) { + + // Position after next relative offset (fixed length) to the key. + writeBuffers.setReadPoint(absoluteOffset, readPos); + + // And, if current value is big we must
[1/2] hive git commit: HIVE-20513: Vectorization: Improve Fast Vector MapJoin Bytes Hash Tables (Matt McCline, reviewed by Zoltan Haindrich)
Repository: hive Updated Branches: refs/heads/master 494b771ac -> ff98a30ab http://git-wip-us.apache.org/repos/asf/hive/blob/ff98a30a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java -- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java index 17d4bdb..79462a0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/WriteBuffers.java @@ -57,6 +57,11 @@ public final class WriteBuffers implements RandomAccessOutput, MemoryEstimate { memSize += (2 * jdm.primitive1()); return memSize; } +public void set(Position pos) { + buffer = pos.buffer; + bufferIndex = pos.bufferIndex; + offset = pos.offset; +} } Position writePos = new Position(); // Position where we'd write @@ -552,6 +557,21 @@ public final class WriteBuffers implements RandomAccessOutput, MemoryEstimate { return v; } + public long readNByteLong(int bytes, Position readPos) { +long v = 0; +if (isAllInOneReadBuffer(bytes, readPos)) { + for (int i = 0; i < bytes; ++i) { +v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff); + } + readPos.offset += bytes; +} else { + for (int i = 0; i < bytes; ++i) { +v = (v << 8) + (readNextByte(readPos) & 0xff); + } +} +return v; + } + public void writeFiveByteULong(long offset, long v) { int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset; setWritePoint(offset); @@ -574,10 +594,43 @@ public final class WriteBuffers implements RandomAccessOutput, MemoryEstimate { writePos.offset = prevOffset; } + public void writeFiveByteULong(long v) { +if (isAllInOneWriteBuffer(5)) { + writePos.buffer[writePos.offset] = (byte)(v >>> 32); + writePos.buffer[writePos.offset + 1] = (byte)(v >>> 24); + writePos.buffer[writePos.offset + 2] = (byte)(v >>> 16); + writePos.buffer[writePos.offset + 3] = (byte)(v >>> 8); + writePos.buffer[writePos.offset + 4] = (byte)(v); + writePos.offset += 5; +} else { + write((byte)(v >>> 32)); + write((byte)(v >>> 24)); + write((byte)(v >>> 16)); + write((byte)(v >>> 8)); + write((byte)(v)); +} + } + public int readInt(long offset) { return (int)unsafeReadNByteLong(offset, 4); } + public int readInt(long offset, Position readPos) { +setReadPoint(offset, readPos); +long v = 0; +if (isAllInOneReadBuffer(4, readPos)) { + for (int i = 0; i < 4; ++i) { +v = (v << 8) + (readPos.buffer[readPos.offset + i] & 0xff); + } + readPos.offset += 4; +} else { + for (int i = 0; i < 4; ++i) { +v = (v << 8) + (readNextByte(readPos) & 0xff); + } +} +return (int) v; + } + @Override public void writeInt(long offset, int v) { int prevIndex = writePos.bufferIndex, prevOffset = writePos.offset;