This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 70f34e27349 HIVE-27269: Create multiple MatchTracker when using
VectorFastHashMapContainer (Seonggon Namgung, reviewed by Denys Kuzmenko)
70f34e27349 is described below
commit 70f34e27349dccf5fabbfc6c63e63c7be0785360
Author: seonggon <[email protected]>
AuthorDate: Thu Nov 30 21:36:18 2023 +0900
HIVE-27269: Create multiple MatchTracker when using
VectorFastHashMapContainer (Seonggon Namgung, reviewed by Denys Kuzmenko)
Closes #4857
---
.../fast/VectorMapJoinFastHashTableLoader.java | 19 +-
.../VectorMapJoinFastLongHashMapContainer.java | 21 +-
.../VectorMapJoinFastMultiKeyHashMapContainer.java | 24 +-
.../VectorMapJoinFastStringHashMapContainer.java | 24 +-
...ctorMapJoinFastStringHashMultiSetContainer.java | 2 +-
...ectorMapJoinFastHashMapContainerNonMatched.java | 317 +++++++++++++++++
...r_fullouter_mapjoin_multithread_fast_htloader.q | 60 ++++
...llouter_mapjoin_multithread_fast_htloader.q.out | 394 +++++++++++++++++++++
8 files changed, 831 insertions(+), 30 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
index 0a5a4f308d5..460f05b0e17 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java
@@ -27,6 +27,7 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAccumulator;
+import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.hadoop.hive.common.Pool;
import org.apache.hadoop.hive.llap.LlapDaemonInfo;
@@ -106,9 +107,21 @@ public class VectorMapJoinFastHashTableLoader implements
org.apache.hadoop.hive.
}
private void initHTLoadingService(long estKeyCount) {
- // Avoid many small HTs that will rehash multiple times causing GCs
- this.numLoadThreads = (estKeyCount <
VectorMapJoinFastHashTable.FIRST_SIZE_UP) ? 1 :
- HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVEMAPJOINPARALELHASHTABLETHREADS);
+ if (estKeyCount < VectorMapJoinFastHashTable.FIRST_SIZE_UP) {
+ // Avoid many small HTs that will rehash multiple times causing GCs
+ this.numLoadThreads = 1;
+ } else {
+ int initialValue = HiveConf.getIntVar(hconf,
HiveConf.ConfVars.HIVEMAPJOINPARALELHASHTABLETHREADS);
+ Preconditions.checkArgument(initialValue > 0, "The number of
HT-loading-threads should be positive.");
+
+ int adjustedValue = Integer.highestOneBit(initialValue);
+ if (initialValue != adjustedValue) {
+ LOG.info("Adjust the number of HT-loading-threads to {}. (Previous
value: {})",
+ adjustedValue, initialValue);
+ }
+
+ this.numLoadThreads = adjustedValue;
+ }
this.totalEntries = new LongAccumulator(Long::sum, 0L);
this.loadExecService = Executors.newFixedThreadPool(numLoadThreads,
new ThreadFactoryBuilder()
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
index 266fdd823b8..6ef9b64cba9 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
@@ -109,10 +109,11 @@ public class VectorMapJoinFastLongHashMapContainer
extends VectorMapJoinFastHash
private NonMatchedLongHashMapIterator(MatchTracker matchTracker,
VectorMapJoinFastLongHashMap[] vectorMapJoinFastLongHashMaps, int
numThreads) {
super(matchTracker);
+
hashMapIterators = new
VectorMapJoinFastLongHashMap.NonMatchedLongHashMapIterator[numThreads];
for (int i = 0; i < numThreads; ++i) {
- hashMapIterators[i] = new
VectorMapJoinFastLongHashMap.NonMatchedLongHashMapIterator(matchTracker,
- vectorMapJoinFastLongHashMaps[i]);
+ hashMapIterators[i] = new
VectorMapJoinFastLongHashMap.NonMatchedLongHashMapIterator(
+ matchTracker.getPartition(i), vectorMapJoinFastLongHashMaps[i]);
}
index = 0;
this.numThreads = numThreads;
@@ -186,7 +187,11 @@ public class VectorMapJoinFastLongHashMapContainer extends
VectorMapJoinFastHash
public JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult
hashMapResult,
MatchTracker matchTracker) {
long hashCode = HashCodeUtil.calculateLongHashCode(key);
- return vectorMapJoinFastLongHashMaps[(int) ((numThreads - 1) &
hashCode)].lookup(key, hashMapResult, matchTracker);
+ int partition = (int) ((numThreads - 1) & hashCode);
+ MatchTracker childMatchTracker = matchTracker != null ?
matchTracker.getPartition(partition) : null;
+
+ return vectorMapJoinFastLongHashMaps[partition].lookup(key, hashMapResult,
+ childMatchTracker);
}
public long getEstimatedMemorySize() {
@@ -208,11 +213,13 @@ public class VectorMapJoinFastLongHashMapContainer
extends VectorMapJoinFastHash
@Override
public MatchTracker createMatchTracker() {
- int count = 0;
- for (int i = 0; i < numThreads; ++i) {
- count += vectorMapJoinFastLongHashMaps[i].logicalHashBucketCount;
+ MatchTracker parentMatchTracker =
MatchTracker.createPartitioned(numThreads);
+ for (int i = 0; i < numThreads; i++) {
+ int childSize = vectorMapJoinFastLongHashMaps[i].logicalHashBucketCount;
+ parentMatchTracker.addPartition(i, childSize);
}
- return MatchTracker.create(count);
+
+ return parentMatchTracker;
}
@Override
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMapContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMapContainer.java
index bb3bba50470..7a36d37c153 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMapContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMapContainer.java
@@ -43,7 +43,6 @@ public class VectorMapJoinFastMultiKeyHashMapContainer
private static final Logger LOG =
LoggerFactory.getLogger(VectorMapJoinFastMultiKeyHashMapContainer.class);
private final VectorMapJoinFastMultiKeyHashMap[]
vectorMapJoinFastMultiKeyHashMaps;
- private BytesWritable testKeyBytesWritable;
private final int numThreads;
public VectorMapJoinFastMultiKeyHashMapContainer(
@@ -68,10 +67,10 @@ public class VectorMapJoinFastMultiKeyHashMapContainer
NonMatchedBytesHashMapParallelIterator(MatchTracker matchTracker,
VectorMapJoinFastBytesHashMap[] hashMaps, int numThreads) {
super(matchTracker);
- hashMapIterators = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator[4];
+ hashMapIterators = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator[numThreads];
for (int i = 0; i < numThreads; ++i) {
- hashMapIterators[i] = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator(matchTracker,
- hashMaps[i]);
+ hashMapIterators[i] = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator(
+ matchTracker.getPartition(i), hashMaps[i]);
}
index = 0;
this.numThreads = numThreads;
@@ -154,11 +153,13 @@ public class VectorMapJoinFastMultiKeyHashMapContainer
@Override
public MatchTracker createMatchTracker() {
- int count = 0;
- for (int i = 0; i < numThreads; ++i) {
- count += vectorMapJoinFastMultiKeyHashMaps[i].logicalHashBucketCount;
+ MatchTracker parentMatchTracker =
MatchTracker.createPartitioned(numThreads);
+ for (int i = 0; i < numThreads; i++) {
+ int childSize =
vectorMapJoinFastMultiKeyHashMaps[i].logicalHashBucketCount;
+ parentMatchTracker.addPartition(i, childSize);
}
- return MatchTracker.create(count);
+
+ return parentMatchTracker;
}
@Override
@@ -182,8 +183,11 @@ public class VectorMapJoinFastMultiKeyHashMapContainer
public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int
keyLength,
VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker)
throws IOException {
long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength);
- return vectorMapJoinFastMultiKeyHashMaps[(int) ((numThreads - 1) &
hashCode)].lookup(keyBytes, keyStart, keyLength, hashMapResult,
- matchTracker);
+ int partition = (int) ((numThreads - 1) & hashCode);
+ MatchTracker childMatchTracker = matchTracker != null ?
matchTracker.getPartition(partition) : null;
+
+ return vectorMapJoinFastMultiKeyHashMaps[partition].lookup(keyBytes,
keyStart, keyLength, hashMapResult,
+ childMatchTracker);
}
@Override
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMapContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMapContainer.java
index ab7e3bf42ab..e55b9aa236b 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMapContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMapContainer.java
@@ -74,10 +74,11 @@ public class VectorMapJoinFastStringHashMapContainer
extends VectorMapJoinFastHa
NonMatchedBytesHashMapIterator(MatchTracker matchTracker,
VectorMapJoinFastStringHashMap[] hashMaps, int numThreads) {
super(matchTracker);
- hashMapIterators = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator[4];
+
+ hashMapIterators = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator[numThreads];
for (int i = 0; i < numThreads; ++i) {
- hashMapIterators[i] = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator(matchTracker,
- hashMaps[i]);
+ hashMapIterators[i] = new
VectorMapJoinFastBytesHashMap.NonMatchedBytesHashMapIterator(
+ matchTracker.getPartition(i), hashMaps[i]);
}
index = 0;
this.numThreads = numThreads;
@@ -172,11 +173,13 @@ public class VectorMapJoinFastStringHashMapContainer
extends VectorMapJoinFastHa
@Override
public MatchTracker createMatchTracker() {
- int count = 0;
- for (int i = 0; i < numThreads; ++i) {
- count += vectorMapJoinFastStringHashMaps[i].logicalHashBucketCount;
+ MatchTracker parentMatchTracker =
MatchTracker.createPartitioned(numThreads);
+ for (int i = 0; i < numThreads; i++) {
+ int childSize =
vectorMapJoinFastStringHashMaps[i].logicalHashBucketCount;
+ parentMatchTracker.addPartition(i, childSize);
}
- return MatchTracker.create(count);
+
+ return parentMatchTracker;
}
@Override
@@ -200,8 +203,11 @@ public class VectorMapJoinFastStringHashMapContainer
extends VectorMapJoinFastHa
public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int
keyLength,
VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker)
throws IOException {
long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength);
- return vectorMapJoinFastStringHashMaps[(int) ((numThreads - 1) &
hashCode)].lookup(keyBytes, keyStart, keyLength, hashMapResult,
- matchTracker);
+ int partition = (int) ((numThreads - 1) & hashCode);
+ MatchTracker childMatchTracker = matchTracker != null ?
matchTracker.getPartition(partition) : null;
+
+ return vectorMapJoinFastStringHashMaps[partition].lookup(keyBytes,
keyStart, keyLength, hashMapResult,
+ childMatchTracker);
}
@Override
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSetContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSetContainer.java
index df4ac1eb721..1755bc7c472 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSetContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSetContainer.java
@@ -53,7 +53,7 @@ public class VectorMapJoinFastStringHashMultiSetContainer
extends VectorMapJoinF
boolean isFullOuter,
int initialCapacity, float loadFactor, int writeBuffersSize, long
estimatedKeyCount, TableDesc tableDesc,
int numHTs) {
- vectorMapJoinFastStringHashMultiSets = new
VectorMapJoinFastStringHashMultiSet[4];
+ vectorMapJoinFastStringHashMultiSets = new
VectorMapJoinFastStringHashMultiSet[numHTs];
LOG.info("Initializing {} HT Containers ", numHTs);
for (int i = 0; i < numHTs; ++i) {
vectorMapJoinFastStringHashMultiSets[i] = new
VectorMapJoinFastStringHashMultiSet(
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastHashMapContainerNonMatched.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastHashMapContainerNonMatched.java
new file mode 100644
index 00000000000..e857cafe234
--- /dev/null
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastHashMapContainerNonMatched.java
@@ -0,0 +1,317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
+
+import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
+import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource;
+import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
+import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.common.util.HashCodeUtil;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Properties;
+import java.util.Random;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+public class TestVectorMapJoinFastHashMapContainerNonMatched {
+ private static final int numHashTable = 2;
+ private static final int initialCapacity = 8;
+ private static final float loadFactor = 0.9f;
+ private static final int writeBufferSize = 1024 * 1024;
+ private static final int estimatedKeyCount = -1;
+
+ private BytesWritable serializeLong(long value, Properties properties)
throws Exception {
+ BinarySortableSerializeWrite serializeWrite =
BinarySortableSerializeWrite.with(properties, 1);
+ ByteStream.Output output = new ByteStream.Output();
+ serializeWrite.set(output);
+ serializeWrite.writeLong(value);
+
+ BytesWritable writable = new BytesWritable();
+ writable.set(output.getData(), 0, output.getLength());
+
+ return writable;
+ }
+
+ private void addToHashMap(VectorMapJoinFastLongHashMapContainer hashMap,
long value, Properties properties)
+ throws Exception {
+ BytesWritable keyWritable = serializeLong(value, properties);
+ BytesWritable valueWritable = new BytesWritable(keyWritable.copyBytes());
+ hashMap.putRow(HashCodeUtil.calculateLongHashCode(value), keyWritable,
valueWritable);
+ }
+
+ private long getHashCode(String key) {
+ Text keyWritable = new Text(key);
+ return HashCodeUtil.murmurHash(keyWritable.getBytes(), 0,
keyWritable.getLength());
+ }
+
+ private BytesWritable serializeString(String value, Properties properties)
throws Exception {
+ BinarySortableSerializeWrite serializeWrite =
BinarySortableSerializeWrite.with(properties, 1);
+ ByteStream.Output output = new ByteStream.Output();
+ serializeWrite.set(output);
+
+ Text text = new Text(value);
+ serializeWrite.writeString(text.getBytes(), 0, text.getLength());
+
+ BytesWritable writable = new BytesWritable();
+ writable.set(output.getData(), 0, output.getLength());
+
+ return writable;
+ }
+
+ private void addToHashMap(
+ VectorMapJoinFastStringHashMapContainer hashMap, String value,
Properties properties) throws Exception {
+ BytesWritable keyWritable = serializeString(value, properties);
+ BytesWritable valueWritable = new BytesWritable(keyWritable.copyBytes());
+ hashMap.putRow(getHashCode(value), keyWritable, valueWritable);
+ }
+
+ private BytesWritable createRandomMultiKey(Random random,
BinarySortableSerializeWrite serializeWrite)
+ throws Exception {
+ ByteStream.Output output = new ByteStream.Output();
+ serializeWrite.set(output);
+
+ serializeWrite.writeLong(random.nextLong());
+ serializeWrite.writeLong(random.nextLong());
+
+ BytesWritable writable = new BytesWritable();
+ writable.set(output.getData(), 0, output.getLength());
+
+ return writable;
+ }
+
+ private long getHashCode(BytesWritable key) {
+ return HashCodeUtil.murmurHash(key.getBytes(), 0, key.getLength());
+ }
+
+ private void addToHashMap(
+ VectorMapJoinFastMultiKeyHashMapContainer hashMap, BytesWritable key)
throws Exception {
+ BytesWritable value = new BytesWritable(key.copyBytes());
+ hashMap.putRow(getHashCode(key), key, value);
+ }
+
+ @Test
+ public void testLongHashMapContainer() throws Exception {
+ Random random = new Random();
+ long keyA = random.nextLong();
+ while ((HashCodeUtil.calculateLongHashCode(keyA) & (initialCapacity - 1))
!= 0) {
+ keyA = random.nextLong();
+ }
+
+ long keyB = random.nextLong();
+ while ((HashCodeUtil.calculateLongHashCode(keyB) & (initialCapacity - 1))
!= 0 || keyB == keyA) {
+ keyB = random.nextLong();
+ }
+
+ long keyC = random.nextLong();
+ while ((HashCodeUtil.calculateLongHashCode(keyC) & (initialCapacity - 1))
!= 1) {
+ keyC = random.nextLong();
+ }
+
+ TableDesc tableDesc = new TableDesc();
+ Properties properties = new Properties();
+ tableDesc.setProperties(properties);
+
+ VectorMapJoinFastLongHashMapContainer hashMapContainer =
+ new VectorMapJoinFastLongHashMapContainer(
+ true, /* isFullOuter */
+ false, /* minMaxEnabled */
+ VectorMapJoinDesc.HashTableKeyType.LONG,
+ initialCapacity,
+ loadFactor,
+ writeBufferSize,
+ estimatedKeyCount,
+ tableDesc,
+ numHashTable);
+
+ addToHashMap(hashMapContainer, keyA, properties);
+ addToHashMap(hashMapContainer, keyB, properties);
+ addToHashMap(hashMapContainer, keyC, properties);
+
+ MatchTracker matchTracker = hashMapContainer.createMatchTracker();
+ VectorMapJoinHashMapResult hashMapResult =
hashMapContainer.createHashMapResult();
+
+ hashMapContainer.lookup(keyB, hashMapResult, matchTracker);
+
+ VectorMapJoinNonMatchedIterator nonMatchedIterator =
+ hashMapContainer.createNonMatchedIterator(matchTracker);
+ nonMatchedIterator.init();
+
+ ArrayList<Long> nonMatchedList = new ArrayList<Long>();
+ while (nonMatchedIterator.findNextNonMatched()) {
+ boolean isNull = !nonMatchedIterator.readNonMatchedLongKey();
+ assertFalse(isNull);
+
+ long key = nonMatchedIterator.getNonMatchedLongKey();
+ nonMatchedList.add(key);
+ }
+
+ assertEquals(2, nonMatchedList.size());
+ assertTrue(nonMatchedList.contains(keyA));
+ assertTrue(nonMatchedList.contains(keyC));
+ }
+
+ @Test
+ public void testStringHashMapContainer() throws Exception {
+ Random random = new Random();
+
+ String keyA = VectorRandomRowSource.getRandString(random, 5, false);
+ while ((getHashCode(keyA) & (initialCapacity - 1)) != 0) {
+ keyA = VectorRandomRowSource.getRandString(random, 5, false);
+ }
+
+ String keyB = VectorRandomRowSource.getRandString(random, 5, false);
+ while ((getHashCode(keyB) & (initialCapacity - 1)) != 0 ||
keyB.equals(keyA)) {
+ keyB = VectorRandomRowSource.getRandString(random, 5, false);
+ }
+
+ String keyC = VectorRandomRowSource.getRandString(random, 5, false);
+ while ((getHashCode(keyC) & (initialCapacity - 1)) != 1) {
+ keyC = VectorRandomRowSource.getRandString(random, 5, false);
+ }
+
+ TableDesc tableDesc = new TableDesc();
+ Properties properties = new Properties();
+ tableDesc.setProperties(properties);
+
+ VectorMapJoinFastStringHashMapContainer hashMapContainer =
+ new VectorMapJoinFastStringHashMapContainer(
+ true, /* isFullOuter */
+ initialCapacity,
+ loadFactor,
+ writeBufferSize,
+ estimatedKeyCount,
+ tableDesc,
+ numHashTable);
+
+ addToHashMap(hashMapContainer, keyA, properties);
+ addToHashMap(hashMapContainer, keyB, properties);
+ addToHashMap(hashMapContainer, keyC, properties);
+
+ MatchTracker matchTracker = hashMapContainer.createMatchTracker();
+ VectorMapJoinHashMapResult hashMapResult =
hashMapContainer.createHashMapResult();
+
+ Text keyTextB = new Text(keyB);
+ hashMapContainer.lookup(keyTextB.getBytes(), 0, keyTextB.getLength(),
hashMapResult, matchTracker);
+
+ VectorMapJoinNonMatchedIterator nonMatchedIterator =
+ hashMapContainer.createNonMatchedIterator(matchTracker);
+ nonMatchedIterator.init();
+
+ ArrayList<String> nonMatchedList = new ArrayList<String>();
+ while (nonMatchedIterator.findNextNonMatched()) {
+ boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();
+ assertFalse(isNull);
+
+ byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes();
+ int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset();
+ int keyLength = nonMatchedIterator.getNonMatchedBytesLength();
+
+ byte[] array = new byte[keyLength];
+ System.arraycopy(keyBytes, keyOffset, array, 0, keyLength);
+ Text key = new Text(array);
+
+ nonMatchedList.add(key.toString());
+ }
+
+ assertEquals(2, nonMatchedList.size());
+ assertTrue(nonMatchedList.contains(keyA));
+ assertTrue(nonMatchedList.contains(keyC));
+ }
+
+ @Test
+ public void testMultiKeyHashMapContainer() throws Exception {
+ Random random = new Random();
+ BinarySortableSerializeWrite serializeWrite =
+ BinarySortableSerializeWrite.with(new Properties(), 2);
+
+ BytesWritable keyA = createRandomMultiKey(random, serializeWrite);
+ while ((getHashCode(keyA) & (initialCapacity - 1)) != 0) {
+ keyA = createRandomMultiKey(random, serializeWrite);
+ }
+
+ BytesWritable keyB = createRandomMultiKey(random, serializeWrite);
+ while ((getHashCode(keyB) & (initialCapacity - 1)) != 0 || keyB == keyA) {
+ keyB = createRandomMultiKey(random, serializeWrite);
+ }
+
+ BytesWritable keyC = createRandomMultiKey(random, serializeWrite);
+ while ((getHashCode(keyC) & (initialCapacity - 1)) != 1) {
+ keyC = createRandomMultiKey(random, serializeWrite);
+ }
+
+ VectorMapJoinFastMultiKeyHashMapContainer hashMapContainer =
+ new VectorMapJoinFastMultiKeyHashMapContainer(
+ true, /* isFullOuter */
+ initialCapacity,
+ loadFactor,
+ writeBufferSize,
+ estimatedKeyCount,
+ numHashTable);
+
+ addToHashMap(hashMapContainer, keyA);
+ addToHashMap(hashMapContainer, keyB);
+ addToHashMap(hashMapContainer, keyC);
+
+ MatchTracker matchTracker = hashMapContainer.createMatchTracker();
+ VectorMapJoinHashMapResult hashMapResult =
hashMapContainer.createHashMapResult();
+
+ hashMapContainer.lookup(keyB.getBytes(), 0, keyB.getLength(),
hashMapResult, matchTracker);
+
+ VectorMapJoinNonMatchedIterator nonMatchedIterator =
+ hashMapContainer.createNonMatchedIterator(matchTracker);
+ nonMatchedIterator.init();
+
+ ArrayList<byte[]> nonMatchedList = new ArrayList<byte[]>();
+ while (nonMatchedIterator.findNextNonMatched()) {
+ boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();
+ assertFalse(isNull);
+
+ byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes();
+ int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset();
+ int keyLength = nonMatchedIterator.getNonMatchedBytesLength();
+
+ byte[] array = new byte[keyLength];
+ System.arraycopy(keyBytes, keyOffset, array, 0, keyLength);
+
+ nonMatchedList.add(array);
+ }
+
+ final BytesWritable finalKeyA = keyA;
+ final BytesWritable finalKeyC = keyC;
+
+ assertEquals(2, nonMatchedList.size());
+ assertTrue(nonMatchedList.stream().anyMatch(arr -> {
+ return Arrays.equals(arr, finalKeyA.copyBytes());
+ }));
+ assertTrue(nonMatchedList.stream().anyMatch(arr -> {
+ return Arrays.equals(arr, finalKeyC.copyBytes());
+ })); }
+}
+
diff --git
a/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_multithread_fast_htloader.q
b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_multithread_fast_htloader.q
new file mode 100644
index 00000000000..b7b5745d302
--- /dev/null
+++
b/ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_multithread_fast_htloader.q
@@ -0,0 +1,60 @@
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join.noconditionaltask.size=100000000000;
+set hive.exec.reducers.max=1;
+set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true;
+set hive.mapjoin.hashtable.load.threads=2;
+
+-- SORT_QUERY_RESULTS
+
+--
+-- test Long key
+--
+
+-- This table should be broadcasted and stored in HashTable.
+create table small_long_table (key bigint, value string);
+insert into small_long_table values (7610878409923211200, "a"); -- key hash %
2097152 == 0
+insert into small_long_table values (-371494529663898262, "b"); -- key hash %
2097152 == 0
+insert into small_long_table values (-2307888158465848362, "c"); -- key hash %
2097152 == 1
+
+create table big_long_table (key bigint, value string);
+insert into big_long_table values (-2307888158465848362, "c"); -- key hash %
2097152 == 1
+
+-- small table size should be larger than
VectorMapJoinFastHashTable.FIRST_SIZE_UP.
+-- If not, only a single thread loads entire hash table.
+alter table big_long_table update statistics set ('numRows'='90000000'); --
should be larger than small table
+alter table small_long_table update statistics set ('numRows'='2097152'); -- 2
* VectorMapJoinFastHashTable.FIRST_SIZE_UP
+
+-- query plan must includes vectorized fullouter mapjoin.
+explain
+select * from small_long_table full outer join big_long_table on
(small_long_table.key = big_long_table.key);
+
+select * from small_long_table full outer join big_long_table on
(small_long_table.key = big_long_table.key);
+
+--
+-- test String key
+--
+
+-- This table should be broadcasted and stored in HashTable.
+create table small_string_table (key string, value string);
+insert into small_string_table values ("affzk", "a"); -- key hash % 2097152 == 0
+insert into small_string_table values ("hbkpa", "b"); -- key hash % 2097152 == 0
+insert into small_string_table values ("kykzm", "c"); -- key hash % 2097152 ==
1
+
+create table big_string_table (key string, value string);
+insert into big_string_table values ("kykzm", "c"); -- key hash % 2097152 == 1
+
+-- small table size should be larger than
VectorMapJoinFastHashTable.FIRST_SIZE_UP.
+-- If not, only a single thread loads entire hash table.
+alter table big_string_table update statistics set ('numRows'='90000000');
-- should be larger than small table
+alter table small_string_table update statistics set ('numRows'='2097152'); --
2 * VectorMapJoinFastHashTable.FIRST_SIZE_UP
+
+-- query plan must includes vectorized fullouter mapjoin.
+explain
+select * from small_string_table full outer join big_string_table on
(small_string_table.key = big_string_table.key);
+
+select * from small_string_table full outer join big_string_table on
(small_string_table.key = big_string_table.key);
+
+-- To test multikey HashTable, one may use the following configuration.
+-- set hive.vectorized.execution.mapjoin.native.multikey.only.enabled=true;
+
diff --git
a/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_multithread_fast_htloader.q.out
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_multithread_fast_htloader.q.out
new file mode 100644
index 00000000000..a9e5e419ff9
--- /dev/null
+++
b/ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_multithread_fast_htloader.q.out
@@ -0,0 +1,394 @@
+PREHOOK: query: create table small_long_table (key bigint, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_long_table
+POSTHOOK: query: create table small_long_table (key bigint, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_long_table
+PREHOOK: query: insert into small_long_table values (7610878409923211200, "a")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@small_long_table
+POSTHOOK: query: insert into small_long_table values (7610878409923211200, "a")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@small_long_table
+POSTHOOK: Lineage: small_long_table.key SCRIPT []
+POSTHOOK: Lineage: small_long_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 0
+insert into small_long_table values (-371494529663898262, "b")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@small_long_table
+POSTHOOK: query: -- key hash % 2097152 == 0
+insert into small_long_table values (-371494529663898262, "b")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@small_long_table
+POSTHOOK: Lineage: small_long_table.key SCRIPT []
+POSTHOOK: Lineage: small_long_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 0
+insert into small_long_table values (-2307888158465848362, "c")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@small_long_table
+POSTHOOK: query: -- key hash % 2097152 == 0
+insert into small_long_table values (-2307888158465848362, "c")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@small_long_table
+POSTHOOK: Lineage: small_long_table.key SCRIPT []
+POSTHOOK: Lineage: small_long_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 1
+
+create table big_long_table (key bigint, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@big_long_table
+POSTHOOK: query: -- key hash % 2097152 == 1
+
+create table big_long_table (key bigint, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@big_long_table
+PREHOOK: query: insert into big_long_table values (-2307888158465848362, "c")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@big_long_table
+POSTHOOK: query: insert into big_long_table values (-2307888158465848362, "c")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@big_long_table
+POSTHOOK: Lineage: big_long_table.key SCRIPT []
+POSTHOOK: Lineage: big_long_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 1
+
+
+
+alter table big_long_table update statistics set ('numRows'='90000000')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@big_long_table
+PREHOOK: Output: default@big_long_table
+POSTHOOK: query: -- key hash % 2097152 == 1
+
+
+
+alter table big_long_table update statistics set ('numRows'='90000000')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@big_long_table
+POSTHOOK: Output: default@big_long_table
+PREHOOK: query: -- should be larger than small table
+alter table small_long_table update statistics set ('numRows'='2097152')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@small_long_table
+PREHOOK: Output: default@small_long_table
+POSTHOOK: query: -- should be larger than small table
+alter table small_long_table update statistics set ('numRows'='2097152')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@small_long_table
+POSTHOOK: Output: default@small_long_table
+PREHOOK: query: -- 2 * VectorMapJoinFastHashTable.FIRST_SIZE_UP
+
+
+explain
+select * from small_long_table full outer join big_long_table on
(small_long_table.key = big_long_table.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@big_long_table
+PREHOOK: Input: default@small_long_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- 2 * VectorMapJoinFastHashTable.FIRST_SIZE_UP
+
+
+explain
+select * from small_long_table full outer join big_long_table on
(small_long_table.key = big_long_table.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@big_long_table
+POSTHOOK: Input: default@small_long_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: small_long_table
+ Statistics: Num rows: 2097152 Data size: 195035136 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: bigint), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2097152 Data size: 195035136 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 2097152 Data size: 195035136 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), 0S (type:
smallint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: big_long_table
+ Statistics: Num rows: 90000000 Data size: 8370000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: bigint), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 90000000 Data size: 8370000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: bigint)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: bigint)
+ Statistics: Num rows: 90000000 Data size: 8370000000
Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Map Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 KEY.reducesinkkey0 (type: bigint)
+ 1 KEY.reducesinkkey0 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 62914652097152 Data size:
11702108160000186 Basic stats: COMPLETE Column stats: COMPLETE
+ DynamicPartitionHashJoin: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 62914652097152 Data size:
11702108160000186 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from small_long_table full outer join big_long_table
on (small_long_table.key = big_long_table.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@big_long_table
+PREHOOK: Input: default@small_long_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from small_long_table full outer join big_long_table
on (small_long_table.key = big_long_table.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@big_long_table
+POSTHOOK: Input: default@small_long_table
+#### A masked pattern was here ####
+-2307888158465848362 c -2307888158465848362 c
+-371494529663898262 b NULL NULL
+7610878409923211200 a NULL NULL
+PREHOOK: query: create table small_string_table (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@small_string_table
+POSTHOOK: query: create table small_string_table (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@small_string_table
+PREHOOK: query: insert into small_string_table values ("affzk", "a")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@small_string_table
+POSTHOOK: query: insert into small_string_table values ("affzk", "a")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@small_string_table
+POSTHOOK: Lineage: small_string_table.key SCRIPT []
+POSTHOOK: Lineage: small_string_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 0
+insert into small_string_table values ("hbkpa", "b")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@small_string_table
+POSTHOOK: query: -- key hash % 2097152 == 0
+insert into small_string_table values ("hbkpa", "b")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@small_string_table
+POSTHOOK: Lineage: small_string_table.key SCRIPT []
+POSTHOOK: Lineage: small_string_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 0
+insert into small_string_table values ("kykzm", "c")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@small_string_table
+POSTHOOK: query: -- key hash % 2097152 == 0
+insert into small_string_table values ("kykzm", "c")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@small_string_table
+POSTHOOK: Lineage: small_string_table.key SCRIPT []
+POSTHOOK: Lineage: small_string_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 1
+
+create table big_string_table (key string, value string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@big_string_table
+POSTHOOK: query: -- key hash % 2097152 == 1
+
+create table big_string_table (key string, value string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@big_string_table
+PREHOOK: query: insert into big_string_table values ("kykzm", "c")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@big_string_table
+POSTHOOK: query: insert into big_string_table values ("kykzm", "c")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@big_string_table
+POSTHOOK: Lineage: big_string_table.key SCRIPT []
+POSTHOOK: Lineage: big_string_table.value SCRIPT []
+PREHOOK: query: -- key hash % 2097152 == 1
+
+
+
+alter table big_string_table update statistics set ('numRows'='90000000')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@big_string_table
+PREHOOK: Output: default@big_string_table
+POSTHOOK: query: -- key hash % 2097152 == 1
+
+
+
+alter table big_string_table update statistics set ('numRows'='90000000')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@big_string_table
+POSTHOOK: Output: default@big_string_table
+PREHOOK: query: -- should be larger than small table
+alter table small_string_table update statistics set ('numRows'='2097152')
+PREHOOK: type: ALTERTABLE_UPDATETABLESTATS
+PREHOOK: Input: default@small_string_table
+PREHOOK: Output: default@small_string_table
+POSTHOOK: query: -- should be larger than small table
+alter table small_string_table update statistics set ('numRows'='2097152')
+POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS
+POSTHOOK: Input: default@small_string_table
+POSTHOOK: Output: default@small_string_table
+PREHOOK: query: -- 2 * VectorMapJoinFastHashTable.FIRST_SIZE_UP
+
+
+explain
+select * from small_string_table full outer join big_string_table on
(small_string_table.key = big_string_table.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@big_string_table
+PREHOOK: Input: default@small_string_table
+#### A masked pattern was here ####
+POSTHOOK: query: -- 2 * VectorMapJoinFastHashTable.FIRST_SIZE_UP
+
+
+explain
+select * from small_string_table full outer join big_string_table on
(small_string_table.key = big_string_table.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@big_string_table
+POSTHOOK: Input: default@small_string_table
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: small_string_table
+ Statistics: Num rows: 2097152 Data size: 364904448 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 2097152 Data size: 364904448 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 2097152 Data size: 364904448 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), 0S (type:
smallint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: big_string_table
+ Statistics: Num rows: 90000000 Data size: 15660000000 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 90000000 Data size: 15660000000
Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 90000000 Data size: 15660000000
Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Map Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 KEY.reducesinkkey0 (type: string)
+ 1 KEY.reducesinkkey0 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 0 Map 1
+ Statistics: Num rows: 62914652097152 Data size:
21894266880000348 Basic stats: COMPLETE Column stats: COMPLETE
+ DynamicPartitionHashJoin: true
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 62914652097152 Data size:
21894266880000348 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from small_string_table full outer join
big_string_table on (small_string_table.key = big_string_table.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@big_string_table
+PREHOOK: Input: default@small_string_table
+#### A masked pattern was here ####
+POSTHOOK: query: select * from small_string_table full outer join
big_string_table on (small_string_table.key = big_string_table.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@big_string_table
+POSTHOOK: Input: default@small_string_table
+#### A masked pattern was here ####
+affzk a NULL NULL
+hbkpa b NULL NULL
+kykzm c kykzm c