Repository: hive Updated Branches: refs/heads/branch-2.0 80ce0449e -> e83dd4163 refs/heads/master 19b508ecc -> eb4a16448
HIVE-12905 : Issue with mapjoin in tez under certain conditions (Vikram Dixit K, reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/eb4a1644 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/eb4a1644 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/eb4a1644 Branch: refs/heads/master Commit: eb4a1644874a7fd49e58a4bf5104f565902dda9d Parents: 19b508e Author: Sergey Shelukhin <[email protected]> Authored: Mon Jan 25 20:50:35 2016 -0800 Committer: Sergey Shelukhin <[email protected]> Committed: Mon Jan 25 20:50:35 2016 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../ql/exec/persistence/HashMapWrapper.java | 16 ++- .../persistence/HybridHashTableContainer.java | 54 +++---- .../persistence/MapJoinBytesTableContainer.java | 22 +-- .../hive/ql/exec/persistence/MapJoinKey.java | 6 - .../exec/persistence/MapJoinTableContainer.java | 6 +- .../persistence/MapJoinTableContainerSerDe.java | 6 +- .../hive/ql/exec/spark/HashTableLoader.java | 1 + .../hive/ql/exec/tez/HashTableLoader.java | 5 +- .../fast/VectorMapJoinFastHashTableLoader.java | 6 +- .../fast/VectorMapJoinFastTableContainer.java | 30 ++-- ql/src/test/queries/clientpositive/empty_join.q | 23 +++ .../results/clientpositive/empty_join.q.out | 142 +++++++++++++++++++ .../results/clientpositive/tez/empty_join.q.out | 115 +++++++++++++++ 14 files changed, 369 insertions(+), 64 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index a2ccfe0..8318c3a 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -69,6 +69,7 @@ disabled.query.files=ql_rewrite_gbtoidx.q,\ smb_mapjoin_8.q minitez.query.files.shared=acid_globallimit.q,\ + empty_join.q,\ alter_merge_2_orc.q,\ alter_merge_orc.q,\ alter_merge_stats_orc.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java index 2ca5c00..a3bccc6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java @@ -58,8 +58,10 @@ public class HashMapWrapper extends AbstractMapJoinTableContainer implements Ser private static final int THRESHOLD = 1000000; private static final float LOADFACTOR = 0.75f; private final HashMap<MapJoinKey, MapJoinRowContainer> mHash; // main memory HashMap - private MapJoinKey lastKey = null; - private Output output = new Output(0); // Reusable output for serialization + private final MapJoinKey lastKey = null; + private final Output output = new Output(0); // Reusable output for serialization + private MapJoinObjectSerDeContext keyContext; + private MapJoinObjectSerDeContext valueContext; public HashMapWrapper(Map<String, String> metaData) { super(metaData); @@ -121,8 +123,7 @@ public class HashMapWrapper extends AbstractMapJoinTableContainer implements Ser } @Override - public MapJoinKey putRow(MapJoinObjectSerDeContext keyContext, Writable currentKey, - MapJoinObjectSerDeContext valueContext, Writable currentValue) + public MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException { MapJoinKey key = MapJoinKey.read(output, keyContext, currentKey); FlatRowContainer values = (FlatRowContainer)get(key); @@ -248,4 +249,11 @@ public class HashMapWrapper extends AbstractMapJoinTableContainer implements Ser public boolean hasSpill() { return false; } + + @Override + public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) + throws SerDeException { + this.keyContext = keyCtx; + this.valueContext = valCtx; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index f2f3c09..fdc1dff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -86,21 +86,21 @@ public class HybridHashTableContainer private int numPartitionsSpilled; // number of spilled partitions private boolean lastPartitionInMem; // only one (last one) partition is left in memory private final int memoryCheckFrequency; // how often (# of rows apart) to check if memory is full - private HybridHashTableConf nwayConf; // configuration for n-way join + private final HybridHashTableConf nwayConf; // configuration for n-way join /** The OI used to deserialize values. We never deserialize keys. */ private LazyBinaryStructObjectInspector internalValueOi; private boolean[] sortableSortOrders; private MapJoinBytesTableContainer.KeyValueHelper writeHelper; - private MapJoinBytesTableContainer.DirectKeyValueWriter directWriteHelper; + private final MapJoinBytesTableContainer.DirectKeyValueWriter directWriteHelper; /* * this is not a real bloom filter, but is a cheap version of the 1-memory * access bloom filters - * + * * In several cases, we'll have map-join spills because the value columns are * a few hundred columns of Text each, while there are very few keys in total * (a few thousand). - * + * * This is a cheap exit option to prevent spilling the big-table in such a * scenario. */ @@ -424,27 +424,8 @@ public class HybridHashTableContainer */ @SuppressWarnings("deprecation") @Override - public MapJoinKey putRow(MapJoinObjectSerDeContext keyContext, Writable currentKey, - MapJoinObjectSerDeContext valueContext, Writable currentValue) + public MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException, IOException { - SerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe(); - - if (writeHelper == null) { - LOG.info("Initializing container with " - + keySerde.getClass().getName() + " and " + valSerde.getClass().getName()); - - // We assume this hashtable is loaded only when tez is enabled - LazyBinaryStructObjectInspector valSoi = - (LazyBinaryStructObjectInspector) valSerde.getObjectInspector(); - writeHelper = new MapJoinBytesTableContainer.LazyBinaryKvWriter(keySerde, valSoi, - valueContext.hasFilterTag()); - if (internalValueOi == null) { - internalValueOi = valSoi; - } - if (sortableSortOrders == null) { - sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders(); - } - } writeHelper.setKeyValue(currentKey, currentValue); return internalPutRow(writeHelper, currentKey, currentValue); } @@ -793,7 +774,7 @@ public class HybridHashTableContainer private class ReusableRowContainer implements MapJoinRowContainer, AbstractRowContainer.RowIterator<List<Object>> { private byte aliasFilter; - private BytesBytesMultiHashMap.Result hashMapResult; + private final BytesBytesMultiHashMap.Result hashMapResult; /** * Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row. @@ -1062,4 +1043,27 @@ public class HybridHashTableContainer } return totalSize; } + + @Override + public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) + throws SerDeException { + SerDe keySerde = keyCtx.getSerDe(), valSerde = valCtx.getSerDe(); + + if (writeHelper == null) { + LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + + valSerde.getClass().getName()); + + // We assume this hashtable is loaded only when tez is enabled + LazyBinaryStructObjectInspector valSoi = + (LazyBinaryStructObjectInspector) valSerde.getObjectInspector(); + writeHelper = new MapJoinBytesTableContainer.LazyBinaryKvWriter(keySerde, valSoi, + valCtx.hasFilterTag()); + if (internalValueOi == null) { + internalValueOi = valSoi; + } + if (sortableSortOrders == null) { + sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders(); + } + } + } } http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index 3dddee7..5c2ff92 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec.persistence; import java.io.ObjectOutputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -28,6 +29,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.debug.Utils; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; @@ -345,26 +347,30 @@ public class MapJoinBytesTableContainer } } - @SuppressWarnings("deprecation") @Override - public MapJoinKey putRow(MapJoinObjectSerDeContext keyContext, Writable currentKey, - MapJoinObjectSerDeContext valueContext, Writable currentValue) throws SerDeException { + public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) + throws SerDeException { SerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe(); if (writeHelper == null) { - LOG.info("Initializing container with " - + keySerde.getClass().getName() + " and " + valSerde.getClass().getName()); + LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + + valSerde.getClass().getName()); if (keySerde instanceof BinarySortableSerDe && valSerde instanceof LazyBinarySerDe) { LazyBinaryStructObjectInspector valSoi = - (LazyBinaryStructObjectInspector)valSerde.getObjectInspector(); + (LazyBinaryStructObjectInspector) valSerde.getObjectInspector(); writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag()); internalValueOi = valSoi; - sortableSortOrders = ((BinarySortableSerDe)keySerde).getSortOrders(); + sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders(); } else { writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag()); internalValueOi = createInternalOi(valueContext); sortableSortOrders = null; } } + } + + @SuppressWarnings("deprecation") + @Override + public MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException { writeHelper.setKeyValue(currentKey, currentValue); hashMap.put(writeHelper, -1); return null; // there's no key to return @@ -538,7 +544,7 @@ public class MapJoinBytesTableContainer private byte aliasFilter; /** Hash table wrapper specific to the container. */ - private BytesBytesMultiHashMap.Result hashMapResult; + private final BytesBytesMultiHashMap.Result hashMapResult; /** * Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row. http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 86cc9bd..cfb9abc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -21,12 +21,9 @@ package org.apache.hadoop.hive.ql.exec.persistence; import java.io.IOException; import java.io.ObjectOutputStream; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashSet; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -40,8 +37,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -53,7 +48,6 @@ import org.apache.hadoop.io.Writable; */ public abstract class MapJoinKey { private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; - public abstract void write(MapJoinObjectSerDeContext context, ObjectOutputStream out) throws IOException, SerDeException; http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java index 869aefd..6d71fef 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java @@ -77,8 +77,7 @@ public interface MapJoinTableContainer { /** * Adds row from input to the table. */ - MapJoinKey putRow(MapJoinObjectSerDeContext keyContext, Writable currentKey, - MapJoinObjectSerDeContext valueContext, Writable currentValue) + MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException, IOException; /** @@ -111,4 +110,7 @@ public interface MapJoinTableContainer { * Return the size of the hash table */ int size(); + + void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) + throws SerDeException; } http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java index d6deabe..7a36b53 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainerSerDe.java @@ -143,6 +143,7 @@ public class MapJoinTableContainerSerDe { new MapJoinBytesTableContainer(hconf, valueContext, -1, 0) : create(name, metaData); } + tableContainer.setSerde(keyContext, valueContext); if (useOptimizedContainer) { loadOptimized((MapJoinBytesTableContainer) tableContainer, in, keyContainer, valueContainer); @@ -192,7 +193,7 @@ public class MapJoinTableContainerSerDe { long numRows = in.readLong(); for (long rowIndex = 0L; rowIndex < numRows; rowIndex++) { value.readFields(in); - container.putRow(keyContext, key, valueContext, value); + container.putRow(key, value); } } } @@ -224,6 +225,7 @@ public class MapJoinTableContainerSerDe { VectorMapJoinFastTableContainer tableContainer = new VectorMapJoinFastTableContainer(mapJoinDesc, hconf, -1); + tableContainer.setSerde(keyContext, valueContext); for (FileStatus fileStatus : fileStatuses) { Path filePath = fileStatus.getPath(); @@ -244,7 +246,7 @@ public class MapJoinTableContainerSerDe { long numRows = in.readLong(); for (long rowIndex = 0L; rowIndex < numRows; rowIndex++) { value.readFields(in); - tableContainer.putRow(null, key, null, value); + tableContainer.putRow(key, value); } } } finally { http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java index 1634f42..64474e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HashTableLoader.java @@ -161,6 +161,7 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable } MapJoinTableContainer mapJoinTable = SmallTableCache.get(path); if (mapJoinTable == null) { + // TODO#: HERE? synchronized (path.toString().intern()) { mapJoinTable = SmallTableCache.get(path); if (mapJoinTable == null) { http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java index ff79110..a742458 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HashTableLoader.java @@ -206,9 +206,10 @@ public class HashTableLoader implements org.apache.hadoop.hive.ql.exec.HashTable LOG.info("Using tableContainer " + tableContainer.getClass().getSimpleName()); + tableContainer.setSerde(keyCtx, valCtx); while (kvReader.next()) { - tableContainer.putRow(keyCtx, (Writable)kvReader.getCurrentKey(), - valCtx, (Writable)kvReader.getCurrentValue()); + tableContainer.putRow( + (Writable)kvReader.getCurrentKey(), (Writable)kvReader.getCurrentValue()); } tableContainer.seal(); mapJoinTables[pos] = tableContainer; http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java index 09a1ffc..49ecdd1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTableLoader.java @@ -93,10 +93,10 @@ public class VectorMapJoinFastHashTableLoader implements org.apache.hadoop.hive. VectorMapJoinFastTableContainer vectorMapJoinFastTableContainer = new VectorMapJoinFastTableContainer(desc, hconf, keyCount); + vectorMapJoinFastTableContainer.setSerde(null, null); // No SerDes here. while (kvReader.next()) { - vectorMapJoinFastTableContainer.putRow( - null, (BytesWritable) kvReader.getCurrentKey(), - null, (BytesWritable) kvReader.getCurrentValue()); + vectorMapJoinFastTableContainer.putRow((BytesWritable)kvReader.getCurrentKey(), + (BytesWritable)kvReader.getCurrentValue()); } vectorMapJoinFastTableContainer.seal(); http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index bd4a595..3b73f7d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -48,17 +48,17 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai private static final Logger LOG = LoggerFactory.getLogger(HashTableLoader.class.getName()); - private MapJoinDesc desc; - private Configuration hconf; + private final MapJoinDesc desc; + private final Configuration hconf; - private float keyCountAdj; - private int threshold; - private float loadFactor; - private int wbSize; - private long keyCount; + private final float keyCountAdj; + private final int threshold; + private final float loadFactor; + private final int wbSize; + private final long keyCount; - private VectorMapJoinFastHashTable VectorMapJoinFastHashTable; + private final VectorMapJoinFastHashTable VectorMapJoinFastHashTable; public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, long keyCount) throws SerDeException { @@ -88,7 +88,7 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai @Override public VectorMapJoinHashTable vectorMapJoinHashTable() { - return (VectorMapJoinHashTable) VectorMapJoinFastHashTable; + return VectorMapJoinFastHashTable; } private VectorMapJoinFastHashTable createHashTable(int newThreshold) { @@ -174,9 +174,8 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai } @Override - public MapJoinKey putRow(MapJoinObjectSerDeContext keyContext, - Writable currentKey, MapJoinObjectSerDeContext valueContext, - Writable currentValue) throws SerDeException, HiveException, IOException { + public MapJoinKey putRow(Writable currentKey, Writable currentValue) + throws SerDeException, HiveException, IOException { // We are not using the key and value contexts, nor do we support a MapJoinKey. VectorMapJoinFastHashTable.putRow((BytesWritable) currentKey, (BytesWritable) currentValue); @@ -218,6 +217,13 @@ public class VectorMapJoinFastTableContainer implements VectorMapJoinTableContai return VectorMapJoinFastHashTable.size(); } + @Override + public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) + throws SerDeException { + // Do nothing in this case. + + } + /* @Override public com.esotericsoftware.kryo.io.Output getHybridBigTableSpillOutput(int partitionId) { http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/test/queries/clientpositive/empty_join.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/empty_join.q b/ql/src/test/queries/clientpositive/empty_join.q new file mode 100644 index 0000000..0c372a0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/empty_join.q @@ -0,0 +1,23 @@ +set hive.auto.convert.join=true; +set hive.mapjoin.hybridgrace.hashtable=false; + +DROP TABLE IF EXISTS test_1; +CREATE TABLE test_1 AS SELECT 1 AS id; + +DROP TABLE IF EXISTS test_2; +CREATE TABLE test_2 (id INT); + +DROP TABLE IF EXISTS test_3; +CREATE TABLE test_3 AS SELECT 1 AS id; + +explain +SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id; + +SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +; http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/test/results/clientpositive/empty_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/empty_join.q.out b/ql/src/test/results/clientpositive/empty_join.q.out new file mode 100644 index 0000000..3f8aedf --- /dev/null +++ b/ql/src/test/results/clientpositive/empty_join.q.out @@ -0,0 +1,142 @@ +PREHOOK: query: DROP TABLE IF EXISTS test_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_1 AS SELECT 1 AS id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 AS SELECT 1 AS id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: DROP TABLE IF EXISTS test_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_2 (id INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (id INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: DROP TABLE IF EXISTS test_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_3 AS SELECT 1 AS id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@test_3 +POSTHOOK: query: CREATE TABLE test_3 AS SELECT 1 AS id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_3 +PREHOOK: query: explain +SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + t2 + Fetch Operator + limit: -1 + t3 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + t2 + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + HashTable Sink Operator + keys: + 0 id (type: int) + 1 id (type: int) + 2 id (type: int) + t3 + TableScan + alias: t3 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 id (type: int) + 1 id (type: int) + 2 id (type: int) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + Inner Join 0 to 2 + keys: + 0 id (type: int) + 1 id (type: int) + 2 id (type: int) + outputColumnNames: _col0, _col4, _col8 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col4 (type: int), _col8 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +PREHOOK: Input: default@test_2 +PREHOOK: Input: default@test_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +POSTHOOK: Input: default@test_2 +POSTHOOK: Input: default@test_3 +#### A masked pattern was here #### +1 NULL 1 http://git-wip-us.apache.org/repos/asf/hive/blob/eb4a1644/ql/src/test/results/clientpositive/tez/empty_join.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/empty_join.q.out b/ql/src/test/results/clientpositive/tez/empty_join.q.out new file mode 100644 index 0000000..ca351ec --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/empty_join.q.out @@ -0,0 +1,115 @@ +PREHOOK: query: DROP TABLE IF EXISTS test_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_1 AS SELECT 1 AS id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@test_1 +POSTHOOK: query: CREATE TABLE test_1 AS SELECT 1 AS id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_1 +PREHOOK: query: DROP TABLE IF EXISTS test_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_2 (id INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_2 +POSTHOOK: query: CREATE TABLE test_2 (id INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_2 +PREHOOK: query: DROP TABLE IF EXISTS test_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS test_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE test_3 AS SELECT 1 AS id +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@test_3 +POSTHOOK: query: CREATE TABLE test_3 AS SELECT 1 AS id +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_3 +PREHOOK: query: explain +SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +POSTHOOK: type: QUERY +Plan not optimized by CBO. + +Vertex dependency in root stage +Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Map 1 + File Output Operator [FS_8] + compressed:false + Statistics:Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [SEL_7] + outputColumnNames:["_col0","_col1","_col2"] + Statistics:Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_16] + | condition map:[{"":"Left Outer Join0 to 1"},{"":"Inner Join 0 to 2"}] + | keys:{"Map 1":"id (type: int)","Map 2":"id (type: int)","Map 3":"id (type: int)"} + | outputColumnNames:["_col0","_col4","_col8"] + | Statistics:Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + |<-Map 2 [BROADCAST_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:id (type: int) + | Map-reduce partition columns:id (type: int) + | sort order:+ + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + | TableScan [TS_1] + | alias:t2 + | Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + |<-Map 3 [BROADCAST_EDGE] + | Reduce Output Operator [RS_5] + | key expressions:id (type: int) + | Map-reduce partition columns:id (type: int) + | sort order:+ + | Statistics:Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_2] + | alias:t3 + | Statistics:Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + |<-TableScan [TS_0] + alias:t1 + Statistics:Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + +PREHOOK: query: SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +PREHOOK: type: QUERY +PREHOOK: Input: default@test_1 +PREHOOK: Input: default@test_2 +PREHOOK: Input: default@test_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.id, t2.id, t3.id +FROM test_1 t1 +LEFT JOIN test_2 t2 ON t1.id = t2.id +INNER JOIN test_3 t3 ON t1.id = t3.id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_1 +POSTHOOK: Input: default@test_2 +POSTHOOK: Input: default@test_3 +#### A masked pattern was here #### +1 NULL 1
