HIVE-13682: EOFException with fast hashtable (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4533d21b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4533d21b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4533d21b Branch: refs/heads/master Commit: 4533d21b0be487e1f11fcc95578a2ba103e72a64 Parents: fbeee62 Author: Matt McCline <[email protected]> Authored: Sat May 14 20:44:27 2016 -0700 Committer: Matt McCline <[email protected]> Committed: Sat May 14 20:44:27 2016 -0700 ---------------------------------------------------------------------- .../fast/VectorMapJoinFastBytesHashMap.java | 12 +- .../VectorMapJoinFastBytesHashMultiSet.java | 10 +- .../fast/VectorMapJoinFastBytesHashSet.java | 5 +- .../fast/VectorMapJoinFastBytesHashTable.java | 14 +- .../fast/VectorMapJoinFastLongHashMap.java | 22 +- .../fast/VectorMapJoinFastLongHashMultiSet.java | 13 +- .../fast/VectorMapJoinFastLongHashSet.java | 16 +- .../fast/VectorMapJoinFastLongHashTable.java | 13 - .../fast/VectorMapJoinFastMultiKeyHashMap.java | 21 +- .../VectorMapJoinFastMultiKeyHashMultiSet.java | 25 +- .../fast/VectorMapJoinFastMultiKeyHashSet.java | 26 +- .../fast/VectorMapJoinFastStringHashMap.java | 4 +- .../VectorMapJoinFastStringHashMultiSet.java | 4 +- .../fast/VectorMapJoinFastStringHashSet.java | 4 +- .../fast/VectorMapJoinFastValueStore.java | 187 ++++- .../ql/exec/vector/RandomRowObjectSource.java | 388 ---------- .../ql/exec/vector/TestVectorRowObject.java | 34 +- .../hive/ql/exec/vector/TestVectorSerDeRow.java | 8 +- .../vector/mapjoin/fast/CheckFastHashTable.java | 721 +++++++++++++++++++ .../mapjoin/fast/CommonFastHashTable.java | 62 +- .../fast/TestVectorMapJoinFastBytesHashMap.java | 272 +++++++ .../TestVectorMapJoinFastBytesHashMultiSet.java | 253 +++++++ .../fast/TestVectorMapJoinFastBytesHashSet.java | 252 +++++++ .../fast/TestVectorMapJoinFastLongHashMap.java | 303 ++++---- .../TestVectorMapJoinFastLongHashMultiSet.java | 252 +++++++ .../fast/TestVectorMapJoinFastLongHashSet.java | 250 +++++++ .../TestVectorMapJoinFastMultiKeyHashMap.java | 231 ------ .../hive/serde2/fast/RandomRowObjectSource.java | 423 +++++++++++ .../fast/LazyBinaryDeserializeRead.java | 2 +- .../apache/hadoop/hive/serde2/VerifyFast.java | 123 ++-- .../hive/serde2/binarysortable/MyTestClass.java | 86 +++ .../binarysortable/TestBinarySortableFast.java | 384 +++++++--- .../hive/serde2/lazy/TestLazySimpleFast.java | 270 ++++--- .../serde2/lazybinary/TestLazyBinaryFast.java | 285 ++++++-- .../hadoop/hive/common/type/RandomTypeUtil.java | 95 +++ 35 files changed, 3852 insertions(+), 1218 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 0ff98bd..a4bc188 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single byte array value hash map optimized for vector map join. + * An bytes key hash map optimized for vector map join. + * + * This is the abstract base for the multi-key and string bytes key hash map implementations. */ public abstract class VectorMapJoinFastBytesHashMap extends VectorMapJoinFastBytesHashTable @@ -37,6 +44,8 @@ public abstract class VectorMapJoinFastBytesHashMap private VectorMapJoinFastValueStore valueStore; + protected BytesWritable testValueBytesWritable; + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); @@ -56,7 +65,6 @@ public abstract class VectorMapJoinFastBytesHashMap slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); - keysAssigned++; } else { // Add another value. // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 5d8ed2d..aaf3497 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single byte array value hash multi-set optimized for vector map join. + * An bytes key hash multi-set optimized for vector map join. + * + * This is the abstract base for the multi-key and string bytes key hash multi-set implementations. */ public abstract class VectorMapJoinFastBytesHashMultiSet extends VectorMapJoinFastBytesHashTable @@ -51,7 +58,6 @@ public abstract class VectorMapJoinFastBytesHashMultiSet slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); - keysAssigned++; } else { // Add another value. // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 990a2e5..841183e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -27,7 +27,9 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; /* - * An single byte array value hash multi-set optimized for vector map join. + * An bytes key hash set optimized for vector map join. + * + * This is the abstract base for the multi-key and string bytes key hash set implementations. */ public abstract class VectorMapJoinFastBytesHashSet extends VectorMapJoinFastBytesHashTable @@ -50,7 +52,6 @@ public abstract class VectorMapJoinFastBytesHashSet slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Existence - keysAssigned++; } } http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 6b536f0..d6e107b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -42,8 +42,7 @@ public abstract class VectorMapJoinFastBytesHashTable protected VectorMapJoinFastKeyStore keyStore; - private BytesWritable testKeyBytesWritable; - private BytesWritable testValueBytesWritable; + protected BytesWritable testKeyBytesWritable; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { @@ -53,17 +52,6 @@ public abstract class VectorMapJoinFastBytesHashTable add(keyBytes, 0, keyLength, currentValue); } - @VisibleForTesting - public void putRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException { - if (testKeyBytesWritable == null) { - testKeyBytesWritable = new BytesWritable(); - testValueBytesWritable = new BytesWritable(); - } - testKeyBytesWritable.set(currentKey, 0, currentKey.length); - testValueBytesWritable.set(currentValue, 0, currentValue.length); - putRow(testKeyBytesWritable, testValueBytesWritable); - } - protected abstract void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, long hashCode, boolean isNewKey, BytesWritable currentValue); http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index 1384fc9..cd51d0d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -18,17 +18,22 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single long value map optimized for vector map join. + * An single LONG key hash map optimized for vector map join. */ public class VectorMapJoinFastLongHashMap extends VectorMapJoinFastLongHashTable @@ -38,11 +43,26 @@ public class VectorMapJoinFastLongHashMap protected VectorMapJoinFastValueStore valueStore; + private BytesWritable testValueBytesWritable; + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } + /* + * A Unit Test convenience method for putting key and value into the hash table using the + * actual types. + */ + @VisibleForTesting + public void testPutRow(long currentKey, byte[] currentValue) throws HiveException, IOException { + if (testValueBytesWritable == null) { + testValueBytesWritable = new BytesWritable(); + } + testValueBytesWritable.set(currentValue, 0, currentValue.length); + add(currentKey, testValueBytesWritable); + } + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 94bf706..032233a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -31,8 +31,10 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single long value multi-set optimized for vector map join. + * An single LONG key hash multi-set optimized for vector map join. */ public class VectorMapJoinFastLongHashMultiSet extends VectorMapJoinFastLongHashTable @@ -45,6 +47,15 @@ public class VectorMapJoinFastLongHashMultiSet return new VectorMapJoinFastHashMultiSet.HashMultiSetResult(); } + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(long currentKey) throws HiveException, IOException { + add(currentKey, null); + } + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 2cbc548..21701d4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -18,18 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single long value multi-set optimized for vector map join. + * An single LONG key hash set optimized for vector map join. */ public class VectorMapJoinFastLongHashSet extends VectorMapJoinFastLongHashTable @@ -42,6 +47,15 @@ public class VectorMapJoinFastLongHashSet return new VectorMapJoinFastHashSet.HashSetResult(); } + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(long currentKey) throws HiveException, IOException { + add(currentKey, null); + } + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index f37f056..0a502e0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -58,8 +58,6 @@ public abstract class VectorMapJoinFastLongHashTable private long min; private long max; - private BytesWritable testValueBytesWritable; - @Override public boolean useMinMax() { return useMinMax; @@ -90,17 +88,6 @@ public abstract class VectorMapJoinFastLongHashTable add(key, currentValue); } - - @VisibleForTesting - public void putRow(long currentKey, byte[] currentValue) throws HiveException, IOException { - if (testValueBytesWritable == null) { - testValueBytesWritable = new BytesWritable(); - } - testValueBytesWritable.set(currentValue, 0, currentValue.length); - add(currentKey, testValueBytesWritable); - } - - protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); public void add(long key, BytesWritable currentValue) { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index 9a9fb8d..cee3b3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -18,17 +18,34 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; + import com.google.common.annotations.VisibleForTesting; /* * An multi-key value hash map optimized for vector map join. + * + * The key is stored as the provided bytes (uninterpreted). */ public class VectorMapJoinFastMultiKeyHashMap extends VectorMapJoinFastBytesHashMap { + /* + * A Unit Test convenience method for putting key and value into the hash table using the + * actual types. + */ @VisibleForTesting - public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) { - this(false, initialCapacity, loadFactor, wbSize); + public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException { + if (testKeyBytesWritable == null) { + testKeyBytesWritable = new BytesWritable(); + testValueBytesWritable = new BytesWritable(); + } + testKeyBytesWritable.set(currentKey, 0, currentKey.length); + testValueBytesWritable.set(currentValue, 0, currentValue.length); + putRow(testKeyBytesWritable, testValueBytesWritable); } public VectorMapJoinFastMultiKeyHashMap( http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index a8744a5..ff82ac4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -18,15 +18,38 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.annotations.VisibleForTesting; + /* - * An multi-key value hash multi-set optimized for vector map join. + * An multi-key hash multi-set optimized for vector map join. + * + * The key is stored as the provided bytes (uninterpreted). */ public class VectorMapJoinFastMultiKeyHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(byte[] currentKey) throws HiveException, IOException { + if (testKeyBytesWritable == null) { + testKeyBytesWritable = new BytesWritable(); + } + testKeyBytesWritable.set(currentKey, 0, currentKey.length); + putRow(testKeyBytesWritable, null); + } + public VectorMapJoinFastMultiKeyHashMultiSet( boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize) { super(initialCapacity, loadFactor, writeBuffersSize); } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index a8048e5..de0666d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -18,15 +18,39 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.annotations.VisibleForTesting; + /* - * An multi-key value hash set optimized for vector map join. + * An multi-key hash set optimized for vector map join. + * + * The key is stored as the provided bytes (uninterpreted). */ public class VectorMapJoinFastMultiKeyHashSet extends VectorMapJoinFastBytesHashSet { + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(byte[] currentKey) throws HiveException, IOException { + if (testKeyBytesWritable == null) { + testKeyBytesWritable = new BytesWritable(); + } + testKeyBytesWritable.set(currentKey, 0, currentKey.length); + putRow(testKeyBytesWritable, null); + } + public VectorMapJoinFastMultiKeyHashSet( boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize) { super(initialCapacity, loadFactor, writeBuffersSize); } + + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index 6f181b2..35af1d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; /* - * An single byte array value hash map optimized for vector map join. + * An single STRING key hash map optimized for vector map join. + * + * The key will be deserialized and just the bytes will be stored. */ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 9653b71..36120b7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; /* - * An single byte array value hash map optimized for vector map join. + * An single STRING key hash multi-set optimized for vector map join. + * + * The key will be deserialized and just the bytes will be stored. */ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index 6419a0b..2ed6ab3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -24,7 +24,9 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; /* - * An single byte array value hash map optimized for vector map join. + * An single STRING key hash set optimized for vector map join. + * + * The key will be deserialized and just the bytes will be stored. */ public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSet { http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java index 570a747..f96e32b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java @@ -23,7 +23,9 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; -import org.apache.hadoop.hive.serde2.WriteBuffers.Position;; +import org.apache.hadoop.hive.serde2.WriteBuffers.Position; + +import com.google.common.base.Preconditions; // Supports random access. @@ -142,7 +144,6 @@ public class VectorMapJoinFastValueStore { } public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) { - // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord)); this.valueStore = valueStore; this.valueRefWord = valueRefWord; @@ -217,6 +218,10 @@ public class VectorMapJoinFastValueStore { if (readIndex == 0) { /* + * Positioned to first. + */ + + /* * Extract information from reference word from slot table. */ absoluteValueOffset = @@ -226,19 +231,32 @@ public class VectorMapJoinFastValueStore { valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos); if (isSingleRow) { + /* + * One element. + */ isNextEof = true; valueLength = (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift); boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn); if (!isValueLengthSmall) { - // And, if current value is big we must read it. + + // {Big Value Len} {Big Value Bytes} valueLength = valueStore.writeBuffers.readVInt(readPos); + } else { + + // {Small Value Bytes} + // (use small length from valueWordRef) } } else { + /* + * First of Multiple elements. + */ isNextEof = false; - // 2nd and beyond records have a relative offset word at the beginning. + /* + * Read the relative offset word at the beginning 2nd and beyond records. + */ long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos); long relativeOffset = @@ -246,25 +264,31 @@ public class VectorMapJoinFastValueStore { nextAbsoluteValueOffset = absoluteValueOffset - relativeOffset; + valueLength = + (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift); + boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn); + + /* + * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes} + * Since this is the first record, the valueRefWord directs us. + */ + if (!isValueLengthSmall) { + valueLength = valueStore.writeBuffers.readVInt(readPos); + } + isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0); isNextValueLengthSmall = ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0); - } - valueLength = - (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift); - boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn); - if (!isValueLengthSmall) { - // And, if current value is big we must read it. - valueLength = valueStore.writeBuffers.readVInt(readPos); - } - - // 2nd and beyond have the next value's small length in the current record. - if (isNextValueLengthSmall) { - nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos); - } else { - nextSmallValueLength = -1; - } + /* + * Optionally, the next value's small length could be a 2nd integer... + */ + if (isNextValueLengthSmall) { + nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos); + } else { + nextSmallValueLength = -1; + } + } } else { if (isNextEof) { @@ -277,24 +301,37 @@ public class VectorMapJoinFastValueStore { valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos); if (isNextLast) { + /* + * No realativeOffsetWord in last value. (This was the first value written.) + */ isNextEof = true; if (isNextValueLengthSmall) { + + // {Small Value Bytes} valueLength = nextSmallValueLength; } else { - valueLength = (int) valueStore.writeBuffers.readVLong(readPos); + + // {Big Value Len} {Big Value Bytes} + valueLength = valueStore.writeBuffers.readVInt(readPos); } } else { + /* + * {Rel Offset Word} [Big Value Len] [Next Value Small Len] {Value Bytes} + * + * 2nd and beyond records have a relative offset word at the beginning. + */ isNextEof = false; - // 2nd and beyond records have a relative offset word at the beginning. long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos); - // Read current value's big length now, if necessary. + /* + * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes} + */ if (isNextValueLengthSmall) { valueLength = nextSmallValueLength; } else { - valueLength = (int) valueStore.writeBuffers.readVLong(readPos); + valueLength = valueStore.writeBuffers.readVInt(readPos); } long relativeOffset = @@ -305,9 +342,13 @@ public class VectorMapJoinFastValueStore { isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0); isNextValueLengthSmall = ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0); + + /* + * Optionally, the next value's small length could be a 2nd integer in the value's + * information. + */ if (isNextValueLengthSmall) { - // TODO: Write readVInt - nextSmallValueLength = (int) valueStore.writeBuffers.readVLong(readPos); + nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos); } else { nextSmallValueLength = -1; } @@ -396,6 +437,51 @@ public class VectorMapJoinFastValueStore { private static final long flagOnMask = 1L << bitShift; } + private static String valueRefWordToString(long valueRef) { + StringBuilder sb = new StringBuilder(); + + sb.append(Long.toHexString(valueRef)); + sb.append(", "); + if ((valueRef & IsInvalidFlag.flagOnMask) != 0) { + sb.append("(Invalid optimized hash table reference), "); + } + /* + * Extract information. + */ + long absoluteValueOffset = + (valueRef & AbsoluteValueOffset.bitMask); + int smallValueLength = + (int) ((valueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift); + boolean isValueLengthSmall = (smallValueLength != SmallValueLength.allBitsOn); + int cappedCount = + (int) ((valueRef & CappedCount.bitMask) >> CappedCount.bitShift); + boolean isValueLast = + ((valueRef & IsLastFlag.flagOnMask) != 0); + + sb.append("absoluteValueOffset "); + sb.append(absoluteValueOffset); + sb.append(" ("); + sb.append(Long.toHexString(absoluteValueOffset)); + sb.append("), "); + + if (isValueLengthSmall) { + sb.append("smallValueLength "); + sb.append(smallValueLength); + sb.append(", "); + } else { + sb.append("isValueLengthSmall = false, "); + } + + sb.append("cappedCount "); + sb.append(cappedCount); + sb.append(", "); + + sb.append("isValueLast "); + sb.append(isValueLast); + + return sb.toString(); + } + /** * Relative Offset Word stored at the beginning of all but the last value that has a * relative offset and 2 flags. @@ -431,6 +517,33 @@ public class VectorMapJoinFastValueStore { private static final long bitMask = allBitsOn << bitShift; } + private static String relativeOffsetWordToString(long relativeOffsetWord) { + StringBuilder sb = new StringBuilder(); + + sb.append(Long.toHexString(relativeOffsetWord)); + sb.append(", "); + + long nextRelativeOffset = + (relativeOffsetWord & NextRelativeValueOffset.bitMask) >> NextRelativeValueOffset.bitShift; + sb.append("nextRelativeOffset "); + sb.append(nextRelativeOffset); + sb.append(" ("); + sb.append(Long.toHexString(nextRelativeOffset)); + sb.append("), "); + + boolean isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0); + sb.append("isNextLast "); + sb.append(isNextLast); + sb.append(", "); + + boolean isNextValueLengthSmall = + ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0); + sb.append("isNextValueLengthSmall "); + sb.append(isNextValueLengthSmall); + + return sb.toString(); + } + public long addFirst(byte[] valueBytes, int valueStart, int valueLength) { // First value is written without: next relative offset, next value length, is next value last @@ -473,8 +586,6 @@ public class VectorMapJoinFastValueStore { valueRefWord |= SmallValueLength.allBitsOnBitShifted; } - // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord)); - // The lower bits are the absolute value offset. valueRefWord |= newAbsoluteOffset; @@ -499,8 +610,6 @@ public class VectorMapJoinFastValueStore { boolean isOldValueLast = ((oldValueRef & IsLastFlag.flagOnMask) != 0); - // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef)); - /* * Write information about the old value (which becomes our next) at the beginning * of our new value. @@ -523,12 +632,6 @@ public class VectorMapJoinFastValueStore { writeBuffers.writeVLong(relativeOffsetWord); - // When the next value is small it was not recorded with the old (i.e. next) value and we - // have to remember it. - if (isOldValueLengthSmall) { - writeBuffers.writeVInt(oldSmallValueLength); - } - // Now, we have written all information about the next value, work on the *new* value. long newValueRef = ((long) newCappedCount) << CappedCount.bitShift; @@ -536,18 +639,28 @@ public class VectorMapJoinFastValueStore { if (!isNewValueSmall) { // Use magic value to indicating we are writing the big value length. newValueRef |= ((long) SmallValueLength.allBitsOn << SmallValueLength.bitShift); + Preconditions.checkState( + (int) ((newValueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift) == + SmallValueLength.allBitsOn); writeBuffers.writeVInt(valueLength); + } else { // Caller must remember small value length. newValueRef |= ((long) valueLength) << SmallValueLength.bitShift; } + + // When the next value is small it was not recorded with the old (i.e. next) value and we + // have to remember it. + if (isOldValueLengthSmall) { + + writeBuffers.writeVInt(oldSmallValueLength); + } + writeBuffers.write(valueBytes, valueStart, valueLength); // The lower bits are the absolute value offset. newValueRef |= newAbsoluteOffset; - // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef)); - return newValueRef; } http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java deleted file mode 100644 index 2d4baa0..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java +++ /dev/null @@ -1,388 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.sql.Date; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -import junit.framework.TestCase; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.common.type.RandomTypeUtil; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hive.common.util.DateUtils; - -/** - * Generate object inspector and random row object[]. - */ -public class RandomRowObjectSource { - - private Random r; - - private int columnCount; - - private List<String> typeNames; - - private PrimitiveCategory[] primitiveCategories; - - private PrimitiveTypeInfo[] primitiveTypeInfos; - - private List<ObjectInspector> primitiveObjectInspectorList; - - private StructObjectInspector rowStructObjectInspector; - - public List<String> typeNames() { - return typeNames; - } - - public PrimitiveCategory[] primitiveCategories() { - return primitiveCategories; - } - - public PrimitiveTypeInfo[] primitiveTypeInfos() { - return primitiveTypeInfos; - } - - public StructObjectInspector rowStructObjectInspector() { - return rowStructObjectInspector; - } - - public void init(Random r) { - this.r = r; - chooseSchema(); - } - - private static String[] possibleHiveTypeNames = { - "boolean", - "tinyint", - "smallint", - "int", - "bigint", - "date", - "float", - "double", - "string", - "char", - "varchar", - "binary", - "date", - "timestamp", - serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME, - serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME, - "decimal" - }; - - private void chooseSchema() { - columnCount = 1 + r.nextInt(20); - typeNames = new ArrayList<String>(columnCount); - primitiveCategories = new PrimitiveCategory[columnCount]; - primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; - primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount); - List<String> columnNames = new ArrayList<String>(columnCount); - for (int c = 0; c < columnCount; c++) { - columnNames.add(String.format("col%d", c)); - int typeNum = r.nextInt(possibleHiveTypeNames.length); - String typeName = possibleHiveTypeNames[typeNum]; - if (typeName.equals("char")) { - int maxLength = 1 + r.nextInt(100); - typeName = String.format("char(%d)", maxLength); - } else if (typeName.equals("varchar")) { - int maxLength = 1 + r.nextInt(100); - typeName = String.format("varchar(%d)", maxLength); - } else if (typeName.equals("decimal")) { - typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); - } - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); - primitiveTypeInfos[c] = primitiveTypeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - primitiveCategories[c] = primitiveCategory; - primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); - typeNames.add(typeName); - } - rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); - } - - public Object[][] randomRows(int n) { - Object[][] result = new Object[n][]; - for (int i = 0; i < n; i++) { - result[i] = randomRow(); - } - return result; - } - - public Object[] randomRow() { - Object row[] = new Object[columnCount]; - for (int c = 0; c < columnCount; c++) { - Object object = randomObject(c); - if (object == null) { - throw new Error("Unexpected null for column " + c); - } - row[c] = getWritableObject(c, object); - if (row[c] == null) { - throw new Error("Unexpected null for writable for column " + c); - } - } - return row; - } - - public Object getWritableObject(int column, Object object) { - ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); - PrimitiveCategory primitiveCategory = primitiveCategories[column]; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; - switch (primitiveCategory) { - case BOOLEAN: - return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); - case BYTE: - return ((WritableByteObjectInspector) objectInspector).create((byte) object); - case SHORT: - return ((WritableShortObjectInspector) objectInspector).create((short) object); - case INT: - return ((WritableIntObjectInspector) objectInspector).create((int) object); - case LONG: - return ((WritableLongObjectInspector) objectInspector).create((long) object); - case DATE: - return ((WritableDateObjectInspector) objectInspector).create((Date) object); - case FLOAT: - return ((WritableFloatObjectInspector) objectInspector).create((float) object); - case DOUBLE: - return ((WritableDoubleObjectInspector) objectInspector).create((double) object); - case STRING: - return ((WritableStringObjectInspector) objectInspector).create((String) object); - case CHAR: - { - WritableHiveCharObjectInspector writableCharObjectInspector = - new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); - return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); - } - case VARCHAR: - { - WritableHiveVarcharObjectInspector writableVarcharObjectInspector = - new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); - return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); - } - case BINARY: - return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); - case TIMESTAMP: - return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); - case INTERVAL_YEAR_MONTH: - return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); - case INTERVAL_DAY_TIME: - return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); - case DECIMAL: - { - WritableHiveDecimalObjectInspector writableDecimalObjectInspector = - new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); - return writableDecimalObjectInspector.create(HiveDecimal.ZERO); - } - default: - throw new Error("Unknown primitive category " + primitiveCategory); - } - } - - public Object randomObject(int column) { - PrimitiveCategory primitiveCategory = primitiveCategories[column]; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; - switch (primitiveCategory) { - case BOOLEAN: - return Boolean.valueOf(r.nextInt(1) == 1); - case BYTE: - return Byte.valueOf((byte) r.nextInt()); - case SHORT: - return Short.valueOf((short) r.nextInt()); - case INT: - return Integer.valueOf(r.nextInt()); - case LONG: - return Long.valueOf(r.nextLong()); - case DATE: - return getRandDate(r); - case FLOAT: - return Float.valueOf(r.nextFloat() * 10 - 5); - case DOUBLE: - return Double.valueOf(r.nextDouble() * 10 - 5); - case STRING: - return getRandString(r); - case CHAR: - return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); - case VARCHAR: - return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); - case BINARY: - return getRandBinary(r, 1 + r.nextInt(100)); - case TIMESTAMP: - return RandomTypeUtil.getRandTimestamp(r); - case INTERVAL_YEAR_MONTH: - return getRandIntervalYearMonth(r); - case INTERVAL_DAY_TIME: - return getRandIntervalDayTime(r); - case DECIMAL: - return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); - default: - throw new Error("Unknown primitive category " + primitiveCategory); - } - } - - public static String getRandString(Random r) { - return getRandString(r, null, r.nextInt(10)); - } - - public static String getRandString(Random r, String characters, int length) { - if (characters == null) { - characters = "ABCDEFGHIJKLMabcdefghijklm"; - } - StringBuilder sb = new StringBuilder(); - sb.append(""); - for (int i = 0; i < length; i++) { - if (characters == null) { - sb.append((char) (r.nextInt(128))); - } else { - sb.append(characters.charAt(r.nextInt(characters.length()))); - } - } - return sb.toString(); - } - - public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { - int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); - String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); - HiveChar hiveChar = new HiveChar(randomString, maxLength); - return hiveChar; - } - - public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { - int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); - String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); - HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); - return hiveVarchar; - } - - public static byte[] getRandBinary(Random r, int len){ - byte[] bytes = new byte[len]; - for (int j = 0; j < len; j++){ - bytes[j] = Byte.valueOf((byte) r.nextInt()); - } - return bytes; - } - - private static final String DECIMAL_CHARS = "0123456789"; - - public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { - while (true) { - StringBuilder sb = new StringBuilder(); - int precision = 1 + r.nextInt(18); - int scale = 0 + r.nextInt(precision + 1); - - int integerDigits = precision - scale; - - if (r.nextBoolean()) { - sb.append("-"); - } - - if (integerDigits == 0) { - sb.append("0"); - } else { - sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); - } - if (scale != 0) { - sb.append("."); - sb.append(getRandString(r, DECIMAL_CHARS, scale)); - } - - HiveDecimal bd = HiveDecimal.create(sb.toString()); - if (bd.scale() > bd.precision()) { - // Sometimes weird decimals are produced? - continue; - } - - return bd; - } - } - - public static Date getRandDate(Random r) { - String dateStr = String.format("%d-%02d-%02d", - Integer.valueOf(1800 + r.nextInt(500)), // year - Integer.valueOf(1 + r.nextInt(12)), // month - Integer.valueOf(1 + r.nextInt(28))); // day - Date dateVal = Date.valueOf(dateStr); - return dateVal; - } - - public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { - String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; - String intervalYearMonthStr = String.format("%s%d-%d", - yearMonthSignStr, - Integer.valueOf(1800 + r.nextInt(500)), // year - Integer.valueOf(0 + r.nextInt(12))); // month - HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); - TestCase.assertTrue(intervalYearMonthVal != null); - return intervalYearMonthVal; - } - - public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { - String optionalNanos = ""; - if (r.nextInt(2) == 1) { - optionalNanos = String.format(".%09d", - Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); - } - String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; - String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", - yearMonthSignStr, - Integer.valueOf(1 + r.nextInt(28)), // day - Integer.valueOf(0 + r.nextInt(24)), // hour - Integer.valueOf(0 + r.nextInt(60)), // minute - Integer.valueOf(0 + r.nextInt(60)), // second - optionalNanos); - HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); - TestCase.assertTrue(intervalDayTimeVal != null); - return intervalDayTimeVal; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index 959a2af..c55d951 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -19,13 +19,10 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import java.util.Random; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import junit.framework.TestCase; @@ -50,7 +47,7 @@ public class TestVectorRowObject extends TestCase { } } - void testVectorRowObject(int caseNum, Random r) throws HiveException { + void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveException { String[] emptyScratchTypeNames = new String[0]; @@ -74,6 +71,9 @@ public class TestVectorRowObject extends TestCase { vectorExtractRow.init(source.typeNames()); Object[][] randomRows = source.randomRows(100000); + if (sort) { + source.sort(randomRows); + } int firstRandomRowIndex = 0; for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; @@ -93,14 +93,22 @@ public class TestVectorRowObject extends TestCase { public void testVectorRowObject() throws Throwable { - try { - Random r = new Random(5678); - for (int c = 0; c < 10; c++) { - testVectorRowObject(c, r); + try { + Random r = new Random(5678); + + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testVectorRowObject(caseNum, false, r); + caseNum++; + } + + // Try one sorted. + testVectorRowObject(caseNum, true, r); + caseNum++; + + } catch (Throwable e) { + e.printStackTrace(); + throw e; } - } catch (Throwable e) { - e.printStackTrace(); - throw e; - } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index e37d2bf..da69ee3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -22,8 +22,6 @@ import java.io.IOException; import java.sql.Date; import java.sql.Timestamp; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import java.util.Properties; import java.util.Random; @@ -50,6 +48,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.io.BooleanWritable; @@ -86,7 +84,7 @@ public class TestVectorSerDeRow extends TestCase { LAZY_SIMPLE } - void deserializeAndVerify(Output output, DeserializeRead deserializeRead, + void deserializeAndVerify(Output output, DeserializeRead deserializeRead, RandomRowObjectSource source, Object[] expectedRow) throws HiveException, IOException { deserializeRead.set(output.getData(), 0, output.getLength()); @@ -523,7 +521,7 @@ public class TestVectorSerDeRow extends TestCase { // Set the configuration parameters tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - + tbl.setProperty("columns", fieldNames); tbl.setProperty("columns.types", fieldTypes); http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java new file mode 100644 index 0000000..3a23584 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -0,0 +1,721 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Random; +import java.util.TreeMap; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.WritableComparator; + +import com.google.common.base.Preconditions; + +import static org.junit.Assert.*; + +public class CheckFastHashTable { + + public static boolean findMatch(byte[] valueBytes, List<byte[]> actualValues, int actualCount, boolean[] taken) { + for (int i = 0; i < actualCount; i++) { + if (!taken[i]) { + byte[] actualBytes = actualValues.get(i); + if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) { + taken[i] = true; + return true; + } + } + } + return false; + } + + public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult, + List<byte[]> values) { + + int valueCount = values.size(); + + WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); + + // Read through all values. + List<byte[]> actualValues = new ArrayList<byte[]>(); + while (true) { + byte[] bytes = ref.getBytes(); + int offset = (int) ref.getOffset(); + int length = ref.getLength(); + + if (length == 0) { + actualValues.add(new byte[0]); + } else { + actualValues.add(Arrays.copyOfRange(bytes, offset, offset + length)); + } + ref = hashMapResult.next(); + if (ref == null) { + break; + } + } + + int actualCount = actualValues.size(); + + if (valueCount != actualCount) { + TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount); + } + + boolean[] taken = new boolean[actualCount]; + + for (int i = 0; i < actualCount; i++) { + byte[] valueBytes = values.get(i); + + if (!findMatch(valueBytes, actualValues, actualCount, taken)) { + List<Integer> availableLengths = new ArrayList<Integer>(); + for (int a = 0; a < actualCount; a++) { + if (!taken[a]) { + availableLengths.add(actualValues.get(a).length); + } + } + TestCase.fail("No match for actual value (valueBytes length " + valueBytes.length + + ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)"); + } + } + } + + /* + * Element for Key: Long x Hash Table: HashMap + */ + public static class FastLongHashMapElement { + private long key; + private List<byte[]> values; + + public FastLongHashMapElement(long key, byte[] firstValue) { + this.key = key; + values = new ArrayList<byte[]>(); + values.add(firstValue); + } + + public long getKey() { + return key; + } + + public int getValueCount() { + return values.size(); + } + + public List<byte[]> getValues() { + return values; + } + + public void addValue(byte[] value) { + values.add(value); + } + } + + /* + * Verify table for Key: Long x Hash Table: HashMap + */ + public static class VerifyFastLongHashMap { + + private int count; + + private FastLongHashMapElement[] array; + + private HashMap<Long, Integer> keyValueMap; + + public VerifyFastLongHashMap() { + count = 0; + array = new FastLongHashMapElement[50]; + keyValueMap = new HashMap<Long, Integer>(); + } + + public int getCount() { + return count; + } + + public boolean contains(long key) { + return keyValueMap.containsKey(key); + } + + public void add(long key, byte[] value) { + if (keyValueMap.containsKey(key)) { + int index = keyValueMap.get(key); + array[index].addValue(value); + } else { + if (count >= array.length) { + // Grow. + FastLongHashMapElement[] newArray = new FastLongHashMapElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastLongHashMapElement(key, value); + keyValueMap.put(key, count); + count++; + } + } + + public long addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].addValue(value); + return array[index].getKey(); + } + + public long getKey(int index) { + return array[index].getKey(); + } + + public List<byte[]> getValues(int index) { + return array[index].getValues(); + } + + public void verify(VectorMapJoinFastLongHashMap map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastLongHashMapElement element = array[index]; + long key = element.getKey(); + List<byte[]> values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + } + } + + /* + * Element for Key: byte[] x Hash Table: HashMap + */ + public static class FastBytesHashMapElement { + private byte[] key; + private List<byte[]> values; + + public FastBytesHashMapElement(byte[] key, byte[] firstValue) { + this.key = key; + values = new ArrayList<byte[]>(); + values.add(firstValue); + } + + public byte[] getKey() { + return key; + } + + public int getValueCount() { + return values.size(); + } + + public List<byte[]> getValues() { + return values; + } + + public void addValue(byte[] value) { + values.add(value); + } + } + + /* + * Verify table for Key: byte[] x Hash Table: HashMap + */ + public static class VerifyFastBytesHashMap { + + private int count; + + private FastBytesHashMapElement[] array; + + private TreeMap<BytesWritable, Integer> keyValueMap; + + public VerifyFastBytesHashMap() { + count = 0; + array = new FastBytesHashMapElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap<BytesWritable, Integer>(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key, byte[] value) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + int index = keyValueMap.get(keyBytesWritable); + array[index].addValue(value); + } else { + if (count >= array.length) { + // Grow. + FastBytesHashMapElement[] newArray = new FastBytesHashMapElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastBytesHashMapElement(key, value); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].addValue(value); + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public List<byte[]> getValues(int index) { + return array[index].getValues(); + } + + public void verify(VectorMapJoinFastBytesHashMap map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List<byte[]> values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + } + } + + /* + * Element for Key: Long x Hash Table: HashMultiSet + */ + public static class FastLongHashMultiSetElement { + private long key; + private int multiSetCount; + + public FastLongHashMultiSetElement(long key) { + this.key = key; + multiSetCount = 1; + } + + public long getKey() { + return key; + } + + public int getMultiSetCount() { + return multiSetCount; + } + + public void incrementMultiSetCount() { + multiSetCount++; + } + } + + /* + * Verify table for Key: Long x Hash Table: HashMultiSet + */ + public static class VerifyFastLongHashMultiSet { + + private int count; + + private FastLongHashMultiSetElement[] array; + + private HashMap<Long, Integer> keyValueMap; + + public VerifyFastLongHashMultiSet() { + count = 0; + array = new FastLongHashMultiSetElement[50]; + keyValueMap = new HashMap<Long, Integer>(); + } + + public int getCount() { + return count; + } + + public boolean contains(long key) { + return keyValueMap.containsKey(key); + } + + public void add(long key) { + if (keyValueMap.containsKey(key)) { + int index = keyValueMap.get(key); + array[index].incrementMultiSetCount(); + } else { + if (count >= array.length) { + // Grow. + FastLongHashMultiSetElement[] newArray = new FastLongHashMultiSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastLongHashMultiSetElement(key); + keyValueMap.put(key, count); + count++; + } + } + + public long addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].incrementMultiSetCount(); + return array[index].getKey(); + } + + public long getKey(int index) { + return array[index].getKey(); + } + + public int getMultiSetCount(int index) { + return array[index].getMultiSetCount(); + } + + public void verify(VectorMapJoinFastLongHashMultiSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastLongHashMultiSetElement element = array[index]; + long key = element.getKey(); + int multiSetCount = element.getMultiSetCount(); + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, hashMultiSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + assertEquals(hashMultiSetResult.count(), multiSetCount); + } + } + } + + /* + * Element for Key: byte[] x Hash Table: HashMultiSet + */ + public static class FastBytesHashMultiSetElement { + private byte[] key; + private int multiSetCount; + + public FastBytesHashMultiSetElement(byte[] key) { + this.key = key; + multiSetCount = 1; + } + + public byte[] getKey() { + return key; + } + + public int getMultiSetCount() { + return multiSetCount; + } + + public void incrementMultiSetCount() { + multiSetCount++; + } + } + + /* + * Verify table for Key: byte[] x Hash Table: HashMultiSet + */ + public static class VerifyFastBytesHashMultiSet { + + private int count; + + private FastBytesHashMultiSetElement[] array; + + private TreeMap<BytesWritable, Integer> keyValueMap; + + public VerifyFastBytesHashMultiSet() { + count = 0; + array = new FastBytesHashMultiSetElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap<BytesWritable, Integer>(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + int index = keyValueMap.get(keyBytesWritable); + array[index].incrementMultiSetCount(); + } else { + if (count >= array.length) { + // Grow. + FastBytesHashMultiSetElement[] newArray = new FastBytesHashMultiSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastBytesHashMultiSetElement(key); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].incrementMultiSetCount(); + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public int getMultiSetCount(int index) { + return array[index].getMultiSetCount(); + } + + public void verify(VectorMapJoinFastBytesHashMultiSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastBytesHashMultiSetElement element = array[index]; + byte[] key = element.getKey(); + int multiSetCount = element.getMultiSetCount(); + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashMultiSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + assertEquals(hashMultiSetResult.count(), multiSetCount); + } + } + } + + /* + * Element for Key: Long x Hash Table: HashSet + */ + public static class FastLongHashSetElement { + private long key; + + public FastLongHashSetElement(long key) { + this.key = key; + } + + public long getKey() { + return key; + } + } + + /* + * Verify table for Key: Long x Hash Table: HashSet + */ + public static class VerifyFastLongHashSet { + + private int count; + + private FastLongHashSetElement[] array; + + private HashMap<Long, Integer> keyValueMap; + + public VerifyFastLongHashSet() { + count = 0; + array = new FastLongHashSetElement[50]; + keyValueMap = new HashMap<Long, Integer>(); + } + + public int getCount() { + return count; + } + + public boolean contains(long key) { + return keyValueMap.containsKey(key); + } + + public void add(long key) { + if (keyValueMap.containsKey(key)) { + // Already exists. + } else { + if (count >= array.length) { + // Grow. + FastLongHashSetElement[] newArray = new FastLongHashSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastLongHashSetElement(key); + keyValueMap.put(key, count); + count++; + } + } + + public long addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + + // Exists aleady. + + return array[index].getKey(); + } + + public long getKey(int index) { + return array[index].getKey(); + } + + public void verify(VectorMapJoinFastLongHashSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastLongHashSetElement element = array[index]; + long key = element.getKey(); + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, hashSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + } + } + + /* + * Element for Key: byte[] x Hash Table: HashSet + */ + public static class FastBytesHashSetElement { + private byte[] key; + + public FastBytesHashSetElement(byte[] key) { + this.key = key; + } + + public byte[] getKey() { + return key; + } + } + + /* + * Verify table for Key: byte[] x Hash Table: HashSet + */ + public static class VerifyFastBytesHashSet { + + private int count; + + private FastBytesHashSetElement[] array; + + private TreeMap<BytesWritable, Integer> keyValueMap; + + public VerifyFastBytesHashSet() { + count = 0; + array = new FastBytesHashSetElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap<BytesWritable, Integer>(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + // Already exists. + } else { + if (count >= array.length) { + // Grow. + FastBytesHashSetElement[] newArray = new FastBytesHashSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastBytesHashSetElement(key); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + + // Already exists. + + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public void verify(VectorMapJoinFastBytesHashSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastBytesHashSetElement element = array[index]; + byte[] key = element.getKey(); + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java index c2375e0..90e8f33 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java @@ -18,16 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import java.util.Random; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; -import org.apache.hadoop.hive.serde2.WriteBuffers; - -import static org.junit.Assert.*; - public class CommonFastHashTable { protected static final float LOAD_FACTOR = 0.75f; @@ -39,6 +31,10 @@ public class CommonFastHashTable { protected static final int LARGE_CAPACITY = 8388608; protected static Random random; + protected static int MAX_KEY_LENGTH = 100; + + protected static int MAX_VALUE_LENGTH = 1000; + public static int generateLargeCount() { int count = 0; if (random.nextInt(100) != 0) { @@ -75,54 +71,4 @@ public class CommonFastHashTable { } return count; } - public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult, - RandomByteArrayStream randomByteArrayStream ) { - - List<byte[]> resultBytes = new ArrayList<byte[]>(); - int count = 0; - if (hashMapResult.hasRows()) { - WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); - while (ref != null) { - count++; - byte[] bytes = ref.getBytes(); - int offset = (int) ref.getOffset(); - int length = ref.getLength(); - resultBytes.add(Arrays.copyOfRange(bytes, offset, offset + length)); - ref = hashMapResult.next(); - } - } else { - assertTrue(hashMapResult.isEof()); - } - if (randomByteArrayStream.size() != count) { - assertTrue(false); - } - - for (int i = 0; i < count; ++i) { - byte[] bytes = resultBytes.get(i); - if (!randomByteArrayStream.contains(bytes)) { - assertTrue(false); - } - } - } - - public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult, - byte[] valueBytes ) { - - assertTrue(hashMapResult.hasRows()); - WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); - byte[] bytes = ref.getBytes(); - int offset = (int) ref.getOffset(); - int length = ref.getLength(); - assertTrue(valueBytes.length == length); - boolean match = true; // Assume - for (int j = 0; j < length; j++) { - if (valueBytes[j] != bytes[offset + j]) { - match = false; - break; - } - } - if (!match) { - assertTrue(false); - } - } } \ No newline at end of file
