http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java new file mode 100644 index 0000000..bbfa65f --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java @@ -0,0 +1,272 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + } + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(1002); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key1); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key1, value); + verifyTable.add(key1, value); + verifyTable.verify(map); + + byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key2); + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key2, 0, key2.length, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertTrue(!hashMapResult.hasRows()); + + map.testPutRow(key2, value); + verifyTable.add(key2, value); + verifyTable.verify(map); + + byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key3); + hashMapResult = map.createHashMapResult(); + joinResult = map.lookup(key3, 0, key3.length, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertTrue(!hashMapResult.hasRows()); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(200001); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap(false,CAPACITY, 1f, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + for (int i = 0; i < CAPACITY; i++) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + byte[] anotherKey; + while (true) { + anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(anotherKey); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(anotherKey, 0, anotherKey.length, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(99221); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap(false,1, 0.0000001f, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + for (int i = 0; i < 18; ++i) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + // verifyTable.verify(map); + } + } + verifyTable.verify(map); + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(21111); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +}
http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java new file mode 100644 index 0000000..449a8b2 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java @@ -0,0 +1,253 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMultiSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +public class TestVectorMapJoinFastBytesHashMultiSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(5255); + + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(2374); + + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(98222); + + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key1); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key2); + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, 0, key2.length, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key3); + hashMultiSetResult = map.createHashMultiSetResult(); + joinResult = map.contains(key3, 0, key3.length, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertEquals(hashMultiSetResult.count(), 0); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(9024); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet(false,CAPACITY, 1f, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + for (int i = 0; i < CAPACITY; i++) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + byte[] anotherKey; + while (true) { + anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(anotherKey); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, 0, anotherKey.length, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(2933); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet(false,1, 0.0000001f, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + for (int i = 0; i < 18; ++i) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMultiSet map, VerifyFastBytesHashMultiSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + } + verifyTable.verify(map); + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(5445); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(5637); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java new file mode 100644 index 0000000..ef7c91c --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(81104); + + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(1120); + + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(2293); + + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key1); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key2); + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, 0, key2.length, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key3); + hashSetResult = map.createHashSetResult(); + joinResult = map.contains(key3, 0, key3.length, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(219); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + for (int i = 0; i < CAPACITY; i++) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + byte[] anotherKey; + while (true) { + anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(anotherKey); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, 0, anotherKey.length, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(773); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + for (int i = 0; i < 18; ++i) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashSet map, VerifyFastBytesHashSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + } + verifyTable.verify(map); + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(8462); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java index a45275b..e8bbee3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java @@ -18,11 +18,14 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; import java.util.Random; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.junit.Test; @@ -31,91 +34,141 @@ import static org.junit.Assert.*; public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { @Test - public void testPutGetOne() throws Exception { - random = new Random(47496); + public void testOneKey() throws Exception { + random = new Random(33221); VectorMapJoinFastLongHashMap map = - new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(0)); - - key = randomLongKeyStream.next(); - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(1)); + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); } @Test - public void testPutGetMultiple() throws Exception { - random = new Random(2990); + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, value); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); - // Same key, multiple values. - for (int i = 0; i < 3; ++i) { - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream); + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); } + verifyTable.verify(map); } @Test public void testGetNonExistent() throws Exception { - random = new Random(16916); + random = new Random(450); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); + long key1 = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); - key += 1; - map.putRow(key, value); + map.testPutRow(key1, value); + verifyTable.add(key1, value); + verifyTable.verify(map); - key += 1; + long key2 = key1 += 1; VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); + JoinUtil.JoinResult joinResult = map.lookup(key2, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertTrue(!hashMapResult.hasRows()); + + map.testPutRow(key2, value); + verifyTable.add(key2, value); + verifyTable.verify(map); + + long key3 = key2 += 1; + hashMapResult = map.createHashMapResult(); + joinResult = map.lookup(key3, hashMapResult); assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); assertTrue(!hashMapResult.hasRows()); } @Test - public void testPutWithFullMap() throws Exception { - random = new Random(26078); + public void testFullMap() throws Exception { + random = new Random(93440); // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - for (int i = 0; i < CAPACITY; ++i) { - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < CAPACITY; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); } - for (int i = 0; i < randomLongKeyStream.size(); ++i) { - verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStream.get(i)); + verifyTable.verify(map); + + long anotherKey; + while (true) { + anotherKey = random.nextLong(); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } } - // assertEquals(CAPACITY, map.getCapacity()); - // Get of non-existent key should terminate.. - long anotherKey = randomLongKeyStream.next(); + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); JoinUtil.JoinResult joinResult = map.lookup(anotherKey, hashMapResult); assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); @@ -123,97 +176,91 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { @Test public void testExpand() throws Exception { - random = new Random(22470); + random = new Random(5227); // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); for (int i = 0; i < 18; ++i) { - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - for (int j = 0; j <= i; ++j) { - verifyHashMapResult(map, randomLongKeyStream.get(j), randomByteArrayValueStream.get(j)); + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); } + verifyTable.verify(map); // assertEquals(1 << 18, map.getCapacity()); } - @Test - public void testLarge() throws Exception { - random = new Random(40719); - - // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - long key = randomLongKeyStream.next(); - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + // verifyTable.verify(map); } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStreams[i]); + verifyTable.verify(map); } } - @Test - public void testLargeAndExpand() throws Exception { - random = new Random(46809); + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - long key = randomLongKeyStream.next(); - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); - } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStreams[i]); - } - } - - private void verifyHashMapResult(VectorMapJoinFastLongHashMap map, long key, - RandomByteArrayStream randomByteArrayValueStream) { + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); - CommonFastHashTable.verifyHashMapResult(hashMapResult, randomByteArrayValueStream); + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); } - private void verifyHashMapResult(VectorMapJoinFastLongHashMap map, long key, - byte[] valueBytes) { + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); - CommonFastHashTable.verifyHashMapResult(hashMapResult, valueBytes); - } + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java new file mode 100644 index 0000000..9e94611 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMultiSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMultiSet; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMultiSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(458); + + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + long key = random.nextLong(); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(8000); + + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(4000); + + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + long key1 = random.nextLong(); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + long key2 = key1 += 1; + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertEquals(hashMultiSetResult.count(), 0); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + long key3 = key2 += 1; + hashMultiSetResult = map.createHashMultiSetResult(); + joinResult = map.contains(key3, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertEquals(hashMultiSetResult.count(), 0); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(25000); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + for (int i = 0; i < CAPACITY; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + long anotherKey; + while (true) { + anotherKey = random.nextLong(); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(30000); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMultiSet map, VerifyFastLongHashMultiSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(333); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(790); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java new file mode 100644 index 0000000..698bcdc --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -0,0 +1,250 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashSet; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(4186); + + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + long key = random.nextLong(); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(1412); + + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(100); + + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + long key1 = random.nextLong(); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + long key2 = key1 += 1; + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + long key3 = key2 += 1; + hashSetResult = map.createHashSetResult(); + joinResult = map.contains(key3, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(2520); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + for (int i = 0; i < CAPACITY; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + long anotherKey; + while (true) { + anotherKey = random.nextLong(); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(348); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashSet map, VerifyFastLongHashSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(7778); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(56); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java deleted file mode 100644 index 944bda6..0000000 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java +++ /dev/null @@ -1,231 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import java.util.Random; - -import org.apache.hadoop.hive.ql.exec.JoinUtil; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { - - @Test - public void testPutGetOne() throws Exception { - random = new Random(47496); - - VectorMapJoinFastMultiKeyHashMap map = - new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(0)); - - key = randomByteArrayKeyStream.next(); - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(1)); - } - - @Test - public void testPutGetMultiple() throws Exception { - random = new Random(2990); - - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, value); - - // Same key, multiple values. - for (int i = 0; i < 3; ++i) { - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream); - } - } - - @Test - public void testGetNonExistent() throws Exception { - random = new Random(16916); - - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - - key[0] = (byte) (key[0] + 1); - map.putRow(key, value); - - key[0] = (byte) (key[0] + 1); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); - assertTrue(!hashMapResult.hasRows()); - } - - @Test - public void testPutWithFullMap() throws Exception { - random = new Random(26078); - - // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - for (int i = 0; i < CAPACITY; ++i) { - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - } - for (int i = 0; i < randomByteArrayKeyStream.size(); ++i) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStream.get(i)); - } - // assertEquals(CAPACITY, map.getCapacity()); - // Get of non-existent key should terminate.. - byte[] anotherKey = randomByteArrayKeyStream.next(); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(anotherKey, 0, anotherKey.length, hashMapResult); - assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); - } - - @Test - public void testExpand() throws Exception { - random = new Random(22470); - - // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - for (int i = 0; i < 18; ++i) { - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - for (int j = 0; j <= i; ++j) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(j), randomByteArrayValueStream.get(j)); - } - } - // assertEquals(1 << 18, map.getCapacity()); - } - - @Test - public void testLarge() throws Exception { - random = new Random(5231); - - // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - byte[] key = randomByteArrayKeyStream.next(); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult == JoinUtil.JoinResult.MATCH) { - // A problem or need different random seed / longer key? - assertTrue(false); - } - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); - } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStreams[i]); - } - } - - @Test - public void testLargeAndExpand() throws Exception { - random = new Random(46809); - - // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - byte[] key = randomByteArrayKeyStream.next(); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult == JoinUtil.JoinResult.MATCH) { - // A problem or need different random seed / longer key? - assertTrue(false); - } - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); - } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStreams[i]); - } - } - - private void verifyHashMapResult(VectorMapJoinFastMultiKeyHashMap map, byte[] key, - RandomByteArrayStream randomByteArrayValueStream) { - - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } - - CommonFastHashTable.verifyHashMapResult(hashMapResult, randomByteArrayValueStream); - } - - private void verifyHashMapResult(VectorMapJoinFastMultiKeyHashMap map, byte[] key, - byte[] valueBytes) { - - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } - - CommonFastHashTable.verifyHashMapResult(hashMapResult, valueBytes); - } - -} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java new file mode 100644 index 0000000..1bb990c --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java @@ -0,0 +1,423 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.fast; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hive.common.util.DateUtils; + +/** + * Generate object inspector and random row object[]. + */ +public class RandomRowObjectSource { + + private Random r; + + private int columnCount; + + private List<String> typeNames; + + private PrimitiveCategory[] primitiveCategories; + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private List<ObjectInspector> primitiveObjectInspectorList; + + private StructObjectInspector rowStructObjectInspector; + + public List<String> typeNames() { + return typeNames; + } + + public PrimitiveCategory[] primitiveCategories() { + return primitiveCategories; + } + + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + public StructObjectInspector rowStructObjectInspector() { + return rowStructObjectInspector; + } + + public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) { + ArrayList<ObjectInspector> partialPrimitiveObjectInspectorList = + new ArrayList<ObjectInspector>(partialFieldCount); + List<String> columnNames = new ArrayList<String>(partialFieldCount); + for (int i = 0; i < partialFieldCount; i++) { + columnNames.add(String.format("partial%d", i)); + partialPrimitiveObjectInspectorList.add( + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + primitiveTypeInfos[i])); + } + + return ObjectInspectorFactory.getStandardStructObjectInspector( + columnNames, primitiveObjectInspectorList); + } + + public void init(Random r) { + this.r = r; + chooseSchema(); + } + + /* + * For now, exclude CHAR until we determine why there is a difference (blank padding) + * serializing with LazyBinarySerializeWrite and the regular SerDe... + */ + private static String[] possibleHiveTypeNames = { + "boolean", + "tinyint", + "smallint", + "int", + "bigint", + "date", + "float", + "double", + "string", +// "char", + "varchar", + "binary", + "date", + "timestamp", + "interval_year_month", + "interval_day_time", + "decimal" + }; + + private void chooseSchema() { + HashSet hashSet = null; + boolean allTypes; + boolean onlyOne = (r.nextInt(100) == 7); + if (onlyOne) { + columnCount = 1; + allTypes = false; + } else { + allTypes = r.nextBoolean(); + if (allTypes) { + // One of each type. + columnCount = possibleHiveTypeNames.length; + hashSet = new HashSet<Integer>(); + } else { + columnCount = 1 + r.nextInt(20); + } + } + typeNames = new ArrayList<String>(columnCount); + primitiveCategories = new PrimitiveCategory[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + primitiveObjectInspectorList = new ArrayList<ObjectInspector>(columnCount); + List<String> columnNames = new ArrayList<String>(columnCount); + for (int c = 0; c < columnCount; c++) { + columnNames.add(String.format("col%d", c)); + String typeName; + + if (onlyOne) { + typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)]; + } else { + int typeNum; + if (allTypes) { + while (true) { + typeNum = r.nextInt(possibleHiveTypeNames.length); + Integer typeNumInteger = new Integer(typeNum); + if (!hashSet.contains(typeNumInteger)) { + hashSet.add(typeNumInteger); + break; + } + } + } else { + typeNum = r.nextInt(possibleHiveTypeNames.length); + } + typeName = possibleHiveTypeNames[typeNum]; + } + if (typeName.equals("char")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("char(%d)", maxLength); + } else if (typeName.equals("varchar")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("varchar(%d)", maxLength); + } else if (typeName.equals("decimal")) { + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + primitiveTypeInfos[c] = primitiveTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[c] = primitiveCategory; + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); + typeNames.add(typeName); + } + rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + } + + public Object[][] randomRows(int n) { + Object[][] result = new Object[n][]; + for (int i = 0; i < n; i++) { + result[i] = randomRow(); + } + return result; + } + + public Object[] randomRow() { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + Object object = randomObject(c); + if (object == null) { + throw new Error("Unexpected null for column " + c); + } + row[c] = getWritableObject(c, object); + if (row[c] == null) { + throw new Error("Unexpected null for writable for column " + c); + } + } + return row; + } + + public static void sort(Object[][] rows, ObjectInspector oi) { + for (int i = 0; i < rows.length; i++) { + for (int j = i + 1; j < rows.length; j++) { + if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) { + Object[] t = rows[i]; + rows[i] = rows[j]; + rows[j] = t; + } + } + } + } + + public void sort(Object[][] rows) { + RandomRowObjectSource.sort(rows, rowStructObjectInspector); + } + + public Object getWritableObject(int column, Object object) { + ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); + case BYTE: + return ((WritableByteObjectInspector) objectInspector).create((byte) object); + case SHORT: + return ((WritableShortObjectInspector) objectInspector).create((short) object); + case INT: + return ((WritableIntObjectInspector) objectInspector).create((int) object); + case LONG: + return ((WritableLongObjectInspector) objectInspector).create((long) object); + case DATE: + return ((WritableDateObjectInspector) objectInspector).create((Date) object); + case FLOAT: + return ((WritableFloatObjectInspector) objectInspector).create((float) object); + case DOUBLE: + return ((WritableDoubleObjectInspector) objectInspector).create((double) object); + case STRING: + return ((WritableStringObjectInspector) objectInspector).create((String) object); + case CHAR: + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + } + case VARCHAR: + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + } + case BINARY: + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + case TIMESTAMP: + return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); + case INTERVAL_YEAR_MONTH: + return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); + case INTERVAL_DAY_TIME: + return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); + case DECIMAL: + { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public Object randomObject(int column) { + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return Boolean.valueOf(r.nextInt(1) == 1); + case BYTE: + return Byte.valueOf((byte) r.nextInt()); + case SHORT: + return Short.valueOf((short) r.nextInt()); + case INT: + return Integer.valueOf(r.nextInt()); + case LONG: + return Long.valueOf(r.nextLong()); + case DATE: + return RandomTypeUtil.getRandDate(r); + case FLOAT: + return Float.valueOf(r.nextFloat() * 10 - 5); + case DOUBLE: + return Double.valueOf(r.nextDouble() * 10 - 5); + case STRING: + return RandomTypeUtil.getRandString(r); + case CHAR: + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + case VARCHAR: + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + case BINARY: + return getRandBinary(r, 1 + r.nextInt(100)); + case TIMESTAMP: + return RandomTypeUtil.getRandTimestamp(r); + case INTERVAL_YEAR_MONTH: + return getRandIntervalYearMonth(r); + case INTERVAL_DAY_TIME: + return getRandIntervalDayTime(r); + case DECIMAL: + return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { + int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { + int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + if (bd.scale() > bd.precision()) { + // Sometimes weird decimals are produced? + continue; + } + + return bd; + } + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + return intervalDayTimeVal; + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/4533d21b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index 4415431..bbb35c7 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -40,7 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; * * Reading some fields require a results object to receive value information. A separate * results object is created by the caller at initialization per different field even for the same - * type. + * type. * * Some type values are by reference to either bytes in the deserialization buffer or to * other type specific buffers. So, those references are only valid until the next time set is
