http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..a37b5a0 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -25,7 +25,6 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; @@ -33,9 +32,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; -import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; -import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; @@ -48,17 +45,23 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorExtractRow; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorRandomBatchSource; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateStream; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -69,7 +72,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; -import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; @@ -86,14 +88,13 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BytesWritable; @@ -101,6 +102,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.HashCodeUtil; import org.apache.hive.common.util.ReflectionUtil; import org.junit.Test; +import org.junit.Ignore; import java.io.IOException; import java.util.ArrayList; @@ -120,233 +122,1355 @@ import junit.framework.Assert; public class TestMapJoinOperator { - /* - * This test collector operator is for MapJoin row-mode. - */ - private class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + private boolean addLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, false); + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, true); + break; + case 2: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + private boolean goodTestVariation(MapJoinTestDescription testDesc) { + final int smallTableValueSize = testDesc.smallTableRetainValueColumnNums.length; + + switch (testDesc.vectorMapJoinVariation) { + case INNER: + return (smallTableValueSize > 0); + case INNER_BIG_ONLY: + case LEFT_SEMI: + return (smallTableValueSize == 0); + case OUTER: + return true; + case FULL_OUTER: + return true; + default: + throw new RuntimeException( + "Unexpected vectorMapJoinVariation " + testDesc.vectorMapJoinVariation); + } + + } + + @Ignore + @Test + public void testLong0() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong0"); + + return false; + } + + @Ignore + @Test + public void testLong0_NoRegularKeys() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "doTestLong0_NoRegularKeys"); + + return false; + } + + @Ignore + @Test + public void testLong1() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong1(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, long value; Small Table: no key retained, string value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong1"); + + return false; + } + + @Test + public void testLong2() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong2(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: short key, no value; Small Table: key retained, timestamp value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.timestampTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong2"); + + return false; + } + + + @Test + public void testLong3() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong3"); + + return false; + } + + @Test + public void testLong3_NoRegularKeys() throws Exception { + long seed = 9934; + int rowCount = 10; - private final RowTestObjectsMultiSet testRowMultiSet; + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "doTestLong3_NoRegularKeys"); + + return false; + } + + @Test + public void testLong4() throws Exception { + long seed = 3982; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong4(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, no value; Small Table: no key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong4"); + + return false; + } + + @Test + public void testLong5() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong5(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong5"); + + return false; + } + + @Test + public void testLong6() throws Exception { + long seed = 9384; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, timestamp value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.timestampTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testLong6"); + + return false; + } + + private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + @Test + public void testMultiKey0() throws Exception { + long seed = 28322; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey0(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.intTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; + + smallTableRetainKeyColumnNums = new int[] {0, 1}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; + } + + @Test + public void testMultiKey1() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey1(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); - public TestMultiSetCollectorOperator( - ObjectInspector[] outputObjectInspectors, - RowTestObjectsMultiSet testRowMultiSet) { - super(outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; - } + TypeInfo[] bigTableTypeInfos = null; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; - @Override - public String getName() { - return TestMultiSetCollectorOperator.class.getSimpleName(); + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] {new DecimalTypeInfo(38, 18)}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testMultiKey1"); + + return false; + } + + @Test + public void testMultiKey2() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - private class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + public boolean doTestMultiKey2(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - private final RowTestObjectsMultiSet testRowMultiSet; + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, - ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) - throws HiveException { - super(outputTypeInfos, outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + TypeInfo[] bigTableTypeInfos = null; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + int[] bigTableKeyColumnNums = null; - @Override - public String getName() { - return TestMultiSetVectorCollectorOperator.class.getSimpleName(); - } - } + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; - private static class KeyConfig { - long seed; - PrimitiveTypeInfo primitiveTypeInfo; - KeyConfig(long seed, PrimitiveTypeInfo primitiveTypeInfo) { - this.seed = seed; - this.primitiveTypeInfo = primitiveTypeInfo; + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; } - private static KeyConfig[] longKeyConfigs = new KeyConfig[] { - new KeyConfig(234882L, TypeInfoFactory.longTypeInfo), - new KeyConfig(4600L, TypeInfoFactory.intTypeInfo), - new KeyConfig(98743L, TypeInfoFactory.shortTypeInfo)}; @Test - public void testLong() throws Exception { - for (KeyConfig longKeyConfig : longKeyConfigs) { + public void testMultiKey3() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; - } - doTestLong(longKeyConfig.seed, longKeyConfig.primitiveTypeInfo, vectorMapJoinVariation); + hiveConfVariationsDone = + doTestMultiKey3( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); } - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestLong(long seed, TypeInfo numberTypeInfo, - VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestMultiKey3(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"number1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.longTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo}; + int[] bigTableKeyColumnNums = null; - int[] bigTableRetainColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.byteTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; + + smallTableRetainKeyColumnNums = new int[] {0, 1}; + + smallTableValueTypeInfos = + new TypeInfo[] {}; + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = - new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = + new MapJoinTestData(rowCount, testDesc, seed); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testMultiKey3"); + + return false; } @Test - public void testMultiKey() throws Exception { + public void testString0() throws Exception { long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); } - doTestMultiKey(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestMultiKey(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString0(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; - TypeInfo[] bigTableTypeInfos = + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One plain STRING key column. + bigTableTypeInfos = new TypeInfo[] { - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo, TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0, 1, 2}; + bigTableKeyColumnNums = new int[] {0}; - String[] smallTableValueColumnNames = new String[] {"sv1"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testString0"); + + return false; + } + + @Test + public void testString1() throws Exception { + long seed = 3422; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestString1(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0, 1, 2}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One BINARY key column. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.binaryTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.floatTypeInfo, + new DecimalTypeInfo(38, 18)}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = - new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = + new MapJoinTestData(rowCount, testDesc, seed); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString1"); + + return false; } @Test - public void testString() throws Exception { - long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + public void testString2() throws Exception { + long seed = 7439; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); } - doTestString(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestString(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString2(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One STRING key column; Small Table value: NONE (tests INNER_BIG_ONLY, LEFT_SEMI). + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = - new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + testData = + new MapJoinTestData(rowCount, testDesc, seed); + + executeTest(testDesc, testData, "testString2"); - executeTest(testDesc, testData); + return false; } private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTableRowObjects, @@ -357,14 +1481,32 @@ public class TestMapJoinOperator { } } - private void addToOutput(MapJoinTestDescription testDesc, RowTestObjectsMultiSet expectedTestRowMultiSet, - Object[] outputObjects) { + private void addToOutput(MapJoinTestDescription testDesc, + RowTestObjectsMultiSet expectedTestRowMultiSet, Object[] outputObjects, + RowTestObjectsMultiSet.RowFlag rowFlag) { for (int c = 0; c < outputObjects.length; c++) { - PrimitiveObjectInspector primitiveObjInsp = ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); + PrimitiveObjectInspector primitiveObjInsp = + ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); Object outputObject = outputObjects[c]; outputObjects[c] = primitiveObjInsp.copyObject(outputObject); } - expectedTestRowMultiSet.add(new RowTestObjects(outputObjects)); + expectedTestRowMultiSet.add(new RowTestObjects(outputObjects), rowFlag); + } + + private String rowToCsvString(Object[] rowObjects) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < rowObjects.length; i++) { + if (sb.length() > 0) { + sb.append(","); + } + Object obj = rowObjects[i]; + if (obj == null) { + sb.append("\\N"); + } else { + sb.append(obj); + } + } + return sb.toString(); } /* @@ -377,7 +1519,7 @@ public class TestMapJoinOperator { RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet(); VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; Object[] bigTableRowObjects = new Object[bigTableColumnCount]; @@ -385,32 +1527,36 @@ public class TestMapJoinOperator { final int bigTableKeyColumnCount = testDesc.bigTableKeyTypeInfos.length; Object[] bigTableKeyObjects = new Object[bigTableKeyColumnCount]; - VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream(); + VectorRandomBatchSource bigTableBatchSource = testData.getBigTableBatchSource(); VectorizedRowBatch batch = testData.getBigTableBatch(); - bigTableBatchStream.reset(); - while (bigTableBatchStream.isNext()) { - batch.reset(); - bigTableBatchStream.fillNext(batch); + bigTableBatchSource.resetBatchIteration(); + while (bigTableBatchSource.fillNextBatch(batch)) { final int size = testData.bigTableBatch.size; for (int r = 0; r < size; r++) { vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); // Form key object array + boolean hasAnyNulls = false; // NULLs may be present in {FULL|LEFT|RIGHT} OUTER joins. for (int k = 0; k < bigTableKeyColumnCount; k++) { int keyColumnNum = testDesc.bigTableKeyColumnNums[k]; - bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum]; + Object keyObject = bigTableRowObjects[keyColumnNum]; + if (keyObject == null) { + hasAnyNulls = true; + } + bigTableKeyObjects[k] = keyObject; bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]); } RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects); - if (testData.smallTableKeyHashMap.containsKey(testKey)) { + if (testData.smallTableKeyHashMap.containsKey(testKey) && !hasAnyNulls) { int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey); switch (testDesc.vectorMapJoinVariation) { case INNER: case OUTER: + case FULL_OUTER: { // One row per value. ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex); @@ -420,36 +1566,46 @@ public class TestMapJoinOperator { addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + Object[] valueRow = valueList.get(v).getRow(); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } } break; case INNER_BIG_ONLY: - { - // Value count rows. - final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex); - for (int v = 0; v < valueCount; v++) { - Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; - - addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); - } - } - break; case LEFT_SEMI: { - // One row (existence). Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } break; default: @@ -458,9 +1614,10 @@ public class TestMapJoinOperator { } else { - // No match. + // Big Table non-match. - if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) { + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { // We need to add a non-match row with nulls for small table values. @@ -468,14 +1625,74 @@ public class TestMapJoinOperator { addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = null; + } + + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = null; + outputObjects[outputColumnNum++] = null; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.LEFT_OUTER); + } + } + } + } + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + System.out.println("*BENCHMARK* ----------------------------------------------------------------------"); + System.out.println("*BENCHMARK* FULL OUTER non-match key count " + + testData.fullOuterAdditionalSmallTableKeys.size()); + + // Fill in non-match Small Table key results. + for (RowTestObjects smallTableKey : testData.fullOuterAdditionalSmallTableKeys) { + + // System.out.println( + // "*BENCHMARK* fullOuterAdditionalSmallTableKey " + smallTableKey.toString()); + + int smallTableKeyIndex = testData.smallTableKeyHashMap.get(smallTableKey); + + // One row per value. + ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(smallTableKeyIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; + + // Non-match Small Table keys produce NULL Big Table columns. + final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; + for (int o = 0; o < bigTableRetainColumnNumsLength; o++) { + outputObjects[o] = null; + } + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + // The output result may include 0, 1, or more small key columns... + Object[] smallKeyObjects = smallTableKey.getRow(); + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + smallKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + Object[] valueRow = valueList.get(v).getRow(); + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; + for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.FULL_OUTER); } } } @@ -483,67 +1700,354 @@ public class TestMapJoinOperator { return expectedTestRowMultiSet; } - private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception { + private void generateBigAndSmallTableRowLogLines(MapJoinTestDescription testDesc, + MapJoinTestData testData) throws HiveException { + + // Generate Big Table rows log lines... + VectorExtractRow vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(testDesc.bigTableTypeInfos); + + final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; + Object[] bigTableRowObjects = new Object[bigTableColumnCount]; + + /* + PrintStream big_ps; + try { + big_ps = new PrintStream("/Users/mmccline/VecFullOuterRefresh/out_big"); + } catch (Exception e) { + throw new HiveException(e); + } + */ + + VectorRandomBatchSource bigTableBatchSource = testData.getBigTableBatchSource(); + VectorizedRowBatch batch = testData.getBigTableBatch(); + bigTableBatchSource.resetBatchIteration(); + while (bigTableBatchSource.fillNextBatch(batch)) { + + final int size = testData.bigTableBatch.size; + for (int r = 0; r < size; r++) { + vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); + + // big_ps.println(rowToCsvString(bigTableRowObjects)); + } + } + // big_ps.close(); + + /* + PrintStream small_ps; + try { + small_ps = new PrintStream("/Users/mmccline/VecFullOuterRefresh/out_small"); + } catch (Exception e) { + throw new HiveException(e); + } + */ + + // Generate Small Table rows log lines... + final int keyKeyColumnNumsLength = + testDesc.bigTableKeyColumnNums.length; + final int smallTableRetainValueLength = + testDesc.smallTableRetainValueColumnNums.length; + final int smallTableLength = keyKeyColumnNumsLength + smallTableRetainValueLength; + for (Entry<RowTestObjects, Integer> entry : testData.smallTableKeyHashMap.entrySet()) { + if (smallTableRetainValueLength == 0) { + Object[] smallTableRowObjects = entry.getKey().getRow(); + // small_ps.println(rowToCsvString(smallTableRowObjects)); + } else { + Integer valueIndex = entry.getValue(); + ArrayList<RowTestObjects> valueList = testData.smallTableValues.get(valueIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] smallTableRowObjects = new Object[smallTableLength]; + System.arraycopy(entry.getKey().getRow(), 0, smallTableRowObjects, 0, keyKeyColumnNumsLength); + int outputColumnNum = keyKeyColumnNumsLength; + Object[] valueRow = valueList.get(v).getRow(); + for (int o = 0; o < smallTableRetainValueLength; o++) { + smallTableRowObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + // small_ps.println(rowToCsvString(smallTableRowObjects)); + } + } + } + // small_ps.close(); + } + + private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + + // So stack trace is self-explanatory. + switch (testDesc.vectorMapJoinVariation) { + case INNER: + executeTestInner(testDesc, testData, title); + break; + case INNER_BIG_ONLY: + executeTestInnerBigOnly(testDesc, testData, title); + break; + case LEFT_SEMI: + executeTestLeftSemi(testDesc, testData, title); + break; + case OUTER: + executeTestOuter(testDesc, testData, title); + break; + case FULL_OUTER: + executeTestFullOuter(testDesc, testData, title); + break; + default: + throw new RuntimeException("Unexpected Vector MapJoin variation " + + testDesc.vectorMapJoinVariation); + } + } + + private void executeTestInner(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestInnerBigOnly(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestLeftSemi(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestFullOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void doExecuteTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData); - // UNDONE: Inner count - System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + - " totalCount " + expectedTestRowMultiSet.getTotalCount()); + generateBigAndSmallTableRowLogLines(testDesc, testData); + + System.out.println("*BENCHMARK* expectedTestRowMultiSet " + + " totalKeyCount " + expectedTestRowMultiSet.getTotalKeyCount() + + " totalValueCount " + expectedTestRowMultiSet.getTotalValueCount()); // Execute all implementation variations. for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) { - executeTestImplementation(mapJoinImplementation, testDesc, testData, - expectedTestRowMultiSet); + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Key match tracking not supported in plain Java HashMap. + continue; + } + switch (mapJoinImplementation) { + case ROW_MODE_HASH_MAP: + executeRowModeHashMap( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case ROW_MODE_OPTIMIZED: + executeRowModeOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case VECTOR_PASS_THROUGH: + executeVectorPassThrough( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_OPTIMIZED: + executeNativeVectorOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_FAST: + executeNativeVectorFast( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + default: + throw new RuntimeException( + "Unexpected vector map join test variation"); + } } } - private boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { - return - (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && - mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + private void executeRowModeHashMap( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_HASH_MAP, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeRowModeOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeVectorPassThrough( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.VECTOR_PASS_THROUGH, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorFast( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_FAST, + testDesc, testData, + expectedTestRowMultiSet, + title); } private void executeTestImplementation( MapJoinTestImplementation mapJoinImplementation, - MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) throws Exception { - System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test"); + System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + + " title " + title); // UNDONE: Parameterize for implementation variation? MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); - final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet(); - Operator<? extends OperatorDesc> testCollectorOperator = - (!isVectorOutput ? - new TestMultiSetCollectorOperator( - testDesc.outputObjectInspectors, outputTestRowMultiSet) : - new TestMultiSetVectorCollectorOperator( - testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet)); - - MapJoinOperator operator = + CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc); + MapJoinOperator mapJoinOperator = result.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe; + + CountCollectorTestOperator testCollectorOperator; + if (!isVectorOutput) { + testCollectorOperator = + new TestMultiSetCollectorOperator( + testDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + testCollectorOperator = + new TestMultiSetVectorCollectorOperator( + ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), + testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator); + + CountCollectorTestOperator interceptTestCollectorOperator = null; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + + if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Not supported. + return; + } + + // Wire in FULL OUTER Intercept. + interceptTestCollectorOperator = + MapJoinTestConfig.addFullOuterIntercept( + mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, + mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } else { + + // Invoke initializeOp methods. + mapJoinOperator.initialize( + testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + } if (!isVectorOutput) { - MapJoinTestData.driveBigTableData(testDesc, testData, operator); + MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator); } else { - MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator); + MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator); + } + + if (!testCollectorOperator.getIsClosed()) { + Assert.fail("collector operator not closed"); + } + if (testCollectorOperator.getIsAborted()) { + Assert.fail("collector operator aborted"); + } + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + if (!interceptTestCollectorOperator.getIsClosed()) { + Assert.fail("intercept collector operator not closed"); + } + if (interceptTestCollectorOperator.getIsAborted()) { + Assert.fail("intercept collector operator aborted"); + } } System.out.println("*BENCHMARK* executeTestImplementation row count " + - ((CountCollectorTestOperator) testCollectorOperator).getRowCount()); + testCollectorOperator.getRowCount()); // Verify the output! - if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) { - System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation); + String option = ""; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name(); + } + if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) { + System.out.println("*BENCHMARK* " + title + " verify failed" + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); + expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual"); } else { - System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation); + System.out.println("*BENCHMARK* " + title + " verify succeeded " + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); } } } \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java index 09dcb83..d356588 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -30,10 +30,13 @@ import java.util.TreeMap; import junit.framework.TestCase; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; @@ -197,6 +200,20 @@ public class CheckFastHashTable { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastLongHashMap map, int index, MatchTracker matchTracker) { + FastLongHashMapElement element = array[index]; + long longKey = element.getKey(); + List<byte[]> values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(longKey, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastLongHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -204,18 +221,77 @@ public class CheckFastHashTable { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(long searchLong) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastLongHashMapElement element = array[index]; - long key = element.getKey(); - List<byte[]> values = element.getValues(); + long longKey = element.getKey(); + if (longKey == searchLong) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastLongHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean[] nonMatched = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean[] returnedNonMatched = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + long longKey = nonMatchedIterator.getNonMatchedLongKey(); + int index = findKeyInArray(longKey); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastLongHashMapElement element = array[index]; + List<byte[]> values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } @@ -247,6 +323,11 @@ public class CheckFastHashTable { public void addValue(byte[] value) { values.add(value); } + + @Override + public String toString() { + return "Key length " + key.length + ", value count " + values.size(); + } } /* @@ -310,6 +391,21 @@ public class CheckFastHashTable { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastBytesHashMap map, int index, + MatchTracker matchTracker) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List<byte[]> values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastBytesHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -317,18 +413,82 @@ public class CheckFastHashTable { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(byte[] searchKeyBytes, int searchKeyOffset, int searchKeyLength) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastBytesHashMapElement element = array[index]; - byte[] key = element.getKey(); - List<byte[]> values = element.getValues(); + byte[] keyBytes = element.getKey(); + if (keyBytes.length == searchKeyLength && + StringExpr.equal( + keyBytes, 0, keyBytes.length, + searchKeyBytes, searchKeyOffset, searchKeyLength)) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastBytesHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean[] nonMatched = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean[] returnedNonMatched = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();; + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + int index = findKeyInArray(keyBytes, keyOffset, keyLength); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastBytesHashMapElement element = array[index]; + List<byte[]> values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } }