http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 0000000..586c850 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testReallyBig() throws Exception { + random = new Random(42662); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false, LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +}
http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java index cbd77d1..6fccde9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -37,7 +37,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -65,7 +65,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -91,7 +91,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -125,7 +125,8 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet( + false, CAPACITY, 1f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -167,7 +168,8 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet( + false, 1, 0.0000001f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -227,7 +229,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -242,7 +244,7 @@ public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java new file mode 100644 index 0000000..5847787 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(33221); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testExpand() throws Exception { + random = new Random(5227); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java index c908f66..9615bf3 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java @@ -599,8 +599,6 @@ public class VerifyFastRow { return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index 793a676..ff88841 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,8 +23,10 @@ import java.util.Random; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -109,15 +111,25 @@ public class VectorBatchGenerator { } private GenerateCategory category; + private boolean allowNulls; public GenerateType(GenerateCategory category) { this.category = category; } + public GenerateType(GenerateCategory category, boolean allowNulls) { + this.category = category; + this.allowNulls = allowNulls; + } + public GenerateCategory getCategory() { return category; } + public boolean getAllowNulls() { + return allowNulls; + } + /* * BOOLEAN .. LONG: Min and max. */ @@ -180,6 +192,7 @@ public class VectorBatchGenerator { case SHORT: case INT: case LONG: + case DATE: colVector = new LongColumnVector(); break; @@ -189,16 +202,22 @@ public class VectorBatchGenerator { break; case STRING: + case CHAR: + case VARCHAR: + case BINARY: colVector = new BytesColumnVector(); break; - // UNDONE - case DATE: case TIMESTAMP: - case BINARY: + colVector = new TimestampColumnVector(); + break; + case DECIMAL: - case VARCHAR: - case CHAR: + colVector = new DecimalColumnVector(38, 18); + break; + + // UNDONE + case LIST: case MAP: case STRUCT: http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java index 9bf9d9d..f200aa2 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java @@ -22,21 +22,28 @@ import java.sql.Timestamp; import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; public class VectorColumnGroupGenerator { private GenerateType[] generateTypes; private int[] columnNums; private Object[] arrays; + private boolean[][] isNullArrays; public VectorColumnGroupGenerator(int columnNum, GenerateType generateType) { columnNums = new int[] {columnNum}; @@ -59,6 +66,7 @@ public class VectorColumnGroupGenerator { private void allocateArrays(int size) { arrays = new Object[generateTypes.length]; + isNullArrays = new boolean[generateTypes.length][]; for (int i = 0; i < generateTypes.length; i++) { GenerateType generateType = generateTypes[i]; GenerateCategory category = generateType.getCategory(); @@ -88,24 +96,34 @@ public class VectorColumnGroupGenerator { case STRING: array = new String[size]; break; + case BINARY: + array = new byte[size][]; + break; + case DATE: + array = new Date[size]; + break; case TIMESTAMP: array = new Timestamp[size]; break; - - // UNDONE - case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: case CHAR: + array = new HiveChar[size]; + break; + case VARCHAR: + array = new HiveVarchar[size]; + break; + case DECIMAL: + array = new HiveDecimalWritable[size]; + break; case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unexpected generate category " + category); } arrays[i] = array; + isNullArrays[i] = new boolean[size]; } } @@ -139,16 +157,24 @@ public class VectorColumnGroupGenerator { case STRING: Arrays.fill(((String[]) array), null); break; + case BINARY: + Arrays.fill(((byte[][]) array), null); + break; + case DATE: + Arrays.fill(((Date[]) array), null); + break; case TIMESTAMP: Arrays.fill(((Timestamp[]) array), null); break; - - // UNDONE - case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: case CHAR: + Arrays.fill(((HiveChar[]) array), null); + break; + case VARCHAR: + Arrays.fill(((HiveVarchar[]) array), null); + break; + case DECIMAL: + Arrays.fill(((HiveDecimalWritable[]) array), null); + break; case LIST: case MAP: @@ -168,6 +194,11 @@ public class VectorColumnGroupGenerator { private void generateRowColumnValue(int rowIndex, int columnIndex, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + if (allowNulls && random.nextInt(100) < 5) { + isNullArrays[columnIndex][rowIndex] = true; + return; + } Object array = arrays[columnIndex]; switch (category) { case BOOLEAN: @@ -228,6 +259,20 @@ public class VectorColumnGroupGenerator { } break; + case BINARY: + { + byte[] value = RandomTypeUtil.getRandBinary(random, 10); + ((byte[][]) array)[rowIndex] = value; + } + break; + + case DATE: + { + Date value = RandomTypeUtil.getRandDate(random); + ((Date[]) array)[rowIndex] = value; + } + break; + case TIMESTAMP: { Timestamp value = RandomTypeUtil.getRandTimestamp(random).toSqlTimestamp(); @@ -235,14 +280,31 @@ public class VectorColumnGroupGenerator { } break; - // UNDONE - case DATE: - // UNDONE: Needed to longTest? + case CHAR: + { + // UNDONE: Use CharTypeInfo.maxLength + HiveChar value = + new HiveChar(RandomTypeUtil.getRandString(random), 10); + ((HiveChar[]) array)[rowIndex] = value; + } + break; - case BINARY: - case DECIMAL: case VARCHAR: - case CHAR: + { + // UNDONE: Use VarcharTypeInfo.maxLength + HiveVarchar value = + new HiveVarchar(RandomTypeUtil.getRandString(random), 10); + ((HiveVarchar[]) array)[rowIndex] = value; + } + break; + + case DECIMAL: + { + HiveDecimalWritable value = + new HiveDecimalWritable(RandomTypeUtil.getRandHiveDecimal(random)); + ((HiveDecimalWritable[]) array)[rowIndex] = value; + } + break; case LIST: case MAP: @@ -261,7 +323,15 @@ public class VectorColumnGroupGenerator { private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCount, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); Object array = arrays[columnIndex]; + boolean[] isNull = isNullArrays[columnIndex]; + if (allowNulls && isNull[rowIndex]) { + for (int i = 1; i < seriesCount; i++) { + isNull[rowIndex + i] = true; + } + return; + } switch (category) { case BOOLEAN: { @@ -335,6 +405,24 @@ public class VectorColumnGroupGenerator { } } break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + byte[] value = byteArrayArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + byteArrayArray[rowIndex + i] = value; + } + } + break; + case DATE: + { + Date[] dateArray = ((Date[]) array); + Date value = dateArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + dateArray[rowIndex + i] = value; + } + } + break; case TIMESTAMP: { Timestamp[] timestampArray = ((Timestamp[]) array); @@ -344,14 +432,33 @@ public class VectorColumnGroupGenerator { } } break; - - // UNDONE - case DATE: - - case BINARY: - case DECIMAL: - case VARCHAR: case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + HiveChar value = hiveCharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveCharArray[rowIndex + i] = value; + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveVarcharArray = ((HiveVarchar[]) array); + HiveVarchar value = hiveVarcharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveVarcharArray[rowIndex + i] = value; + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + HiveDecimalWritable value = hiveDecimalWritableArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveDecimalWritableArray[rowIndex + i] = value; + } + } + break; case LIST: case MAP: @@ -387,6 +494,16 @@ public class VectorColumnGroupGenerator { GenerateType generateType = generateTypes[logicalColumnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + boolean[] isNull = isNullArrays[logicalColumnIndex]; + if (allowNulls) { + for (int i = 0; i < size; i++) { + if (isNull[i]) { + colVector.isNull[i] = true; + colVector.noNulls = false; + } + } + } Object array = arrays[logicalColumnIndex]; switch (category) { case BOOLEAN: @@ -394,7 +511,11 @@ public class VectorColumnGroupGenerator { boolean[] booleanArray = ((boolean[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = (booleanArray[i] ? 1 : 0); + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = (booleanArray[i] ? 1 : 0); + } } } break; @@ -403,7 +524,11 @@ public class VectorColumnGroupGenerator { byte[] byteArray = ((byte[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = byteArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = byteArray[i]; + } } } break; @@ -412,7 +537,11 @@ public class VectorColumnGroupGenerator { short[] shortArray = ((short[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = shortArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = shortArray[i]; + } } } break; @@ -421,7 +550,11 @@ public class VectorColumnGroupGenerator { int[] intArray = ((int[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = intArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = intArray[i]; + } } } break; @@ -430,7 +563,11 @@ public class VectorColumnGroupGenerator { long[] longArray = ((long[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = longArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = longArray[i]; + } } } break; @@ -439,7 +576,11 @@ public class VectorColumnGroupGenerator { float[] floatArray = ((float[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = floatArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = floatArray[i]; + } } } break; @@ -448,7 +589,11 @@ public class VectorColumnGroupGenerator { double[] doubleArray = ((double[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = doubleArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = doubleArray[i]; + } } } break; @@ -457,8 +602,35 @@ public class VectorColumnGroupGenerator { String[] stringArray = ((String[]) array); BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); for (int i = 0; i < size; i++) { - byte[] bytes = stringArray[i].getBytes(); - bytesColVec.setVal(i, bytes); + if (!isNull[i]) { + byte[] bytes = stringArray[i].getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = byteArrayArray[i]; + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DATE: + { + Date[] dateArray = ((Date[]) array); + LongColumnVector longColVec = ((LongColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + Date date = dateArray[i]; + longColVec.vector[i] = + DateWritableV2.dateToDays(date); + } } } break; @@ -467,26 +639,58 @@ public class VectorColumnGroupGenerator { Timestamp[] timestampArray = ((Timestamp[]) array); TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector); for (int i = 0; i < size; i++) { - Timestamp timestamp = timestampArray[i]; - timestampColVec.set(i, timestamp); + if (!isNull[i]) { + Timestamp timestamp = timestampArray[i]; + timestampColVec.set(i, timestamp); + } + } + } + break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveCharArray = ((HiveVarchar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + DecimalColumnVector decimalColVec = ((DecimalColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + HiveDecimalWritable decWritable = hiveDecimalWritableArray[i]; + decimalColVec.set(i, decWritable); + } } } break; // UNDONE - case DATE: - - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unepected generate category " + category); } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java index b705fd7..fdc8701 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/mapping/TestCounterMapping.java @@ -146,7 +146,7 @@ public class TestCounterMapping { FilterOperator filter2 = filters2.get(0); assertEquals("original check", 7, filter1.getStatistics().getNumRows()); - assertEquals("optimized check", 6, filter2.getStatistics().getNumRows()); + assertEquals("optimized check", 1, filter2.getStatistics().getNumRows()); } http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientnegative/join32.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientnegative/join32.q b/ql/src/test/queries/clientnegative/join32.q index 8c93914..3b6babb 100644 --- a/ql/src/test/queries/clientnegative/join32.q +++ b/ql/src/test/queries/clientnegative/join32.q @@ -2,6 +2,9 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.cbo.enable=false; + +-- SORT_QUERY_RESULTS + CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; -- Mapjoin followed by Mapjoin is not supported. http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/auto_join_filters.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/auto_join_filters.q b/ql/src/test/queries/clientpositive/auto_join_filters.q index ea028f6..9282f8f 100644 --- a/ql/src/test/queries/clientpositive/auto_join_filters.q +++ b/ql/src/test/queries/clientpositive/auto_join_filters.q @@ -7,7 +7,13 @@ LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1_n5; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -24,10 +30,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); @@ -51,7 +66,12 @@ SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -68,10 +88,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n5 a FULL OUTER JOIN myinput1_n5 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a LEFT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n5 a RIGHT OUTER JOIN myinput1_n5 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1_n5 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/auto_join_nulls.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/auto_join_nulls.q b/ql/src/test/queries/clientpositive/auto_join_nulls.q index 4a2b57b..a385128 100644 --- a/ql/src/test/queries/clientpositive/auto_join_nulls.q +++ b/ql/src/test/queries/clientpositive/auto_join_nulls.q @@ -19,10 +19,19 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOI SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON a.key=b.key and a.value = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1_n2 a FULL OUTER JOIN myinput1_n2 b ON a.value = b.value and a.key=b.key; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n2 a LEFT OUTER JOIN myinput1_n2 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1_n2 c ON (b.value=c.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1_n2 a RIGHT OUTER JOIN myinput1_n2 b ON (a.value=b.value) LEFT OUTER JOIN myinput1_n2 c ON (b.value=c.value); http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q index 8248035..049a4d9 100644 --- a/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q +++ b/ql/src/test/queries/clientpositive/bucket_map_join_tez1.q @@ -18,7 +18,7 @@ load data local inpath '../../data/files/bmj/000001_0' INTO TABLE srcbucket_mapj load data local inpath '../../data/files/bmj/000002_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); load data local inpath '../../data/files/bmj/000003_0' INTO TABLE srcbucket_mapjoin_part_n15 partition(ds='2008-04-08'); - +-- SORT_QUERY_RESULTS set hive.optimize.bucketingsorting=false; insert overwrite table tab_part_n9 partition (ds='2008-04-08') http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/correlationoptimizer1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer1.q b/ql/src/test/queries/clientpositive/correlationoptimizer1.q index 1c4f82a..d61d175 100644 --- a/ql/src/test/queries/clientpositive/correlationoptimizer1.q +++ b/ql/src/test/queries/clientpositive/correlationoptimizer1.q @@ -216,6 +216,7 @@ set hive.optimize.correlation=false; -- they share the same key. Because those keys with a null value are not grouped -- in the output of the Full Outer Join, we cannot use a single MR to execute -- these two operators. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -227,7 +228,35 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -238,6 +267,7 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; set hive.auto.convert.join=false; set hive.optimize.correlation=false; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/correlationoptimizer2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer2.q b/ql/src/test/queries/clientpositive/correlationoptimizer2.q index 6656084..cbb6e47 100644 --- a/ql/src/test/queries/clientpositive/correlationoptimizer2.q +++ b/ql/src/test/queries/clientpositive/correlationoptimizer2.q @@ -96,6 +96,7 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 set hive.optimize.correlation=false; -- Full Outer Join should be handled. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -109,7 +110,39 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -122,8 +155,28 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; set hive.optimize.correlation=false; + +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=true; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -138,11 +191,13 @@ FROM (SELECT a.key AS key, count(1) AS cnt FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.merge.nway.joins=false; set hive.optimize.correlation=true; -- After FULL OUTER JOIN, keys with null values are not grouped, right now, -- we have to generate 2 MR jobs for tmp, 1 MR job for a join b and another for the -- GroupByOperator on key. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -158,11 +213,30 @@ FROM (SELECT a.key AS key, count(1) AS cnt ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=false; -- When Correlation Optimizer is turned off, we need 4 MR jobs. -- When Correlation Optimizer is turned on, the subquery of tmp will be evaluated in -- a single MR job (including the subquery a, the subquery b, and a join b). So, we -- will have 2 MR jobs. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -176,7 +250,39 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -189,3 +295,4 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/correlationoptimizer4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/correlationoptimizer4.q b/ql/src/test/queries/clientpositive/correlationoptimizer4.q index c34ff23..ee95abf 100644 --- a/ql/src/test/queries/clientpositive/correlationoptimizer4.q +++ b/ql/src/test/queries/clientpositive/correlationoptimizer4.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; + CREATE TABLE T1_n146(key INT, val STRING); LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1_n146; CREATE TABLE T2_n86(key INT, val STRING); @@ -24,7 +25,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt GROUP BY y.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) @@ -38,7 +39,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=true; set hive.auto.convert.join=true; -- Enable hive.auto.convert.join. -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x JOIN T1_n146 y ON (x.key = y.key) JOIN T3_n34 z ON (y.key = z.key) @@ -53,7 +54,7 @@ set hive.auto.convert.join=false; set hive.optimize.correlation=false; -- This case should be optimized, since the key of GroupByOperator is from the leftmost table -- of a chain of LEFT OUTER JOINs. -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -65,7 +66,7 @@ FROM (SELECT x.key AS key, count(1) AS cnt GROUP BY x.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -80,7 +81,7 @@ set hive.optimize.correlation=true; -- This query will not be optimized by correlation optimizer because -- GroupByOperator uses y.key (a right table of a left outer join) -- as the key. -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x LEFT OUTER JOIN T1_n146 y ON (x.key = y.key) LEFT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -94,7 +95,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=false; -- This case should be optimized, since the key of GroupByOperator is from the rightmost table -- of a chain of RIGHT OUTER JOINs. -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -106,7 +107,7 @@ FROM (SELECT z.key AS key, count(1) AS cnt GROUP BY z.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT z.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -121,7 +122,7 @@ set hive.optimize.correlation=true; -- This query will not be optimized by correlation optimizer because -- GroupByOperator uses y.key (a left table of a right outer join) -- as the key. -EXPLAIN +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x RIGHT OUTER JOIN T1_n146 y ON (x.key = y.key) RIGHT OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -135,7 +136,20 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=false; -- This case should not be optimized because afer the FULL OUTER JOIN, rows with null keys -- are not grouped. -EXPLAIN +set hive.auto.convert.join=false; +EXPLAIN VECTORIZATION +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -147,7 +161,8 @@ FROM (SELECT y.key AS key, count(1) AS cnt GROUP BY y.key) tmp; set hive.optimize.correlation=true; -EXPLAIN +set hive.auto.convert.join=false; +EXPLAIN VECTORIZATION SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) @@ -157,3 +172,15 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; +EXPLAIN VECTORIZATION +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2_n86 x FULL OUTER JOIN T1_n146 y ON (x.key = y.key) FULL OUTER JOIN T3_n34 z ON (y.key = z.key) + GROUP BY y.key) tmp; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q b/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..32b2e04 --- /dev/null +++ b/ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q @@ -0,0 +1,290 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +set hive.mapjoin.hybridgrace.hashtable=false; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/join32_lessSize.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/join32_lessSize.q b/ql/src/test/queries/clientpositive/join32_lessSize.q index fcadbe3..b998ac7 100644 --- a/ql/src/test/queries/clientpositive/join32_lessSize.q +++ b/ql/src/test/queries/clientpositive/join32_lessSize.q @@ -2,6 +2,7 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.mapred.mode=nonstrict; + -- SORT_QUERY_RESULTS CREATE TABLE dest_j1_n21(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE; http://git-wip-us.apache.org/repos/asf/hive/blob/a37827ec/ql/src/test/queries/clientpositive/join33.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/join33.q b/ql/src/test/queries/clientpositive/join33.q index 1527575..6ddf0eb 100644 --- a/ql/src/test/queries/clientpositive/join33.q +++ b/ql/src/test/queries/clientpositive/join33.q @@ -2,6 +2,7 @@ --! qt:dataset:src1 --! qt:dataset:src set hive.mapred.mode=nonstrict; + -- SORT_QUERY_RESULTS CREATE TABLE dest_j1_n7(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;