This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 0978d700769 HIVE-29375: FULL OUTER JOIN is failing with Unexpected
hash table key type DATE (#6239)
0978d700769 is described below
commit 0978d700769efe43b223fee9cdcc1ee48e814747
Author: Raghav Aggarwal <[email protected]>
AuthorDate: Wed Jan 7 13:11:38 2026 +0530
HIVE-29375: FULL OUTER JOIN is failing with Unexpected hash table key type
DATE (#6239)
---
.../vector/mapjoin/VectorMapJoinLongHashUtil.java | 41 ++++
.../VectorMapJoinOuterGenerateResultOperator.java | 1 +
.../VectorMapJoinFastLongHashMapContainer.java | 3 +-
...VectorMapJoinFastLongHashMultiSetContainer.java | 3 +-
.../VectorMapJoinFastLongHashSetContainer.java | 3 +-
.../fast/VectorMapJoinFastLongHashTable.java | 3 +-
.../fast/VectorMapJoinFastLongHashUtil.java | 55 -----
.../VectorMapJoinOptimizedLongHashMap.java | 30 +--
.../ql/exec/vector/mapjoin/MapJoinTestConfig.java | 4 +
.../exec/vector/mapjoin/TestMapJoinOperator.java | 74 ++++++
.../vector/mapjoin/fast/CheckFastRowHashMap.java | 5 +
.../fast/TestVectorMapJoinFastRowHashMap.java | 41 ++++
.../clientpositive/vector_full_outer_join_date.q | 14 ++
.../llap/vector_full_outer_join_date.q.out | 262 +++++++++++++++++++++
14 files changed, 459 insertions(+), 80 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java
new file mode 100644
index 00000000000..8008210c5bb
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLongHashUtil.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector.mapjoin;
+
+import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
+import
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
+
+public class VectorMapJoinLongHashUtil {
+
+ public static long deserializeLongKey(
+ BinarySortableDeserializeRead keyBinarySortableDeserializeRead,
+ HashTableKeyType hashTableKeyType)
+ throws RuntimeException {
+ return switch (hashTableKeyType) {
+ case BOOLEAN -> (keyBinarySortableDeserializeRead.currentBoolean ? 1 :
0);
+ case BYTE -> keyBinarySortableDeserializeRead.currentByte;
+ case SHORT -> keyBinarySortableDeserializeRead.currentShort;
+ case INT -> keyBinarySortableDeserializeRead.currentInt;
+ case DATE ->
keyBinarySortableDeserializeRead.currentDateWritable.getDays();
+ case LONG -> keyBinarySortableDeserializeRead.currentLong;
+ default ->
+ throw new RuntimeException("Unexpected hash table key type " +
hashTableKeyType.name());
+ };
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
index fff2f28a097..e83b178e4dc 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java
@@ -824,6 +824,7 @@ protected void generateFullOuterSmallTableNoMatches(byte
smallTablePos,
case SHORT:
case INT:
case LONG:
+ case DATE:
generateFullOuterLongKeySmallTableNoMatches();
break;
case STRING:
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
index 6ef9b64cba9..f8179c70f3f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMapContainer.java
@@ -21,6 +21,7 @@
import org.apache.hadoop.hive.common.MemoryEstimate;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
@@ -169,7 +170,7 @@ public long getHashCode(BytesWritable currentKey) throws
HiveException, IOExcept
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
- long key =
VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
+ long key =
VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
return HashCodeUtil.calculateLongHashCode(key);
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java
index c7184d7e81d..4842c002785 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSetContainer.java
@@ -20,6 +20,7 @@
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMultiSet;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
@@ -87,7 +88,7 @@ public long getHashCode(BytesWritable currentKey) throws
HiveException, IOExcept
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
- long key =
VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
+ long key =
VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
return HashCodeUtil.calculateLongHashCode(key);
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java
index 1690739cc62..57030de901f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSetContainer.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import
org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
@@ -86,7 +87,7 @@ public long getHashCode(BytesWritable currentKey) throws
HiveException, IOExcept
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
- long key =
VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
+ long key =
VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
return HashCodeUtil.calculateLongHashCode(key);
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index ba46bfc26db..d26e3d271b4 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -20,6 +20,7 @@
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.slf4j.Logger;
@@ -77,7 +78,7 @@ public boolean adaptPutRow(long hashCode, BytesWritable
currentKey, BytesWritabl
throw new HiveException("DeserializeRead details: " +
keyBinarySortableDeserializeRead.getDetailedReadPositionString(), e);
}
- long key =
VectorMapJoinFastLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
+ long key =
VectorMapJoinLongHashUtil.deserializeLongKey(keyBinarySortableDeserializeRead,
hashTableKeyType);
add(hashCode, key, currentValue);
return true;
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java
deleted file mode 100644
index d3bda217a16..00000000000
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashUtil.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast;
-
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
-import
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
-
-public class VectorMapJoinFastLongHashUtil {
-
- public static long deserializeLongKey(BinarySortableDeserializeRead
keyBinarySortableDeserializeRead,
- HashTableKeyType hashTableKeyType) throws RuntimeException {
- long key = 0;
- switch (hashTableKeyType) {
- case BOOLEAN:
- key = (keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0);
- break;
- case BYTE:
- key = (long) keyBinarySortableDeserializeRead.currentByte;
- break;
- case SHORT:
- key = (long) keyBinarySortableDeserializeRead.currentShort;
- break;
- case INT:
- key = (long) keyBinarySortableDeserializeRead.currentInt;
- break;
- case DATE:
- key = (long)
keyBinarySortableDeserializeRead.currentDateWritable.getDays();
- break;
- case LONG:
- key = keyBinarySortableDeserializeRead.currentLong;
- break;
- default:
- throw new RuntimeException("Unexpected hash table key type " +
hashTableKeyType.name());
- }
- return key;
- }
-}
\ No newline at end of file
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java
index 65c51270b8e..934bd82d70e 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java
@@ -18,8 +18,6 @@
package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized;
-import java.io.IOException;
-
import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
import
org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
@@ -34,6 +32,9 @@
import
org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinLongHashUtil;
+
+import java.io.IOException;
/*
* An single long value hash map based on the BytesBytesMultiHashMap.
@@ -89,6 +90,9 @@ public void init() {
case LONG:
integerTypeInfo = TypeInfoFactory.longTypeInfo;
break;
+ case DATE:
+ integerTypeInfo = TypeInfoFactory.dateTypeInfo;
+ break;
default:
throw new RuntimeException("Unexpected key type " +
hashMap.hashTableKeyType);
}
@@ -107,25 +111,9 @@ private boolean readNonMatchedLongKey(ByteSegmentRef
keyRef) throws HiveExceptio
if (!keyBinarySortableDeserializeRead.readNextField()) {
return false;
}
- switch (hashMap.hashTableKeyType) {
- case BOOLEAN:
- longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0;
- break;
- case BYTE:
- longValue = keyBinarySortableDeserializeRead.currentByte;
- break;
- case SHORT:
- longValue = keyBinarySortableDeserializeRead.currentShort;
- break;
- case INT:
- longValue = keyBinarySortableDeserializeRead.currentInt;
- break;
- case LONG:
- longValue = keyBinarySortableDeserializeRead.currentLong;
- break;
- default:
- throw new RuntimeException("Unexpected key type " +
hashMap.hashTableKeyType);
- }
+ longValue =
+ VectorMapJoinLongHashUtil.deserializeLongKey(
+ keyBinarySortableDeserializeRead, hashMap.hashTableKeyType);
} catch (IOException e) {
throw new HiveException(e);
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java
index e4674d81efc..8597229c3e3 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java
@@ -394,6 +394,9 @@ public static VectorMapJoinDesc
createVectorMapJoinDesc(MapJoinTestDescription t
case LONG:
hashTableKeyType = HashTableKeyType.LONG;
break;
+ case DATE:
+ hashTableKeyType = HashTableKeyType.DATE;
+ break;
case STRING:
hashTableKeyType = HashTableKeyType.STRING;
break;
@@ -547,6 +550,7 @@ public static VectorMapJoinCommonOperator
createNativeVectorMapJoinOperator(
case BYTE:
case SHORT:
case INT:
+ case DATE:
case LONG:
switch (VectorMapJoinVariation) {
case INNER:
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
index a38a6c98f47..5571be43574 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java
@@ -815,6 +815,80 @@ public boolean doTestLong6(long seed, int rowCount, int
hiveConfVariation,
return false;
}
+ @Test
+ public void testDate0() throws Exception {
+ long seed = 8322;
+ int rowCount = 10;
+
+ int hiveConfVariation = 0;
+ boolean hiveConfVariationsDone = false;
+ do {
+ for (VectorMapJoinVariation vectorMapJoinVariation :
VectorMapJoinVariation.values()) {
+ hiveConfVariationsDone =
+ doTestDate0(
+ seed,
+ rowCount,
+ hiveConfVariation,
+ vectorMapJoinVariation,
+ MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN);
+ }
+ seed++;
+ hiveConfVariation++;
+ } while (!hiveConfVariationsDone);
+ }
+
+ public boolean doTestDate0(
+ long seed,
+ int rowCount,
+ int hiveConfVariation,
+ VectorMapJoinVariation vectorMapJoinVariation,
+ MapJoinPlanVariation mapJoinPlanVariation)
+ throws Exception {
+
+ HiveConf hiveConf = getHiveConf();
+
+ if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) {
+ return true;
+ }
+
+ TypeInfo[] bigTableTypeInfos;
+ int[] bigTableKeyColumnNums;
+ TypeInfo[] smallTableValueTypeInfos;
+ int[] smallTableRetainKeyColumnNums;
+
+ SmallTableGenerationParameters smallTableGenerationParameters =
+ new SmallTableGenerationParameters();
+
+ MapJoinTestDescription testDesc;
+ MapJoinTestData testData;
+
+ // Big Table: date key; Small Table: key retained, string value
+ bigTableTypeInfos = new TypeInfo[] {TypeInfoFactory.dateTypeInfo};
+ bigTableKeyColumnNums = new int[] {0};
+ smallTableRetainKeyColumnNums = new int[] {0};
+ smallTableValueTypeInfos = new TypeInfo[] {TypeInfoFactory.stringTypeInfo};
+
+ testDesc =
+ new MapJoinTestDescription(
+ hiveConf,
+ vectorMapJoinVariation,
+ bigTableTypeInfos,
+ bigTableKeyColumnNums,
+ smallTableValueTypeInfos,
+ smallTableRetainKeyColumnNums,
+ smallTableGenerationParameters,
+ mapJoinPlanVariation);
+
+ if (!goodTestVariation(testDesc)) {
+ return false;
+ }
+
+ testData = new MapJoinTestData(rowCount, testDesc, seed);
+ executeTest(testDesc, testData, "testDate0");
+
+ return false;
+ }
+
private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf
hiveConf) {
// Set defaults.
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
index 5a9f180b3f3..e0d387718b5 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
@@ -35,6 +35,7 @@
import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.VerifyLazy;
import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead;
@@ -335,6 +336,7 @@ public void verify(VectorMapJoinFastHashTableContainerBase
map,
case SHORT:
case INT:
case LONG:
+ case DATE:
{
Object[] keyRow = element.getKeyRow();
Object keyObject = keyRow[0];
@@ -357,6 +359,9 @@ public void verify(VectorMapJoinFastHashTableContainerBase
map,
case LONG:
longKey = ((LongWritable) keyObject).get();
break;
+ case DATE:
+ longKey = ((DateWritableV2) keyObject).getDays();
+ break;
default:
throw new RuntimeException("Unexpected hash table key type " +
hashTableKeyType.name());
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
index f5eb68c6ba7..291b6a40b25 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastRowHashMap.java
@@ -495,6 +495,47 @@ public void testBigIntRowsExact() throws Exception {
/* doClipping */ false, /* useExactBytes */ true);
}
+ @Test
+ public void testDateRowsExact() throws Exception {
+ random = new Random(44332);
+
+ // Use a large capacity that doesn't require expansion, yet.
+ VectorMapJoinFastLongHashMapContainer map =
+ new VectorMapJoinFastLongHashMapContainer(
+ false,
+ false,
+ HashTableKeyType.DATE,
+ LARGE_CAPACITY,
+ LOAD_FACTOR,
+ LARGE_WB_SIZE,
+ -1,
+ tableDesc,
+ 4);
+
+ VerifyFastRowHashMap verifyTable = new VerifyFastRowHashMap();
+ VectorRandomRowSource valueSource = new VectorRandomRowSource();
+
+ valueSource.init(
+ random,
+ VectorRandomRowSource.SupportedTypes.ALL,
+ 4,
+ false,
+ false);
+
+ int rowCount = 1000;
+ Object[][] rows = valueSource.randomRows(rowCount);
+
+ addAndVerifyRows(
+ valueSource,
+ rows,
+ map,
+ HashTableKeyType.DATE,
+ verifyTable,
+ new String[] {"date"},
+ false,
+ true);
+ }
+
@Test
public void testIntRowsExact() throws Exception {
random = new Random(8238383);
diff --git a/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q
b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q
new file mode 100644
index 00000000000..d1e2533578f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_full_outer_join_date.q
@@ -0,0 +1,14 @@
+set hive.vectorized.execution.enabled=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join=true;
+
+create table tbl1 (id int, event_date date);
+create table tbl2 (id int, event_date date);
+
+insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'), (3,
'2023-01-03');
+insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'), (4,
'2023-01-05');
+
+explain vectorization detail select * from tbl1 full outer join tbl2 on
tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id;
+
+select * from tbl1 full outer join tbl2 on tbl1.event_date = tbl2.event_date
order by tbl1.id, tbl2.id;
+
diff --git
a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out
b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out
new file mode 100644
index 00000000000..d1d49f77854
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out
@@ -0,0 +1,262 @@
+PREHOOK: query: create table tbl1 (id int, event_date date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: create table tbl1 (id int, event_date date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl1
+PREHOOK: query: create table tbl2 (id int, event_date date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: create table tbl2 (id int, event_date date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl2
+PREHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'),
(3, '2023-01-03')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl1
+POSTHOOK: query: insert into tbl1 values (1, '2023-01-01'), (2, '2023-01-02'),
(3, '2023-01-03')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl1
+POSTHOOK: Lineage: tbl1.event_date SCRIPT []
+POSTHOOK: Lineage: tbl1.id SCRIPT []
+PREHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'),
(4, '2023-01-05')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl2
+POSTHOOK: query: insert into tbl2 values (2, '2023-01-02'), (3, '2023-01-04'),
(4, '2023-01-05')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl2
+POSTHOOK: Lineage: tbl2.event_date SCRIPT []
+POSTHOOK: Lineage: tbl2.id SCRIPT []
+PREHOOK: query: explain vectorization detail select * from tbl1 full outer
join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select * from tbl1 full outer
join tbl2 on tbl1.event_date = tbl2.event_date order by tbl1.id, tbl2.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl1
+ Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:id:int,
1:event_date:date, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
3:ROW__IS__DELETED:boolean]
+ Select Operator
+ expressions: id (type: int), event_date (type: date)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 3 Data size: 180 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: date)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col1 (type: date)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumns: 1:date
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 0:int
+ Statistics: Num rows: 3 Data size: 180 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: id:int, event_date:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tbl2
+ Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:id:int,
1:event_date:date, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
3:ROW__IS__DELETED:boolean]
+ Select Operator
+ expressions: id (type: int), event_date (type: date)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 3 Data size: 180 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: date)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col1 (type: date)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumns: 1:date
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 0:int, 4:smallint
+ valueExpressions: ConstantVectorExpression(val 0) ->
4:smallint
+ Statistics: Num rows: 3 Data size: 180 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), 0S (type: smallint)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: id:int, event_date:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez] IS true
+ reduceColumnNullOrder: z
+ reduceColumnSortOrder: +
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ dataColumns: KEY.reducesinkkey0:date, VALUE._col0:int
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, bigint]
+ Reduce Operator Tree:
+ Map Join Operator
+ condition map:
+ Full Outer Join 0 to 1
+ keys:
+ 0 KEY.reducesinkkey0 (type: date)
+ 1 KEY.reducesinkkey0 (type: date)
+ Map Join Vectorization:
+ bigTableKeyColumns: 0:date
+ bigTableRetainColumnNums: [0, 1]
+ bigTableValueColumns: 1:int, 0:date
+ className: VectorMapJoinFullOuterLongOperator
+ fullOuterSmallTableKeyMapping: 0 -> 3
+ native: true
+ nativeConditionsMet: hive.mapjoin.optimized.hashtable IS
true, hive.vectorized.execution.mapjoin.native.enabled IS true,
hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No
nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true,
Optimized Table and Supports Key Types IS true
+ projectedOutput: 1:int, 0:date, 2:int, 3:date
+ smallTableValueMapping: 2:int
+ hashTableImplementationType: OPTIMIZED
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 4
+ Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE
Column stats: COMPLETE
+ DynamicPartitionHashJoin: true
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col2 (type: int)
+ null sort order: zz
+ sort order: ++
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ keyColumns: 1:int, 2:int
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 0:date, 3:date
+ Statistics: Num rows: 9 Data size: 1080 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: date), _col3 (type: date)
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez] IS true
+ reduceColumnNullOrder: zz
+ reduceColumnSortOrder: ++
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY.reducesinkkey0:int,
KEY.reducesinkkey1:int, VALUE._col0:date, VALUE._col1:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0
(type: date), KEY.reducesinkkey1 (type: int), VALUE._col1 (type: date)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 2, 1, 3]
+ Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 9 Data size: 1080 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from tbl1 full outer join tbl2 on tbl1.event_date =
tbl2.event_date order by tbl1.id, tbl2.id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl1
+PREHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tbl1 full outer join tbl2 on tbl1.event_date =
tbl2.event_date order by tbl1.id, tbl2.id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl1
+POSTHOOK: Input: default@tbl2
+#### A masked pattern was here ####
+1 2023-01-01 NULL NULL
+2 2023-01-02 2 2023-01-02
+3 2023-01-03 NULL NULL
+NULL NULL 3 2023-01-04
+NULL NULL 4 2023-01-05