This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 1455f6201b0 HIVE-27943: NPE in
VectorMapJoinCommonOperator.setUpHashTable when running query with join on date
(Stamatis Zampetakis reviewed by Attila Turoczy, Krisztian Kasa)
1455f6201b0 is described below
commit 1455f6201b0f7b061361bc9acc23cb810ff02483
Author: Stamatis Zampetakis <[email protected]>
AuthorDate: Thu Dec 7 16:20:17 2023 +0100
HIVE-27943: NPE in VectorMapJoinCommonOperator.setUpHashTable when running
query with join on date (Stamatis Zampetakis reviewed by Attila Turoczy,
Krisztian Kasa)
Close apache/hive#4929
---
.../VectorMapJoinOptimizedCreateHashTable.java | 1 +
ql/src/test/queries/clientpositive/mapjoin_date.q | 8 +
.../results/clientpositive/llap/mapjoin_date.q.out | 186 +++++++++++++++++++++
3 files changed, 195 insertions(+)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java
index 4171f2038c2..dff1f51da60 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java
@@ -55,6 +55,7 @@ public class VectorMapJoinOptimizedCreateHashTable {
case SHORT:
case INT:
case LONG:
+ case DATE:
switch (hashTableKind) {
case HASH_MAP:
hashTable = new VectorMapJoinOptimizedLongHashMap(
diff --git a/ql/src/test/queries/clientpositive/mapjoin_date.q
b/ql/src/test/queries/clientpositive/mapjoin_date.q
new file mode 100644
index 00000000000..8126c017a90
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mapjoin_date.q
@@ -0,0 +1,8 @@
+set hive.auto.convert.join=true;
+
+CREATE TABLE person (fname string, birthDate date);
+INSERT INTO person VALUES ('Victor', '2023-11-27'), ('Alexandre',
'2023-11-28');
+
+EXPLAIN VECTORIZATION DETAIL SELECT * FROM person p1 INNER JOIN person p2 ON
p1.birthDate=p2.birthDate;
+
+SELECT * FROM person p1 INNER JOIN person p2 ON p1.birthDate=p2.birthDate;
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out
b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out
new file mode 100644
index 00000000000..c5dfc75a5f3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: CREATE TABLE person (fname string, birthDate date)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@person
+POSTHOOK: query: CREATE TABLE person (fname string, birthDate date)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@person
+PREHOOK: query: INSERT INTO person VALUES ('Victor', '2023-11-27'),
('Alexandre', '2023-11-28')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@person
+POSTHOOK: query: INSERT INTO person VALUES ('Victor', '2023-11-27'),
('Alexandre', '2023-11-28')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@person
+POSTHOOK: Lineage: person.birthdate SCRIPT []
+POSTHOOK: Lineage: person.fname SCRIPT []
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM person p1 INNER
JOIN person p2 ON p1.birthDate=p2.birthDate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@person
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM person p1 INNER
JOIN person p2 ON p1.birthDate=p2.birthDate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@person
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: p1
+ filterExpr: birthdate is not null (type: boolean)
+ probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container,
bigKeyColName:birthdate, smallTablePos:1, keyRatio:0.0
+ Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:fname:string,
1:birthdate:date, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
3:ROW__IS__DELETED:boolean]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 1:date)
+ predicate: birthdate is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 296 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: fname (type: string), birthdate (type: date)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 2 Data size: 296 Basic stats:
COMPLETE Column stats: COMPLETE
+ Map Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col1 (type: date)
+ 1 _col1 (type: date)
+ Map Join Vectorization:
+ bigTableKeyColumns: 1:date
+ bigTableRetainColumnNums: [0, 1]
+ bigTableValueColumns: 0:string, 1:date
+ className: VectorMapJoinInnerLongOperator
+ native: true
+ nativeConditionsMet:
hive.mapjoin.optimized.hashtable IS true,
hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine
tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small
table vectorizes IS true, Optimized Table and Supports Key Types IS true
+ nonOuterSmallTableKeyMapping: []
+ projectedOutput: 0:string, 1:date, 4:string, 1:date
+ smallTableValueMapping: 4:string
+ hashTableImplementationType: OPTIMIZED
+ outputColumnNames: _col0, _col1, _col2, _col3
+ input vertices:
+ 1 Map 2
+ Statistics: Num rows: 4 Data size: 1184 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 4 Data size: 1184 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: fname:string, birthdate:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [string]
+ Map 2
+ Map Operator Tree:
+ TableScan
+ alias: p2
+ filterExpr: birthdate is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:fname:string,
1:birthdate:date, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
3:ROW__IS__DELETED:boolean]
+ Filter Operator
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: SelectColumnIsNotNull(col 1:date)
+ predicate: birthdate is not null (type: boolean)
+ Statistics: Num rows: 2 Data size: 296 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: fname (type: string), birthdate (type: date)
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1]
+ Statistics: Num rows: 2 Data size: 296 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col1 (type: date)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: _col1 (type: date)
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkLongOperator
+ keyColumns: 1:date
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 0:string
+ Statistics: Num rows: 2 Data size: 296 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string)
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: true
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: fname:string, birthdate:date
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT * FROM person p1 INNER JOIN person p2 ON
p1.birthDate=p2.birthDate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@person
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM person p1 INNER JOIN person p2 ON
p1.birthDate=p2.birthDate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@person
+#### A masked pattern was here ####
+Victor 2023-11-27 Victor 2023-11-27
+Alexandre 2023-11-28 Alexandre 2023-11-28