This is an automated email from the ASF dual-hosted git repository.
hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 04b311d HIVE-23501 : AOOB in VectorDeserializeRow when complex types
are converted to primitive types (Ramesh Kumar via Ashutosh Chauhan)
04b311d is described below
commit 04b311d2ce52fb6ab6cf6fe4edb91cd0de970946
Author: RAMESH KUMAR THANGARAJAN <[email protected]>
AuthorDate: Wed May 20 22:21:07 2020 -0700
HIVE-23501 : AOOB in VectorDeserializeRow when complex types are converted
to primitive types (Ramesh Kumar via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <[email protected]>
---
data/files/arrayofIntdata/base_0000001/000054_0 | 1 +
.../test/resources/testconfiguration.properties | 1 +
.../hive/ql/exec/vector/VectorDeserializeRow.java | 6 +-
.../clientpositive/vector_deserialize_row.q | 33 +++++
.../llap/vector_deserialize_row.q.out | 145 +++++++++++++++++++++
5 files changed, 182 insertions(+), 4 deletions(-)
diff --git a/data/files/arrayofIntdata/base_0000001/000054_0
b/data/files/arrayofIntdata/base_0000001/000054_0
new file mode 100644
index 0000000..a7a5893
--- /dev/null
+++ b/data/files/arrayofIntdata/base_0000001/000054_0
@@ -0,0 +1 @@
+test 1
5208187416695208186759785208123282775208126369095208128960765208133938905208135089945208135646875208135960535208135314935208136316075208136109305208136556885208136690205208137412015208137269385208137339875208137443045208137204395208137666885208137720445208137787775208137264825208137990955208138331935208138811555208139113415208139319435208137474495208138853255208139267335208139179105208137428265208139428805208139564045208139611825208139439395208
[...]
diff --git a/itests/src/test/resources/testconfiguration.properties
b/itests/src/test/resources/testconfiguration.properties
index f48e433..e7c3e43 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -946,6 +946,7 @@ minillaplocal.query.files=\
vector_decimal64_case_when_nvl.q,\
vector_decimal64_case_when_nvl_cbo.q,\
vector_decimal64_multi_vertex.q,\
+ vector_deserialize_row.q,\
vector_full_outer_join.q,\
vector_fullouter_mapjoin_1_fast.q,\
vector_fullouter_mapjoin_1_optimized.q,\
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index 97166ec..6453069 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -846,8 +846,7 @@ public final class VectorDeserializeRow<T extends
DeserializeRead> {
// Ensure child size.
final int childCapacity = listColVector.child.isNull.length;
- final int childCount = listColVector.childCount;
- if (childCapacity < childCount / 0.75) {
+ if (childCapacity < offset / 0.75) {
listColVector.child.ensureSize(childCapacity * 2, true);
}
@@ -877,8 +876,7 @@ public final class VectorDeserializeRow<T extends
DeserializeRead> {
// Ensure child size.
final int childCapacity = mapColVector.keys.isNull.length;
- final int childCount = mapColVector.childCount;
- if (childCapacity < childCount / 0.75) {
+ if (childCapacity < offset / 0.75) {
mapColVector.keys.ensureSize(childCapacity * 2, true);
mapColVector.values.ensureSize(childCapacity * 2, true);
}
diff --git a/ql/src/test/queries/clientpositive/vector_deserialize_row.q
b/ql/src/test/queries/clientpositive/vector_deserialize_row.q
new file mode 100644
index 0000000..38c8454
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_deserialize_row.q
@@ -0,0 +1,33 @@
+CREATE external TABLE IF NOT EXISTS sessions
+(
+session_id string,
+uid bigint,
+uids array<bigint>,
+search_ids array<string>,
+total_views int,
+datestamp date
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+LOCATION '../../data/files/arrayofIntdata';
+
+CREATE TABLE IF NOT EXISTS sessions_orc
+(
+session_id string,
+uid bigint,
+uids array<bigint>,
+search_ids array<string>,
+total_views int,
+datestamp date
+);
+
+describe formatted sessions_orc;
+
+INSERT OVERWRITE TABLE sessions_orc
+SELECT * FROM sessions;
+
+select count(1) from sessions_orc;
+select count(1) from sessions;
+drop table sessions;
+drop table sessions_orc;
+
diff --git
a/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out
b/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out
new file mode 100644
index 0000000..6b0e010
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_deserialize_row.q.out
@@ -0,0 +1,145 @@
+PREHOOK: query: CREATE external TABLE IF NOT EXISTS sessions
+(
+session_id string,
+uid bigint,
+uids array<bigint>,
+search_ids array<string>,
+total_views int,
+datestamp date
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sessions
+POSTHOOK: query: CREATE external TABLE IF NOT EXISTS sessions
+(
+session_id string,
+uid bigint,
+uids array<bigint>,
+search_ids array<string>,
+total_views int,
+datestamp date
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
+STORED AS TEXTFILE
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sessions
+PREHOOK: query: CREATE TABLE IF NOT EXISTS sessions_orc
+(
+session_id string,
+uid bigint,
+uids array<bigint>,
+search_ids array<string>,
+total_views int,
+datestamp date
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sessions_orc
+POSTHOOK: query: CREATE TABLE IF NOT EXISTS sessions_orc
+(
+session_id string,
+uid bigint,
+uids array<bigint>,
+search_ids array<string>,
+total_views int,
+datestamp date
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sessions_orc
+PREHOOK: query: describe formatted sessions_orc
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@sessions_orc
+POSTHOOK: query: describe formatted sessions_orc
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@sessions_orc
+# col_name data_type comment
+session_id string
+uid bigint
+uids array<bigint>
+search_ids array<string>
+total_views int
+datestamp date
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: MANAGED_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"datestamp\":\"true\",\"search_ids\":\"true\",\"session_id\":\"true\",\"total_views\":\"true\",\"uid\":\"true\",\"uids\":\"true\"}}
+ bucketing_version 2
+ numFiles 0
+ numRows 0
+ rawDataSize 0
+ totalSize 0
+#### A masked pattern was here ####
+
+# Storage Information
+SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+InputFormat: org.apache.hadoop.mapred.TextInputFormat
+OutputFormat:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+Compressed: No
+Num Buckets: -1
+Bucket Columns: []
+Sort Columns: []
+Storage Desc Params:
+ serialization.format 1
+PREHOOK: query: INSERT OVERWRITE TABLE sessions_orc
+SELECT * FROM sessions
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sessions
+PREHOOK: Output: default@sessions_orc
+POSTHOOK: query: INSERT OVERWRITE TABLE sessions_orc
+SELECT * FROM sessions
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sessions
+POSTHOOK: Output: default@sessions_orc
+POSTHOOK: Lineage: sessions_orc.datestamp SIMPLE
[(sessions)sessions.FieldSchema(name:datestamp, type:date, comment:null), ]
+POSTHOOK: Lineage: sessions_orc.search_ids SIMPLE
[(sessions)sessions.FieldSchema(name:search_ids, type:array<string>,
comment:null), ]
+POSTHOOK: Lineage: sessions_orc.session_id SIMPLE
[(sessions)sessions.FieldSchema(name:session_id, type:string, comment:null), ]
+POSTHOOK: Lineage: sessions_orc.total_views SIMPLE
[(sessions)sessions.FieldSchema(name:total_views, type:int, comment:null), ]
+POSTHOOK: Lineage: sessions_orc.uid SIMPLE
[(sessions)sessions.FieldSchema(name:uid, type:bigint, comment:null), ]
+POSTHOOK: Lineage: sessions_orc.uids SIMPLE
[(sessions)sessions.FieldSchema(name:uids, type:array<bigint>, comment:null), ]
+PREHOOK: query: select count(1) from sessions_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sessions_orc
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from sessions_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sessions_orc
+#### A masked pattern was here ####
+1
+PREHOOK: query: select count(1) from sessions
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sessions
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1) from sessions
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sessions
+#### A masked pattern was here ####
+1
+PREHOOK: query: drop table sessions
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@sessions
+PREHOOK: Output: default@sessions
+POSTHOOK: query: drop table sessions
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@sessions
+POSTHOOK: Output: default@sessions
+PREHOOK: query: drop table sessions_orc
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@sessions_orc
+PREHOOK: Output: default@sessions_orc
+POSTHOOK: query: drop table sessions_orc
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@sessions_orc
+POSTHOOK: Output: default@sessions_orc