This is an automated email from the ASF dual-hosted git repository.
pvary pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new effe7e4 HIVE-22805: Vectorization with conditional array or map is
not implemented and throws an error (Peter Vary reviewed by Ramesh Kumar
Thangarajan and Marta Kuczora)
effe7e4 is described below
commit effe7e4345e7220a880e8cb757f0f2c36ec8f864
Author: Peter Vary <[email protected]>
AuthorDate: Thu Feb 6 11:35:56 2020 +0100
HIVE-22805: Vectorization with conditional array or map is not implemented
and throws an error (Peter Vary reviewed by Ramesh Kumar Thangarajan and Marta
Kuczora)
---
.../clientpositive/vectorization_multi_value.q | 30 +
.../clientpositive/vectorization_multi_value.q.out | 653 +++++++++++++++++++++
.../hive/ql/exec/vector/ListColumnVector.java | 85 ++-
.../hive/ql/exec/vector/MapColumnVector.java | 89 ++-
.../ql/exec/vector/MultiValuedColumnVector.java | 6 +-
5 files changed, 860 insertions(+), 3 deletions(-)
diff --git a/ql/src/test/queries/clientpositive/vectorization_multi_value.q
b/ql/src/test/queries/clientpositive/vectorization_multi_value.q
new file mode 100644
index 0000000..d0100aa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vectorization_multi_value.q
@@ -0,0 +1,30 @@
+
+DROP TABLE IF EXISTS cond_vector;
+CREATE TABLE cond_vector(a STRING) STORED AS ORC;
+INSERT OVERWRITE TABLE cond_vector VALUES("a/b"),("a/b"),("c/d");
+set hive.fetch.task.conversion=minimal;
+set hive.execution.mode=container;
+
+SELECT IF(1=1, MAP("a", "b"), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", "b"), NULL) FROM
cond_vector;
+
+SELECT IF(1=1, MAP("a", MAP("b","c")), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", MAP("b","c")), NULL) FROM
cond_vector;
+
+SELECT IF(1=1, MAP("a", a), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", a), NULL) FROM
cond_vector;
+
+SELECT IF(1=1, MAP("a", MAP("b", a)), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", MAP("b", a)), NULL) FROM
cond_vector;
+
+SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM
cond_vector;
+
+SELECT IF(1=1, ARRAY(ARRAY("a", "b"), ARRAY("c", "d")), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY(ARRAY("a", "b"), ARRAY("c",
"d")), NULL) FROM cond_vector;
+
+SELECT IF(1=1, ARRAY("a", a), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", a), NULL) FROM
cond_vector;
+
+SELECT IF(1=1, ARRAY(ARRAY("a", a), ARRAY("b", "c")), NULL) FROM cond_vector;
+EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY(ARRAY("a", a), ARRAY("b",
"c")), NULL) FROM cond_vector;
diff --git a/ql/src/test/results/clientpositive/vectorization_multi_value.q.out
b/ql/src/test/results/clientpositive/vectorization_multi_value.q.out
new file mode 100644
index 0000000..2fec50d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vectorization_multi_value.q.out
@@ -0,0 +1,653 @@
+PREHOOK: query: DROP TABLE IF EXISTS cond_vector
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE IF EXISTS cond_vector
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE cond_vector(a STRING) STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@cond_vector
+POSTHOOK: query: CREATE TABLE cond_vector(a STRING) STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@cond_vector
+PREHOOK: query: INSERT OVERWRITE TABLE cond_vector
VALUES("a/b"),("a/b"),("c/d")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@cond_vector
+POSTHOOK: query: INSERT OVERWRITE TABLE cond_vector
VALUES("a/b"),("a/b"),("c/d")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@cond_vector
+POSTHOOK: Lineage: cond_vector.a SCRIPT []
+PREHOOK: query: SELECT IF(1=1, MAP("a", "b"), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, MAP("a", "b"), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+{"a":"b"}
+{"a":"b"}
+{"a":"b"}
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", "b"),
NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", "b"),
NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, map('a':'b'), null) (type:
map<string,string>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
3:map<string,string>, null)(children: ConstantVectorExpression(val 1) ->
2:boolean, VectorUDFAdaptor(map('a':'b')) -> 3:map<string,string>) ->
4:map<string,string>
+ Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: []
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, map<string,string>,
map<string,string>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, MAP("a", MAP("b","c")), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, MAP("a", MAP("b","c")), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+{"a":{"b":"c"}}
+{"a":{"b":"c"}}
+{"a":{"b":"c"}}
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a",
MAP("b","c")), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a",
MAP("b","c")), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, map('a':map('b':'c')), null) (type:
map<string,map<string,string>>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
4:map<string,map<string,string>>, null)(children: ConstantVectorExpression(val
1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':'c')))(children:
VectorUDFAdaptor(map('b':'c')) -> 3:map<string,string>) ->
4:map<string,map<string,string>>) -> 5:map<string,map<string,string>>
+ Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: []
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, map<string,string>,
map<string,map<string,string>>, map<string,map<string,string>>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, MAP("a", a), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, MAP("a", a), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+{"a":"a/b"}
+{"a":"a/b"}
+{"a":"c/d"}
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", a), NULL)
FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", a),
NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, map('a':a), null) (type:
map<string,string>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
3:map<string,string>, null)(children: ConstantVectorExpression(val 1) ->
2:boolean, VectorUDFAdaptor(map('a':a)) -> 3:map<string,string>) ->
4:map<string,string>
+ Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, map<string,string>,
map<string,string>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, MAP("a", MAP("b", a)), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, MAP("a", MAP("b", a)), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+{"a":{"b":"a/b"}}
+{"a":{"b":"a/b"}}
+{"a":{"b":"c/d"}}
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", MAP("b",
a)), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, MAP("a", MAP("b",
a)), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, map('a':map('b':a)), null) (type:
map<string,map<string,string>>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [5]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
4:map<string,map<string,string>>, null)(children: ConstantVectorExpression(val
1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':a)))(children:
VectorUDFAdaptor(map('b':a)) -> 3:map<string,string>) ->
4:map<string,map<string,string>>) -> 5:map<string,map<string,string>>
+ Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, map<string,string>,
map<string,map<string,string>>, map<string,map<string,string>>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, ARRAY("a", "b"), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+["a","b"]
+["a","b"]
+["a","b"]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", "b"),
NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", "b"),
NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, array('a','b'), null) (type: array<string>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
3:array<string>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean,
VectorUDFAdaptor(array('a','b')) -> 3:array<string>) -> 4:array<string>
+ Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: []
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, array<string>, array<string>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, ARRAY(ARRAY("a", "b"), ARRAY("c", "d")), NULL)
FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, ARRAY(ARRAY("a", "b"), ARRAY("c", "d")), NULL)
FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+[["a","b"],["c","d"]]
+[["a","b"],["c","d"]]
+[["a","b"],["c","d"]]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY(ARRAY("a",
"b"), ARRAY("c", "d")), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY(ARRAY("a",
"b"), ARRAY("c", "d")), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, array(array('a','b'),array('c','d')),
null) (type: array<array<string>>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
5:array<array<string>>, null)(children: ConstantVectorExpression(val 1) ->
2:boolean, VectorUDFAdaptor(array(array('a','b'),array('c','d')))(children:
VectorUDFAdaptor(array('a','b')) -> 3:array<string>,
VectorUDFAdaptor(array('c','d')) -> 4:array<string>) -> 5:array<array<string>>)
-> 6:array<array<string>>
+ Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: []
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, array<string>, array<string>,
array<array<string>>, array<array<string>>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, ARRAY("a", a), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, ARRAY("a", a), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+["a","a/b"]
+["a","a/b"]
+["a","c/d"]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", a),
NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY("a", a),
NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, array('a',a), null) (type: array<string>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [4]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
3:array<string>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean,
VectorUDFAdaptor(array('a',a)) -> 3:array<string>) -> 4:array<string>
+ Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, array<string>, array<string>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: SELECT IF(1=1, ARRAY(ARRAY("a", a), ARRAY("b", "c")), NULL)
FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT IF(1=1, ARRAY(ARRAY("a", a), ARRAY("b", "c")), NULL)
FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+[["a","a/b"],["b","c"]]
+[["a","a/b"],["b","c"]]
+[["a","c/d"],["b","c"]]
+PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY(ARRAY("a",
a), ARRAY("b", "c")), NULL) FROM cond_vector
+PREHOOK: type: QUERY
+PREHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT IF(1=1, ARRAY(ARRAY("a",
a), ARRAY("b", "c")), NULL) FROM cond_vector
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@cond_vector
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: cond_vector
+ Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:a:string,
1:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
+ Select Operator
+ expressions: if(true, array(array('a',a),array('b','c')), null)
(type: array<array<string>>)
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [6]
+ selectExpressions: IfExprCondExprNull(col 2:boolean, col
5:array<array<string>>, null)(children: ConstantVectorExpression(val 1) ->
2:boolean, VectorUDFAdaptor(array(array('a',a),array('b','c')))(children:
VectorUDFAdaptor(array('a',a)) -> 3:array<string>,
VectorUDFAdaptor(array('b','c')) -> 4:array<string>) -> 5:array<array<string>>)
-> 6:array<array<string>>
+ Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS
true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: true
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ includeColumns: [0]
+ dataColumns: a:string
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, array<string>, array<string>,
array<array<string>>, array<array<string>>]
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
index 8cbcc02..0e13f24 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.util.Arrays;
+
/**
* The representation of a vectorized column of list objects.
*
@@ -158,7 +160,88 @@ public class ListColumnVector extends
MultiValuedColumnVector {
@Override
public void copySelected(boolean selectedInUse, int[] sel, int size,
ColumnVector outputColVector) {
- throw new RuntimeException("Not supported");
+ ListColumnVector output = (ListColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the
output.
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ outputColVector.setElement(0, 0, this);
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the
tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.setElement(i, i, this);
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ outputColVector.setElement(i, i, this);
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of
isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ child.shallowCopyTo(output.child);
+ System.arraycopy(offsets, 0, output.offsets, 0, size);
+ System.arraycopy(lengths, 0, output.lengths, 0, size);
+ output.childCount = childCount;
+ }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ output.noNulls = false;
+
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ outputColVector.setElement(i, i, this);
+ }
+ } else {
+ child.shallowCopyTo(output.child);
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ System.arraycopy(offsets, 0, output.offsets, 0, size);
+ System.arraycopy(lengths, 0, output.lengths, 0, size);
+ output.childCount = childCount;
+ }
+ }
}
+ @Override
+ public void shallowCopyTo(ColumnVector otherCv) {
+ ListColumnVector other = (ListColumnVector)otherCv;
+ super.shallowCopyTo(other);
+ child.shallowCopyTo(other.child);
+ }
}
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
index 3143a44..4a13ad3 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.util.Arrays;
+
/**
* The representation of a vectorized column of map objects.
*
@@ -173,6 +175,91 @@ public class MapColumnVector extends
MultiValuedColumnVector {
@Override
public void copySelected(boolean selectedInUse, int[] sel, int size,
ColumnVector outputColVector) {
- throw new RuntimeException("Not supported");
+ MapColumnVector output = (MapColumnVector) outputColVector;
+ boolean[] outputIsNull = output.isNull;
+
+ // We do not need to do a column reset since we are carefully changing the
output.
+ output.isRepeating = false;
+
+ // Handle repeating case
+ if (isRepeating) {
+ if (noNulls || !isNull[0]) {
+ outputIsNull[0] = false;
+ outputColVector.setElement(0, 0, this);
+ } else {
+ outputIsNull[0] = true;
+ output.noNulls = false;
+ }
+ output.isRepeating = true;
+ return;
+ }
+
+ // Handle normal case
+
+ if (noNulls) {
+ if (selectedInUse) {
+
+ // CONSIDER: For large n, fill n or all of isNull array and use the
tighter ELSE loop.
+
+ if (!outputColVector.noNulls) {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ // Set isNull before call in case it changes it mind.
+ outputIsNull[i] = false;
+ outputColVector.setElement(i, i, this);
+ }
+ } else {
+ for(int j = 0; j != size; j++) {
+ final int i = sel[j];
+ outputColVector.setElement(i, i, this);
+ }
+ }
+ } else {
+ if (!outputColVector.noNulls) {
+
+ // Assume it is almost always a performance win to fill all of
isNull so we can
+ // safely reset noNulls.
+ Arrays.fill(outputIsNull, false);
+ outputColVector.noNulls = true;
+ }
+ keys.shallowCopyTo(output.keys);
+ values.shallowCopyTo(output.values);
+ System.arraycopy(offsets, 0, output.offsets, 0, size);
+ System.arraycopy(lengths, 0, output.lengths, 0, size);
+ output.childCount = childCount;
+ }
+ } else /* there are nulls in our column */ {
+
+ // Carefully handle NULLs...
+
+ /*
+ * For better performance on LONG/DOUBLE we don't want the conditional
+ * statements inside the for loop.
+ */
+ output.noNulls = false;
+
+ if (selectedInUse) {
+ for (int j = 0; j < size; j++) {
+ int i = sel[j];
+ output.isNull[i] = isNull[i];
+ outputColVector.setElement(i, i, this);
+ }
+ } else {
+ keys.shallowCopyTo(output.keys);
+ values.shallowCopyTo(output.values);
+ System.arraycopy(isNull, 0, output.isNull, 0, size);
+ System.arraycopy(offsets, 0, output.offsets, 0, size);
+ System.arraycopy(lengths, 0, output.lengths, 0, size);
+ output.childCount = childCount;
+ }
+ }
+ }
+
+ @Override
+ public void shallowCopyTo(ColumnVector otherCv) {
+ MapColumnVector other = (MapColumnVector)otherCv;
+ super.shallowCopyTo(other);
+ keys.shallowCopyTo(other.keys);
+ values.shallowCopyTo(other.values);
}
}
diff --git
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
index 028084c..8d3c81f 100644
---
a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
+++
b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java
@@ -149,6 +149,10 @@ public abstract class MultiValuedColumnVector extends
ColumnVector {
@Override
public void shallowCopyTo(ColumnVector otherCv) {
- throw new UnsupportedOperationException(); // Implement in future, if
needed.
+ MultiValuedColumnVector other = (MultiValuedColumnVector)otherCv;
+ super.shallowCopyTo(other);
+ other.offsets = offsets;
+ other.lengths = lengths;
+ other.childCount = childCount;
}
}