Repository: hive Updated Branches: refs/heads/master 158f119ee -> 2fb7695b3
HIVE-19116: Vectorization: Vector Map data type doesn't keep the order of the key/values pairs as read (Matt McCline, reviewed by Deepak Jaiswal) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2fb7695b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2fb7695b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2fb7695b Branch: refs/heads/master Commit: 2fb7695b35ddbca631fa7469b2ffa6882c7789d1 Parents: 158f119 Author: Matt McCline <mmccl...@hortonworks.com> Authored: Fri Apr 6 16:07:05 2018 -0500 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Fri Apr 6 16:07:05 2018 -0500 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/exec/vector/VectorExtractRow.java | 4 +- .../queries/clientpositive/vector_map_order.q | 15 +++ .../llap/parquet_map_type_vectorization.q.out | 60 +++++------ .../clientpositive/llap/vector_map_order.q.out | 106 +++++++++++++++++++ .../parquet_map_type_vectorization.q.out | 60 +++++------ .../clientpositive/vector_map_order.q.out | 101 ++++++++++++++++++ 7 files changed, 285 insertions(+), 62 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index d2e077b..28c14eb 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -370,6 +370,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ vector_left_outer_join.q,\ vector_left_outer_join2.q,\ vector_leftsemi_mapjoin.q,\ + vector_map_order.q,\ vector_mr_diff_schema_alias.q,\ vector_multi_insert.q,\ vector_null_projection.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index d0961b3..152d75b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; -import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -397,7 +397,7 @@ public class VectorExtractRow { final int offset = (int) mapColumnVector.offsets[adjustedIndex]; final int size = (int) mapColumnVector.lengths[adjustedIndex]; - final Map map = new HashMap(); + final Map<Object, Object> map = new LinkedHashMap<Object, Object>(); for (int i = 0; i < size; i++) { final Object key = extractRowColumn( mapColumnVector.keys, http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/ql/src/test/queries/clientpositive/vector_map_order.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_map_order.q b/ql/src/test/queries/clientpositive/vector_map_order.q new file mode 100644 index 0000000..b1f05d5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_map_order.q @@ -0,0 +1,15 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table map_table (foo STRING , bar MAP<STRING, STRING>) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +STORED AS TEXTFILE; + +load data local inpath "../../data/files/map_table.txt" overwrite into table map_table; + +explain vectorization detail +select * from map_table; +select * from map_table; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out index 1d70e06..a2bb0f3 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_map_type_vectorization.q.out @@ -173,16 +173,16 @@ stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_ma POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type POSTHOOK: Output: hdfs://### HDFS PATH ### -{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 -{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 -{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 -{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 -{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 -{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 -{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 -{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 -{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 -{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +{"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {123:7,456:8} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {123:9,456:10} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {123:11,456:12} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {123:13,456:14} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {123:15,456:16} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {123:17,456:18} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {123:19,456:20} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 PREHOOK: query: explain vectorization expression select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY @@ -402,16 +402,16 @@ stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_ma POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type POSTHOOK: Output: hdfs://### HDFS PATH ### -{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 -{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 -{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 -{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 -{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 -{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 -{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 -{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 -{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 -{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +{"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {123:7,456:8} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {123:9,456:10} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {123:11,456:12} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {123:13,456:14} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {123:15,456:16} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {123:17,456:18} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {123:19,456:20} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY @@ -468,16 +468,16 @@ stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_ma POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type POSTHOOK: Output: hdfs://### HDFS PATH ### -{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 -{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 -{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 -{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 -{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 -{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 -{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 -{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 -{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 -{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +{"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {123:7,456:8} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {123:9,456:10} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {123:11,456:12} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {123:13,456:14} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {123:15,456:16} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {123:17,456:18} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {123:19,456:20} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/ql/src/test/results/clientpositive/llap/vector_map_order.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_map_order.q.out b/ql/src/test/results/clientpositive/llap/vector_map_order.q.out new file mode 100644 index 0000000..88748fa --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_map_order.q.out @@ -0,0 +1,106 @@ +PREHOOK: query: create table map_table (foo STRING , bar MAP<STRING, STRING>) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@map_table +POSTHOOK: query: create table map_table (foo STRING , bar MAP<STRING, STRING>) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@map_table +PREHOOK: query: load data local inpath "../../data/files/map_table.txt" overwrite into table map_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@map_table +POSTHOOK: query: load data local inpath "../../data/files/map_table.txt" overwrite into table map_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@map_table +PREHOOK: query: explain vectorization detail +select * from map_table +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * from map_table +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: map_table + Statistics: Num rows: 1 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:foo:string, 1:bar:map<string,string>, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: foo (type: string), bar (type: map<string,string>) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 1104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: foo:string, bar:map<string,string> + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from map_table +PREHOOK: type: QUERY +PREHOOK: Input: default@map_table +#### A masked pattern was here #### +POSTHOOK: query: select * from map_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_table +#### A masked pattern was here #### +foo1 {"k1":"v1","k2":"v2","k3":"v3"} +foo2 {"k21":"v21","k22":"v22","k31":"v31"} http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out index af9754e..289909d 100644 --- a/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out +++ b/ql/src/test/results/clientpositive/parquet_map_type_vectorization.q.out @@ -169,16 +169,16 @@ stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_ma POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type #### A masked pattern was here #### -{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 -{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 -{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 -{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 -{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 -{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 -{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 -{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 -{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 -{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +{"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {123:7,456:8} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {123:9,456:10} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {123:11,456:12} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {123:13,456:14} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {123:15,456:16} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {123:17,456:18} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {123:19,456:20} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 PREHOOK: query: explain vectorization expression select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY @@ -386,16 +386,16 @@ stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_ma POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type #### A masked pattern was here #### -{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 -{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 -{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 -{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 -{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 -{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 -{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 -{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 -{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 -{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +{"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {123:7,456:8} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {123:9,456:10} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {123:11,456:12} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {123:13,456:14} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {123:15,456:16} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {123:17,456:18} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {123:19,456:20} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY @@ -452,16 +452,16 @@ stringMap[stringIndex], intMap[intIndex], doubleMap[doubleIndex] from parquet_ma POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_map_type #### A masked pattern was here #### -{"k1":"v1","k2":"v1-2"} {456:2,123:1} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 -{"k1":"v2","k2":"v2-2"} {456:4,123:3} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 -{"k1":"v3","k2":"v3-2"} {456:6,123:5} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 -{"k1":"v4","k2":"v4-2"} {456:8,123:7} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 -{"k1":"v5","k2":"v5-2"} {456:10,123:9} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 -{"k1":"v6","k2":"v6-2"} {456:12,123:11} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 -{"k1":"v7","k2":"v7-2"} {456:14,123:13} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 -{"k1":"v8","k2":"v8-2"} {456:16,123:15} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 -{"k1":"v9","k2":"v9-2"} {456:18,123:17} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 -{"k1":"v10","k2":"v10-2"} {456:20,123:19} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 +{"k1":"v1","k2":"v1-2"} {123:1,456:2} {123.123:1.1,456.456:1.2} v1-2 2 1.1 v1 1 1.2 +{"k1":"v2","k2":"v2-2"} {123:3,456:4} {123.123:2.1,456.456:2.2} v2-2 4 2.1 v2 3 2.2 +{"k1":"v3","k2":"v3-2"} {123:5,456:6} {123.123:3.1,456.456:3.2} v3-2 6 3.1 v3 5 3.2 +{"k1":"v4","k2":"v4-2"} {123:7,456:8} {123.123:4.1,456.456:4.2} v4-2 8 4.1 v4 7 4.2 +{"k1":"v5","k2":"v5-2"} {123:9,456:10} {123.123:5.1,456.456:5.2} v5-2 10 5.1 v5 9 5.2 +{"k1":"v6","k2":"v6-2"} {123:11,456:12} {123.123:6.1,456.456:6.2} v6-2 12 6.1 v6 11 6.2 +{"k1":"v7","k2":"v7-2"} {123:13,456:14} {123.123:7.1,456.456:7.2} v7-2 14 7.1 v7 13 7.2 +{"k1":"v8","k2":"v8-2"} {123:15,456:16} {123.123:8.1,456.456:8.2} v8-2 16 8.1 v8 15 8.2 +{"k1":"v9","k2":"v9-2"} {123:17,456:18} {123.123:9.1,456.456:9.2} v9-2 18 9.1 v9 17 9.2 +{"k1":"v10","k2":"v10-2"} {123:19,456:20} {123.123:10.1,456.456:10.2} v10-2 20 10.1 v10 19 10.2 PREHOOK: query: select sum(intMap[123]), sum(doubleMap[123.123]), stringMap['k1'] from parquet_map_type where stringMap['k1'] like 'v100%' group by stringMap['k1'] order by stringMap['k1'] limit 10 PREHOOK: type: QUERY http://git-wip-us.apache.org/repos/asf/hive/blob/2fb7695b/ql/src/test/results/clientpositive/vector_map_order.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_map_order.q.out b/ql/src/test/results/clientpositive/vector_map_order.q.out new file mode 100644 index 0000000..6fe6943 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_map_order.q.out @@ -0,0 +1,101 @@ +PREHOOK: query: create table map_table (foo STRING , bar MAP<STRING, STRING>) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@map_table +POSTHOOK: query: create table map_table (foo STRING , bar MAP<STRING, STRING>) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '\t' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@map_table +PREHOOK: query: load data local inpath "../../data/files/map_table.txt" overwrite into table map_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@map_table +POSTHOOK: query: load data local inpath "../../data/files/map_table.txt" overwrite into table map_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@map_table +PREHOOK: query: explain vectorization detail +select * from map_table +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select * from map_table +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: map_table + Statistics: Num rows: 1 Data size: 520 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:foo:string, 1:bar:map<string,string>, 2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>] + Select Operator + expressions: foo (type: string), bar (type: map<string,string>) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 520 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 520 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: foo:string, bar:map<string,string> + partitionColumnCount: 0 + scratchColumnTypeNames: [] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from map_table +PREHOOK: type: QUERY +PREHOOK: Input: default@map_table +#### A masked pattern was here #### +POSTHOOK: query: select * from map_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_table +#### A masked pattern was here #### +foo1 {"k1":"v1","k2":"v2","k3":"v3"} +foo2 {"k21":"v21","k22":"v22","k31":"v31"}