HIVE-19019: Vectorization: When vectorized, orc_merge_incompat_schema.q throws HiveException "Not implemented yet" from VectorExpressionWriterMap (Matt McCline, reviewed by Teddy Choi)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/fc48d721 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/fc48d721 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/fc48d721 Branch: refs/heads/master Commit: fc48d7218ecf66411079b1e67ea0cf92e37b5ce6 Parents: ba8a99e Author: Matt McCline <[email protected]> Authored: Fri Mar 30 23:45:19 2018 -0500 Committer: Matt McCline <[email protected]> Committed: Fri Mar 30 23:45:19 2018 -0500 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 7 +- .../VectorExpressionWriterFactory.java | 137 +- ql/src/test/queries/clientpositive/nullMap.q | 2 + .../clientpositive/orc_merge_incompat_schema.q | 2 + .../clientpositive/orc_nested_column_pruning.q | 214 ++ .../queries/clientpositive/orc_null_check.q | 2 + .../queries/clientpositive/vector_null_map.q | 21 + .../vector_orc_merge_incompat_schema.q | 53 + .../vector_orc_nested_column_pruning.q | 233 ++ .../clientpositive/vector_orc_null_check.q | 13 + .../clientpositive/vectorization_limit.q | 29 +- .../clientpositive/llap/vector_null_map.q.out | 173 ++ .../llap/vector_orc_merge_incompat_schema.q.out | 305 ++ .../llap/vector_orc_nested_column_pruning.q.out | 2861 ++++++++++++++++++ .../llap/vector_orc_null_check.q.out | 121 + .../llap/vectorization_limit.q.out | 265 +- .../orc_nested_column_pruning.q.out | 1856 ++++++++++++ .../tez/vectorization_limit.q.out | 946 ------ .../clientpositive/vectorization_limit.q.out | 237 +- 19 files changed, 6348 insertions(+), 1129 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e494f8e..669d0ea 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -46,8 +46,7 @@ minitez.query.files.shared=delete_orig_table.q,\ update_orig_table.q,\ vector_join_part_col_char.q,\ vector_non_string_partition.q,\ - vectorization_div0.q,\ - vectorization_limit.q + vectorization_div0.q # NOTE: Add tests to minitez only if it is very # specific to tez and cannot be added to minillap. @@ -759,7 +758,11 @@ minillaplocal.query.files=\ vector_like_2.q,\ vector_llap_text_1.q,\ vector_mapjoin_reduce.q,\ + vector_null_map.q,\ vector_number_compare_projection.q,\ + vector_orc_merge_incompat_schema.q,\ + vector_orc_nested_column_pruning.q,\ + vector_orc_null_check.q,\ vector_order_null.q,\ vector_outer_reference_windowed.q,\ vector_partitioned_date_time.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index bd594e6..55dc461 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -23,6 +23,7 @@ import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; @@ -61,10 +62,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestamp import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.Text; import org.apache.hive.common.util.DateUtils; @@ -1431,11 +1434,13 @@ public final class VectorExpressionWriterFactory { SettableListObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterList() { + private Object obj; private VectorExtractRow vectorExtractRow; private ListTypeInfo listTypeInfo; public VectorExpressionWriter init(SettableListObjectInspector objInspector) throws HiveException { super.init(objInspector); + obj = initValue(null); vectorExtractRow = new VectorExtractRow(); listTypeInfo = (ListTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); @@ -1450,24 +1455,43 @@ public final class VectorExpressionWriterFactory { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - return setValue(null, column, row); + final ListColumnVector listColVector = (ListColumnVector) column; + final SettableListObjectInspector listOI = + (SettableListObjectInspector) this.objectInspector; + final List value = (List) vectorExtractRow.extractRowColumn(listColVector, + listTypeInfo, listOI, row); + if (value == null) { + return null; + } + + listOI.resize(obj, value.size()); + for (int i = 0; i < value.size(); i++) { + listOI.set(obj, i, value.get(i)); + } + return obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) + public Object setValue(Object list, ColumnVector column, int row) throws HiveException { + if (list == null) { + list = initValue(null); + } + final ListColumnVector listColVector = (ListColumnVector) column; final SettableListObjectInspector listOI = (SettableListObjectInspector) this.objectInspector; - final List value = (List)vectorExtractRow.extractRowColumn(listColVector, - listTypeInfo, listOI, columnRow); - if (null == row) { - row = ((SettableListObjectInspector) this.objectInspector).create(value.size()); + final List value = (List) vectorExtractRow.extractRowColumn(listColVector, + listTypeInfo, listOI, row); + if (value == null) { + return null; } + + listOI.resize(list, value.size()); for (int i = 0; i < value.size(); i++) { - listOI.set(row, i, value.get(i)); + listOI.set(list, i, value.get(i)); } - return row; + return list; } }.init(fieldObjInspector); @@ -1478,10 +1502,15 @@ public final class VectorExpressionWriterFactory { return new VectorExpressionWriterMap() { private Object obj; + private VectorExtractRow vectorExtractRow; + private MapTypeInfo mapTypeInfo; public VectorExpressionWriter init(SettableMapObjectInspector objInspector) throws HiveException { super.init(objInspector); obj = initValue(null); + vectorExtractRow = new VectorExtractRow(); + mapTypeInfo = (MapTypeInfo) + TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); return this; } @@ -1493,13 +1522,45 @@ public final class VectorExpressionWriterFactory { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + final MapColumnVector mapColVector = (MapColumnVector) column; + final SettableMapObjectInspector mapOI = + (SettableMapObjectInspector) this.objectInspector; + final Map<Object, Object> value = + (Map<Object, Object>) vectorExtractRow.extractRowColumn( + mapColVector, mapTypeInfo, mapOI, row); + if (value == null) { + return null; + } + + mapOI.clear(obj); + for (Map.Entry<Object, Object> entry : value.entrySet()) { + mapOI.put(obj, entry.getKey(), entry.getValue()); + } + return obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) + public Object setValue(Object map, ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + if (map == null) { + map = initValue(null); + } + + final MapColumnVector mapColVector = (MapColumnVector) column; + final SettableMapObjectInspector mapOI = + (SettableMapObjectInspector) this.objectInspector; + final Map<Object, Object> value = + (Map<Object, Object>) vectorExtractRow.extractRowColumn( + mapColVector, mapTypeInfo, mapOI, row); + if (value == null) { + return null; + } + + mapOI.clear(map); + for (Map.Entry<Object, Object> entry : value.entrySet()) { + mapOI.put(map, entry.getKey(), entry.getValue()); + } + return map; } }.init(fieldObjInspector); } @@ -1546,9 +1607,9 @@ public final class VectorExpressionWriterFactory { } @Override - public Object setValue(Object field, ColumnVector column, int row) throws HiveException { - if (null == field) { - field = initValue(null); + public Object setValue(Object struct, ColumnVector column, int row) throws HiveException { + if (struct == null) { + struct = initValue(null); } final StructColumnVector structColVector = (StructColumnVector) column; @@ -1562,9 +1623,9 @@ public final class VectorExpressionWriterFactory { final StructField structField = fields.get(i); final Object value = vectorExtractRow.extractRowColumn(structColVector.fields[i], fieldTypeInfos.get(i), structField.getFieldObjectInspector(), row); - structOI.setStructFieldData(obj, structField, value); + structOI.setStructFieldData(struct, structField, value); } - return field; + return struct; } }.init(fieldObjInspector); } @@ -1574,10 +1635,15 @@ public final class VectorExpressionWriterFactory { return new VectorExpressionWriterMap() { private Object obj; + private VectorExtractRow vectorExtractRow; + private UnionTypeInfo unionTypeInfo; public VectorExpressionWriter init(SettableUnionObjectInspector objInspector) throws HiveException { super.init(objInspector); obj = initValue(null); + vectorExtractRow = new VectorExtractRow(); + unionTypeInfo = (UnionTypeInfo) + TypeInfoUtils.getTypeInfoFromTypeString(objInspector.getTypeName()); return this; } @@ -1589,13 +1655,46 @@ public final class VectorExpressionWriterFactory { @Override public Object writeValue(ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + + final UnionColumnVector unionColumnVector = (UnionColumnVector) column; + final int tag = unionColumnVector.tags[row]; + final SettableUnionObjectInspector unionOI = + (SettableUnionObjectInspector) this.objectInspector; + ObjectInspector fieldOI = unionOI.getObjectInspectors().get(tag); + ColumnVector fieldColVector = unionColumnVector.fields[tag]; + final Object value = + vectorExtractRow.extractRowColumn( + fieldColVector, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), fieldOI, row); + if (value == null) { + return null; + } + + unionOI.setFieldAndTag(obj, value, (byte) tag); + return obj; } @Override - public Object setValue(Object row, ColumnVector column, int columnRow) + public Object setValue(Object union, ColumnVector column, int row) throws HiveException { - throw new HiveException("Not implemented yet"); + if (union == null) { + union = initValue(null); + } + + final UnionColumnVector unionColumnVector = (UnionColumnVector) column; + final int tag = unionColumnVector.tags[row]; + final SettableUnionObjectInspector unionOI = + (SettableUnionObjectInspector) this.objectInspector; + ObjectInspector fieldOI = unionOI.getObjectInspectors().get(tag); + ColumnVector fieldColVector = unionColumnVector.fields[tag]; + final Object value = + vectorExtractRow.extractRowColumn( + fieldColVector, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), fieldOI, row); + if (value == null) { + return null; + } + + unionOI.setFieldAndTag(union, value, (byte) tag); + return union; } }.init(fieldObjInspector); } http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/nullMap.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/nullMap.q b/ql/src/test/queries/clientpositive/nullMap.q index d2784b7..f272bb9 100644 --- a/ql/src/test/queries/clientpositive/nullMap.q +++ b/ql/src/test/queries/clientpositive/nullMap.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + create table map_txt ( id int, content map<int,string> http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q b/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q index 098b41e..2396194 100644 --- a/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q +++ b/ql/src/test/queries/clientpositive/orc_merge_incompat_schema.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + set hive.metastore.disallow.incompatible.col.type.changes=false; CREATE TABLE orc_create_staging ( http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/orc_nested_column_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_nested_column_pruning.q b/ql/src/test/queries/clientpositive/orc_nested_column_pruning.q new file mode 100644 index 0000000..700fdd4 --- /dev/null +++ b/ql/src/test/queries/clientpositive/orc_nested_column_pruning.q @@ -0,0 +1,214 @@ +SET hive.vectorized.execution.enabled=false; +set hive.fetch.task.conversion = none; +set hive.exec.dynamic.partition.mode = nonstrict; +set hive.strict.checks.cartesian.product=false; + +-- First, create source tables +DROP TABLE IF EXISTS dummy; +CREATE TABLE dummy (i int); +INSERT INTO TABLE dummy VALUES (42); + +DROP TABLE IF EXISTS nested_tbl_1; +CREATE TABLE nested_tbl_1 ( + a int, + s1 struct<f1: boolean, f2: string, f3: struct<f4: int, f5: double>, f6: int>, + s2 struct<f7: string, f8: struct<f9 : boolean, f10: array<int>, f11: map<string, boolean>>>, + s3 struct<f12: array<struct<f13:string, f14:int>>>, + s4 map<string, struct<f15:int>>, + s5 struct<f16: array<struct<f17:string, f18:struct<f19:int>>>>, + s6 map<string, struct<f20:array<struct<f21:struct<f22:int>>>>> +) STORED AS ORC; + +INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy; + +DROP TABLE IF EXISTS nested_tbl_2; +CREATE TABLE nested_tbl_2 LIKE nested_tbl_1; + +INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy; + +-- Testing only select statements + +EXPLAIN SELECT a FROM nested_tbl_1; +SELECT a FROM nested_tbl_1; + +EXPLAIN SELECT s1.f1 FROM nested_tbl_1; +SELECT s1.f1 FROM nested_tbl_1; + +EXPLAIN SELECT s1.f1, s1.f2 FROM nested_tbl_1; +SELECT s1.f1, s1.f2 FROM nested_tbl_1; + +-- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; + +-- Testing select array and index shifting +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1; +SELECT s1.f3.f5 FROM nested_tbl_1; + +-- Testing select from multiple structs +EXPLAIN SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; + + +-- Testing select with filter + +EXPLAIN SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; + +EXPLAIN SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; + +EXPLAIN SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; + + +-- Testing lateral view + +EXPLAIN SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; + + +-- Testing UDFs +EXPLAIN SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; + + +-- Testing aggregations + +EXPLAIN SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; + +EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; + +EXPLAIN SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; + + +-- Testing joins + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; + +EXPLAIN SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; + +EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; + +EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; + +EXPLAIN SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; + +-- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3; +CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC; + +INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1; + +SELECT * FROM nested_tbl_3; + +-- Testing select struct field from elements in array or map + +EXPLAIN +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +EXPLAIN +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +EXPLAIN +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +EXPLAIN +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +EXPLAIN +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +EXPLAIN +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; + +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/orc_null_check.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/orc_null_check.q b/ql/src/test/queries/clientpositive/orc_null_check.q index 2cb1190..e5453fc 100644 --- a/ql/src/test/queries/clientpositive/orc_null_check.q +++ b/ql/src/test/queries/clientpositive/orc_null_check.q @@ -1,3 +1,5 @@ +SET hive.vectorized.execution.enabled=false; + create table listtable(l array<string>); create table listtable_orc(l array<string>) stored as orc; http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/vector_null_map.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_null_map.q b/ql/src/test/queries/clientpositive/vector_null_map.q new file mode 100644 index 0000000..bda6705 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_null_map.q @@ -0,0 +1,21 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table map_txt ( + id int, + content map<int,string> +) +row format delimited +null defined as '\\N' +stored as textfile +; + +LOAD DATA LOCAL INPATH '../../data/files/mapNull.txt' INTO TABLE map_txt; + +explain vectorization expression +select * from map_txt; +select * from map_txt; + +explain vectorization expression +select id, map_keys(content) from map_txt; +select id, map_keys(content) from map_txt; http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q b/ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q new file mode 100644 index 0000000..b03a084 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_orc_merge_incompat_schema.q @@ -0,0 +1,53 @@ +SET hive.vectorized.execution.enabled=true; + +set hive.metastore.disallow.incompatible.col.type.changes=false; + +CREATE TABLE orc_create_staging ( + str STRING, + mp MAP<STRING,STRING>, + lst ARRAY<STRING>, + strct STRUCT<A:STRING,B:STRING> +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':'; + +LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging; + +CREATE TABLE orc_create_complex ( + str STRING, + mp MAP<STRING,STRING>, + lst ARRAY<STRING>, + strct STRUCT<A:STRING,B:STRING>, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000"); + +INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging; +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +-- will be merged as the schema is the same +ALTER TABLE orc_create_complex CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; + +ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT<A:STRING,B:STRING,C:STRING>; + +EXPLAIN VECTORIZATION +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging; +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +EXPLAIN VECTORIZATION +select sum(hash(*)) from orc_create_complex; +select sum(hash(*)) from orc_create_complex; + +-- schema is different for both files, will not be merged +ALTER TABLE orc_create_complex CONCATENATE; + +dfs -ls ${hiveconf:hive.metastore.warehouse.dir}/orc_create_complex/; +select sum(hash(*)) from orc_create_complex; http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q b/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q new file mode 100644 index 0000000..3121ec8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_orc_nested_column_pruning.q @@ -0,0 +1,233 @@ +set hive.fetch.task.conversion = none; +set hive.exec.dynamic.partition.mode = nonstrict; +set hive.strict.checks.cartesian.product=false; +SET hive.vectorized.execution.enabled=true; + +-- First, create source tables +DROP TABLE IF EXISTS dummy; +CREATE TABLE dummy (i int); +INSERT INTO TABLE dummy VALUES (42); + +DROP TABLE IF EXISTS nested_tbl_1; +CREATE TABLE nested_tbl_1 ( + a int, + s1 struct<f1: boolean, f2: string, f3: struct<f4: int, f5: double>, f6: int>, + s2 struct<f7: string, f8: struct<f9 : boolean, f10: array<int>, f11: map<string, boolean>>>, + s3 struct<f12: array<struct<f13:string, f14:int>>>, + s4 map<string, struct<f15:int>>, + s5 struct<f16: array<struct<f17:string, f18:struct<f19:int>>>>, + s6 map<string, struct<f20:array<struct<f21:struct<f22:int>>>>> +) STORED AS ORC; + +INSERT INTO TABLE nested_tbl_1 SELECT + 1, named_struct('f1', false, 'f2', 'foo', 'f3', named_struct('f4', 4, 'f5', cast(5.0 as double)), 'f6', 4), + named_struct('f7', 'f7', 'f8', named_struct('f9', true, 'f10', array(10, 11), 'f11', map('key1', true, 'key2', false))), + named_struct('f12', array(named_struct('f13', 'foo', 'f14', 14), named_struct('f13', 'bar', 'f14', 28))), + map('key1', named_struct('f15', 1), 'key2', named_struct('f15', 2)), + named_struct('f16', array(named_struct('f17', 'foo', 'f18', named_struct('f19', 14)), named_struct('f17', 'bar', 'f18', named_struct('f19', 28)))), + map('key1', named_struct('f20', array(named_struct('f21', named_struct('f22', 1)))), + 'key2', named_struct('f20', array(named_struct('f21', named_struct('f22', 2))))) +FROM dummy; + +DROP TABLE IF EXISTS nested_tbl_2; +CREATE TABLE nested_tbl_2 LIKE nested_tbl_1; + +INSERT INTO TABLE nested_tbl_2 SELECT + 2, named_struct('f1', true, 'f2', 'bar', 'f3', named_struct('f4', 4, 'f5', cast(6.5 as double)), 'f6', 4), + named_struct('f7', 'f72', 'f8', named_struct('f9', false, 'f10', array(20, 22), 'f11', map('key3', true, 'key4', false))), + named_struct('f12', array(named_struct('f13', 'bar', 'f14', 28), named_struct('f13', 'foo', 'f14', 56))), + map('key3', named_struct('f15', 3), 'key4', named_struct('f15', 4)), + named_struct('f16', array(named_struct('f17', 'bar', 'f18', named_struct('f19', 28)), named_struct('f17', 'foo', 'f18', named_struct('f19', 56)))), + map('key3', named_struct('f20', array(named_struct('f21', named_struct('f22', 3)))), + 'key4', named_struct('f20', array(named_struct('f21', named_struct('f22', 4))))) +FROM dummy; + +-- Testing only select statements + +EXPLAIN VECTORIZATION +SELECT a FROM nested_tbl_1; +SELECT a FROM nested_tbl_1; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1 FROM nested_tbl_1; +SELECT s1.f1 FROM nested_tbl_1; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f1, s1.f2 FROM nested_tbl_1; +SELECT s1.f1, s1.f2 FROM nested_tbl_1; + +-- In this case 's1.f3' and 's1.f3.f4' should be merged +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; +SELECT s1.f3, s1.f3.f4 FROM nested_tbl_1; + +-- Testing select array and index shifting +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1; +SELECT s1.f3.f5 FROM nested_tbl_1; + +-- Testing select from multiple structs +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; +SELECT s1.f3.f4, s2.f8.f9 FROM nested_tbl_1; + + +-- Testing select with filter + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; +SELECT s1.f2 FROM nested_tbl_1 WHERE s1.f1 = FALSE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; +SELECT s1.f3.f5 FROM nested_tbl_1 WHERE s1.f3.f4 = 4; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; +SELECT s2.f8 FROM nested_tbl_1 WHERE s1.f2 = 'foo' AND size(s2.f8.f10) > 1 AND s2.f8.f11['key1'] = TRUE; + + +-- Testing lateral view + +EXPLAIN VECTORIZATION EXPRESSION +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; +SELECT col1, col2 FROM nested_tbl_1 +LATERAL VIEW explode(s2.f8.f10) tbl1 AS col1 +LATERAL VIEW explode(s3.f12) tbl2 AS col2; + + +-- Testing UDFs +EXPLAIN VECTORIZATION EXPRESSION +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; +SELECT pmod(s2.f8.f10[1], s1.f3.f4) FROM nested_tbl_1; + + +-- Testing aggregations + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; +SELECT s1.f3.f5, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3.f5; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; +SELECT s1.f3, count(s1.f3.f4) FROM nested_tbl_1 GROUP BY s1.f3 ORDER BY s1.f3; + + +-- Testing joins + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_2 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == FALSE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5, t2.s2.f8 +FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 +WHERE t2.s2.f8.f9 == TRUE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f1 <> t2.s2.f8.f9; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; +SELECT t1.s1.f3.f5 +FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2 +ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9; + +-- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3; +CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS ORC; + +INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1; + +SELECT * FROM nested_tbl_3; + +-- Testing select struct field from elements in array or map + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +SELECT count(s1.f6), s3.f12[0].f14 +FROM nested_tbl_1 +GROUP BY s3.f12[0].f14; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +SELECT count(s1.f6), s4['key1'].f15 +FROM nested_tbl_1 +GROUP BY s4['key1'].f15; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +SELECT count(s1.f6), s5.f16[0].f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16[0].f18.f19; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +SELECT count(s1.f6), s5.f16.f18.f19 +FROM nested_tbl_1 +GROUP BY s5.f16.f18.f19; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +SELECT count(s1.f6), s6['key1'].f20[0].f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20[0].f21.f22; + +EXPLAIN VECTORIZATION EXPRESSION +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; + +SELECT count(s1.f6), s6['key1'].f20.f21.f22 +FROM nested_tbl_1 +GROUP BY s6['key1'].f20.f21.f22; http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/vector_orc_null_check.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_orc_null_check.q b/ql/src/test/queries/clientpositive/vector_orc_null_check.q new file mode 100644 index 0000000..8f415c3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_orc_null_check.q @@ -0,0 +1,13 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +create table listtable(l array<string>); +create table listtable_orc(l array<string>) stored as orc; + +insert overwrite table listtable select array(null) from src; +insert overwrite table listtable_orc select * from listtable; + +explain vectorization expression +select size(l) from listtable_orc limit 10; +select size(l) from listtable_orc limit 10; + http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/queries/clientpositive/vectorization_limit.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorization_limit.q b/ql/src/test/queries/clientpositive/vectorization_limit.q index 8044484..30d5500 100644 --- a/ql/src/test/queries/clientpositive/vectorization_limit.q +++ b/ql/src/test/queries/clientpositive/vectorization_limit.q @@ -5,8 +5,9 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -explain vectorization SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; -SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 limit 7; +explain vectorization +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7; +SELECT cbigint, cdouble FROM alltypesorc WHERE cbigint < cdouble and cint > 0 order by cbigint, cdouble limit 7; set hive.optimize.reducededuplication.min.reducer=1; set hive.limit.pushdown.memory.usage=0.3f; @@ -14,30 +15,30 @@ set hive.limit.pushdown.memory.usage=0.3f; -- HIVE-3562 Some limit can be pushed down to map stage - c/p parts from limit_pushdown explain vectorization detail -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; -select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble limit 20; +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20; +select ctinyint,cdouble,csmallint from alltypesorc where ctinyint is not null order by ctinyint,cdouble,csmallint limit 20; -- deduped RS explain vectorization detail -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; -select ctinyint,avg(cdouble + 1) from alltypesorc group by ctinyint order by ctinyint limit 20; +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20; +select ctinyint,avg(cdouble + 1) as cavg from alltypesorc group by ctinyint order by ctinyint, cavg limit 20; -- distincts explain vectorization detail -select distinct(ctinyint) from alltypesorc limit 20; -select distinct(ctinyint) from alltypesorc limit 20; +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20; +select distinct(ctinyint) as cdistinct from alltypesorc order by cdistinct limit 20; explain vectorization detail -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; -select ctinyint, count(distinct(cdouble)) from alltypesorc group by ctinyint order by ctinyint limit 20; +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20; +select ctinyint, count(distinct(cdouble)) as count_distinct from alltypesorc group by ctinyint order by ctinyint, count_distinct limit 20; -- limit zero explain vectorization detail -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; -select ctinyint,cdouble from alltypesorc order by ctinyint limit 0; +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0; +select ctinyint,cdouble from alltypesorc order by ctinyint,cdouble limit 0; -- 2MR (applied to last RS) explain vectorization detail -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; -select cdouble, sum(ctinyint) as sum from alltypesorc where ctinyint is not null group by cdouble order by sum, cdouble limit 20; +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20; +select cdouble, sum(ctinyint) as csum from alltypesorc where ctinyint is not null group by cdouble order by csum, cdouble limit 20; http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/results/clientpositive/llap/vector_null_map.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_null_map.q.out b/ql/src/test/results/clientpositive/llap/vector_null_map.q.out new file mode 100644 index 0000000..666f7fd --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_null_map.q.out @@ -0,0 +1,173 @@ +PREHOOK: query: create table map_txt ( + id int, + content map<int,string> +) +row format delimited +null defined as '\\N' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@map_txt +POSTHOOK: query: create table map_txt ( + id int, + content map<int,string> +) +row format delimited +null defined as '\\N' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@map_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/mapNull.txt' INTO TABLE map_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@map_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/mapNull.txt' INTO TABLE map_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@map_txt +PREHOOK: query: explain vectorization expression +select * from map_txt +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select * from map_txt +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: map_txt + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: id (type: int), content (type: map<int,string>) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from map_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@map_txt +#### A masked pattern was here #### +POSTHOOK: query: select * from map_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_txt +#### A masked pattern was here #### +1 NULL +PREHOOK: query: explain vectorization expression +select id, map_keys(content) from map_txt +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization expression +select id, map_keys(content) from map_txt +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: map_txt + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + Select Operator + expressions: id (type: int), map_keys(content) (type: array<int>) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 3] + selectExpressions: VectorUDFAdaptor(map_keys(content)) -> 3:array<int> + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 744 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id, map_keys(content) from map_txt +PREHOOK: type: QUERY +PREHOOK: Input: default@map_txt +#### A masked pattern was here #### +POSTHOOK: query: select id, map_keys(content) from map_txt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@map_txt +#### A masked pattern was here #### +1 [] http://git-wip-us.apache.org/repos/asf/hive/blob/fc48d721/ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out b/ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out new file mode 100644 index 0000000..ebf6853 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_orc_merge_incompat_schema.q.out @@ -0,0 +1,305 @@ +PREHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP<STRING,STRING>, + lst ARRAY<STRING>, + strct STRUCT<A:STRING,B:STRING> +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: CREATE TABLE orc_create_staging ( + str STRING, + mp MAP<STRING,STRING>, + lst ARRAY<STRING>, + strct STRUCT<A:STRING,B:STRING> +) ROW FORMAT DELIMITED + FIELDS TERMINATED BY '|' + COLLECTION ITEMS TERMINATED BY ',' + MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@orc_create_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/orc_create.txt' OVERWRITE INTO TABLE orc_create_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@orc_create_staging +PREHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP<STRING,STRING>, + lst ARRAY<STRING>, + strct STRUCT<A:STRING,B:STRING>, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: CREATE TABLE orc_create_complex ( + str STRING, + mp MAP<STRING,STRING>, + lst ARRAY<STRING>, + strct STRUCT<A:STRING,B:STRING>, + val INT +) STORED AS ORC tblproperties("orc.row.index.stride"="1000", "orc.stripe.size"="1000", "orc.compress.size"="10000") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT OVERWRITE TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array<string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map<string,string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct<A:string,B:string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,strct,0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array<string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map<string,string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct<A:string,B:string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 1 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +953053114 +PREHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT<A:STRING,B:STRING,C:STRING> +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex +CHANGE COLUMN strct strct STRUCT<A:STRING,B:STRING,C:STRING> +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +PREHOOK: query: EXPLAIN VECTORIZATION +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_staging + Pruned Column Paths: strct.a, strct.b + Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str (type: string), mp (type: map<string,string>), lst (type: array<string>), named_struct('A',strct.a,'B',strct.b,'C','c') (type: struct<a:string,b:string,c:string>), 0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex + Execution mode: llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: SELECT operator: Unable to vectorize custom UDF. Encountered unsupported expr desc : Column[strct].a + vectorized: false + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex + + Stage: Stage-3 + Stats Work + Basic Stats Work: + +PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_staging +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: INSERT INTO TABLE orc_create_complex SELECT str,mp,lst,NAMED_STRUCT('A',strct.A,'B',strct.B,'C','c'),0 FROM orc_create_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_staging +POSTHOOK: Output: default@orc_create_complex +POSTHOOK: Lineage: orc_create_complex.lst SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:lst, type:array<string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.mp SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:mp, type:map<string,string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:str, type:string, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.strct EXPRESSION [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct<A:string,B:string>, comment:null), ] +POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION +select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION +select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 6 Data size: 21816 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(str,mp,lst,strct,val) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 21816 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3644 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594 +PREHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +PREHOOK: type: ALTER_TABLE_MERGE +PREHOOK: Input: default@orc_create_complex +PREHOOK: Output: default@orc_create_complex +POSTHOOK: query: ALTER TABLE orc_create_complex CONCATENATE +POSTHOOK: type: ALTER_TABLE_MERGE +POSTHOOK: Input: default@orc_create_complex +POSTHOOK: Output: default@orc_create_complex +Found 2 items +#### A masked pattern was here #### +PREHOOK: query: select sum(hash(*)) from orc_create_complex +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +POSTHOOK: query: select sum(hash(*)) from orc_create_complex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_create_complex +#### A masked pattern was here #### +4334574594
