HIVE-19992: Vectorization: Follow-on to HIVE-19951 --> add call to SchemaEvolution.isOnlyImplicitConversion to disable encoded LLAP I/O for ORC only when data type conversion is not implicit (Matt McCline, reviewed by Prasanth Jayachandran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/64ceb7ba Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/64ceb7ba Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/64ceb7ba Branch: refs/heads/master-txnstats Commit: 64ceb7baece29da0babe927385fdd954796ccca6 Parents: 1b5903b Author: Matt McCline <mmccl...@hortonworks.com> Authored: Sun Jul 15 23:07:13 2018 -0500 Committer: Matt McCline <mmccl...@hortonworks.com> Committed: Sun Jul 15 23:07:13 2018 -0500 ---------------------------------------------------------------------- .../hive/llap/io/api/impl/LlapRecordReader.java | 60 +------------------- .../llap/orc_ppd_schema_evol_3a.q.out | 52 ++++------------- 2 files changed, 13 insertions(+), 99 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/64ceb7ba/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java index be748e9..3455d16 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapRecordReader.java @@ -289,68 +289,10 @@ class LlapRecordReader executor.submit(rp.getReadCallable()); } - private boolean hasSchemaEvolutionStringFamilyTruncateIssue(SchemaEvolution evolution) { - return hasStringFamilyTruncateTypeIssue(evolution, evolution.getReaderSchema()); - } - - // We recurse through the types. - private boolean hasStringFamilyTruncateTypeIssue(SchemaEvolution evolution, - TypeDescription readerType) { - TypeDescription fileType = evolution.getFileType(readerType); - if (fileType == null) { - return false; - } - switch (fileType.getCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DOUBLE: - case FLOAT: - case STRING: - case TIMESTAMP: - case BINARY: - case DATE: - case DECIMAL: - // We are only looking for the CHAR/VARCHAR truncate issue. - return false; - case CHAR: - case VARCHAR: - if (readerType.getCategory().equals(TypeDescription.Category.CHAR) || - readerType.getCategory().equals(TypeDescription.Category.VARCHAR)) { - return (fileType.getMaxLength() > readerType.getMaxLength()); - } - return false; - case UNION: - case MAP: - case LIST: - case STRUCT: - { - List<TypeDescription> readerChildren = readerType.getChildren(); - final int childCount = readerChildren.size(); - for (int i = 0; i < childCount; ++i) { - if (hasStringFamilyTruncateTypeIssue(evolution, readerChildren.get(i))) { - return true; - } - } - } - return false; - default: - throw new IllegalArgumentException("Unknown type " + fileType); - } - } - private boolean checkOrcSchemaEvolution() { SchemaEvolution evolution = rp.getSchemaEvolution(); - /* - * FUTURE: When SchemaEvolution.isOnlyImplicitConversion becomes available: - * 1) Replace the hasSchemaEvolutionStringFamilyTruncateIssue call with - * !isOnlyImplicitConversion. - * 2) Delete hasSchemaEvolutionStringFamilyTruncateIssue code. - */ - if (evolution.hasConversion() && hasSchemaEvolutionStringFamilyTruncateIssue(evolution)) { + if (evolution.hasConversion() && !evolution.isOnlyImplicitConversion()) { // We do not support data type conversion when reading encoded ORC data. return false; http://git-wip-us.apache.org/repos/asf/hive/blob/64ceb7ba/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out index 45586be..e197126 100644 --- a/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_ppd_schema_evol_3a.q.out @@ -1635,9 +1635,9 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd_n3 PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 + HDFS_BYTES_READ: 20860 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 5 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -1655,13 +1655,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_SEL_2: 6 RECORDS_OUT_OPERATOR_TS_0: 2100 Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 4346 - CACHE_MISS_BYTES: 0 - METADATA_CACHE_HIT: 2 - NUM_DECODED_BATCHES: 3 - NUM_VECTOR_BATCHES: 3 - ROWS_EMITTED: 2100 - SELECTED_ROWGROUPS: 3 + METADATA_CACHE_HIT: 1 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 1 @@ -1673,9 +1667,9 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd_n3 PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 0 + HDFS_BYTES_READ: 22586 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 3 + HDFS_READ_OPS: 5 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -1693,13 +1687,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_SEL_2: 6 RECORDS_OUT_OPERATOR_TS_0: 2100 Stage-1 LLAP IO COUNTERS: - CACHE_HIT_BYTES: 5935 - CACHE_MISS_BYTES: 0 - METADATA_CACHE_HIT: 2 - NUM_DECODED_BATCHES: 3 - NUM_VECTOR_BATCHES: 3 - ROWS_EMITTED: 2100 - SELECTED_ROWGROUPS: 3 + METADATA_CACHE_HIT: 1 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 1 @@ -1863,9 +1851,9 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd_n3 PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 2062 + HDFS_BYTES_READ: 18628 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 4 + HDFS_READ_OPS: 5 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -1883,15 +1871,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_SEL_2: 4 RECORDS_OUT_OPERATOR_TS_0: 2100 Stage-1 LLAP IO COUNTERS: - ALLOCATED_BYTES: 786432 - ALLOCATED_USED_BYTES: 4264 - CACHE_HIT_BYTES: 24 - CACHE_MISS_BYTES: 2062 - METADATA_CACHE_HIT: 2 - NUM_DECODED_BATCHES: 3 - NUM_VECTOR_BATCHES: 3 - ROWS_EMITTED: 2100 - SELECTED_ROWGROUPS: 3 + METADATA_CACHE_HIT: 1 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 1 @@ -1935,9 +1915,9 @@ PREHOOK: type: QUERY PREHOOK: Input: default@orc_ppd_n3 PREHOOK: Output: hdfs://### HDFS PATH ### Stage-1 FILE SYSTEM COUNTERS: - HDFS_BYTES_READ: 1215 + HDFS_BYTES_READ: 19952 HDFS_BYTES_WRITTEN: 101 - HDFS_READ_OPS: 4 + HDFS_READ_OPS: 5 HDFS_LARGE_READ_OPS: 0 HDFS_WRITE_OPS: 2 Stage-1 HIVE COUNTERS: @@ -1955,15 +1935,7 @@ Stage-1 HIVE COUNTERS: RECORDS_OUT_OPERATOR_SEL_2: 4 RECORDS_OUT_OPERATOR_TS_0: 1000 Stage-1 LLAP IO COUNTERS: - ALLOCATED_BYTES: 262144 - ALLOCATED_USED_BYTES: 2376 - CACHE_HIT_BYTES: 2086 - CACHE_MISS_BYTES: 1215 - METADATA_CACHE_HIT: 2 - NUM_DECODED_BATCHES: 1 - NUM_VECTOR_BATCHES: 1 - ROWS_EMITTED: 1000 - SELECTED_ROWGROUPS: 1 + METADATA_CACHE_HIT: 1 Stage-1 INPUT COUNTERS: GROUPED_INPUT_SPLITS_Map_1: 1 INPUT_DIRECTORIES_Map_1: 1