HIVE-19629: Enable Decimal64 reader after orc version upgrade (Prasanth Jayachandran reviewed by Matt McCline)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dd512593 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dd512593 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dd512593 Branch: refs/heads/master-txnstats Commit: dd5125939b5b5ae652e39725cfcf2379e6cb0fea Parents: 040c078 Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Sat Jun 16 11:23:55 2018 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Sat Jun 16 11:23:55 2018 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 3 + .../hive/llap/io/api/impl/LlapInputFormat.java | 6 + .../io/decode/GenericColumnVectorProducer.java | 9 +- .../llap/io/decode/OrcEncodedDataConsumer.java | 40 +- .../llap/io/encoded/OrcEncodedDataReader.java | 65 +- .../llap/io/encoded/SerDeEncodedDataReader.java | 3 + .../io/encoded/VectorDeserializeOrcWriter.java | 49 +- .../llap/io/metadata/ConsumerFileMetadata.java | 2 + .../hive/llap/io/metadata/OrcFileMetadata.java | 9 +- .../hadoop/hive/ql/exec/FetchOperator.java | 4 +- .../ql/exec/vector/VectorizationContext.java | 7 +- .../vector/VectorizedInputFormatInterface.java | 1 + .../ql/exec/vector/VectorizedRowBatchCtx.java | 5 + .../VectorInBloomFilterColDynamicValue.java | 1 + .../aggregates/VectorUDAFBloomFilter.java | 1 + .../ql/exec/vector/udf/VectorUDFAdaptor.java | 8 + .../hadoop/hive/ql/io/BatchToRowReader.java | 8 +- .../hadoop/hive/ql/io/NullRowsInputFormat.java | 6 + .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 8 +- .../hive/ql/io/orc/OrcRawRecordMerger.java | 22 +- .../apache/hadoop/hive/ql/io/orc/Reader.java | 12 +- .../hadoop/hive/ql/io/orc/ReaderImpl.java | 9 +- .../hadoop/hive/ql/io/orc/RecordReaderImpl.java | 25 +- .../io/orc/VectorizedOrcAcidRowBatchReader.java | 24 +- .../ql/io/orc/VectorizedOrcInputFormat.java | 8 +- .../hadoop/hive/ql/io/orc/WriterImpl.java | 23 +- .../orc/encoded/EncodedTreeReaderFactory.java | 205 +++- .../ql/io/parquet/MapredParquetInputFormat.java | 6 + .../hive/ql/optimizer/physical/Vectorizer.java | 115 +- .../hive/ql/io/orc/TestInputOutputFormat.java | 81 +- .../hive/ql/io/orc/TestOrcRawRecordMerger.java | 10 +- .../hive/ql/io/orc/TestVectorizedORCReader.java | 3 +- .../TestVectorizedOrcAcidRowBatchReader.java | 2 +- .../queries/clientpositive/explainanalyze_3.q | 2 +- ql/src/test/queries/clientpositive/llap_acid2.q | 31 +- .../clientpositive/llap_decimal64_reader.q | 54 + .../queries/clientpositive/llap_uncompressed.q | 13 +- ql/src/test/queries/clientpositive/orc_create.q | 4 +- .../queries/clientpositive/orc_llap_counters.q | 6 +- .../queries/clientpositive/orc_llap_counters1.q | 7 +- .../test/queries/clientpositive/orc_merge11.q | 8 +- ql/src/test/queries/clientpositive/orc_merge5.q | 4 +- ql/src/test/queries/clientpositive/orc_merge6.q | 4 +- ql/src/test/queries/clientpositive/orc_merge7.q | 4 +- .../clientpositive/orc_merge_incompat1.q | 4 +- .../clientpositive/orc_merge_incompat2.q | 4 +- .../test/queries/clientpositive/orc_ppd_basic.q | 7 +- .../clientpositive/orc_ppd_schema_evol_3a.q | 6 +- .../clientpositive/orc_schema_evolution_float.q | 2 + .../clientpositive/orc_split_elimination.q | 4 +- .../schema_evol_orc_nonvec_part_all_primitive.q | 2 + ...evol_orc_nonvec_part_all_primitive_llap_io.q | 2 + .../schema_evol_orc_vec_part_all_primitive.q | 2 + ...ma_evol_orc_vec_part_all_primitive_llap_io.q | 2 + .../clientpositive/type_change_test_int.q | 3 + .../type_change_test_int_vectorized.q | 2 + .../queries/clientpositive/vector_case_when_1.q | 22 +- .../queries/clientpositive/vector_decimal_5.q | 3 +- .../clientpositive/vector_decimal_mapjoin.q | 6 + .../vectorized_dynamic_semijoin_reduction2.q | 2 +- .../clientpositive/llap/acid_no_buckets.q.out | 32 +- .../llap/acid_vectorization_original.q.out | 14 +- .../llap/enforce_constraint_notnull.q.out | 14 +- .../results/clientpositive/llap/llap_acid.q.out | 12 +- .../clientpositive/llap/llap_acid2.q.out | 302 +++-- .../clientpositive/llap/llap_acid_fast.q.out | 12 +- .../llap/llap_decimal64_reader.q.out | 303 +++++ .../clientpositive/llap/llap_partitioned.q.out | 11 +- .../results/clientpositive/llap/llap_text.q.out | 1082 ++++++++++++++++++ .../clientpositive/llap/llap_uncompressed.q.out | 283 +++++ .../llap/llap_vector_nohybridgrace.q.out | 16 +- .../llap/materialized_view_create.q.out | 6 +- .../materialized_view_create_rewrite_5.q.out | 2 +- .../llap/materialized_view_describe.q.out | 6 +- .../results/clientpositive/llap/mergejoin.q.out | 76 +- .../clientpositive/llap/orc_create.q.out | 12 +- .../clientpositive/llap/orc_llap_counters.q.out | 86 +- .../llap/orc_llap_counters1.q.out | 16 +- .../clientpositive/llap/orc_merge11.q.out | 416 +++---- .../clientpositive/llap/orc_merge5.q.out | 30 +- .../clientpositive/llap/orc_merge6.q.out | 40 +- .../clientpositive/llap/orc_merge7.q.out | 76 +- .../llap/orc_merge_incompat1.q.out | 28 +- .../llap/orc_merge_incompat2.q.out | 50 +- .../clientpositive/llap/orc_ppd_basic.q.out | 106 +- .../llap/orc_ppd_schema_evol_3a.q.out | 88 +- .../llap/orc_split_elimination.q.out | 8 +- .../llap/orc_struct_type_vectorization.q.out | 8 +- .../schema_evol_orc_acidvec_part_llap_io.q.out | 56 +- .../llap/schema_evol_orc_acidvec_table.q.out | 56 +- .../schema_evol_orc_acidvec_table_llap_io.q.out | 56 +- .../llap/schema_evol_orc_vec_part.q.out | 36 +- .../schema_evol_orc_vec_part_all_complex.q.out | 12 +- ..._evol_orc_vec_part_all_complex_llap_io.q.out | 12 +- ...schema_evol_orc_vec_part_all_primitive.q.out | 15 +- ...vol_orc_vec_part_all_primitive_llap_io.q.out | 15 +- .../llap/schema_evol_orc_vec_table.q.out | 20 +- .../schema_evol_orc_vec_table_llap_io.q.out | 20 +- ...evol_text_vec_part_all_complex_llap_io.q.out | 12 +- ...ol_text_vec_part_all_primitive_llap_io.q.out | 19 +- .../schema_evol_text_vec_part_llap_io.q.out | 27 +- .../schema_evol_text_vec_table_llap_io.q.out | 42 +- .../llap/vector_adaptor_usage_mode.q.out | 16 +- .../llap/vector_aggregate_9.q.out | 12 +- .../llap/vector_aggregate_without_gby.q.out | 4 +- .../llap/vector_annotate_stats_select.q.out | 96 +- .../llap/vector_auto_smb_mapjoin_14.q.out | 8 +- .../llap/vector_between_columns.q.out | 16 +- .../clientpositive/llap/vector_between_in.q.out | 48 +- .../llap/vector_binary_join_groupby.q.out | 27 +- .../clientpositive/llap/vector_bround.q.out | 4 +- .../llap/vector_case_when_1.q.out | 433 +++---- .../llap/vector_case_when_2.q.out | 8 +- .../llap/vector_cast_constant.q.out | 4 +- .../clientpositive/llap/vector_char_2.q.out | 8 +- .../clientpositive/llap/vector_char_4.q.out | 4 +- .../llap/vector_char_mapjoin1.q.out | 24 +- .../llap/vector_char_simple.q.out | 12 +- .../llap/vector_char_varchar_1.q.out | 6 +- .../clientpositive/llap/vector_coalesce.q.out | 24 +- .../clientpositive/llap/vector_coalesce_2.q.out | 16 +- .../clientpositive/llap/vector_coalesce_3.q.out | 8 +- .../clientpositive/llap/vector_coalesce_4.q.out | 4 +- .../llap/vector_complex_all.q.out | 40 +- .../llap/vector_complex_join.q.out | 24 +- .../clientpositive/llap/vector_count.q.out | 8 +- .../llap/vector_count_distinct.q.out | 4 +- .../llap/vector_create_struct_table.q.out | 9 +- .../clientpositive/llap/vector_data_types.q.out | 8 +- .../clientpositive/llap/vector_date_1.q.out | 28 +- .../clientpositive/llap/vector_decimal_1.q.out | 144 +-- .../llap/vector_decimal_10_0.q.out | 15 +- .../clientpositive/llap/vector_decimal_2.q.out | 344 +++--- .../clientpositive/llap/vector_decimal_5.q.out | 154 ++- .../clientpositive/llap/vector_decimal_6.q.out | 94 +- .../llap/vector_decimal_aggregate.q.out | 32 +- .../llap/vector_decimal_cast.q.out | 7 +- .../llap/vector_decimal_expressions.q.out | 24 +- .../llap/vector_decimal_mapjoin.q.out | 554 ++------- .../llap/vector_decimal_math_funcs.q.out | 20 +- .../llap/vector_decimal_precision.q.out | 7 +- .../llap/vector_decimal_round.q.out | 62 +- .../llap/vector_decimal_round_2.q.out | 16 +- .../llap/vector_decimal_trailing.q.out | 8 +- .../llap/vector_decimal_udf.q.out | 488 ++++---- .../llap/vector_decimal_udf2.q.out | 38 +- .../clientpositive/llap/vector_distinct_2.q.out | 4 +- .../clientpositive/llap/vector_elt.q.out | 8 +- .../clientpositive/llap/vector_groupby4.q.out | 4 +- .../clientpositive/llap/vector_groupby6.q.out | 4 +- .../clientpositive/llap/vector_groupby_3.q.out | 4 +- .../llap/vector_groupby_cube1.q.out | 15 +- .../llap/vector_groupby_grouping_id1.q.out | 24 +- .../llap/vector_groupby_grouping_id2.q.out | 36 +- .../llap/vector_groupby_grouping_id3.q.out | 8 +- .../llap/vector_groupby_grouping_sets1.q.out | 28 +- .../llap/vector_groupby_grouping_sets2.q.out | 12 +- .../vector_groupby_grouping_sets3_dec.q.out | 30 +- .../llap/vector_groupby_grouping_sets4.q.out | 12 +- .../llap/vector_groupby_grouping_sets5.q.out | 12 +- .../llap/vector_groupby_grouping_sets6.q.out | 8 +- .../vector_groupby_grouping_sets_grouping.q.out | 60 +- .../vector_groupby_grouping_sets_limit.q.out | 24 +- .../llap/vector_groupby_grouping_window.q.out | 4 +- .../llap/vector_groupby_mapjoin.q.out | 6 +- .../llap/vector_groupby_reduce.q.out | 16 +- .../llap/vector_groupby_rollup1.q.out | 12 +- .../llap/vector_groupby_sort_11.q.out | 12 +- .../llap/vector_groupby_sort_8.q.out | 3 +- .../llap/vector_grouping_sets.q.out | 8 +- .../clientpositive/llap/vector_if_expr.q.out | 4 +- .../clientpositive/llap/vector_if_expr_2.q.out | 4 +- .../llap/vector_include_no_sel.q.out | 8 +- .../clientpositive/llap/vector_inner_join.q.out | 72 +- .../clientpositive/llap/vector_interval_1.q.out | 32 +- .../clientpositive/llap/vector_interval_2.q.out | 40 +- .../llap/vector_interval_arithmetic.q.out | 32 +- .../llap/vector_interval_mapjoin.q.out | 8 +- .../clientpositive/llap/vector_join30.q.out | 84 +- .../llap/vector_left_outer_join.q.out | 12 +- .../llap/vector_left_outer_join2.q.out | 32 +- .../llap/vector_leftsemi_mapjoin.q.out | 768 ++++++------- .../clientpositive/llap/vector_like_2.q.out | 4 +- .../llap/vector_llap_text_1.q.out | 6 +- .../clientpositive/llap/vector_map_order.q.out | 3 +- .../llap/vector_mapjoin_reduce.q.out | 18 +- .../llap/vector_mr_diff_schema_alias.q.out | 8 +- .../llap/vector_multi_insert.q.out | 4 +- .../clientpositive/llap/vector_null_map.q.out | 6 +- .../llap/vector_null_projection.q.out | 4 +- .../llap/vector_nullsafe_join.q.out | 112 +- .../llap/vector_number_compare_projection.q.out | 8 +- .../clientpositive/llap/vector_nvl.q.out | 16 +- .../llap/vector_orc_merge_incompat_schema.q.out | 7 +- .../llap/vector_orc_nested_column_pruning.q.out | 96 +- .../llap/vector_orc_null_check.q.out | 4 +- .../clientpositive/llap/vector_order_null.q.out | 33 +- .../clientpositive/llap/vector_orderby_5.q.out | 4 +- .../llap/vector_outer_join0.q.out | 16 +- .../llap/vector_outer_join1.q.out | 28 +- .../llap/vector_outer_join2.q.out | 12 +- .../llap/vector_outer_reference_windowed.q.out | 176 ++- .../llap/vector_partition_diff_num_cols.q.out | 20 +- .../llap/vector_partitioned_date_time.q.out | 32 +- .../clientpositive/llap/vector_ptf_1.q.out | 3 +- .../llap/vector_ptf_part_simple.q.out | 120 +- .../clientpositive/llap/vector_reduce1.q.out | 4 +- .../clientpositive/llap/vector_reduce2.q.out | 4 +- .../clientpositive/llap/vector_reduce3.q.out | 4 +- .../llap/vector_reduce_groupby_decimal.q.out | 4 +- .../vector_reduce_groupby_duplicate_cols.q.out | 3 +- .../llap/vector_retry_failure.q.out | 3 +- .../llap/vector_reuse_scratchcols.q.out | 8 +- .../llap/vector_string_concat.q.out | 8 +- .../llap/vector_string_decimal.q.out | 6 +- .../clientpositive/llap/vector_struct_in.q.out | 32 +- .../clientpositive/llap/vector_udf1.q.out | 112 +- .../clientpositive/llap/vector_udf2.q.out | 12 +- .../llap/vector_udf_adaptor_1.q.out | 16 +- .../clientpositive/llap/vector_varchar_4.q.out | 4 +- .../llap/vector_varchar_mapjoin1.q.out | 24 +- .../llap/vector_varchar_simple.q.out | 12 +- .../llap/vector_when_case_null.q.out | 4 +- .../clientpositive/llap/vector_windowing.q.out | 141 +-- .../llap/vector_windowing_expressions.q.out | 50 +- .../llap/vector_windowing_gby.q.out | 7 +- .../llap/vector_windowing_gby2.q.out | 16 +- .../vector_windowing_multipartitioning.q.out | 42 +- .../llap/vector_windowing_navfn.q.out | 57 +- .../llap/vector_windowing_order_null.q.out | 56 +- .../vector_windowing_range_multiorder.q.out | 77 +- .../llap/vector_windowing_rank.q.out | 70 +- .../llap/vector_windowing_streaming.q.out | 17 +- .../llap/vector_windowing_windowspec.q.out | 77 +- .../llap/vector_windowing_windowspec4.q.out | 3 +- .../clientpositive/llap/vectorization_0.q.out | 40 +- .../clientpositive/llap/vectorization_1.q.out | 4 +- .../clientpositive/llap/vectorization_10.q.out | 4 +- .../clientpositive/llap/vectorization_11.q.out | 4 +- .../clientpositive/llap/vectorization_12.q.out | 4 +- .../clientpositive/llap/vectorization_13.q.out | 8 +- .../clientpositive/llap/vectorization_14.q.out | 4 +- .../clientpositive/llap/vectorization_15.q.out | 4 +- .../clientpositive/llap/vectorization_16.q.out | 4 +- .../clientpositive/llap/vectorization_17.q.out | 4 +- .../clientpositive/llap/vectorization_2.q.out | 4 +- .../clientpositive/llap/vectorization_3.q.out | 4 +- .../clientpositive/llap/vectorization_4.q.out | 4 +- .../clientpositive/llap/vectorization_5.q.out | 4 +- .../clientpositive/llap/vectorization_6.q.out | 4 +- .../clientpositive/llap/vectorization_7.q.out | 8 +- .../clientpositive/llap/vectorization_8.q.out | 8 +- .../clientpositive/llap/vectorization_9.q.out | 4 +- .../llap/vectorization_decimal_date.q.out | 4 +- .../llap/vectorization_div0.q.out | 16 +- .../llap/vectorization_limit.q.out | 24 +- .../llap/vectorization_nested_udf.q.out | 4 +- .../llap/vectorization_part_project.q.out | 4 +- .../llap/vectorization_pushdown.q.out | 4 +- .../llap/vectorization_short_regress.q.out | 80 +- .../clientpositive/llap/vectorized_case.q.out | 68 +- .../clientpositive/llap/vectorized_casts.q.out | 4 +- .../llap/vectorized_context.q.out | 12 +- .../llap/vectorized_date_funcs.q.out | 20 +- .../llap/vectorized_distinct_gby.q.out | 8 +- .../vectorized_dynamic_partition_pruning.q.out | 287 ++--- .../vectorized_dynamic_semijoin_reduction.q.out | 52 +- ...vectorized_dynamic_semijoin_reduction2.q.out | 138 ++- .../llap/vectorized_mapjoin.q.out | 8 +- .../llap/vectorized_mapjoin3.q.out | 69 +- .../llap/vectorized_math_funcs.q.out | 4 +- .../llap/vectorized_nested_mapjoin.q.out | 12 +- .../clientpositive/llap/vectorized_ptf.q.out | 100 +- .../llap/vectorized_shufflejoin.q.out | 8 +- .../llap/vectorized_string_funcs.q.out | 4 +- .../llap/vectorized_timestamp.q.out | 16 +- .../llap/vectorized_timestamp_funcs.q.out | 28 +- .../llap/vectorized_timestamp_ints_casts.q.out | 8 +- .../test/results/clientpositive/mergejoin.q.out | 8 +- .../results/clientpositive/orc_file_dump.q.out | 54 +- .../results/clientpositive/orc_merge11.q.out | 416 +++---- .../results/clientpositive/orc_merge5.q.out | 30 +- .../results/clientpositive/orc_merge6.q.out | 40 +- .../clientpositive/orc_merge_incompat1.q.out | 28 +- .../clientpositive/orc_merge_incompat2.q.out | 50 +- .../orc_struct_type_vectorization.q.out | 8 +- .../clientpositive/spark/orc_merge5.q.out | 22 +- .../clientpositive/spark/orc_merge6.q.out | 32 +- .../clientpositive/spark/orc_merge7.q.out | 64 +- .../spark/orc_merge_incompat1.q.out | 22 +- .../spark/orc_merge_incompat2.q.out | 46 +- ...k_vectorized_dynamic_partition_pruning.q.out | 240 ++-- .../spark/vector_between_in.q.out | 48 +- .../spark/vector_cast_constant.q.out | 4 +- .../clientpositive/spark/vector_char_4.q.out | 4 +- .../spark/vector_count_distinct.q.out | 4 +- .../spark/vector_data_types.q.out | 8 +- .../spark/vector_decimal_aggregate.q.out | 8 +- .../spark/vector_decimal_mapjoin.q.out | 82 +- .../spark/vector_distinct_2.q.out | 4 +- .../clientpositive/spark/vector_elt.q.out | 8 +- .../clientpositive/spark/vector_groupby_3.q.out | 4 +- .../spark/vector_inner_join.q.out | 72 +- .../spark/vector_left_outer_join.q.out | 12 +- .../clientpositive/spark/vector_orderby_5.q.out | 4 +- .../spark/vector_outer_join0.q.out | 16 +- .../spark/vector_outer_join1.q.out | 28 +- .../spark/vector_outer_join2.q.out | 12 +- .../spark/vector_string_concat.q.out | 8 +- .../clientpositive/spark/vector_varchar_4.q.out | 4 +- .../clientpositive/spark/vectorization_0.q.out | 40 +- .../clientpositive/spark/vectorization_1.q.out | 4 +- .../clientpositive/spark/vectorization_10.q.out | 4 +- .../clientpositive/spark/vectorization_11.q.out | 4 +- .../clientpositive/spark/vectorization_12.q.out | 4 +- .../clientpositive/spark/vectorization_13.q.out | 8 +- .../clientpositive/spark/vectorization_14.q.out | 4 +- .../clientpositive/spark/vectorization_15.q.out | 4 +- .../clientpositive/spark/vectorization_16.q.out | 4 +- .../clientpositive/spark/vectorization_17.q.out | 4 +- .../clientpositive/spark/vectorization_2.q.out | 4 +- .../clientpositive/spark/vectorization_3.q.out | 4 +- .../clientpositive/spark/vectorization_4.q.out | 4 +- .../clientpositive/spark/vectorization_5.q.out | 4 +- .../clientpositive/spark/vectorization_6.q.out | 4 +- .../clientpositive/spark/vectorization_9.q.out | 4 +- .../spark/vectorization_decimal_date.q.out | 4 +- .../spark/vectorization_div0.q.out | 16 +- .../spark/vectorization_nested_udf.q.out | 4 +- .../spark/vectorization_part_project.q.out | 4 +- .../spark/vectorization_pushdown.q.out | 4 +- .../spark/vectorization_short_regress.q.out | 80 +- .../clientpositive/spark/vectorized_case.q.out | 68 +- .../spark/vectorized_mapjoin.q.out | 8 +- .../spark/vectorized_math_funcs.q.out | 4 +- .../spark/vectorized_nested_mapjoin.q.out | 12 +- .../clientpositive/spark/vectorized_ptf.q.out | 100 +- .../spark/vectorized_shufflejoin.q.out | 8 +- .../spark/vectorized_string_funcs.q.out | 4 +- .../spark/vectorized_timestamp_funcs.q.out | 28 +- .../tez/acid_vectorization_original_tez.q.out | 38 +- .../clientpositive/tez/explainanalyze_3.q.out | 4 +- .../tez/vector_non_string_partition.q.out | 8 +- .../clientpositive/vector_aggregate_9.q.out | 12 +- .../vector_aggregate_without_gby.q.out | 4 +- .../clientpositive/vector_between_columns.q.out | 8 +- .../vector_binary_join_groupby.q.out | 14 +- .../results/clientpositive/vector_bround.q.out | 4 +- .../clientpositive/vector_case_when_1.q.out | 426 +++---- .../clientpositive/vector_case_when_2.q.out | 8 +- .../clientpositive/vector_cast_constant.q.out | 4 +- .../results/clientpositive/vector_char_2.q.out | 8 +- .../results/clientpositive/vector_char_4.q.out | 4 +- .../clientpositive/vector_char_mapjoin1.q.out | 12 +- .../clientpositive/vector_char_simple.q.out | 12 +- .../clientpositive/vector_coalesce.q.out | 24 +- .../clientpositive/vector_coalesce_2.q.out | 16 +- .../clientpositive/vector_coalesce_3.q.out | 4 +- .../clientpositive/vector_coalesce_4.q.out | 4 +- .../results/clientpositive/vector_count.q.out | 8 +- .../clientpositive/vector_data_types.q.out | 8 +- .../results/clientpositive/vector_date_1.q.out | 28 +- .../clientpositive/vector_decimal_1.q.out | 126 +- .../clientpositive/vector_decimal_10_0.q.out | 8 +- .../clientpositive/vector_decimal_5.q.out | 150 ++- .../clientpositive/vector_decimal_6.q.out | 28 +- .../vector_decimal_aggregate.q.out | 8 +- .../clientpositive/vector_decimal_cast.q.out | 4 +- .../vector_decimal_expressions.q.out | 22 +- .../clientpositive/vector_decimal_mapjoin.q.out | 74 +- .../vector_decimal_math_funcs.q.out | 20 +- .../vector_decimal_precision.q.out | 4 +- .../clientpositive/vector_decimal_round.q.out | 28 +- .../clientpositive/vector_decimal_round_2.q.out | 16 +- .../vector_decimal_trailing.q.out | 8 +- .../clientpositive/vector_decimal_udf2.q.out | 20 +- .../vector_delete_orig_table.q.out | 4 +- .../clientpositive/vector_distinct_2.q.out | 4 +- .../results/clientpositive/vector_elt.q.out | 8 +- .../clientpositive/vector_empty_where.q.out | 16 +- .../clientpositive/vector_groupby4.q.out | 4 +- .../clientpositive/vector_groupby6.q.out | 4 +- .../clientpositive/vector_groupby_3.q.out | 4 +- .../clientpositive/vector_groupby_reduce.q.out | 16 +- .../clientpositive/vector_grouping_sets.q.out | 8 +- .../results/clientpositive/vector_if_expr.q.out | 4 +- .../clientpositive/vector_include_no_sel.q.out | 4 +- .../clientpositive/vector_interval_1.q.out | 32 +- .../vector_interval_arithmetic.q.out | 32 +- .../vector_interval_mapjoin.q.out | 4 +- .../clientpositive/vector_left_outer_join.q.out | 4 +- .../vector_left_outer_join2.q.out | 16 +- .../clientpositive/vector_multi_insert.q.out | 4 +- .../vector_non_string_partition.q.out | 8 +- .../clientpositive/vector_null_projection.q.out | 4 +- .../results/clientpositive/vector_nvl.q.out | 16 +- .../clientpositive/vector_orderby_5.q.out | 4 +- .../clientpositive/vector_outer_join0.q.out | 8 +- .../clientpositive/vector_outer_join1.q.out | 12 +- .../clientpositive/vector_outer_join2.q.out | 4 +- .../clientpositive/vector_outer_join3.q.out | 6 +- .../clientpositive/vector_outer_join4.q.out | 6 +- .../clientpositive/vector_outer_join6.q.out | 4 +- .../vector_outer_join_no_keys.q.out | 8 +- .../results/clientpositive/vector_reduce1.q.out | 4 +- .../results/clientpositive/vector_reduce2.q.out | 4 +- .../results/clientpositive/vector_reduce3.q.out | 4 +- .../vector_reduce_groupby_decimal.q.out | 4 +- .../clientpositive/vector_string_concat.q.out | 8 +- .../clientpositive/vector_struct_in.q.out | 32 +- .../vector_tablesample_rows.q.out | 8 +- .../results/clientpositive/vector_udf3.q.out | 4 +- .../clientpositive/vector_varchar_4.q.out | 4 +- .../vector_varchar_mapjoin1.q.out | 12 +- .../clientpositive/vector_varchar_simple.q.out | 12 +- .../clientpositive/vector_when_case_null.q.out | 4 +- .../clientpositive/vectorization_1.q.out | 4 +- .../clientpositive/vectorization_10.q.out | 4 +- .../clientpositive/vectorization_11.q.out | 4 +- .../clientpositive/vectorization_12.q.out | 4 +- .../clientpositive/vectorization_13.q.out | 8 +- .../clientpositive/vectorization_14.q.out | 4 +- .../clientpositive/vectorization_15.q.out | 4 +- .../clientpositive/vectorization_16.q.out | 4 +- .../clientpositive/vectorization_17.q.out | 4 +- .../clientpositive/vectorization_2.q.out | 4 +- .../clientpositive/vectorization_3.q.out | 4 +- .../clientpositive/vectorization_4.q.out | 4 +- .../clientpositive/vectorization_5.q.out | 4 +- .../clientpositive/vectorization_6.q.out | 4 +- .../clientpositive/vectorization_7.q.out | 8 +- .../clientpositive/vectorization_8.q.out | 8 +- .../clientpositive/vectorization_9.q.out | 4 +- .../vectorization_decimal_date.q.out | 4 +- .../clientpositive/vectorization_limit.q.out | 20 +- .../vectorization_nested_udf.q.out | 4 +- .../vectorization_offset_limit.q.out | 8 +- .../vectorization_part_project.q.out | 4 +- .../clientpositive/vectorization_pushdown.q.out | 4 +- .../clientpositive/vectorized_case.q.out | 68 +- .../clientpositive/vectorized_casts.q.out | 4 +- .../clientpositive/vectorized_context.q.out | 4 +- .../clientpositive/vectorized_date_funcs.q.out | 20 +- .../clientpositive/vectorized_mapjoin.q.out | 4 +- .../clientpositive/vectorized_mapjoin2.q.out | 4 +- .../clientpositive/vectorized_mapjoin3.q.out | 12 +- .../clientpositive/vectorized_math_funcs.q.out | 4 +- .../vectorized_string_funcs.q.out | 4 +- .../clientpositive/vectorized_timestamp.q.out | 16 +- .../vectorized_timestamp_funcs.q.out | 28 +- .../vectorized_timestamp_ints_casts.q.out | 8 +- 451 files changed, 8616 insertions(+), 6642 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index c7d2285..aeb6211 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -539,6 +539,9 @@ minillaplocal.query.files=\ llap_partitioned.q,\ llap_smb.q,\ llap_vector_nohybridgrace.q,\ + llap_uncompressed.q,\ + llap_decimal64_reader.q,\ + llap_text.q,\ load_data_acid_rename.q,\ load_data_using_job.q,\ load_dyn_part5.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java index 40f7c83..ac1aca8 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapInputFormat.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.llap.io.api.impl; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.BatchToRowInputFormat; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -232,4 +233,9 @@ public class LlapInputFormat implements InputFormat<NullWritable, VectorizedRowB } return tableScanOperator; } + + @Override + public VectorizedSupport.Support[] getSupportedFeatures() { + return new VectorizedSupport.Support[] {VectorizedSupport.Support.DECIMAL_64}; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java index 7af1b05..32f3bed 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/GenericColumnVectorProducer.java @@ -31,13 +31,11 @@ import org.apache.hadoop.hive.llap.cache.SerDeLowLevelCacheImpl; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; import org.apache.hadoop.hive.llap.io.api.impl.ColumnVectorBatch; import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; -import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.Includes; import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader; import org.apache.hadoop.hive.llap.io.metadata.ConsumerFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.ConsumerStripeMetadata; import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.encoded.Consumer; import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace; @@ -52,8 +50,8 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.Reporter; import org.apache.hive.common.util.FixedSizedObjectPool; import org.apache.orc.CompressionKind; +import org.apache.orc.OrcFile; import org.apache.orc.OrcProto; -import org.apache.orc.OrcUtils; import org.apache.orc.OrcProto.ColumnEncoding; import org.apache.orc.OrcProto.RowIndex; import org.apache.orc.OrcProto.RowIndexEntry; @@ -289,5 +287,10 @@ public class GenericColumnVectorProducer implements ColumnVectorProducer { public TypeDescription getSchema() { return schema; } + + @Override + public OrcFile.Version getFileVersion() { + return null; + } } } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java index feccb87..0d7435c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/decode/OrcEncodedDataConsumer.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.llap.io.metadata.ConsumerStripeMetadata; import org.apache.hadoop.hive.llap.metrics.LlapDaemonIOMetrics; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; @@ -73,10 +74,11 @@ public class OrcEncodedDataConsumer private IoTrace trace; private final Includes includes; private TypeDescription[] batchSchemas; + private boolean useDecimal64ColumnVectors; public OrcEncodedDataConsumer( - Consumer<ColumnVectorBatch> consumer, Includes includes, boolean skipCorrupt, - QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) { + Consumer<ColumnVectorBatch> consumer, Includes includes, boolean skipCorrupt, + QueryFragmentCounters counters, LlapDaemonIOMetrics ioMetrics) { super(consumer, includes.getPhysicalColumnIds().size(), ioMetrics); this.includes = includes; // TODO: get rid of this @@ -84,6 +86,10 @@ public class OrcEncodedDataConsumer this.counters = counters; } + public void setUseDecimal64ColumnVectors(final boolean useDecimal64ColumnVectors) { + this.useDecimal64ColumnVectors = useDecimal64ColumnVectors; + } + public void setFileMetadata(ConsumerFileMetadata f) { assert fileMetadata == null; fileMetadata = f; @@ -153,7 +159,7 @@ public class OrcEncodedDataConsumer if (cvb.cols[idx] == null) { // Orc store rows inside a root struct (hive writes it this way). // When we populate column vectors we skip over the root struct. - cvb.cols[idx] = createColumn(batchSchemas[idx], VectorizedRowBatch.DEFAULT_SIZE); + cvb.cols[idx] = createColumn(batchSchemas[idx], VectorizedRowBatch.DEFAULT_SIZE, useDecimal64ColumnVectors); } trace.logTreeReaderNextVector(idx); @@ -217,10 +223,10 @@ public class OrcEncodedDataConsumer TreeReaderFactory.Context context = new TreeReaderFactory.ReaderContext() .setSchemaEvolution(evolution).skipCorrupt(skipCorrupt) .writerTimeZone(stripeMetadata.getWriterTimezone()) - ; + .fileFormat(fileMetadata == null ? null : fileMetadata.getFileVersion()); this.batchSchemas = includes.getBatchReaderTypes(fileSchema); StructTreeReader treeReader = EncodedTreeReaderFactory.createRootTreeReader( - batchSchemas, stripeMetadata.getEncodings(), batch, codec, context); + batchSchemas, stripeMetadata.getEncodings(), batch, codec, context, useDecimal64ColumnVectors); this.columnReaders = treeReader.getChildReaders(); if (LlapIoImpl.LOG.isDebugEnabled()) { @@ -232,7 +238,7 @@ public class OrcEncodedDataConsumer positionInStreams(columnReaders, batch.getBatchKey(), stripeMetadata); } - private ColumnVector createColumn(TypeDescription type, int batchSize) { + private ColumnVector createColumn(TypeDescription type, int batchSize, final boolean useDecimal64ColumnVectors) { switch (type.getCategory()) { case BOOLEAN: case BYTE: @@ -252,30 +258,34 @@ public class OrcEncodedDataConsumer case TIMESTAMP: return new TimestampColumnVector(batchSize); case DECIMAL: - return new DecimalColumnVector(batchSize, type.getPrecision(), - type.getScale()); + if (useDecimal64ColumnVectors && type.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) { + return new Decimal64ColumnVector(batchSize, type.getPrecision(), type.getScale()); + } else { + return new DecimalColumnVector(batchSize, type.getPrecision(), type.getScale()); + } case STRUCT: { List<TypeDescription> subtypeIdxs = type.getChildren(); ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()]; - for(int i = 0; i < fieldVector.length; ++i) { - fieldVector[i] = createColumn(subtypeIdxs.get(i), batchSize); + for (int i = 0; i < fieldVector.length; ++i) { + fieldVector[i] = createColumn(subtypeIdxs.get(i), batchSize, useDecimal64ColumnVectors); } return new StructColumnVector(batchSize, fieldVector); } case UNION: { List<TypeDescription> subtypeIdxs = type.getChildren(); ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()]; - for(int i=0; i < fieldVector.length; ++i) { - fieldVector[i] = createColumn(subtypeIdxs.get(i), batchSize); + for (int i = 0; i < fieldVector.length; ++i) { + fieldVector[i] = createColumn(subtypeIdxs.get(i), batchSize, useDecimal64ColumnVectors); } return new UnionColumnVector(batchSize, fieldVector); } case LIST: - return new ListColumnVector(batchSize, createColumn(type.getChildren().get(0), batchSize)); + return new ListColumnVector(batchSize, createColumn(type.getChildren().get(0), batchSize, + useDecimal64ColumnVectors)); case MAP: List<TypeDescription> subtypeIdxs = type.getChildren(); - return new MapColumnVector(batchSize, createColumn(subtypeIdxs.get(0), batchSize), - createColumn(subtypeIdxs.get(1), batchSize)); + return new MapColumnVector(batchSize, createColumn(subtypeIdxs.get(0), batchSize, useDecimal64ColumnVectors), + createColumn(subtypeIdxs.get(1), batchSize, useDecimal64ColumnVectors)); default: throw new IllegalArgumentException("LLAP does not support " + type.getCategory()); } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java index 2947c16..b76b0de 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java @@ -24,37 +24,17 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.llap.counters.LlapIOCounters; -import org.apache.orc.CompressionCodec; -import org.apache.orc.OrcProto.BloomFilterIndex; -import org.apache.orc.OrcProto.FileTail; -import org.apache.orc.OrcProto.RowIndex; -import org.apache.orc.OrcProto.Stream; -import org.apache.orc.OrcProto.StripeStatistics; -import org.apache.orc.TypeDescription; -import org.apache.orc.impl.BufferChunk; -import org.apache.orc.impl.DataReaderProperties; -import org.apache.orc.impl.InStream; -import org.apache.orc.impl.OrcCodecPool; -import org.apache.orc.impl.OrcIndex; -import org.apache.orc.impl.OrcTail; -import org.apache.orc.impl.ReaderImpl; -import org.apache.orc.impl.SchemaEvolution; -import org.apache.orc.impl.WriterImpl; -import org.apache.tez.common.counters.TezCounters; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.Pool; import org.apache.hadoop.hive.common.Pool.PoolObjectHelper; +import org.apache.hadoop.hive.common.io.Allocator; import org.apache.hadoop.hive.common.io.Allocator.BufferObjectFactory; import org.apache.hadoop.hive.common.io.DataCache; -import org.apache.hadoop.hive.common.io.Allocator; -import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData; import org.apache.hadoop.hive.common.io.DiskRange; import org.apache.hadoop.hive.common.io.DiskRangeList; +import org.apache.hadoop.hive.common.io.encoded.EncodedColumnBatch.ColumnStreamData; import org.apache.hadoop.hive.common.io.encoded.MemoryBuffer; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; @@ -65,38 +45,58 @@ import org.apache.hadoop.hive.llap.cache.BufferUsageManager; import org.apache.hadoop.hive.llap.cache.LlapDataBuffer; import org.apache.hadoop.hive.llap.cache.LowLevelCache; import org.apache.hadoop.hive.llap.cache.LowLevelCache.Priority; +import org.apache.hadoop.hive.llap.counters.LlapIOCounters; import org.apache.hadoop.hive.llap.counters.QueryFragmentCounters; import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.Includes; import org.apache.hadoop.hive.llap.io.decode.ColumnVectorProducer.SchemaEvolutionFactory; import org.apache.hadoop.hive.llap.io.decode.OrcEncodedDataConsumer; -import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.MetadataCache; import org.apache.hadoop.hive.llap.io.metadata.MetadataCache.LlapBufferOrBuffers; +import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata; import org.apache.hadoop.hive.ql.io.HdfsUtils; -import org.apache.orc.CompressionKind; -import org.apache.orc.DataReader; import org.apache.hadoop.hive.ql.io.orc.OrcFile; import org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions; -import org.apache.orc.OrcConf; import org.apache.hadoop.hive.ql.io.orc.OrcSplit; -import org.apache.hadoop.hive.ql.io.orc.encoded.Reader; import org.apache.hadoop.hive.ql.io.orc.RecordReaderImpl; import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedOrcFile; import org.apache.hadoop.hive.ql.io.orc.encoded.EncodedReader; import org.apache.hadoop.hive.ql.io.orc.encoded.IoTrace; import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey; +import org.apache.hadoop.hive.ql.io.orc.encoded.Reader; import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.OrcEncodedColumnBatch; import org.apache.hadoop.hive.ql.io.orc.encoded.Reader.PoolFactory; -import org.apache.orc.impl.RecordReaderUtils; -import org.apache.orc.StripeInformation; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hive.common.util.FixedSizedObjectPool; +import org.apache.orc.CompressionCodec; +import org.apache.orc.CompressionKind; +import org.apache.orc.DataReader; +import org.apache.orc.OrcConf; import org.apache.orc.OrcProto; +import org.apache.orc.OrcProto.BloomFilterIndex; +import org.apache.orc.OrcProto.FileTail; +import org.apache.orc.OrcProto.RowIndex; +import org.apache.orc.OrcProto.Stream; +import org.apache.orc.OrcProto.StripeStatistics; +import org.apache.orc.StripeInformation; +import org.apache.orc.TypeDescription; +import org.apache.orc.impl.BufferChunk; +import org.apache.orc.impl.DataReaderProperties; +import org.apache.orc.impl.InStream; +import org.apache.orc.impl.OrcCodecPool; +import org.apache.orc.impl.OrcIndex; +import org.apache.orc.impl.OrcTail; +import org.apache.orc.impl.ReaderImpl; +import org.apache.orc.impl.RecordReaderUtils; +import org.apache.orc.impl.SchemaEvolution; +import org.apache.orc.impl.WriterImpl; import org.apache.tez.common.CallableWithNdc; +import org.apache.tez.common.counters.TezCounters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.Lists; @@ -231,6 +231,8 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> this.jobConf = jobConf; // TODO: setFileMetadata could just create schema. Called in two places; clean up later. this.evolution = sef.createSchemaEvolution(fileMetadata.getSchema()); + consumer.setUseDecimal64ColumnVectors(HiveConf.getVar(jobConf, + ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64")); consumer.setFileMetadata(fileMetadata); consumer.setSchemaEvolution(evolution); } @@ -569,7 +571,8 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> stripes.add(new ReaderImpl.StripeInformationImpl(stripeProto)); } return new OrcFileMetadata( - fileKey, tail.getFooter(), tail.getPostscript(), stats, stripes); + fileKey, tail.getFooter(), tail.getPostscript(), stats, stripes, + ReaderImpl.getFileVersion(tail.getPostscript().getVersionList())); } finally { // We don't need the buffer anymore. metadataCache.decRefBuffer(tailBuffers); @@ -586,7 +589,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void> } FileTail ft = orcReader.getFileTail(); return new OrcFileMetadata(fileKey, ft.getFooter(), ft.getPostscript(), - orcReader.getOrcProtoStripeStatistics(), orcReader.getStripes()); + orcReader.getOrcProtoStripeStatistics(), orcReader.getStripes(), orcReader.getFileVersion()); } private OrcProto.StripeFooter buildStripeFooter( http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java index bed5887..5b54af5 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java @@ -221,6 +221,9 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void> this.sourceSerDe = sourceSerDe; this.reporter = reporter; this.jobConf = jobConf; + final boolean useDecimal64ColumnVectors = HiveConf.getVar(jobConf, ConfVars + .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64"); + consumer.setUseDecimal64ColumnVectors(useDecimal64ColumnVectors); this.schema = schema; this.writerIncludes = OrcInputFormat.genIncludedColumns(schema, columnIds); SchemaEvolution evolution = new SchemaEvolution(schema, null, http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java index de19b1d..ca6d696 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/VectorDeserializeOrcWriter.java @@ -20,14 +20,19 @@ package org.apache.hadoop.hive.llap.io.encoded; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.llap.DebugUtils; @@ -35,10 +40,12 @@ import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.CacheWriter; import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.DeserializerOrcWriter; import org.apache.hadoop.hive.llap.io.encoded.SerDeEncodedDataReader.EncodingWriter; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.orc.Writer; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -46,14 +53,20 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BinaryComparable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.InputFormat; @@ -113,7 +126,7 @@ class VectorDeserializeOrcWriter extends EncodingWriter implements Runnable { } LlapIoImpl.LOG.info("Creating VertorDeserializeOrcWriter for " + path); return new VectorDeserializeOrcWriter( - daemonConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize); + jobConf, tblProps, sourceOi, sourceIncludes, cacheIncludes, allocSize); } private VectorDeserializeOrcWriter(Configuration conf, Properties tblProps, @@ -121,12 +134,12 @@ class VectorDeserializeOrcWriter extends EncodingWriter implements Runnable { int allocSize) throws IOException { super(sourceOi, allocSize); // See also: the usage of VectorDeserializeType, for binary. For now, we only want text. - this.vrbCtx = createVrbCtx(sourceOi); + this.vrbCtx = createVrbCtx(sourceOi, tblProps, conf); this.sourceIncludes = sourceIncludes; this.cacheIncludes = cacheIncludes; this.sourceBatch = vrbCtx.createVectorizedRowBatch(); deserializeRead = new LazySimpleDeserializeRead(vrbCtx.getRowColumnTypeInfos(), - /* useExternalBuffer */ true, createSerdeParams(conf, tblProps)); + vrbCtx.getRowdataTypePhysicalVariations(),/* useExternalBuffer */ true, createSerdeParams(conf, tblProps)); vectorDeserializeRow = new VectorDeserializeRow<LazySimpleDeserializeRead>(deserializeRead); int colCount = vrbCtx.getRowColumnTypeInfos().length; boolean[] includes = null; @@ -192,13 +205,41 @@ class VectorDeserializeOrcWriter extends EncodingWriter implements Runnable { this.orcThread.start(); } - private static VectorizedRowBatchCtx createVrbCtx(StructObjectInspector oi) throws IOException { + private static VectorizedRowBatchCtx createVrbCtx(StructObjectInspector oi, final Properties tblProps, + final Configuration conf) throws IOException { + final boolean useDecimal64ColumnVectors = HiveConf.getVar(conf, ConfVars + .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64"); + final String serde = tblProps.getProperty(serdeConstants.SERIALIZATION_LIB); + final String inputFormat = tblProps.getProperty(hive_metastoreConstants.FILE_INPUT_FORMAT); + final boolean isTextFormat = inputFormat != null && inputFormat.equals(TextInputFormat.class.getName()) && + serde != null && serde.equals(LazySimpleSerDe.class.getName()); + List<DataTypePhysicalVariation> dataTypePhysicalVariations = new ArrayList<>(); + if (isTextFormat) { + StructTypeInfo structTypeInfo = (StructTypeInfo) TypeInfoUtils.getTypeInfoFromObjectInspector(oi); + int dataColumnCount = structTypeInfo.getAllStructFieldTypeInfos().size(); + for (int i = 0; i < dataColumnCount; i++) { + DataTypePhysicalVariation dataTypePhysicalVariation = DataTypePhysicalVariation.NONE; + if (useDecimal64ColumnVectors) { + TypeInfo typeInfo = structTypeInfo.getAllStructFieldTypeInfos().get(i); + if (typeInfo instanceof DecimalTypeInfo) { + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; + if (HiveDecimalWritable.isPrecisionDecimal64(decimalTypeInfo.precision())) { + dataTypePhysicalVariation = DataTypePhysicalVariation.DECIMAL_64; + } + } + } + dataTypePhysicalVariations.add(dataTypePhysicalVariation); + } + } VectorizedRowBatchCtx vrbCtx = new VectorizedRowBatchCtx(); try { vrbCtx.init(oi, new String[0]); } catch (HiveException e) { throw new IOException(e); } + if (!dataTypePhysicalVariations.isEmpty()) { + vrbCtx.setRowDataTypePhysicalVariations(dataTypePhysicalVariations.toArray(new DataTypePhysicalVariation[0])); + } return vrbCtx; } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java index 89ad4aa..d6b16ef 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/ConsumerFileMetadata.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.orc.CompressionKind; import org.apache.orc.FileFormatException; +import org.apache.orc.OrcFile; import org.apache.orc.OrcProto.Type; import org.apache.orc.TypeDescription; @@ -29,4 +30,5 @@ public interface ConsumerFileMetadata { CompressionKind getCompressionKind(); List<Type> getTypes(); TypeDescription getSchema() throws FileFormatException; + OrcFile.Version getFileVersion(); } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java index 5cd6f9f..5eb713c 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileMetadata.java @@ -50,9 +50,10 @@ public final class OrcFileMetadata implements FileMetadata, ConsumerFileMetadata private final long contentLength; private final long numberOfRows; private final boolean isOriginalFormat; + private final OrcFile.Version fileVersion; public OrcFileMetadata(Object fileKey, OrcProto.Footer footer, OrcProto.PostScript ps, - List<StripeStatistics> stats, List<StripeInformation> stripes) { + List<StripeStatistics> stats, List<StripeInformation> stripes, final OrcFile.Version fileVersion) { this.stripeStats = stats; this.compressionKind = CompressionKind.valueOf(ps.getCompression().name()); this.compressionBufferSize = (int)ps.getCompressionBlockSize(); @@ -67,6 +68,7 @@ public final class OrcFileMetadata implements FileMetadata, ConsumerFileMetadata this.numberOfRows = footer.getNumberOfRows(); this.fileStats = footer.getStatisticsList(); this.fileKey = fileKey; + this.fileVersion = fileVersion; } // FileMetadata @@ -163,4 +165,9 @@ public final class OrcFileMetadata implements FileMetadata, ConsumerFileMetadata public TypeDescription getSchema() throws FileFormatException { return OrcUtils.convertTypeFromProtobuf(this.types, 0); } + + @Override + public OrcFile.Version getFileVersion() { + return fileVersion; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 2246901..183fae5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -214,8 +214,8 @@ public class FetchOperator implements Serializable { private static final Map<String, InputFormat> inputFormats = new HashMap<String, InputFormat>(); @SuppressWarnings("unchecked") - static InputFormat getInputFormatFromCache( - Class<? extends InputFormat> inputFormatClass, JobConf conf) throws IOException { + public static InputFormat getInputFormatFromCache( + Class<? extends InputFormat> inputFormatClass, Configuration conf) throws IOException { if (Configurable.class.isAssignableFrom(inputFormatClass) || JobConfigurable.class.isAssignableFrom(inputFormatClass)) { return ReflectionUtil.newInstance(inputFormatClass, conf); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 9ddb136..84a0a3a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -584,8 +584,8 @@ public class VectorizationContext { // Re-use an existing, available column of the same required type. if (usedOutputColumns.contains(i) || - !(scratchVectorTypeNames)[i].equalsIgnoreCase(columnType) && - scratchDataTypePhysicalVariations[i] == dataTypePhysicalVariation) { + !(scratchVectorTypeNames[i].equalsIgnoreCase(columnType) && + scratchDataTypePhysicalVariations[i] == dataTypePhysicalVariation)) { continue; } //Use i @@ -874,6 +874,7 @@ public class VectorizationContext { LOG.debug("Input Expression = " + exprDesc.toString() + ", Vectorized Expression = " + ve.toString()); } + return ve; } @@ -1965,7 +1966,7 @@ public class VectorizationContext { return cleaned; } - private VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnTypeInfo, + public VectorExpression instantiateExpression(Class<?> vclass, TypeInfo returnTypeInfo, DataTypePhysicalVariation returnDataTypePhysicalVariation, Object...args) throws HiveException { VectorExpression ve = null; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java index e74b185..8ee59e4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedInputFormatInterface.java @@ -24,4 +24,5 @@ package org.apache.hadoop.hive.ql.exec.vector; */ public interface VectorizedInputFormatInterface { + VectorizedSupport.Support[] getSupportedFeatures(); } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 6588385..ffbfb6f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -163,6 +163,11 @@ public class VectorizedRowBatchCtx { return rowDataTypePhysicalVariations; } + public void setRowDataTypePhysicalVariations( + final DataTypePhysicalVariation[] rowDataTypePhysicalVariations) { + this.rowDataTypePhysicalVariations = rowDataTypePhysicalVariations; + } + public int[] getDataColumnNums() { return dataColumnNums; } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java index d8a3cac..8bf990a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorInBloomFilterColDynamicValue.java @@ -82,6 +82,7 @@ public class VectorInBloomFilterColDynamicValue extends VectorExpression { // Instantiate BloomFilterCheck based on input column type switch (colVectorType) { case LONG: + case DECIMAL_64: bfCheck = new LongBloomFilterCheck(); break; case DOUBLE: http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 18bacc5..fdb067f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -107,6 +107,7 @@ public class VectorUDAFBloomFilter extends VectorAggregateExpression { } switch (colVectorType) { case LONG: + case DECIMAL_64: valueProcessor = new ValueProcessorLong(); break; case DOUBLE: http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index 5b2cb4c..e011657 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -235,4 +235,12 @@ public class VectorUDFAdaptor extends VectorExpression { public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()).build(); } + + public VectorUDFArgDesc[] getArgDescs() { + return argDescs; + } + + public void setArgDescs(final VectorUDFArgDesc[] argDescs) { + this.argDescs = argDescs; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java index 2b005c4..c88ee99 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/BatchToRowReader.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.llap.DebugUtils; import java.util.Arrays; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -416,7 +417,12 @@ public abstract class BatchToRowReader<StructType, UnionType> } else { result = (HiveDecimalWritable) previous; } - result.set(((DecimalColumnVector) vector).vector[row]); + if (vector instanceof Decimal64ColumnVector) { + long value = ((Decimal64ColumnVector) vector).vector[row]; + result.deserialize64(value, ((Decimal64ColumnVector) vector).scale); + } else { + result.set(((DecimalColumnVector) vector).vector[row]); + } return result; } else { return null; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java index e632d43..6434414 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/NullRowsInputFormat.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import java.io.IOException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.io.NullWritable; @@ -50,6 +51,11 @@ public class NullRowsInputFormat implements InputFormat<NullWritable, NullWritab static final int MAX_ROW = 100; // to prevent infinite loop static final Logger LOG = LoggerFactory.getLogger(NullRowsRecordReader.class.getName()); + @Override + public VectorizedSupport.Support[] getSupportedFeatures() { + return null; + } + public static class DummyInputSplit extends FileSplit { @SuppressWarnings("unused") // Serialization ctor. private DummyInputSplit() { http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index b6f92e3..3c11847 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.AcidInputFormat; import org.apache.hadoop.hive.ql.io.AcidOutputFormat; import org.apache.hadoop.hive.ql.io.AcidUtils; @@ -161,6 +162,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, SelfDescribingInputFormatInterface, AcidInputFormat<NullWritable, OrcStruct>, CombineHiveInputFormat.AvoidSplitCombination, BatchToRowInputFormat { + @Override + public VectorizedSupport.Support[] getSupportedFeatures() { + return new VectorizedSupport.Support[] {VectorizedSupport.Support.DECIMAL_64}; + } + static enum SplitStrategyKind { HYBRID, BI, @@ -328,7 +334,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema); options.include(genIncludedColumns(schema, conf)); setSearchArgument(options, types, conf, isOriginal); - return file.rowsOptions(options); + return file.rowsOptions(options, conf); } public static boolean isOriginal(Reader file) { http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java index 9d954ca..6571a24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.TreeMap; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.AbstractFileMergeOperator; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -231,16 +232,17 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ * @param maxKey only return keys less than or equal to maxKey if it is * non-null * @param options options to provide to read the rows. + * @param conf * @throws IOException */ @VisibleForTesting ReaderPairAcid(ReaderKey key, Reader reader, - RecordIdentifier minKey, RecordIdentifier maxKey, - ReaderImpl.Options options) throws IOException { + RecordIdentifier minKey, RecordIdentifier maxKey, + ReaderImpl.Options options, final Configuration conf) throws IOException { this.reader = reader; this.key = key; // TODO use stripe statistics to jump over stripes - recordReader = reader.rowsOptions(options); + recordReader = reader.rowsOptions(options, conf); this.minKey = minKey; this.maxKey = maxKey; // advance the reader until we reach the minimum key @@ -440,7 +442,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ RecordIdentifier newMinKey = minKey; RecordIdentifier newMaxKey = maxKey; - recordReader = reader.rowsOptions(options); + recordReader = reader.rowsOptions(options, conf); /** * Logically each bucket consists of 0000_0, 0000_0_copy_1... 0000_0_copy_N. etc We don't * know N a priori so if this is true, then the current split is from 0000_0_copy_N file. @@ -589,7 +591,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ throw new IllegalStateException("No 'original' files found for bucketId=" + this.bucketId + " in " + mergerOptions.getRootPath()); } - recordReader = getReader().rowsOptions(options); + recordReader = getReader().rowsOptions(options, conf); next(nextRecord());//load 1st row } @Override public RecordReader getRecordReader() { @@ -623,7 +625,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ nextRecord = null; return; } - recordReader = reader.rowsOptions(options); + recordReader = reader.rowsOptions(options, conf); } } } @@ -1043,7 +1045,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ //required (on Tez) that base_x/ doesn't have a file for 'bucket' reader = OrcFile.createReader(bucketPath, OrcFile.readerOptions(conf)); pair = new ReaderPairAcid(baseKey, reader, keyInterval.getMinKey(), keyInterval.getMaxKey(), - eventOptions); + eventOptions, conf); } else { pair = new EmptyReaderPair(); @@ -1053,7 +1055,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ else { assert reader != null : "no reader? " + mergerOptions.getRootPath(); pair = new ReaderPairAcid(baseKey, reader, keyInterval.getMinKey(), keyInterval.getMaxKey(), - eventOptions); + eventOptions, conf); } } minKey = pair.getMinKey(); @@ -1113,7 +1115,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ //HIVE-17320: we should compute a SARG to push down min/max key to delete_delta Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf)); ReaderPair deltaPair = new ReaderPairAcid(key, deltaReader, minKey, maxKey, - deltaEventOptions); + deltaEventOptions, conf); if (deltaPair.nextRecord() != null) { ensurePutReader(key, deltaPair); key = new ReaderKey(); @@ -1128,7 +1130,7 @@ public class OrcRawRecordMerger implements AcidInputFormat.RawReader<OrcStruct>{ assert length >= 0; Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); //must get statementId from file name since Acid 1.0 doesn't write it into bucketProperty - ReaderPairAcid deltaPair = new ReaderPairAcid(key, deltaReader, minKey, maxKey, deltaEventOptions); + ReaderPairAcid deltaPair = new ReaderPairAcid(key, deltaReader, minKey, maxKey, deltaEventOptions, conf); if (deltaPair.nextRecord() != null) { ensurePutReader(key, deltaPair); key = new ReaderKey(); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java index 7485e60..8fd9b90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/Reader.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.IOException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -55,7 +56,16 @@ public interface Reader extends org.apache.orc.Reader { * @throws IOException */ RecordReader rowsOptions(Options options) throws IOException; - + + /** + * Create a RecordReader that reads everything with the given options. + * @param options the options to use + * @param conf conf object + * @return a new RecordReader + * @throws IOException + */ + RecordReader rowsOptions(Options options, Configuration conf) throws IOException; + /** * Create a RecordReader that will scan the entire file. * This is a legacy method and rowsOptions is preferred. http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java index 1a6db1f..171b02b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.io.orc; import java.io.IOException; import java.nio.ByteBuffer; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -73,11 +74,17 @@ public class ReaderImpl extends org.apache.orc.impl.ReaderImpl @Override public RecordReader rowsOptions(Options options) throws IOException { + return rowsOptions(options, null); + } + + @Override + public RecordReader rowsOptions(Options options, Configuration conf) throws IOException { LOG.info("Reading ORC rows from " + path + " with " + options); - return new RecordReaderImpl(this, options); + return new RecordReaderImpl(this, options, conf); } + @Override public RecordReader rows(boolean[] include) throws IOException { return rowsOptions(new Options().include(include)); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 5b001a0..c6fe4fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -23,8 +23,11 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ListColumnVector; @@ -48,6 +51,7 @@ import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.orc.OrcFile; import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -60,9 +64,15 @@ public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl private long baseRow; protected RecordReaderImpl(ReaderImpl fileReader, - Reader.Options options) throws IOException { + Reader.Options options, final Configuration conf) throws IOException { super(fileReader, options); - batch = this.schema.createRowBatch(); + final boolean useDecimal64ColumnVectors = conf != null && HiveConf.getVar(conf, + HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64"); + if (useDecimal64ColumnVectors){ + batch = this.schema.createRowBatchV2(); + } else { + batch = this.schema.createRowBatch(); + } rowInBatch = 0; } @@ -80,8 +90,8 @@ public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl return true; } - public VectorizedRowBatch createRowBatch() { - return this.schema.createRowBatch(); + public VectorizedRowBatch createRowBatch(boolean useDecimal64) { + return useDecimal64 ? this.schema.createRowBatchV2() : this.schema.createRowBatch(); } @Override @@ -393,7 +403,12 @@ public class RecordReaderImpl extends org.apache.orc.impl.RecordReaderImpl } else { result = (HiveDecimalWritable) previous; } - result.set(((DecimalColumnVector) vector).vector[row]); + if (vector instanceof Decimal64ColumnVector) { + long value = ((Decimal64ColumnVector) vector).vector[row]; + result.deserialize64(value, ((Decimal64ColumnVector) vector).scale); + } else { + result.set(((DecimalColumnVector) vector).vector[row]); + } return result; } else { return null; http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java index 66ffcae..1a91d4a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowBatchReader.java @@ -112,7 +112,7 @@ public class VectorizedOrcAcidRowBatchReader final Reader reader = OrcInputFormat.createOrcReaderForSplit(conf, (OrcSplit) inputSplit); // Careful with the range here now, we do not want to read the whole base file like deltas. - innerReader = reader.rowsOptions(readerOptions.range(offset, length)); + innerReader = reader.rowsOptions(readerOptions.range(offset, length), conf); baseReader = new org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>() { @Override @@ -145,7 +145,13 @@ public class VectorizedOrcAcidRowBatchReader return innerReader.getProgress(); } }; - this.vectorizedRowBatchBase = ((RecordReaderImpl) innerReader).createRowBatch(); + final boolean useDecimal64ColumnVectors = HiveConf + .getVar(conf, ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64"); + if (useDecimal64ColumnVectors) { + this.vectorizedRowBatchBase = ((RecordReaderImpl) innerReader).createRowBatch(true); + } else { + this.vectorizedRowBatchBase = ((RecordReaderImpl) innerReader).createRowBatch(false); + } } /** @@ -864,11 +870,17 @@ public class VectorizedOrcAcidRowBatchReader private final Reader reader; DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket, - ValidWriteIdList validWriteIdList, boolean isBucketedTable) throws IOException { + ValidWriteIdList validWriteIdList, boolean isBucketedTable, final JobConf conf) throws IOException { this.reader = deleteDeltaReader; - this.recordReader = deleteDeltaReader.rowsOptions(readerOptions); + this.recordReader = deleteDeltaReader.rowsOptions(readerOptions, conf); this.bucketForSplit = bucket; - this.batch = deleteDeltaReader.getSchema().createRowBatch(); + final boolean useDecimal64ColumnVector = HiveConf.getVar(conf, ConfVars + .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64"); + if (useDecimal64ColumnVector) { + this.batch = deleteDeltaReader.getSchema().createRowBatchV2(); + } else { + this.batch = deleteDeltaReader.getSchema().createRowBatch(); + } if (!recordReader.nextBatch(batch)) { // Read the first batch. this.batch = null; // Oh! the first batch itself was null. Close the reader. } @@ -1067,7 +1079,7 @@ public class VectorizedOrcAcidRowBatchReader throw new DeleteEventsOverflowMemoryException(); } DeleteReaderValue deleteReaderValue = new DeleteReaderValue(deleteDeltaReader, - readerOptions, bucket, validWriteIdList, isBucketedTable); + readerOptions, bucket, validWriteIdList, isBucketedTable, conf); DeleteRecordKey deleteRecordKey = new DeleteRecordKey(); if (deleteReaderValue.next(deleteRecordKey)) { sortMerger.put(deleteRecordKey, deleteReaderValue); http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java index c581bba..892fcc0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcInputFormat.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.io.InputFormatChecker; import org.apache.hadoop.hive.ql.io.SelfDescribingInputFormatInterface; @@ -99,7 +100,7 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect options.include(OrcInputFormat.genIncludedColumns(schema, conf)); OrcInputFormat.setSearchArgument(options, types, conf, true); - this.reader = file.rowsOptions(options); + this.reader = file.rowsOptions(options, conf); int partitionColumnCount = rbCtx.getPartitionColumnCount(); if (partitionColumnCount > 0) { @@ -204,4 +205,9 @@ public class VectorizedOrcInputFormat extends FileInputFormat<NullWritable, Vect } return true; } + + @Override + public VectorizedSupport.Support[] getSupportedFeatures() { + return new VectorizedSupport.Support[] {VectorizedSupport.Support.DECIMAL_64}; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/dd512593/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 71682af..91a01e9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -24,6 +24,8 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -63,6 +65,7 @@ import org.apache.hadoop.io.Text; import com.google.common.annotations.VisibleForTesting; import org.apache.orc.PhysicalWriter; +import org.apache.orc.TypeDescription; /** * An ORC file writer. The file is divided into stripes, which is the natural @@ -93,7 +96,15 @@ public class WriterImpl extends org.apache.orc.impl.WriterImpl implements Writer OrcFile.WriterOptions opts) throws IOException { super(fs, path, opts); this.inspector = opts.getInspector(); - this.internalBatch = opts.getSchema().createRowBatch(opts.getBatchSize()); + boolean useDecimal64ColumnVectors = opts.getConfiguration() != null && + HiveConf.getVar(opts.getConfiguration(), HiveConf.ConfVars.HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED) + .equalsIgnoreCase("decimal_64"); + if (useDecimal64ColumnVectors) { + this.internalBatch = opts.getSchema().createRowBatch(TypeDescription.RowBatchVersion.USE_DECIMAL64, + opts.getBatchSize()); + } else { + this.internalBatch = opts.getSchema().createRowBatch(opts.getBatchSize()); + } this.fields = initializeFieldsFromOi(inspector); } @@ -207,9 +218,15 @@ public class WriterImpl extends org.apache.orc.impl.WriterImpl implements Writer break; } case DECIMAL: { - DecimalColumnVector vector = (DecimalColumnVector) column; - vector.set(rowId, ((HiveDecimalObjectInspector) inspector) + if (column instanceof Decimal64ColumnVector) { + Decimal64ColumnVector vector = (Decimal64ColumnVector) column; + vector.set(rowId, ((HiveDecimalObjectInspector) inspector) + .getPrimitiveWritableObject(obj)); + } else { + DecimalColumnVector vector = (DecimalColumnVector) column; + vector.set(rowId, ((HiveDecimalObjectInspector) inspector) .getPrimitiveWritableObject(obj)); + } break; } }