This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch revert-37530-pick_21_opt_fill_partition in repository https://gitbox.apache.org/repos/asf/doris.git
commit 5ff2703f3abf1de0fa528a6aef8e62647eb13988 Author: Mingyu Chen <[email protected]> AuthorDate: Wed Jul 17 17:19:37 2024 +0800 Revert "[opt](serde)Optimize the filling of fixed values into block columns…" This reverts commit 6932eef65e54d2ae060b4973da3aec0d919d7154. --- .../serde/data_type_datetimev2_serde.cpp | 21 ----------------- .../data_types/serde/data_type_datetimev2_serde.h | 5 ----- .../data_types/serde/data_type_datev2_serde.cpp | 21 ----------------- .../vec/data_types/serde/data_type_datev2_serde.h | 6 ----- .../data_types/serde/data_type_decimal_serde.cpp | 26 ---------------------- .../vec/data_types/serde/data_type_decimal_serde.h | 6 ----- .../data_types/serde/data_type_nullable_serde.cpp | 20 ----------------- .../data_types/serde/data_type_nullable_serde.h | 3 --- .../data_types/serde/data_type_number_serde.cpp | 22 ------------------ .../vec/data_types/serde/data_type_number_serde.h | 6 ----- be/src/vec/data_types/serde/data_type_serde.h | 21 ----------------- .../vec/data_types/serde/data_type_string_serde.h | 25 --------------------- be/src/vec/exec/format/orc/vorc_reader.cpp | 9 +++++--- .../exec/format/parquet/vparquet_group_reader.cpp | 9 +++++--- be/src/vec/exec/scan/vfile_scanner.cpp | 9 +++++--- 15 files changed, 18 insertions(+), 191 deletions(-) diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 850ac5766fc..63a199199a0 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -247,25 +247,4 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone, return Status::OK(); } -Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, - const FormatOptions& options) const { - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - return st; - } - - DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(column, rows - 1); - *num_deserialized = rows; - return Status::OK(); -} - -void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column, - int times) const { - auto& col = static_cast<ColumnVector<UInt64>&>(column); - auto sz = col.size(); - UInt64 val = col.get_element(sz - 1); - col.insert_many_vals(val, times); -} - } // namespace doris::vectorized diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h index ef4aa6843a0..00b05f5fcd6 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h @@ -77,11 +77,6 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const override; - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; - private: template <bool is_binary_format> Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result, diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index f2d595b87c4..eb9122dd240 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -175,26 +175,5 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con return Status::OK(); } -Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice, - int rows, int* num_deserialized, - const FormatOptions& options) const { - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - return st; - } - DataTypeDateV2SerDe::insert_column_last_value_multiple_times(column, rows - 1); - *num_deserialized = rows; - return Status::OK(); -} - -void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column, - int times) const { - auto& col = static_cast<ColumnVector<UInt32>&>(column); - auto sz = col.size(); - UInt32 val = col.get_element(sz - 1); - - col.insert_many_vals(val, times); -} - } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h b/be/src/vec/data_types/serde/data_type_datev2_serde.h index 52e4cec364e..9a8b050eeba 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.h +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h @@ -74,12 +74,6 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const override; - - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; - private: template <bool is_binary_format> Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result, diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index e979211d6d7..a59fdedbfe6 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -275,32 +275,6 @@ Status DataTypeDecimalSerDe<T>::write_column_to_orc(const std::string& timezone, } return Status::OK(); } -template <typename T> - -Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, - const FormatOptions& options) const { - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - return st; - } - - DataTypeDecimalSerDe::insert_column_last_value_multiple_times(column, rows - 1); - *num_deserialized = rows; - return Status::OK(); -} - -template <typename T> -void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& column, - int times) const { - auto& col = static_cast<ColumnDecimal<T>&>(column); - auto sz = col.size(); - - T val = col.get_element(sz - 1); - for (int i = 0; i < times; i++) { - col.insert_value(val); - } -} template class DataTypeDecimalSerDe<Decimal32>; template class DataTypeDecimalSerDe<Decimal64>; diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index 484c6686bc5..55e68699f01 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -114,12 +114,6 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const override; - - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; - private: template <bool is_binary_format> Status _write_column_to_mysql(const IColumn& column, MysqlRowBuffer<is_binary_format>& result, diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 98ff1eb7f81..faa3c8eb1f4 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -127,26 +127,6 @@ Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector( return Status::OK(); } -Status DataTypeNullableSerDe::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, - const FormatOptions& options) const { - auto& col = static_cast<ColumnNullable&>(column); - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - return st; - } - auto& null_map = col.get_null_map_data(); - auto& nested_column = col.get_nested_column(); - - null_map.resize_fill( - rows, null_map.back()); // data_type_nullable::insert_column_last_value_multiple_times() - if (rows - 1 != 0) { - nested_serde->insert_column_last_value_multiple_times(nested_column, rows - 1); - } - *num_deserialized = rows; - return Status::OK(); -} - Status DataTypeNullableSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { auto& null_column = assert_cast<ColumnNullable&>(column); diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 7b4841dcbdf..09d2fbde409 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -47,9 +47,6 @@ public: int* num_deserialized, const FormatOptions& options) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const override; Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, int hive_text_complex_type_delimiter_level = 1) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 299779ea267..0ba338ce399 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -224,28 +224,6 @@ void DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column, const auto* raw_data = reinterpret_cast<const T*>(buffer->data()) + start; col_data.insert(raw_data, raw_data + row_count); } -template <typename T> -Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json( - IColumn& column, Slice& slice, int rows, int* num_deserialized, - const FormatOptions& options) const { - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - return st; - } - - DataTypeNumberSerDe::insert_column_last_value_multiple_times(column, rows - 1); - *num_deserialized = rows; - return Status::OK(); -} - -template <typename T> -void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& column, - int times) const { - auto& col = static_cast<ColumnVector<T>&>(column); - auto sz = col.size(); - T val = col.get_element(sz - 1); - col.insert_many_vals(val, times); -} template <typename T> template <bool is_binary_format> diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index 18ba2fb26c7..c66bc994605 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -70,12 +70,6 @@ public: int* num_deserialized, const FormatOptions& options) const override; - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const override; - - void insert_column_last_value_multiple_times(IColumn& column, int times) const override; - Status write_column_to_pb(const IColumn& column, PValues& result, int start, int end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 1f6e24aef3f..77663e1d43a 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -234,27 +234,6 @@ public: virtual Status deserialize_column_from_json_vector(IColumn& column, std::vector<Slice>& slices, int* num_deserialized, const FormatOptions& options) const = 0; - // deserialize fixed values.Repeatedly insert the value row times into the column. - virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const { - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - *num_deserialized = 0; - return st; - } - insert_column_last_value_multiple_times(column, rows - 1); - *num_deserialized = rows; - return Status::OK(); - } - // Insert the last value to the end of this column multiple times. - virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const { - //If you try to simplify this operation by using `column.insert_many_from(column, column.size() - 1, rows - 1);` - // you are likely to get incorrect data results. - MutableColumnPtr dum_col = column.clone_empty(); - dum_col->insert_from(column, column.size() - 1); - column.insert_many_from(*dum_col.get(), 0, times); - } virtual Status deserialize_one_cell_from_hive_text( IColumn& column, Slice& slice, const FormatOptions& options, diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 0f0f1d0dfe8..b74b5857086 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -132,31 +132,6 @@ public: } return Status::OK(); } - - Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, - int* num_deserialized, - const FormatOptions& options) const override { - Status st = deserialize_one_cell_from_json(column, slice, options); - if (!st.ok()) { - return st; - } - - DataTypeStringSerDeBase::insert_column_last_value_multiple_times(column, rows - 1); - *num_deserialized = rows; - return Status::OK(); - } - - void insert_column_last_value_multiple_times(IColumn& column, int times) const override { - auto& col = static_cast<ColumnString&>(column); - auto sz = col.size(); - - StringRef ref = col.get_data_at(sz - 1); - String str(ref.data, ref.size); - std::vector<StringRef> refs(times, {str.data(), str.size()}); - - col.insert_many_strings(refs.data(), refs.size()); - } - Status read_column_from_pb(IColumn& column, const PValues& arg) const override { auto& column_dest = assert_cast<ColumnType&>(column); column_dest.reserve(column_dest.size() + arg.string_value_size()); diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 54d94dcecc7..16909f0023a 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -935,10 +935,13 @@ Status OrcReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); + vector<Slice> slices(rows); + for (int i = 0; i < rows; i++) { + slices[i] = {value.data(), value.size()}; + } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, - &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 9ec1235be1d..5e824f34817 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -631,10 +631,13 @@ Status RowGroupReader::_fill_partition_columns( auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); + vector<Slice> slices(rows); + for (int i = 0; i < rows; i++) { + slices[i] = {value.data(), value.size()}; + } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, - &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 0688f2c0712..944884434f4 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -499,10 +499,13 @@ Status VFileScanner::_fill_columns_from_path(size_t rows) { auto& [value, slot_desc] = kv.second; auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); + vector<Slice> slices(rows); + for (int i = 0; i < rows; i++) { + slices[i] = {value.data(), value.size()}; + } int num_deserialized = 0; - if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, - &num_deserialized, - _text_formatOptions) != Status::OK()) { + if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, &num_deserialized, + _text_formatOptions) != Status::OK()) { return Status::InternalError("Failed to fill partition column: {}={}", slot_desc->col_name(), value); } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
