This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch revert-45667-seralize-variant in repository https://gitbox.apache.org/repos/asf/doris.git
commit f47a7297142b4ca2d7b959bb696a73bb4f1089b1 Author: lihangyu <[email protected]> AuthorDate: Fri Dec 20 01:21:23 2024 +0800 Revert "[fix](serialize) fix column serialize and deserialize (#45667)" This reverts commit c15122c043defa4c417663bf34f5e48d98dc2f42. --- be/src/vec/columns/column_object.cpp | 13 +++------- .../vec/data_types/serde/data_type_array_serde.cpp | 7 +++--- .../vec/data_types/serde/data_type_array_serde.h | 2 +- .../vec/data_types/serde/data_type_jsonb_serde.cpp | 5 ++-- .../vec/data_types/serde/data_type_jsonb_serde.h | 2 +- .../data_types/serde/data_type_nullable_serde.cpp | 29 +++++++++++----------- .../data_types/serde/data_type_nullable_serde.h | 4 +-- .../data_types/serde/data_type_number_serde.cpp | 4 ++- .../vec/data_types/serde/data_type_number_serde.h | 2 +- be/src/vec/data_types/serde/data_type_serde.h | 2 +- .../vec/data_types/serde/data_type_string_serde.h | 4 ++- 11 files changed, 36 insertions(+), 38 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 17c02172da8..91a0936673f 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1053,10 +1053,8 @@ void ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std: auto& nullable_col = assert_cast<const ColumnNullable&>(*part); // insert value - ColumnString::Chars& chars = value->get_chars(); nullable_serde->get_nested_serde()->write_one_cell_to_binary( - nullable_col.get_nested_column(), chars, row); - value->get_offsets().push_back(chars.size()); + nullable_col.get_nested_column(), value, row); } return; } @@ -1116,11 +1114,6 @@ const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Fi end = data + size; break; } - case TypeIndex::Nothing: { - res = Null(); - end = data; - break; - } case TypeIndex::Array: { const size_t size = *reinterpret_cast<const size_t*>(data); data += sizeof(size_t); @@ -1130,9 +1123,9 @@ const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Fi for (size_t i = 0; i < size; ++i) { Field nested_field; const auto nested_type = - static_cast<const TypeIndex>(*reinterpret_cast<const uint8_t*>(data++)); + assert_cast<const TypeIndex>(*reinterpret_cast<const uint8_t*>(data++)); data = parse_binary_from_sparse_column(nested_type, data, nested_field, info_res); - array[i] = std::move(nested_field); + array.emplace_back(std::move(nested_field)); } end = data; break; diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 2c906ce4c49..57a43fbb381 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -436,9 +436,9 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& column, const PValues& a } void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString::Chars& chars, - int64_t row_num) const { + ColumnString* dst_column, int64_t row_num) const { const uint8_t type = static_cast<uint8_t>(TypeIndex::Array); + ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t); chars.resize(new_size); @@ -453,8 +453,9 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column, memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&size), sizeof(size_t)); for (size_t offset = start; offset != end; ++offset) { - nested_serde->write_one_cell_to_binary(nested_column, chars, offset); + nested_serde->write_one_cell_to_binary(nested_column, dst_column, offset); } + dst_column->get_offsets().push_back(chars.size()); } } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index 25da83f2cff..aaf1a425512 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -101,7 +101,7 @@ public: nested_serde->set_return_object_as_string(value); } - void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, int64_t row_num) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index 69dbae7241c..7279a0fc4a6 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -279,13 +279,13 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn& column, const PValues& a } void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString::Chars& chars, - int64_t row_num) const { + ColumnString* dst_column, int64_t row_num) const { const uint8_t type = static_cast<uint8_t>(TypeIndex::JSONB); const auto& col = assert_cast<const ColumnString&>(src_column); const auto& data_ref = col.get_data_at(row_num); size_t data_size = data_ref.size; + ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size; chars.resize(new_size); @@ -294,6 +294,7 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column, memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&data_size), sizeof(size_t)); memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size); + dst_column->get_offsets().push_back(new_size); } } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index 95e510516ed..d6d29cce556 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -71,7 +71,7 @@ public: int64_t end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; - void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, int64_t row_num) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 9193a3b0100..b325ec88e9f 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -393,21 +393,20 @@ Status DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column, return Status::OK(); } -void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString::Chars& chars, - int64_t row_num) const { - auto& col = assert_cast<const ColumnNullable&>(src_column); - if (col.is_null_at(row_num)) [[unlikely]] { - const uint8_t type = static_cast<uint8_t>(TypeIndex::Nothing); - const size_t old_size = chars.size(); - const size_t new_size = old_size + sizeof(uint8_t); - chars.resize(new_size); - memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t)); - } else { - auto& nested_col = col.get_nested_column(); - nested_serde->write_one_cell_to_binary(nested_col, chars, row_num); - } -} +// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column, +// ColumnString* dst_column, +// int64_t row_num) const { +// auto& col = assert_cast<const ColumnNullable&>(src_column); +// uint8_t is_null = 0; +// if (col.is_null_at(row_num)) [[unlikely]] { +// is_null = 1; +// dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t)); +// } else { +// dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t)); +// auto& nested_col = col.get_nested_column(); +// nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num); +// } +// } } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 828c079244b..33cf86ab694 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -99,8 +99,8 @@ public: int64_t row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; - void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, - int64_t row_num) const override; + // void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + // int64_t row_num) const override; DataTypeSerDeSPtr get_nested_serde() { return nested_serde; } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index fcf72e6f992..c5f2994f6b2 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -395,17 +395,19 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const std::string& timezone, template <typename T> void DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& src_column, - ColumnString::Chars& chars, + ColumnString* dst_column, int64_t row_num) const { const uint8_t type = static_cast<uint8_t>(TypeId<T>::value); const auto& data_ref = assert_cast<const ColumnType&>(src_column).get_data_at(row_num); + ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size; chars.resize(new_size); memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t)); memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, data_ref.size); + dst_column->get_offsets().push_back(new_size); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index db4373e646c..c9073f5e868 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -107,7 +107,7 @@ public: int64_t row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; - void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, int64_t row_num) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 8a879b5df26..38b2590b062 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -337,7 +337,7 @@ public: Arena& mem_pool, int64_t row_num) const; virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const; - virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, + virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst, int64_t row_num) const { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "write_one_cell_to_binary"); } diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 39a623316a2..50acf28c6f2 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -366,13 +366,14 @@ public: return Status::OK(); } - void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, int64_t row_num) const override { const uint8_t type = static_cast<uint8_t>(TypeIndex::String); const auto& col = assert_cast<const ColumnType&>(src_column); const auto& data_ref = col.get_data_at(row_num); const size_t data_size = data_ref.size; + ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size; chars.resize(new_size); @@ -382,6 +383,7 @@ public: sizeof(size_t)); memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size); + dst_column->get_offsets().push_back(chars.size()); } private: --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
