This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit aa684d85d7529c83a128a88ee5b9aedb06137177 Author: lihangyu <[email protected]> AuthorDate: Thu May 9 17:54:20 2024 +0800 [Bug](Variant) fix rapidjson::Allocator may cause mem allocate issue when build with `DENABLE_CLANG_COVERAGE` (#34150) --- be/src/vec/columns/column_object.cpp | 19 ++++++++++++------- be/src/vec/data_types/serde/data_type_array_serde.cpp | 5 +++-- be/src/vec/data_types/serde/data_type_array_serde.h | 2 +- be/src/vec/data_types/serde/data_type_jsonb_serde.cpp | 2 +- be/src/vec/data_types/serde/data_type_jsonb_serde.h | 2 +- .../vec/data_types/serde/data_type_nullable_serde.cpp | 6 +++--- .../vec/data_types/serde/data_type_nullable_serde.h | 2 +- be/src/vec/data_types/serde/data_type_number_serde.h | 4 ++-- be/src/vec/data_types/serde/data_type_serde.cpp | 2 +- be/src/vec/data_types/serde/data_type_serde.h | 2 +- be/src/vec/data_types/serde/data_type_string_serde.h | 2 +- 11 files changed, 27 insertions(+), 21 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 6d7104647c7..ddb5bee6e01 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -51,6 +51,7 @@ #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" +#include "vec/common/arena.h" #include "vec/common/assert_cast.h" #include "vec/common/field_visitors.h" #include "vec/common/schema_util.h" @@ -956,7 +957,8 @@ rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& json, const PathInDat Status find_and_set_leave_value(const IColumn* column, const PathInData& path, const DataTypeSerDeSPtr& type_serde, const DataTypePtr& type, rapidjson::Value& root, - rapidjson::Document::AllocatorType& allocator, int row) { + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, + int row) { // sanitize type and column if (column->get_name() != type->create_column()->get_name()) { return Status::InternalError( @@ -977,7 +979,7 @@ Status find_and_set_leave_value(const IColumn* column, const PathInData& path, << ", root: " << std::string(buffer.GetString(), buffer.GetSize()); return Status::NotFound("Not found path {}", path.get_path()); } - RETURN_IF_ERROR(type_serde->write_one_cell_to_json(*column, *target, allocator, row)); + RETURN_IF_ERROR(type_serde->write_one_cell_to_json(*column, *target, allocator, mem_pool, row)); return Status::OK(); } @@ -1079,14 +1081,16 @@ Status ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String if (!doc_structure->IsNull()) { root.CopyFrom(*doc_structure, doc_structure->GetAllocator()); } + Arena mem_pool; #ifndef NDEBUG VLOG_DEBUG << "dump structure " << JsonFunctions::print_json_value(*doc_structure); #endif for (const auto& subcolumn : subcolumns) { - RETURN_IF_ERROR(find_and_set_leave_value( - subcolumn->data.get_finalized_column_ptr(), subcolumn->path, - subcolumn->data.get_least_common_type_serde(), - subcolumn->data.get_least_common_type(), root, doc_structure->GetAllocator(), row)); + RETURN_IF_ERROR(find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(), + subcolumn->path, + subcolumn->data.get_least_common_type_serde(), + subcolumn->data.get_least_common_type(), root, + doc_structure->GetAllocator(), mem_pool, row)); if (subcolumn->path.empty() && !root.IsObject()) { // root was modified, only handle root node break; @@ -1147,6 +1151,7 @@ Status ColumnObject::merge_sparse_to_root_column() { root.CopyFrom(*doc_structure, doc_structure->GetAllocator()); } size_t null_count = 0; + Arena mem_pool; for (const auto& subcolumn : sparse_columns) { auto& column = subcolumn->data.get_finalized_column_ptr(); if (assert_cast<const ColumnNullable&>(*column).is_null_at(i)) { @@ -1156,7 +1161,7 @@ Status ColumnObject::merge_sparse_to_root_column() { bool succ = find_and_set_leave_value(column, subcolumn->path, subcolumn->data.get_least_common_type_serde(), subcolumn->data.get_least_common_type(), root, - doc_structure->GetAllocator(), i); + doc_structure->GetAllocator(), mem_pool, i); if (succ && subcolumn->path.empty() && !root.IsObject()) { // root was modified, only handle root node break; diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index dfe39b2c8a4..51f6efc78fe 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -228,14 +228,15 @@ void DataTypeArraySerDe::write_one_cell_to_jsonb(const IColumn& column, JsonbWri Status DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - int row_num) const { + Arena& mem_pool, int row_num) const { // Use allocator instead of stack memory, since rapidjson hold the reference of String value // otherwise causes stack use after free auto& column_array = static_cast<const ColumnArray&>(column); if (row_num > column_array.size()) { return Status::InternalError("row num {} out of range {}!", row_num, column_array.size()); } - void* mem = allocator.Malloc(sizeof(vectorized::Field)); + // void* mem = allocator.Malloc(sizeof(vectorized::Field)); + void* mem = mem_pool.alloc(sizeof(vectorized::Field)); if (!mem) { return Status::InternalError("Malloc failed"); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index d7d709727d4..ac4fcbadd57 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -72,7 +72,7 @@ public: int32_t col_id, int row_num) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, - rapidjson::Document::AllocatorType& allocator, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index f632b5d83e8..9899732ed07 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -205,7 +205,7 @@ static void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value& Status DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - int row_num) const { + Arena& mem_pool, int row_num) const { const auto& data = assert_cast<const ColumnString&>(column); const auto jsonb_val = data.get_data_at(row_num); if (jsonb_val.empty()) { diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index d0eaabc81e2..fe8f5e7f6a3 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -62,7 +62,7 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, - rapidjson::Document::AllocatorType& allocator, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; Status write_column_to_pb(const IColumn& column, PValues& result, int start, diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 1393913b5c4..43ee1dce5d9 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -338,14 +338,14 @@ Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone, Status DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - int row_num) const { + Arena& mem_pool, int row_num) const { auto& col = static_cast<const ColumnNullable&>(column); auto& nested_col = col.get_nested_column(); if (col.is_null_at(row_num)) { result.SetNull(); } else { - RETURN_IF_ERROR( - nested_serde->write_one_cell_to_json(nested_col, result, allocator, row_num)); + RETURN_IF_ERROR(nested_serde->write_one_cell_to_json(nested_col, result, allocator, + mem_pool, row_num)); } return Status::OK(); } diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 24f1946614c..06417b1aea0 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -90,7 +90,7 @@ public: } Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, - rapidjson::Document::AllocatorType& allocator, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index e7f3b7f4deb..78adf009393 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -95,7 +95,7 @@ public: int start, int end, std::vector<StringRef>& buffer_list) const override; Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, - rapidjson::Document::AllocatorType& allocator, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; @@ -295,7 +295,7 @@ template <typename T> Status DataTypeNumberSerDe<T>::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - int row_num) const { + Arena& mem_pool, int row_num) const { const auto& data = reinterpret_cast<const ColumnType&>(column).get_data(); if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, Int16> || std::is_same_v<T, Int32>) { result.SetInt(data[row_num]); diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp b/be/src/vec/data_types/serde/data_type_serde.cpp index 6ed6086b0f9..2bdd63aa989 100644 --- a/be/src/vec/data_types/serde/data_type_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_serde.cpp @@ -84,7 +84,7 @@ void DataTypeSerDe::convert_field_to_rapidjson(const vectorized::Field& field, Status DataTypeSerDe::write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - int row_num) const { + Arena& mem_pool, int row_num) const { return Status::InternalError("Not support write {} to rapidjson", column.get_name()); } diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 464e23a9319..4352504ff62 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -278,7 +278,7 @@ public: // rapidjson virtual Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, rapidjson::Document::AllocatorType& allocator, - int row_num) const; + Arena& mem_pool, int row_num) const; virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const; protected: diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index c6cef1babd1..f5d3e47ba16 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -229,7 +229,7 @@ public: return Status::OK(); } Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& result, - rapidjson::Document::AllocatorType& allocator, + rapidjson::Document::AllocatorType& allocator, Arena& mem_pool, int row_num) const override { const auto& col = assert_cast<const ColumnType&>(column); const auto& data_ref = col.get_data_at(row_num); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
