This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit aa684d85d7529c83a128a88ee5b9aedb06137177
Author: lihangyu <[email protected]>
AuthorDate: Thu May 9 17:54:20 2024 +0800

    [Bug](Variant) fix rapidjson::Allocator may cause mem allocate issue when 
build with `DENABLE_CLANG_COVERAGE`  (#34150)
---
 be/src/vec/columns/column_object.cpp                  | 19 ++++++++++++-------
 be/src/vec/data_types/serde/data_type_array_serde.cpp |  5 +++--
 be/src/vec/data_types/serde/data_type_array_serde.h   |  2 +-
 be/src/vec/data_types/serde/data_type_jsonb_serde.cpp |  2 +-
 be/src/vec/data_types/serde/data_type_jsonb_serde.h   |  2 +-
 .../vec/data_types/serde/data_type_nullable_serde.cpp |  6 +++---
 .../vec/data_types/serde/data_type_nullable_serde.h   |  2 +-
 be/src/vec/data_types/serde/data_type_number_serde.h  |  4 ++--
 be/src/vec/data_types/serde/data_type_serde.cpp       |  2 +-
 be/src/vec/data_types/serde/data_type_serde.h         |  2 +-
 be/src/vec/data_types/serde/data_type_string_serde.h  |  2 +-
 11 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 6d7104647c7..ddb5bee6e01 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -51,6 +51,7 @@
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_vector.h"
 #include "vec/columns/columns_number.h"
+#include "vec/common/arena.h"
 #include "vec/common/assert_cast.h"
 #include "vec/common/field_visitors.h"
 #include "vec/common/schema_util.h"
@@ -956,7 +957,8 @@ rapidjson::Value* find_leaf_node_by_path(rapidjson::Value& 
json, const PathInDat
 Status find_and_set_leave_value(const IColumn* column, const PathInData& path,
                                 const DataTypeSerDeSPtr& type_serde, const 
DataTypePtr& type,
                                 rapidjson::Value& root,
-                                rapidjson::Document::AllocatorType& allocator, 
int row) {
+                                rapidjson::Document::AllocatorType& allocator, 
Arena& mem_pool,
+                                int row) {
     // sanitize type and column
     if (column->get_name() != type->create_column()->get_name()) {
         return Status::InternalError(
@@ -977,7 +979,7 @@ Status find_and_set_leave_value(const IColumn* column, 
const PathInData& path,
                      << ", root: " << std::string(buffer.GetString(), 
buffer.GetSize());
         return Status::NotFound("Not found path {}", path.get_path());
     }
-    RETURN_IF_ERROR(type_serde->write_one_cell_to_json(*column, *target, 
allocator, row));
+    RETURN_IF_ERROR(type_serde->write_one_cell_to_json(*column, *target, 
allocator, mem_pool, row));
     return Status::OK();
 }
 
@@ -1079,14 +1081,16 @@ Status 
ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String
     if (!doc_structure->IsNull()) {
         root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
     }
+    Arena mem_pool;
 #ifndef NDEBUG
     VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
 #endif
     for (const auto& subcolumn : subcolumns) {
-        RETURN_IF_ERROR(find_and_set_leave_value(
-                subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
-                subcolumn->data.get_least_common_type_serde(),
-                subcolumn->data.get_least_common_type(), root, 
doc_structure->GetAllocator(), row));
+        
RETURN_IF_ERROR(find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(),
+                                                 subcolumn->path,
+                                                 
subcolumn->data.get_least_common_type_serde(),
+                                                 
subcolumn->data.get_least_common_type(), root,
+                                                 
doc_structure->GetAllocator(), mem_pool, row));
         if (subcolumn->path.empty() && !root.IsObject()) {
             // root was modified, only handle root node
             break;
@@ -1147,6 +1151,7 @@ Status ColumnObject::merge_sparse_to_root_column() {
             root.CopyFrom(*doc_structure, doc_structure->GetAllocator());
         }
         size_t null_count = 0;
+        Arena mem_pool;
         for (const auto& subcolumn : sparse_columns) {
             auto& column = subcolumn->data.get_finalized_column_ptr();
             if (assert_cast<const ColumnNullable&>(*column).is_null_at(i)) {
@@ -1156,7 +1161,7 @@ Status ColumnObject::merge_sparse_to_root_column() {
             bool succ = find_and_set_leave_value(column, subcolumn->path,
                                                  
subcolumn->data.get_least_common_type_serde(),
                                                  
subcolumn->data.get_least_common_type(), root,
-                                                 
doc_structure->GetAllocator(), i);
+                                                 
doc_structure->GetAllocator(), mem_pool, i);
             if (succ && subcolumn->path.empty() && !root.IsObject()) {
                 // root was modified, only handle root node
                 break;
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp 
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index dfe39b2c8a4..51f6efc78fe 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -228,14 +228,15 @@ void DataTypeArraySerDe::write_one_cell_to_jsonb(const 
IColumn& column, JsonbWri
 
 Status DataTypeArraySerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
                                                   
rapidjson::Document::AllocatorType& allocator,
-                                                  int row_num) const {
+                                                  Arena& mem_pool, int 
row_num) const {
     // Use allocator instead of stack memory, since rapidjson hold the 
reference of String value
     // otherwise causes stack use after free
     auto& column_array = static_cast<const ColumnArray&>(column);
     if (row_num > column_array.size()) {
         return Status::InternalError("row num {} out of range {}!", row_num, 
column_array.size());
     }
-    void* mem = allocator.Malloc(sizeof(vectorized::Field));
+    // void* mem = allocator.Malloc(sizeof(vectorized::Field));
+    void* mem = mem_pool.alloc(sizeof(vectorized::Field));
     if (!mem) {
         return Status::InternalError("Malloc failed");
     }
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h 
b/be/src/vec/data_types/serde/data_type_array_serde.h
index d7d709727d4..ac4fcbadd57 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.h
+++ b/be/src/vec/data_types/serde/data_type_array_serde.h
@@ -72,7 +72,7 @@ public:
                                  int32_t col_id, int row_num) const override;
 
     Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& 
result,
-                                  rapidjson::Document::AllocatorType& 
allocator,
+                                  rapidjson::Document::AllocatorType& 
allocator, Arena& mem_pool,
                                   int row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
index f632b5d83e8..9899732ed07 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
@@ -205,7 +205,7 @@ static void convert_jsonb_to_rapidjson(const JsonbValue& 
val, rapidjson::Value&
 
 Status DataTypeJsonbSerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
                                                   
rapidjson::Document::AllocatorType& allocator,
-                                                  int row_num) const {
+                                                  Arena& mem_pool, int 
row_num) const {
     const auto& data = assert_cast<const ColumnString&>(column);
     const auto jsonb_val = data.get_data_at(row_num);
     if (jsonb_val.empty()) {
diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h 
b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
index d0eaabc81e2..fe8f5e7f6a3 100644
--- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h
+++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h
@@ -62,7 +62,7 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
     Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& 
result,
-                                  rapidjson::Document::AllocatorType& 
allocator,
+                                  rapidjson::Document::AllocatorType& 
allocator, Arena& mem_pool,
                                   int row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
     Status write_column_to_pb(const IColumn& column, PValues& result, int 
start,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 1393913b5c4..43ee1dce5d9 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -338,14 +338,14 @@ Status DataTypeNullableSerDe::write_column_to_orc(const 
std::string& timezone,
 Status DataTypeNullableSerDe::write_one_cell_to_json(const IColumn& column,
                                                      rapidjson::Value& result,
                                                      
rapidjson::Document::AllocatorType& allocator,
-                                                     int row_num) const {
+                                                     Arena& mem_pool, int 
row_num) const {
     auto& col = static_cast<const ColumnNullable&>(column);
     auto& nested_col = col.get_nested_column();
     if (col.is_null_at(row_num)) {
         result.SetNull();
     } else {
-        RETURN_IF_ERROR(
-                nested_serde->write_one_cell_to_json(nested_col, result, 
allocator, row_num));
+        RETURN_IF_ERROR(nested_serde->write_one_cell_to_json(nested_col, 
result, allocator,
+                                                             mem_pool, 
row_num));
     }
     return Status::OK();
 }
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 24f1946614c..06417b1aea0 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -90,7 +90,7 @@ public:
     }
 
     Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& 
result,
-                                  rapidjson::Document::AllocatorType& 
allocator,
+                                  rapidjson::Document::AllocatorType& 
allocator, Arena& mem_pool,
                                   int row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index e7f3b7f4deb..78adf009393 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -95,7 +95,7 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
     Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& 
result,
-                                  rapidjson::Document::AllocatorType& 
allocator,
+                                  rapidjson::Document::AllocatorType& 
allocator, Arena& mem_pool,
                                   int row_num) const override;
     Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& 
result) const override;
 
@@ -295,7 +295,7 @@ template <typename T>
 Status DataTypeNumberSerDe<T>::write_one_cell_to_json(const IColumn& column,
                                                       rapidjson::Value& result,
                                                       
rapidjson::Document::AllocatorType& allocator,
-                                                      int row_num) const {
+                                                      Arena& mem_pool, int 
row_num) const {
     const auto& data = reinterpret_cast<const ColumnType&>(column).get_data();
     if constexpr (std::is_same_v<T, Int8> || std::is_same_v<T, Int16> || 
std::is_same_v<T, Int32>) {
         result.SetInt(data[row_num]);
diff --git a/be/src/vec/data_types/serde/data_type_serde.cpp 
b/be/src/vec/data_types/serde/data_type_serde.cpp
index 6ed6086b0f9..2bdd63aa989 100644
--- a/be/src/vec/data_types/serde/data_type_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_serde.cpp
@@ -84,7 +84,7 @@ void DataTypeSerDe::convert_field_to_rapidjson(const 
vectorized::Field& field,
 
 Status DataTypeSerDe::write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
                                              
rapidjson::Document::AllocatorType& allocator,
-                                             int row_num) const {
+                                             Arena& mem_pool, int row_num) 
const {
     return Status::InternalError("Not support write {} to rapidjson", 
column.get_name());
 }
 
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 464e23a9319..4352504ff62 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -278,7 +278,7 @@ public:
     // rapidjson
     virtual Status write_one_cell_to_json(const IColumn& column, 
rapidjson::Value& result,
                                           rapidjson::Document::AllocatorType& 
allocator,
-                                          int row_num) const;
+                                          Arena& mem_pool, int row_num) const;
     virtual Status read_one_cell_from_json(IColumn& column, const 
rapidjson::Value& result) const;
 
 protected:
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index c6cef1babd1..f5d3e47ba16 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -229,7 +229,7 @@ public:
         return Status::OK();
     }
     Status write_one_cell_to_json(const IColumn& column, rapidjson::Value& 
result,
-                                  rapidjson::Document::AllocatorType& 
allocator,
+                                  rapidjson::Document::AllocatorType& 
allocator, Arena& mem_pool,
                                   int row_num) const override {
         const auto& col = assert_cast<const ColumnType&>(column);
         const auto& data_ref = col.get_data_at(row_num);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to