This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fbc060f1112c9b365536392be881fe846d691885
Author: TengJianPing <[email protected]>
AuthorDate: Mon Dec 26 21:23:58 2022 +0800

    [fix](string) fix offsets over flow for extreme large String column (#15360)
    
    * [fix](string) fix offsets over flow for extreme large String column
    
    * fix
---
 be/src/vec/columns/column_string.cpp               |  6 ++++++
 be/src/vec/columns/column_string.h                 | 24 +++++++++++++++++++++-
 .../main/java/org/apache/doris/common/Config.java  |  2 +-
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 0d702ef89c..3bada76599 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -83,6 +83,7 @@ void ColumnString::insert_range_from(const IColumn& src, 
size_t start, size_t le
     size_t nested_length = src_concrete.offsets[start + length - 1] - 
nested_offset;
 
     size_t old_chars_size = chars.size();
+    check_chars_length(old_chars_size + nested_length);
     chars.resize(old_chars_size + nested_length);
     memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], 
nested_length);
 
@@ -218,6 +219,7 @@ const char* 
ColumnString::deserialize_and_insert_from_arena(const char* pos) {
 
     const size_t old_size = chars.size();
     const size_t new_size = old_size + string_size;
+    check_chars_length(new_size);
     chars.resize(new_size);
     memcpy(chars.data() + old_size, pos, string_size);
 
@@ -300,6 +302,7 @@ ColumnPtr ColumnString::index_impl(const 
PaddedPODArray<Type>& indexes, size_t l
     for (size_t i = 0; i < limit; ++i) {
         new_chars_size += size_at(indexes[i]);
     }
+    check_chars_length(new_chars_size);
     res_chars.resize(new_chars_size);
 
     res_offsets.resize(limit);
@@ -399,6 +402,7 @@ ColumnPtr ColumnString::replicate(const Offsets& 
replicate_offsets) const {
         prev_string_offset = offsets[i];
     }
 
+    check_chars_length(res_chars.size());
     return res;
 }
 
@@ -436,6 +440,8 @@ void ColumnString::replicate(const uint32_t* counts, size_t 
target_size, IColumn
 
         prev_string_offset = offsets[i];
     }
+
+    check_chars_length(res_chars.size());
 }
 
 void ColumnString::reserve(size_t n) {
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 26a734fb08..4bbf77a477 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -42,6 +42,10 @@ public:
     using Chars = PaddedPODArray<UInt8>;
 
 private:
+    // currently Offsets is uint32, if chars.size() exceeds 4G, offset will 
overflow.
+    // limit chars.size() and check the size when inserting data into 
ColumnString.
+    static constexpr size_t MAX_STRING_SIZE = 1024 * 1024 * 1024;
+
     friend class COWHelper<IColumn, ColumnString>;
     friend class OlapBlockDataConvertor;
 
@@ -57,6 +61,12 @@ private:
     /// Size of i-th element, including terminating zero.
     size_t ALWAYS_INLINE size_at(ssize_t i) const { return offsets[i] - 
offsets[i - 1]; }
 
+    void ALWAYS_INLINE check_chars_length(size_t length) const {
+        if (UNLIKELY(length > MAX_STRING_SIZE)) {
+            LOG(FATAL) << "string column length is too large.";
+        }
+    }
+
     template <bool positive>
     struct less;
 
@@ -113,6 +123,8 @@ public:
         const size_t size_to_append = s.size();
         const size_t new_size = old_size + size_to_append;
 
+        check_chars_length(new_size);
+
         chars.resize(new_size);
         memcpy(chars.data() + old_size, s.c_str(), size_to_append);
         offsets.push_back(new_size);
@@ -135,6 +147,8 @@ public:
             const size_t offset = src.offsets[n - 1];
             const size_t new_size = old_size + size_to_append;
 
+            check_chars_length(new_size);
+
             chars.resize(new_size);
             memcpy_small_allow_read_write_overflow15(chars.data() + old_size, 
&src.chars[offset],
                                                      size_to_append);
@@ -147,6 +161,7 @@ public:
         const size_t new_size = old_size + length;
 
         if (length) {
+            check_chars_length(new_size);
             chars.resize(new_size);
             memcpy(chars.data() + old_size, pos, length);
         }
@@ -158,6 +173,7 @@ public:
         const size_t new_size = old_size + length;
 
         if (length) {
+            check_chars_length(new_size);
             chars.resize(new_size);
             memcpy(chars.data() + old_size, pos, length);
         }
@@ -188,6 +204,7 @@ public:
                 length = 0;
             }
         }
+        check_chars_length(offset);
         chars.resize(offset);
     }
 
@@ -199,8 +216,9 @@ public:
         }
         const auto old_size = chars.size();
         const auto begin_offset = offsets_[0];
-        const auto total_mem_size = offsets_[num] - begin_offset;
+        const size_t total_mem_size = offsets_[num] - begin_offset;
         if (LIKELY(total_mem_size > 0)) {
+            check_chars_length(total_mem_size + old_size);
             chars.resize(total_mem_size + old_size);
             memcpy(chars.data() + old_size, data + begin_offset, 
total_mem_size);
         }
@@ -224,6 +242,7 @@ public:
         }
 
         const size_t old_size = chars.size();
+        check_chars_length(old_size + new_size);
         chars.resize(old_size + new_size);
 
         Char* data = chars.data();
@@ -245,6 +264,7 @@ public:
         }
 
         const size_t old_size = chars.size();
+        check_chars_length(old_size + new_size);
         chars.resize(old_size + new_size);
 
         Char* data = chars.data();
@@ -272,6 +292,7 @@ public:
             offsets[offset_size + i] = new_size;
         }
 
+        check_chars_length(new_size);
         chars.resize(new_size);
 
         for (size_t i = start_index; i < start_index + num; i++) {
@@ -428,6 +449,7 @@ public:
             chars.clear();
             offsets[self_row] = data.size;
         } else {
+            check_chars_length(chars.size() + data.size);
             offsets[self_row] = offsets[self_row - 1] + data.size;
         }
 
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 89eb179acd..1b4a77cd01 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -1470,7 +1470,7 @@ public class Config extends ConfigBase {
      * When the result set is large, you may need to increase this value.
      */
     @ConfField
-    public static int grpc_max_message_size_bytes = 1 * 1024 * 1024 * 1024; // 
1GB
+    public static int grpc_max_message_size_bytes = 2147483647; // 2GB
 
     /**
      * Used to set minimal number of replication per tablet.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to