This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit fbc060f1112c9b365536392be881fe846d691885 Author: TengJianPing <[email protected]> AuthorDate: Mon Dec 26 21:23:58 2022 +0800 [fix](string) fix offsets over flow for extreme large String column (#15360) * [fix](string) fix offsets over flow for extreme large String column * fix --- be/src/vec/columns/column_string.cpp | 6 ++++++ be/src/vec/columns/column_string.h | 24 +++++++++++++++++++++- .../main/java/org/apache/doris/common/Config.java | 2 +- 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 0d702ef89c..3bada76599 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -83,6 +83,7 @@ void ColumnString::insert_range_from(const IColumn& src, size_t start, size_t le size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset; size_t old_chars_size = chars.size(); + check_chars_length(old_chars_size + nested_length); chars.resize(old_chars_size + nested_length); memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length); @@ -218,6 +219,7 @@ const char* ColumnString::deserialize_and_insert_from_arena(const char* pos) { const size_t old_size = chars.size(); const size_t new_size = old_size + string_size; + check_chars_length(new_size); chars.resize(new_size); memcpy(chars.data() + old_size, pos, string_size); @@ -300,6 +302,7 @@ ColumnPtr ColumnString::index_impl(const PaddedPODArray<Type>& indexes, size_t l for (size_t i = 0; i < limit; ++i) { new_chars_size += size_at(indexes[i]); } + check_chars_length(new_chars_size); res_chars.resize(new_chars_size); res_offsets.resize(limit); @@ -399,6 +402,7 @@ ColumnPtr ColumnString::replicate(const Offsets& replicate_offsets) const { prev_string_offset = offsets[i]; } + check_chars_length(res_chars.size()); return res; } @@ -436,6 +440,8 @@ void ColumnString::replicate(const uint32_t* counts, size_t target_size, IColumn prev_string_offset = offsets[i]; } + + check_chars_length(res_chars.size()); } void ColumnString::reserve(size_t n) { diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 26a734fb08..4bbf77a477 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -42,6 +42,10 @@ public: using Chars = PaddedPODArray<UInt8>; private: + // currently Offsets is uint32, if chars.size() exceeds 4G, offset will overflow. + // limit chars.size() and check the size when inserting data into ColumnString. + static constexpr size_t MAX_STRING_SIZE = 1024 * 1024 * 1024; + friend class COWHelper<IColumn, ColumnString>; friend class OlapBlockDataConvertor; @@ -57,6 +61,12 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE size_at(ssize_t i) const { return offsets[i] - offsets[i - 1]; } + void ALWAYS_INLINE check_chars_length(size_t length) const { + if (UNLIKELY(length > MAX_STRING_SIZE)) { + LOG(FATAL) << "string column length is too large."; + } + } + template <bool positive> struct less; @@ -113,6 +123,8 @@ public: const size_t size_to_append = s.size(); const size_t new_size = old_size + size_to_append; + check_chars_length(new_size); + chars.resize(new_size); memcpy(chars.data() + old_size, s.c_str(), size_to_append); offsets.push_back(new_size); @@ -135,6 +147,8 @@ public: const size_t offset = src.offsets[n - 1]; const size_t new_size = old_size + size_to_append; + check_chars_length(new_size); + chars.resize(new_size); memcpy_small_allow_read_write_overflow15(chars.data() + old_size, &src.chars[offset], size_to_append); @@ -147,6 +161,7 @@ public: const size_t new_size = old_size + length; if (length) { + check_chars_length(new_size); chars.resize(new_size); memcpy(chars.data() + old_size, pos, length); } @@ -158,6 +173,7 @@ public: const size_t new_size = old_size + length; if (length) { + check_chars_length(new_size); chars.resize(new_size); memcpy(chars.data() + old_size, pos, length); } @@ -188,6 +204,7 @@ public: length = 0; } } + check_chars_length(offset); chars.resize(offset); } @@ -199,8 +216,9 @@ public: } const auto old_size = chars.size(); const auto begin_offset = offsets_[0]; - const auto total_mem_size = offsets_[num] - begin_offset; + const size_t total_mem_size = offsets_[num] - begin_offset; if (LIKELY(total_mem_size > 0)) { + check_chars_length(total_mem_size + old_size); chars.resize(total_mem_size + old_size); memcpy(chars.data() + old_size, data + begin_offset, total_mem_size); } @@ -224,6 +242,7 @@ public: } const size_t old_size = chars.size(); + check_chars_length(old_size + new_size); chars.resize(old_size + new_size); Char* data = chars.data(); @@ -245,6 +264,7 @@ public: } const size_t old_size = chars.size(); + check_chars_length(old_size + new_size); chars.resize(old_size + new_size); Char* data = chars.data(); @@ -272,6 +292,7 @@ public: offsets[offset_size + i] = new_size; } + check_chars_length(new_size); chars.resize(new_size); for (size_t i = start_index; i < start_index + num; i++) { @@ -428,6 +449,7 @@ public: chars.clear(); offsets[self_row] = data.size; } else { + check_chars_length(chars.size() + data.size); offsets[self_row] = offsets[self_row - 1] + data.size; } diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 89eb179acd..1b4a77cd01 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1470,7 +1470,7 @@ public class Config extends ConfigBase { * When the result set is large, you may need to increase this value. */ @ConfField - public static int grpc_max_message_size_bytes = 1 * 1024 * 1024 * 1024; // 1GB + public static int grpc_max_message_size_bytes = 2147483647; // 2GB /** * Used to set minimal number of replication per tablet. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
