This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new b96cde9e007 [fix](inverted index) fix StringRef to std::string
reinterpret_cast overflow (#61120)
b96cde9e007 is described below
commit b96cde9e00771824f4aa57cda491b5e82d0a70ef
Author: Jack <[email protected]>
AuthorDate: Wed Mar 11 19:39:59 2026 +0800
[fix](inverted index) fix StringRef to std::string reinterpret_cast
overflow (#61120)
## Proposed changes
Fix `reinterpret_cast<std::string*>` on `StringRef*` causing buffer
overflow on ARM64, where `std::string` is 24 bytes but `StringRef` is
only 16 bytes.
### 1. `function_multi_match.cpp`
Convert `StringRef` to `std::string` before passing as `query_value`.
Downstream `FullTextIndexReader::query()` does `reinterpret_cast<const
std::string*>(query_value)`, reading 8 bytes past the `StringRef`
buffer.
### 2. `in_list_predicate.h`
Fix 3 sites where `HybridSet` iterator's `get_value()` returns
`StringRef*`, but code casts it to `std::string*`. Added `if constexpr
(is_string_type(Type))` guard to safely construct `std::string` from
`StringRef::data`/`StringRef::size`.
---
be/src/exprs/function/function_multi_match.cpp | 5 +-
be/src/storage/predicate/in_list_predicate.h | 84 +++++++++++++++-----------
2 files changed, 52 insertions(+), 37 deletions(-)
diff --git a/be/src/exprs/function/function_multi_match.cpp
b/be/src/exprs/function/function_multi_match.cpp
index 948af1a8c5b..2ba2a42b08d 100644
--- a/be/src/exprs/function/function_multi_match.cpp
+++ b/be/src/exprs/function/function_multi_match.cpp
@@ -73,12 +73,15 @@ Status FunctionMultiMatch::evaluate_inverted_index(
}
// query
- auto query_str = arguments[1].column->get_data_at(0);
+ auto query_str_ref = arguments[1].column->get_data_at(0);
auto param_type = arguments[1].type->get_primitive_type();
if (!is_string_type(param_type)) {
return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
"arguments for multi_match must be string");
}
+ // Must convert StringRef to std::string because downstream readers
+ // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as
std::string*.
+ std::string query_str(query_str_ref.data, query_str_ref.size);
// search
InvertedIndexParam param;
diff --git a/be/src/storage/predicate/in_list_predicate.h
b/be/src/storage/predicate/in_list_predicate.h
index 9b05405518b..df41d24d6b4 100644
--- a/be/src/storage/predicate/in_list_predicate.h
+++ b/be/src/storage/predicate/in_list_predicate.h
@@ -114,8 +114,15 @@ public:
}
HybridSetBase::IteratorBase* iter = _values->begin();
while (iter->has_next()) {
- const T* value = (const T*)(iter->get_value());
- _update_min_max(*value);
+ if constexpr (is_string_type(Type)) {
+ // get_value() returns StringRef*, not std::string*
+ const auto* ref = (const StringRef*)(iter->get_value());
+ T str(ref->data, ref->size);
+ _update_min_max(str);
+ } else {
+ const T* value = (const T*)(iter->get_value());
+ _update_min_max(*value);
+ }
iter->next();
}
}
@@ -167,12 +174,18 @@ public:
roaring::Roaring indices;
HybridSetBase::IteratorBase* iter = _values->begin();
while (iter->has_next()) {
- const void* ptr = iter->get_value();
- // auto&& value =
PrimitiveTypeConvertor<Type>::to_storage_field_type(
- // *reinterpret_cast<const T*>(ptr));
std::unique_ptr<InvertedIndexQueryParamFactory> query_param =
nullptr;
-
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>((const
T*)ptr,
-
query_param));
+ if constexpr (is_string_type(Type)) {
+ // get_value() returns StringRef*, not std::string*
+ const auto* ref = (const StringRef*)(iter->get_value());
+ T str(ref->data, ref->size);
+
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>(
+ &str, query_param));
+ } else {
+ const T* value = (const T*)(iter->get_value());
+
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>(
+ value, query_param));
+ }
InvertedIndexQueryType query_type =
InvertedIndexQueryType::EQUAL_QUERY;
InvertedIndexParam param;
param.column_name = name_with_type.first;
@@ -412,44 +425,43 @@ public:
if constexpr (PT == PredicateType::IN_LIST) {
HybridSetBase::IteratorBase* iter = _values->begin();
while (iter->has_next()) {
- const T* value = (const T*)(iter->get_value());
-
auto test_bytes = [&]<typename V>(const V& val) {
return
bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&val)),
sizeof(V));
};
- // Small integers (TINYINT, SMALLINT, INTEGER) -> hash as int32
- if constexpr (Type == PrimitiveType::TYPE_TINYINT ||
- Type == PrimitiveType::TYPE_SMALLINT ||
- Type == PrimitiveType::TYPE_INT) {
- int32_t int32_value = static_cast<int32_t>(*value);
- if (test_bytes(int32_value)) {
- return true;
- }
- } else if constexpr (Type == PrimitiveType::TYPE_BIGINT) {
- // BIGINT -> hash as int64
- if (test_bytes(*value)) {
- return true;
- }
- } else if constexpr (Type == PrimitiveType::TYPE_DOUBLE) {
- // DOUBLE -> hash as double
- if (test_bytes(*value)) {
- return true;
- }
- } else if constexpr (Type == PrimitiveType::TYPE_FLOAT) {
- // FLOAT -> hash as float
- if (test_bytes(*value)) {
+ if constexpr (is_string_type(Type)) {
+ // get_value() returns StringRef*, not std::string*
+ const auto* ref = (const StringRef*)(iter->get_value());
+ if (bf->test_bytes(ref->data, ref->size)) {
return true;
}
- } else if constexpr (is_string_type(Type)) {
- // VARCHAR/STRING -> hash bytes
- if (bf->test_bytes(value->data(), value->size())) {
+ } else {
+ const T* value = (const T*)(iter->get_value());
+ // Small integers (TINYINT, SMALLINT, INTEGER) -> hash as
int32
+ if constexpr (Type == PrimitiveType::TYPE_TINYINT ||
+ Type == PrimitiveType::TYPE_SMALLINT ||
+ Type == PrimitiveType::TYPE_INT) {
+ int32_t int32_value = static_cast<int32_t>(*value);
+ if (test_bytes(int32_value)) {
+ return true;
+ }
+ } else if constexpr (Type == PrimitiveType::TYPE_BIGINT) {
+ if (test_bytes(*value)) {
+ return true;
+ }
+ } else if constexpr (Type == PrimitiveType::TYPE_DOUBLE) {
+ if (test_bytes(*value)) {
+ return true;
+ }
+ } else if constexpr (Type == PrimitiveType::TYPE_FLOAT) {
+ if (test_bytes(*value)) {
+ return true;
+ }
+ } else {
+ // Unsupported types: return true (accept)
return true;
}
- } else {
- // Unsupported types: return true (accept)
- return true;
}
iter->next();
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]