This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b96cde9e007 [fix](inverted index) fix StringRef to std::string 
reinterpret_cast overflow (#61120)
b96cde9e007 is described below

commit b96cde9e00771824f4aa57cda491b5e82d0a70ef
Author: Jack <[email protected]>
AuthorDate: Wed Mar 11 19:39:59 2026 +0800

    [fix](inverted index) fix StringRef to std::string reinterpret_cast 
overflow (#61120)
    
    ## Proposed changes
    
    Fix `reinterpret_cast<std::string*>` on `StringRef*` causing buffer
    overflow on ARM64, where `std::string` is 24 bytes but `StringRef` is
    only 16 bytes.
    
    ### 1. `function_multi_match.cpp`
    Convert `StringRef` to `std::string` before passing as `query_value`.
    Downstream `FullTextIndexReader::query()` does `reinterpret_cast<const
    std::string*>(query_value)`, reading 8 bytes past the `StringRef`
    buffer.
    
    ### 2. `in_list_predicate.h`
    Fix 3 sites where `HybridSet` iterator's `get_value()` returns
    `StringRef*`, but code casts it to `std::string*`. Added `if constexpr
    (is_string_type(Type))` guard to safely construct `std::string` from
    `StringRef::data`/`StringRef::size`.
---
 be/src/exprs/function/function_multi_match.cpp |  5 +-
 be/src/storage/predicate/in_list_predicate.h   | 84 +++++++++++++++-----------
 2 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/be/src/exprs/function/function_multi_match.cpp 
b/be/src/exprs/function/function_multi_match.cpp
index 948af1a8c5b..2ba2a42b08d 100644
--- a/be/src/exprs/function/function_multi_match.cpp
+++ b/be/src/exprs/function/function_multi_match.cpp
@@ -73,12 +73,15 @@ Status FunctionMultiMatch::evaluate_inverted_index(
     }
 
     // query
-    auto query_str = arguments[1].column->get_data_at(0);
+    auto query_str_ref = arguments[1].column->get_data_at(0);
     auto param_type = arguments[1].type->get_primitive_type();
     if (!is_string_type(param_type)) {
         return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
                 "arguments for multi_match must be string");
     }
+    // Must convert StringRef to std::string because downstream readers
+    // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as 
std::string*.
+    std::string query_str(query_str_ref.data, query_str_ref.size);
 
     // search
     InvertedIndexParam param;
diff --git a/be/src/storage/predicate/in_list_predicate.h 
b/be/src/storage/predicate/in_list_predicate.h
index 9b05405518b..df41d24d6b4 100644
--- a/be/src/storage/predicate/in_list_predicate.h
+++ b/be/src/storage/predicate/in_list_predicate.h
@@ -114,8 +114,15 @@ public:
         }
         HybridSetBase::IteratorBase* iter = _values->begin();
         while (iter->has_next()) {
-            const T* value = (const T*)(iter->get_value());
-            _update_min_max(*value);
+            if constexpr (is_string_type(Type)) {
+                // get_value() returns StringRef*, not std::string*
+                const auto* ref = (const StringRef*)(iter->get_value());
+                T str(ref->data, ref->size);
+                _update_min_max(str);
+            } else {
+                const T* value = (const T*)(iter->get_value());
+                _update_min_max(*value);
+            }
             iter->next();
         }
     }
@@ -167,12 +174,18 @@ public:
         roaring::Roaring indices;
         HybridSetBase::IteratorBase* iter = _values->begin();
         while (iter->has_next()) {
-            const void* ptr = iter->get_value();
-            //            auto&& value = 
PrimitiveTypeConvertor<Type>::to_storage_field_type(
-            //                    *reinterpret_cast<const T*>(ptr));
             std::unique_ptr<InvertedIndexQueryParamFactory> query_param = 
nullptr;
-            
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>((const 
T*)ptr,
-                                                                               
      query_param));
+            if constexpr (is_string_type(Type)) {
+                // get_value() returns StringRef*, not std::string*
+                const auto* ref = (const StringRef*)(iter->get_value());
+                T str(ref->data, ref->size);
+                
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>(
+                        &str, query_param));
+            } else {
+                const T* value = (const T*)(iter->get_value());
+                
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>(
+                        value, query_param));
+            }
             InvertedIndexQueryType query_type = 
InvertedIndexQueryType::EQUAL_QUERY;
             InvertedIndexParam param;
             param.column_name = name_with_type.first;
@@ -412,44 +425,43 @@ public:
         if constexpr (PT == PredicateType::IN_LIST) {
             HybridSetBase::IteratorBase* iter = _values->begin();
             while (iter->has_next()) {
-                const T* value = (const T*)(iter->get_value());
-
                 auto test_bytes = [&]<typename V>(const V& val) {
                     return 
bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&val)),
                                           sizeof(V));
                 };
 
-                // Small integers (TINYINT, SMALLINT, INTEGER) -> hash as int32
-                if constexpr (Type == PrimitiveType::TYPE_TINYINT ||
-                              Type == PrimitiveType::TYPE_SMALLINT ||
-                              Type == PrimitiveType::TYPE_INT) {
-                    int32_t int32_value = static_cast<int32_t>(*value);
-                    if (test_bytes(int32_value)) {
-                        return true;
-                    }
-                } else if constexpr (Type == PrimitiveType::TYPE_BIGINT) {
-                    // BIGINT -> hash as int64
-                    if (test_bytes(*value)) {
-                        return true;
-                    }
-                } else if constexpr (Type == PrimitiveType::TYPE_DOUBLE) {
-                    // DOUBLE -> hash as double
-                    if (test_bytes(*value)) {
-                        return true;
-                    }
-                } else if constexpr (Type == PrimitiveType::TYPE_FLOAT) {
-                    // FLOAT -> hash as float
-                    if (test_bytes(*value)) {
+                if constexpr (is_string_type(Type)) {
+                    // get_value() returns StringRef*, not std::string*
+                    const auto* ref = (const StringRef*)(iter->get_value());
+                    if (bf->test_bytes(ref->data, ref->size)) {
                         return true;
                     }
-                } else if constexpr (is_string_type(Type)) {
-                    // VARCHAR/STRING -> hash bytes
-                    if (bf->test_bytes(value->data(), value->size())) {
+                } else {
+                    const T* value = (const T*)(iter->get_value());
+                    // Small integers (TINYINT, SMALLINT, INTEGER) -> hash as 
int32
+                    if constexpr (Type == PrimitiveType::TYPE_TINYINT ||
+                                  Type == PrimitiveType::TYPE_SMALLINT ||
+                                  Type == PrimitiveType::TYPE_INT) {
+                        int32_t int32_value = static_cast<int32_t>(*value);
+                        if (test_bytes(int32_value)) {
+                            return true;
+                        }
+                    } else if constexpr (Type == PrimitiveType::TYPE_BIGINT) {
+                        if (test_bytes(*value)) {
+                            return true;
+                        }
+                    } else if constexpr (Type == PrimitiveType::TYPE_DOUBLE) {
+                        if (test_bytes(*value)) {
+                            return true;
+                        }
+                    } else if constexpr (Type == PrimitiveType::TYPE_FLOAT) {
+                        if (test_bytes(*value)) {
+                            return true;
+                        }
+                    } else {
+                        // Unsupported types: return true (accept)
                         return true;
                     }
-                } else {
-                    // Unsupported types: return true (accept)
-                    return true;
                 }
                 iter->next();
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to