This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new bb2f70b855d [branch-4.0][fix](inverted index) fix StringRef to 
std::string reinterpret_cast overflow (#61150)
bb2f70b855d is described below

commit bb2f70b855df3e892734a12de01e5a1f6941829f
Author: Jack <[email protected]>
AuthorDate: Tue Mar 10 11:55:03 2026 +0800

    [branch-4.0][fix](inverted index) fix StringRef to std::string 
reinterpret_cast overflow (#61150)
    
    ## Proposed changes
    
    Cherry-pick of #61120 to branch-4.0.
    
    On ARM64, std::string is 24 bytes but StringRef is 16 bytes. Several
    places pass StringRef* through void* and then reinterpret_cast to
    std::string*, reading 8 bytes beyond the buffer.
    
    1. **function_multi_match.cpp**: Convert StringRef to std::string before
    passing as query_value. Downstream FullTextIndexReader::query()
    reinterpret_casts query_value as std::string* (24 bytes on ARM64), but
    StringRef is only 16 bytes, causing stack-buffer-overflow.
    
    2. **in_list_predicate.h**: Fix 3 sites where HybridSet iterator returns
    StringRef* via get_value(), but code treats it as std::string*. Add `if
    constexpr (is_string_type(Type))` guard to construct std::string from
    StringRef data/size before use.
    
    Cherry-pick applied cleanly with no conflicts.
    
    Co-authored-by: Claude Opus 4.6 <[email protected]>
---
 be/src/olap/in_list_predicate.h               | 84 +++++++++++++++------------
 be/src/vec/functions/function_multi_match.cpp |  5 +-
 2 files changed, 52 insertions(+), 37 deletions(-)

diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index b271fa85d1c..80b00b9af3b 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -115,8 +115,15 @@ public:
         }
         HybridSetBase::IteratorBase* iter = _values->begin();
         while (iter->has_next()) {
-            const T* value = (const T*)(iter->get_value());
-            _update_min_max(*value);
+            if constexpr (is_string_type(Type)) {
+                // get_value() returns StringRef*, not std::string*
+                const auto* ref = (const StringRef*)(iter->get_value());
+                T str(ref->data, ref->size);
+                _update_min_max(str);
+            } else {
+                const T* value = (const T*)(iter->get_value());
+                _update_min_max(*value);
+            }
             iter->next();
         }
     }
@@ -169,12 +176,18 @@ public:
         roaring::Roaring indices;
         HybridSetBase::IteratorBase* iter = _values->begin();
         while (iter->has_next()) {
-            const void* ptr = iter->get_value();
-            //            auto&& value = 
PrimitiveTypeConvertor<Type>::to_storage_field_type(
-            //                    *reinterpret_cast<const T*>(ptr));
             std::unique_ptr<InvertedIndexQueryParamFactory> query_param = 
nullptr;
-            
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>((const 
T*)ptr,
-                                                                               
      query_param));
+            if constexpr (is_string_type(Type)) {
+                // get_value() returns StringRef*, not std::string*
+                const auto* ref = (const StringRef*)(iter->get_value());
+                T str(ref->data, ref->size);
+                
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>(
+                        &str, query_param));
+            } else {
+                const T* value = (const T*)(iter->get_value());
+                
RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value<Type>(
+                        value, query_param));
+            }
             InvertedIndexQueryType query_type = 
InvertedIndexQueryType::EQUAL_QUERY;
             InvertedIndexParam param;
             param.column_name = name_with_type.first;
@@ -417,44 +430,43 @@ public:
         if constexpr (PT == PredicateType::IN_LIST) {
             HybridSetBase::IteratorBase* iter = _values->begin();
             while (iter->has_next()) {
-                const T* value = (const T*)(iter->get_value());
-
                 auto test_bytes = [&]<typename V>(const V& val) {
                     return 
bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&val)),
                                           sizeof(V));
                 };
 
-                // Small integers (TINYINT, SMALLINT, INTEGER) -> hash as int32
-                if constexpr (Type == PrimitiveType::TYPE_TINYINT ||
-                              Type == PrimitiveType::TYPE_SMALLINT ||
-                              Type == PrimitiveType::TYPE_INT) {
-                    int32_t int32_value = static_cast<int32_t>(*value);
-                    if (test_bytes(int32_value)) {
-                        return true;
-                    }
-                } else if constexpr (Type == PrimitiveType::TYPE_BIGINT) {
-                    // BIGINT -> hash as int64
-                    if (test_bytes(*value)) {
-                        return true;
-                    }
-                } else if constexpr (Type == PrimitiveType::TYPE_DOUBLE) {
-                    // DOUBLE -> hash as double
-                    if (test_bytes(*value)) {
-                        return true;
-                    }
-                } else if constexpr (Type == PrimitiveType::TYPE_FLOAT) {
-                    // FLOAT -> hash as float
-                    if (test_bytes(*value)) {
+                if constexpr (is_string_type(Type)) {
+                    // get_value() returns StringRef*, not std::string*
+                    const auto* ref = (const StringRef*)(iter->get_value());
+                    if (bf->test_bytes(ref->data, ref->size)) {
                         return true;
                     }
-                } else if constexpr (is_string_type(Type)) {
-                    // VARCHAR/STRING -> hash bytes
-                    if (bf->test_bytes(value->data(), value->size())) {
+                } else {
+                    const T* value = (const T*)(iter->get_value());
+                    // Small integers (TINYINT, SMALLINT, INTEGER) -> hash as 
int32
+                    if constexpr (Type == PrimitiveType::TYPE_TINYINT ||
+                                  Type == PrimitiveType::TYPE_SMALLINT ||
+                                  Type == PrimitiveType::TYPE_INT) {
+                        int32_t int32_value = static_cast<int32_t>(*value);
+                        if (test_bytes(int32_value)) {
+                            return true;
+                        }
+                    } else if constexpr (Type == PrimitiveType::TYPE_BIGINT) {
+                        if (test_bytes(*value)) {
+                            return true;
+                        }
+                    } else if constexpr (Type == PrimitiveType::TYPE_DOUBLE) {
+                        if (test_bytes(*value)) {
+                            return true;
+                        }
+                    } else if constexpr (Type == PrimitiveType::TYPE_FLOAT) {
+                        if (test_bytes(*value)) {
+                            return true;
+                        }
+                    } else {
+                        // Unsupported types: return true (accept)
                         return true;
                     }
-                } else {
-                    // Unsupported types: return true (accept)
-                    return true;
                 }
                 iter->next();
             }
diff --git a/be/src/vec/functions/function_multi_match.cpp 
b/be/src/vec/functions/function_multi_match.cpp
index 676ab446635..e9d66d74abf 100644
--- a/be/src/vec/functions/function_multi_match.cpp
+++ b/be/src/vec/functions/function_multi_match.cpp
@@ -73,12 +73,15 @@ Status FunctionMultiMatch::evaluate_inverted_index(
     }
 
     // query
-    auto query_str = arguments[1].column->get_data_at(0);
+    auto query_str_ref = arguments[1].column->get_data_at(0);
     auto param_type = arguments[1].type->get_primitive_type();
     if (!is_string_type(param_type)) {
         return Status::Error<ErrorCode::INDEX_INVALID_PARAMETERS>(
                 "arguments for multi_match must be string");
     }
+    // Must convert StringRef to std::string because downstream readers
+    // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as 
std::string*.
+    std::string query_str(query_str_ref.data, query_str_ref.size);
 
     // search
     InvertedIndexParam param;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to