This is an automated email from the ASF dual-hosted git repository.

BiteTheDDDDt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0d8654bd33d [fix](be) Fix varbinary literal construction (#64089)
0d8654bd33d is described below

commit 0d8654bd33da9dd93552057f753054936b0ebb3f
Author: Pxl <[email protected]>
AuthorDate: Thu Jun 4 11:34:59 2026 +0800

    [fix](be) Fix varbinary literal construction (#64089)
    
    `create_texpr_literal_node<TYPE_VARBINARY>` treated the input pointer as
    `std::string*`, but Doris `Field` stores `TYPE_VARBINARY` values as
    `StringView`. When TopN predicate conversion builds a VARBINARY literal
    from a `Field`, the helper reinterprets a `StringView*` as a
    `std::string*`, which can make `std::string` assignment read a bogus
    size and request a huge allocation under ASAN.
    
    This PR reads VARBINARY literal input as `StringView`, copies the exact
    byte range into the thrift literal, and adds VARBINARY coverage for
    `create_texpr_node_from(Field, TYPE_VARBINARY, ...)` and `VLiteral`
    round trip. It also wires the `const void*` helper for `TYPE_VARBINARY`.
---
 be/src/exprs/vexpr.cpp       |  4 ++++
 be/src/exprs/vexpr.h         | 11 ++++++-----
 be/test/exprs/vexpr_test.cpp | 26 +++++++++++++++++++++++---
 3 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp
index 61e3effbdd1..65dcd4eb9de 100644
--- a/be/src/exprs/vexpr.cpp
+++ b/be/src/exprs/vexpr.cpp
@@ -164,6 +164,10 @@ TExprNode create_texpr_node_from(const void* data, const 
PrimitiveType& type, in
         THROW_IF_ERROR(create_texpr_literal_node<TYPE_STRING>(data, &node));
         break;
     }
+    case TYPE_VARBINARY: {
+        THROW_IF_ERROR(create_texpr_literal_node<TYPE_VARBINARY>(data, &node));
+        break;
+    }
     case TYPE_IPV4: {
         THROW_IF_ERROR(create_texpr_literal_node<TYPE_IPV4>(data, &node));
         break;
diff --git a/be/src/exprs/vexpr.h b/be/src/exprs/vexpr.h
index 79f3485b3be..458f469c85e 100644
--- a/be/src/exprs/vexpr.h
+++ b/be/src/exprs/vexpr.h
@@ -39,6 +39,7 @@
 #include "core/data_type/data_type_ipv6.h"
 #include "core/data_type/define_primitive_type.h"
 #include "core/extended_types.h"
+#include "core/string_view.h"
 #include "core/types.h"
 #include "core/value/large_int_value.h"
 #include "core/value/timestamptz_value.h"
@@ -491,7 +492,7 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_BIGINT));
     } else if constexpr (T == TYPE_LARGEINT) {
         // data may not be 16-byte aligned; use unaligned_load to avoid UB.
-        int128_t origin_value = unaligned_load<int128_t>(data);
+        auto origin_value = unaligned_load<int128_t>(data);
         (*node).__set_node_type(TExprNodeType::LARGE_INT_LITERAL);
         TLargeIntLiteral large_int_literal;
         large_int_literal.__set_value(LargeIntValue::to_string(origin_value));
@@ -540,7 +541,7 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
     } else if constexpr (T == TYPE_DECIMALV2) {
         // data may not be 16-byte aligned (DecimalV2Value stores int128_t);
         // use unaligned_load to avoid UB.
-        DecimalV2Value origin_value = unaligned_load<DecimalV2Value>(data);
+        auto origin_value = unaligned_load<DecimalV2Value>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
         decimal_literal.__set_value(origin_value.to_string());
@@ -562,7 +563,7 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, 
precision, scale));
     } else if constexpr (T == TYPE_DECIMAL128I) {
         // data may not be 16-byte aligned; use unaligned_load to avoid UB.
-        Decimal<int128_t> origin_value = 
unaligned_load<Decimal<int128_t>>(data);
+        auto origin_value = unaligned_load<Decimal<int128_t>>(data);
         (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
         TDecimalLiteral decimal_literal;
         // e.g. For a decimal(26,6) column, the initial value of the _min of 
the MinMax RF
@@ -627,10 +628,10 @@ Status create_texpr_literal_node(const void* data, 
TExprNode* node, int precisio
         (*node).__set_node_type(TExprNodeType::TIMEV2_LITERAL);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_TIMEV2, 
precision, scale));
     } else if constexpr (T == TYPE_VARBINARY) {
-        const auto* origin_value = reinterpret_cast<const std::string*>(data);
+        const auto* origin_value = reinterpret_cast<const StringView*>(data);
         (*node).__set_node_type(TExprNodeType::VARBINARY_LITERAL);
         TVarBinaryLiteral varbinary_literal;
-        varbinary_literal.__set_value(*origin_value);
+        varbinary_literal.__set_value(std::string(origin_value->data(), 
origin_value->size()));
         (*node).__set_varbinary_literal(varbinary_literal);
         (*node).__set_type(create_type_desc(PrimitiveType::TYPE_VARBINARY));
     } else {
diff --git a/be/test/exprs/vexpr_test.cpp b/be/test/exprs/vexpr_test.cpp
index c2269efb39b..dc430d55915 100644
--- a/be/test/exprs/vexpr_test.cpp
+++ b/be/test/exprs/vexpr_test.cpp
@@ -489,11 +489,11 @@ TEST(TEST_VEXPR, LITERALTEST) {
     }
     // float
     {
-        VLiteral literal(create_literal<TYPE_FLOAT, float>(1024.0f));
+        VLiteral literal(create_literal<TYPE_FLOAT, float>(1024.0F));
         ColumnPtr result_column;
         static_cast<void>(literal.execute_column(nullptr, nullptr, nullptr, 1, 
result_column));
         auto v = (*result_column)[0].get<TYPE_FLOAT>();
-        EXPECT_FLOAT_EQ(v, 1024.0f);
+        EXPECT_FLOAT_EQ(v, 1024.0F);
         EXPECT_EQ("1024", literal.value());
 
         auto node = std::make_shared<VLiteral>(
@@ -709,6 +709,26 @@ TEST(TEST_VEXPR, LITERALTEST) {
                 create_texpr_node_from((*result_column)[0], TYPE_STRING, 0, 
0), true);
         EXPECT_EQ(s, node->value());
     }
+    // varbinary
+    {
+        const std::vector<std::string> values = {std::string("bin\0ary", 7),
+                                                 
std::string("0123456789abc\0xyz", 17)};
+        for (const auto& value : values) {
+            auto field = Field::create_field<TYPE_VARBINARY>(
+                    StringView(value.data(), 
cast_set<uint32_t>(value.size())));
+            auto texpr_node = create_texpr_node_from(field, TYPE_VARBINARY, 0, 
0);
+            EXPECT_EQ(TExprNodeType::VARBINARY_LITERAL, texpr_node.node_type);
+            EXPECT_EQ(value, texpr_node.varbinary_literal.value);
+
+            VLiteral literal(texpr_node);
+            EXPECT_EQ(value, literal.value());
+
+            ColumnPtr result_column;
+            ASSERT_TRUE(literal.execute_column(nullptr, nullptr, nullptr, 1, 
result_column).ok());
+            auto sv = (*result_column)[0].get<TYPE_VARBINARY>();
+            EXPECT_EQ(value, std::string(sv.data(), sv.size()));
+        }
+    }
     // decimalv2
     {
         VLiteral literal(create_literal<TYPE_DECIMALV2, 
std::string>(std::string("1234.56")));
@@ -912,5 +932,5 @@ TEST(VExprExecuteColumnTest, CorrectColumnPasses) {
     ColumnPtr result;
     auto st = expr.execute_column(nullptr, nullptr, nullptr, 1, result);
     EXPECT_TRUE(st.ok());
-    EXPECT_EQ(result->size(), 1u);
+    EXPECT_EQ(result->size(), 1U);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to