This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e8717f2587b [fix](core) Fix the core issue caused by extra fields when 
casting a string to a struct. (#50851)
e8717f2587b is described below

commit e8717f2587b00793f037357dfe8bab164b62a017
Author: Mryange <[email protected]>
AuthorDate: Fri May 16 15:54:40 2025 +0800

    [fix](core) Fix the core issue caused by extra fields when casting a string 
to a struct. (#50851)
    
    ```
    select cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int, b:int>);
    
    ==3393513==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x506001118e20 at pc 0x55808f91364a bp 0x7f9ec8816af0 sp 0x7f9ec8816ae8
    READ of size 8 at 0x506001118e20 thread T1298 (brpc_light)
        #0 0x55808f913649 in std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char>>::_M_data() const 
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/basic_string.h:223:28
        #1 0x55808f9386f6 in std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char>>::data() const 
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/basic_string.h:2597:16
        #2 0x55808fc4e57a in 
doris::Slice::Slice(std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char>> const&) 
/mnt/disk12/yanxuecheng/doris/be/src/util/slice.h:72:40
        #3 0x5580bebb7ed9 in 
doris::vectorized::DataTypeStructSerDe::deserialize_one_cell_from_json(doris::vectorized::IColumn&,
 doris::Slice&, doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk12/yanxuecheng/doris/be/src/vec/data_types/serde/data_type_struct_serde.cpp:141:17
        #4 0x5580c90cf235 in 
doris::vectorized::ConvertImplGenericFromString::execute(doris::FunctionContext*,
 doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned 
int>> const&, unsigned int, unsigned long) 
/mnt/disk12/yanxuecheng/doris/be/src/vec/functions/function_cast.h:513:36
        #5 0x5580c89607e6 in doris::Status std::__invoke_impl<doris::Status, 
doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long), doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long>(std::__invoke_other, doris::Status (*&)(doris::FunctionContext*, 
doris::vectorized::Block&, std::vector<u [...]
        #6 0x5580c89605ca in std::enable_if<is_invocable_r_v<doris::Status, 
doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long), doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long>, doris::Status>::type std::__invoke_r<doris::Status, 
doris::Status (*&)(doris::FunctionContext*, doris::ve [...]
        #7 0x5580c8960201 in std::_Function_handler<doris::Status 
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, 
std::allocator<unsigned int>> const&, unsigned long, unsigned long), 
doris::Status (*)(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)>::_M_invoke(std::_Any_data const&, doris::FunctionContext*&&, 
doris::vectorized::Block&, std::vector<unsigned int, s [...]
        #8 0x5580c827ff5c in std::function<doris::Status 
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, 
std::allocator<unsigned int>> const&, unsigned long, unsigned 
long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long) const 
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
        #9 0x5580c88fb7a7 in 
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
 std::shared_ptr<doris::vectorized::IDataType const> const&, 
std::shared_ptr<doris::vectorized::IDataType const> const&, bool) 
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)::operator()(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std:: [...]
        #10 0x5580c88fae32 in doris::Status std::__invoke_impl<doris::Status, 
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
 std::shared_ptr<doris::vectorized::IDataType const> const&, 
std::shared_ptr<doris::vectorized::IDataType const> const&, bool) 
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)&, doris::FunctionContext*, doris::vectorized:: [...]
        #11 0x5580c88fac0a in std::enable_if<is_invocable_r_v<doris::Status, 
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
 std::shared_ptr<doris::vectorized::IDataType const> const&, 
std::shared_ptr<doris::vectorized::IDataType const> const&, bool) 
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int, 
unsigned long)&, doris::FunctionContext*, doris::vectorized::B [...]
        #12 0x5580c88fa641 in std::_Function_handler<doris::Status 
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, 
std::allocator<unsigned int>> const&, unsigned long, unsigned long), 
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
 std::shared_ptr<doris::vectorized::IDataType const> const&, 
std::shared_ptr<doris::vectorized::IDataType const> const&, bool) 
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, st [...]
        #13 0x5580c827ff5c in std::function<doris::Status 
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int, 
std::allocator<unsigned int>> const&, unsigned long, unsigned 
long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&, 
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long, 
unsigned long) const 
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
    
    ```
    
    now
    ```
    mysql> select cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int, 
b:int>);
    +-----------------------------------------------------------------+
    | cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int, b:int>) |
    +-----------------------------------------------------------------+
    | NULL                                                            |
    +-----------------------------------------------------------------+
---
 be/src/vec/data_types/serde/data_type_struct_serde.cpp  |  10 ++++++++++
 .../sql_functions/cast_function/test_cast_struct.out    | Bin 496 -> 513 bytes
 .../sql_functions/cast_function/test_cast_struct.groovy |   3 +++
 3 files changed, 13 insertions(+)

diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp 
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index beb829dc521..d6c0c26a06c 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -111,6 +111,7 @@ Status 
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
     char quote_char = 0;
 
     auto elem_size = elem_serdes_ptrs.size();
+    DCHECK_EQ(elem_size, elem_names.size());
     int field_pos = 0;
 
     for (; idx < slice_size; ++idx) {
@@ -138,6 +139,15 @@ Status 
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
             next.trim_prefix();
             next.trim_quote();
             // check field_name
+            if (field_pos >= elem_size) {
+                // we should do column revert if error
+                for (size_t j = 0; j < field_pos; j++) {
+                    struct_column.get_column(j).pop_back(1);
+                }
+                return Status::InvalidArgument(
+                        "Actual struct field number is more than schema field 
number {}.",
+                        field_pos, elem_size);
+            }
             if (elem_names[field_pos] != next) {
                 // we should do column revert if error
                 for (size_t j = 0; j < field_pos; j++) {
diff --git 
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
 
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
index fa63c0da504..b27ed779acb 100644
Binary files 
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
 and 
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
 differ
diff --git 
a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
 
b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
index 3056dab2a36..2d177154b49 100644
--- 
a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
@@ -40,6 +40,9 @@ suite("test_cast_struct") {
     // struct type cast to struct
     qt_sql14 "select cast(cast({1,'2022-10-10'} as struct<f1:int,f2:date>) as 
struct<f1:double,f2:datetime>)"
 
+    // struct type cast to struct with different field name
+    qt_sql15 """select cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int, 
b:int>)"""
+
     // basic types except string can not cast to struct 
     test {
         sql "select cast(cast(1 as int) as struct<f1:int>)"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to