This is an automated email from the ASF dual-hosted git repository.
panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new e8717f2587b [fix](core) Fix the core issue caused by extra fields when
casting a string to a struct. (#50851)
e8717f2587b is described below
commit e8717f2587b00793f037357dfe8bab164b62a017
Author: Mryange <[email protected]>
AuthorDate: Fri May 16 15:54:40 2025 +0800
[fix](core) Fix the core issue caused by extra fields when casting a string
to a struct. (#50851)
```
select cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int, b:int>);
==3393513==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x506001118e20 at pc 0x55808f91364a bp 0x7f9ec8816af0 sp 0x7f9ec8816ae8
READ of size 8 at 0x506001118e20 thread T1298 (brpc_light)
#0 0x55808f913649 in std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char>>::_M_data() const
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/basic_string.h:223:28
#1 0x55808f9386f6 in std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char>>::data() const
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/basic_string.h:2597:16
#2 0x55808fc4e57a in
doris::Slice::Slice(std::__cxx11::basic_string<char, std::char_traits<char>,
std::allocator<char>> const&)
/mnt/disk12/yanxuecheng/doris/be/src/util/slice.h:72:40
#3 0x5580bebb7ed9 in
doris::vectorized::DataTypeStructSerDe::deserialize_one_cell_from_json(doris::vectorized::IColumn&,
doris::Slice&, doris::vectorized::DataTypeSerDe::FormatOptions const&) const
/mnt/disk12/yanxuecheng/doris/be/src/vec/data_types/serde/data_type_struct_serde.cpp:141:17
#4 0x5580c90cf235 in
doris::vectorized::ConvertImplGenericFromString::execute(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<unsigned int, std::allocator<unsigned
int>> const&, unsigned int, unsigned long)
/mnt/disk12/yanxuecheng/doris/be/src/vec/functions/function_cast.h:513:36
#5 0x5580c89607e6 in doris::Status std::__invoke_impl<doris::Status,
doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int,
unsigned long), doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long,
unsigned long>(std::__invoke_other, doris::Status (*&)(doris::FunctionContext*,
doris::vectorized::Block&, std::vector<u [...]
#6 0x5580c89605ca in std::enable_if<is_invocable_r_v<doris::Status,
doris::Status (*&)(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int,
unsigned long), doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long,
unsigned long>, doris::Status>::type std::__invoke_r<doris::Status,
doris::Status (*&)(doris::FunctionContext*, doris::ve [...]
#7 0x5580c8960201 in std::_Function_handler<doris::Status
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int,
std::allocator<unsigned int>> const&, unsigned long, unsigned long),
doris::Status (*)(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int,
unsigned long)>::_M_invoke(std::_Any_data const&, doris::FunctionContext*&&,
doris::vectorized::Block&, std::vector<unsigned int, s [...]
#8 0x5580c827ff5c in std::function<doris::Status
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int,
std::allocator<unsigned int>> const&, unsigned long, unsigned
long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long,
unsigned long) const
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
#9 0x5580c88fb7a7 in
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
std::shared_ptr<doris::vectorized::IDataType const> const&,
std::shared_ptr<doris::vectorized::IDataType const> const&, bool)
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int,
unsigned long)::operator()(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std:: [...]
#10 0x5580c88fae32 in doris::Status std::__invoke_impl<doris::Status,
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
std::shared_ptr<doris::vectorized::IDataType const> const&,
std::shared_ptr<doris::vectorized::IDataType const> const&, bool)
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int,
unsigned long)&, doris::FunctionContext*, doris::vectorized:: [...]
#11 0x5580c88fac0a in std::enable_if<is_invocable_r_v<doris::Status,
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
std::shared_ptr<doris::vectorized::IDataType const> const&,
std::shared_ptr<doris::vectorized::IDataType const> const&, bool)
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned int,
unsigned long)&, doris::FunctionContext*, doris::vectorized::B [...]
#12 0x5580c88fa641 in std::_Function_handler<doris::Status
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int,
std::allocator<unsigned int>> const&, unsigned long, unsigned long),
doris::vectorized::FunctionCast::prepare_remove_nullable(doris::FunctionContext*,
std::shared_ptr<doris::vectorized::IDataType const> const&,
std::shared_ptr<doris::vectorized::IDataType const> const&, bool)
const::'lambda'(doris::FunctionContext*, doris::vectorized::Block&, st [...]
#13 0x5580c827ff5c in std::function<doris::Status
(doris::FunctionContext*, doris::vectorized::Block&, std::vector<unsigned int,
std::allocator<unsigned int>> const&, unsigned long, unsigned
long)>::operator()(doris::FunctionContext*, doris::vectorized::Block&,
std::vector<unsigned int, std::allocator<unsigned int>> const&, unsigned long,
unsigned long) const
/mnt/disk6/common/ldb_toolchain_robin/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9
```
now
```
mysql> select cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int,
b:int>);
+-----------------------------------------------------------------+
| cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int, b:int>) |
+-----------------------------------------------------------------+
| NULL |
+-----------------------------------------------------------------+
---
be/src/vec/data_types/serde/data_type_struct_serde.cpp | 10 ++++++++++
.../sql_functions/cast_function/test_cast_struct.out | Bin 496 -> 513 bytes
.../sql_functions/cast_function/test_cast_struct.groovy | 3 +++
3 files changed, 13 insertions(+)
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index beb829dc521..d6c0c26a06c 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -111,6 +111,7 @@ Status
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
char quote_char = 0;
auto elem_size = elem_serdes_ptrs.size();
+ DCHECK_EQ(elem_size, elem_names.size());
int field_pos = 0;
for (; idx < slice_size; ++idx) {
@@ -138,6 +139,15 @@ Status
DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic
next.trim_prefix();
next.trim_quote();
// check field_name
+ if (field_pos >= elem_size) {
+ // we should do column revert if error
+ for (size_t j = 0; j < field_pos; j++) {
+ struct_column.get_column(j).pop_back(1);
+ }
+ return Status::InvalidArgument(
+ "Actual struct field number is more than schema field
number {}.",
+ field_pos, elem_size);
+ }
if (elem_names[field_pos] != next) {
// we should do column revert if error
for (size_t j = 0; j < field_pos; j++) {
diff --git
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
index fa63c0da504..b27ed779acb 100644
Binary files
a/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
and
b/regression-test/data/query_p0/sql_functions/cast_function/test_cast_struct.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
index 3056dab2a36..2d177154b49 100644
---
a/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
+++
b/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_struct.groovy
@@ -40,6 +40,9 @@ suite("test_cast_struct") {
// struct type cast to struct
qt_sql14 "select cast(cast({1,'2022-10-10'} as struct<f1:int,f2:date>) as
struct<f1:double,f2:datetime>)"
+ // struct type cast to struct with different field name
+ qt_sql15 """select cast('{"a":1,"b":"1","c":"1","d":"1"}' as struct<a:int,
b:int>)"""
+
// basic types except string can not cast to struct
test {
sql "select cast(cast(1 as int) as struct<f1:int>)"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]