This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new da78c26cc00 [fix](mysql)fix mysql row buffer open_dynamic_mode make 
_pos pointer out range of _buf (#37936)
da78c26cc00 is described below

commit da78c26cc002b22c5b95bbdb900d6f467ec74cfa
Author: amory <[email protected]>
AuthorDate: Thu Jul 18 11:09:44 2024 +0800

    [fix](mysql)fix mysql row buffer open_dynamic_mode make _pos pointer out 
range of _buf (#37936)
    
    if we select nested type such as map/array/struct after large string ,
    when string type in mysql_row_buf reserve make buffer size is not large
    enough , which will lead nested type open_dynamic_mode make _pos pointer
    out range of mysql_row_buf, then nested type call push_string, and
    reserve() will make heap_buffer_overflow
    ```
    ==200769==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x62d0051c12ba at pc 0x55a77788692d bp 0x7fb52f474a30 sp 0x7fb52f4741f8
    READ of size 36541 at 0x62d0051c12ba thread T2309 (Pipe_normal [wo)
        #0 0x55a77788692c in __asan_memcpy 
(/mnt/disk1/wangqiannan/amory/doris/output/be/lib/doris_be+0x60c1c92c) 
(BuildId: 4513940b6b9e22fa)
        #1 0x55a7a1f622fa in doris::MysqlRowBuffer<false>::reserve(long) 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/mysql_row_buffer.cpp:140:5
        #2 0x55a7a1f638eb in doris::MysqlRowBuffer<false>::push_string(char 
const*, long) 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/mysql_row_buffer.cpp:473:5
        #3 0x55a7a21f16eb in doris::Status 
doris::vectorized::DataTypeMapSerDe::_write_column_to_mysql<false>(doris::vectorized::IColumn
 const&, doris::MysqlRowBuffer<false>&, int, bool, 
doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_map_serde.cpp:410:21
        #4 0x55a7a21e4c1e in 
doris::vectorized::DataTypeMapSerDe::write_column_to_mysql(doris::vectorized::IColumn
 const&, doris::MysqlRowBuffer<false>&, int, bool, 
doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_map_serde.cpp:478:12
        #5 0x55a7a22070e6 in doris::Status 
doris::vectorized::DataTypeNullableSerDe::_write_column_to_mysql<false>(doris::vectorized::IColumn
 const&, doris::MysqlRowBuffer<false>&, int, bool, 
doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_nullable_serde.cpp:300:9
        #6 0x55a7a21fbc5e in 
doris::vectorized::DataTypeNullableSerDe::write_column_to_mysql(doris::vectorized::IColumn
 const&, doris::MysqlRowBuffer<false>&, int, bool, 
doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_nullable_serde.cpp:317:12
        #7 0x55a7c2e97e6c in 
doris::vectorized::VMysqlResultWriter<false>::write(doris::RuntimeState*, 
doris::vectorized::Block&) 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/sink/vmysql_result_writer.cpp:216:17
        #8 0x55a7c8031b83 in 
doris::pipeline::ResultSinkOperatorX::sink(doris::RuntimeState*, 
doris::vectorized::Block*, bool) 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/exec/result_sink_operator.cpp:142:5
        #9 0x55a7c99a81d6 in 
doris::pipeline::PipelineTask::execute(bool*)::$_1::operator()() const 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/pipeline_task.cpp:361:38
        #10 0x55a7c99a4b27 in doris::pipeline::PipelineTask::execute(bool*) 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/pipeline_task.cpp:364:22
        #11 0x55a7c9a23a2b in doris::pipeline::TaskScheduler::_do_work(unsigned 
long) 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/task_scheduler.cpp:138:9
        #12 0x55a7c9a269ca in 
doris::pipeline::TaskScheduler::start()::$_0::operator()() const 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/task_scheduler.cpp:64:9
        #13 0x55a7c9a2694e in void std::__invoke_impl<void, 
doris::pipeline::TaskScheduler::start()::$_0&>(std::__invoke_other, 
doris::pipeline::TaskScheduler::start()::$_0&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14
        #14 0x55a7c9a268ae in std::enable_if<is_invocable_r_v<void, 
doris::pipeline::TaskScheduler::start()::$_0&>, void>::type 
std::__invoke_r<void, 
doris::pipeline::TaskScheduler::start()::$_0&>(doris::pipeline::TaskScheduler::start()::$_0&)
 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2
        #15 0x55a7c9a26635 in std::_Function_handler<void (), 
doris::pipeline::TaskScheduler::start()::$_0>::_M_invoke(std::_Any_data const&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9
        #16 0x55a777b226da in std::function<void ()>::operator()() const 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9
        #17 0x55a77e95ec94 in doris::FunctionRunnable::run() 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/threadpool.cpp:48:27
        #18 0x55a77e941015 in doris::ThreadPool::dispatch_thread() 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/threadpool.cpp:543:24
        #19 0x55a77e97eb23 in void std::__invoke_impl<void, void 
(doris::ThreadPool::*&)(), doris::ThreadPool*&>(std::__invoke_memfun_deref, 
void (doris::ThreadPool::*&)(), doris::ThreadPool*&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:74:14
        #20 0x55a77e97e928 in std::__invoke_result<void 
(doris::ThreadPool::*&)(), doris::ThreadPool*&>::type std::__invoke<void 
(doris::ThreadPool::*&)(), doris::ThreadPool*&>(void (doris::ThreadPool::*&)(), 
doris::ThreadPool*&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:96:14
        #21 0x55a77e97e860 in void std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>::__call<void, 0ul>(std::tuple<>&&, 
std::_Index_tuple<0ul>) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/functional:420:11
        #22 0x55a77e97e655 in void std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>::operator()<void>() 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/functional:503:17
        #23 0x55a77e97e54e in void std::__invoke_impl<void, std::_Bind<void 
(doris::ThreadPool::* (doris::ThreadPool*))()>&>(std::__invoke_other, 
std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14
        #24 0x55a77e97e48e in std::enable_if<is_invocable_r_v<void, 
std::_Bind<void (doris::ThreadPool::* (doris::ThreadPool*))()>&>, void>::type 
std::__invoke_r<void, std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>&>(std::_Bind<void (doris::ThreadPool::* 
(doris::ThreadPool*))()>&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2
        #25 0x55a77e97dd85 in std::_Function_handler<void (), std::_Bind<void 
(doris::ThreadPool::* (doris::ThreadPool*))()>>::_M_invoke(std::_Any_data 
const&) 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9
        #26 0x55a777b226da in std::function<void ()>::operator()() const 
/mnt/disk1/wangqiannan/tool/ldb_toolchain_16/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9
        #27 0x55a77e8fb841 in doris::Thread::supervise_thread(void*) 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/thread.cpp:498:5
        #28 0x7fc1c3a111c9 in start_thread (/lib64/libpthread.so.0+0x81c9) 
(BuildId: 823fccea3475e5870a4167dfe47df20e53222db0)
        #29 0x7fc1c4400e72 in clone (/lib64/libc.so.6+0x39e72) (BuildId: 
ec3d7025354f1f1985831ff08ef0eb3b50aefbce)
    
    0x62d0051c12ba is located 0 bytes after 36538-byte region 
[0x62d0051b8400,0x62d0051c12ba)
    allocated by thread T2309 (Pipe_normal [wo) here:
        #0 0x55a7778c20bd in operator new[](unsigned long) 
(/mnt/disk1/wangqiannan/amory/doris/output/be/lib/doris_be+0x60c580bd) 
(BuildId: 4513940b6b9e22fa)
        #1 0x55a7a1f621c1 in doris::MysqlRowBuffer<false>::reserve(long) 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/mysql_row_buffer.cpp:137:21
        #2 0x55a7a1f638eb in doris::MysqlRowBuffer<false>::push_string(char 
const*, long) 
/mnt/disk1/wangqiannan/amory/doris/be/src/util/mysql_row_buffer.cpp:473:5
        #3 0x55a7a1fd0d75 in doris::Status 
doris::vectorized::DataTypeStringSerDeBase<doris::vectorized::ColumnStr<unsigned
 int>>::_write_column_to_mysql<false>(doris::vectorized::IColumn const&, 
doris::MysqlRowBuffer<false>&, int, bool, 
doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_string_serde.h:260:16
        #4 0x55a7a1fccc1e in 
doris::vectorized::DataTypeStringSerDeBase<doris::vectorized::ColumnStr<unsigned
 int>>::write_column_to_mysql(doris::vectorized::IColumn const&, 
doris::MysqlRowBuffer<false>&, int, bool, 
doris::vectorized::DataTypeSerDe::FormatOptions const&) const 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_string_serde.h:215:16
        #5 0x55a7c2e97e6c in 
doris::vectorized::VMysqlResultWriter<false>::write(doris::RuntimeState*, 
doris::vectorized::Block&) 
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/sink/vmysql_result_writer.cpp:216:17
        #6 0x55a7c8031b83 in 
doris::pipeline::ResultSinkOperatorX::sink(doris::RuntimeState*, 
doris::vectorized::Block*, bool) 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/exec/result_sink_operator.cpp:142:5
        #7 0x55a7c99a81d6 in 
doris::pipeline::PipelineTask::execute(bool*)::$_1::operator()() const 
/mnt/disk1/wangqiannan/amory/doris/be/src/pipeline/pipeline_task.cpp:361:38
    ```
    
    ## Proposed changes
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 be/src/util/mysql_row_buffer.cpp                   |  5 ++-
 .../test_large_string_and_nested_type.out          |  4 ++
 .../test_large_string_and_nested_type.groovy       | 46 ++++++++++++++++++++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/be/src/util/mysql_row_buffer.cpp b/be/src/util/mysql_row_buffer.cpp
index 3e4aa332cea..4823920508a 100644
--- a/be/src/util/mysql_row_buffer.cpp
+++ b/be/src/util/mysql_row_buffer.cpp
@@ -44,6 +44,9 @@ namespace doris {
 static uint8_t NEXT_TWO_BYTE = 252;
 static uint8_t NEXT_THREE_BYTE = 253;
 static uint8_t NEXT_EIGHT_BYTE = 254;
+// the EXTRA_RESERVE_BYTE wanner to make sure _pos pointer is always in _buf 
memory
+// used in reserve() for allocate current buffer
+static size_t EXTRA_RESERVE_BYTE = 16;
 
 // the first byte:
 // <= 250: length
@@ -133,7 +136,7 @@ int MysqlRowBuffer<is_binary_format>::reserve(int64_t size) 
{
         return 0;
     }
 
-    int64_t alloc_size = std::max(need_size, _buf_size * 2);
+    int64_t alloc_size = std::max(need_size, _buf_size * 2) + 
EXTRA_RESERVE_BYTE;
     char* new_buf = new char[alloc_size];
 
     size_t offset = _pos - _buf;
diff --git 
a/regression-test/data/datatype_p0/nested_types/corner_cases/test_large_string_and_nested_type.out
 
b/regression-test/data/datatype_p0/nested_types/corner_cases/test_large_string_and_nested_type.out
new file mode 100644
index 00000000000..a9e54877534
--- /dev/null
+++ 
b/regression-test/data/datatype_p0/nested_types/corner_cases/test_large_string_and_nested_type.out
@@ -0,0 +1,4 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+1      
bbyNAEofVG2QVSEnKaKtNIZRA9BgH409IxsionnxaH5z70Ttfz5mobCvUA5xM7O6jPMLHEoSkngDfficKjKJISfl822rQlrYithBeQLVw1upYyXCTxtS2Nl0B3vi3tGeN4j6kPuhJoCQLLSivaHQrTgCCpV8ItDuQIoiRlkolabnjDHKShh3YracboXMHAYebiUxuaq3G3glxz09zXccRCgXzyDUIV4sJMO0GsE8JDo2QnzZpIcUvngS8uJhyIes0eIuPTAVFWQzYlEPopQKViLSzckSi86yIUnNABwFSTp6CTL651nNe9XifaXqQphZdnrLs7ngSJVexlSVLc2ECV1e1ZCP98L18gtgsG8P0LbUaTiWQULfSiMBtt1h2N5AjOVLQaHrkquETQDmyAhYipPfHxp9In6GFlpilAJQFpOxRe54xkiYkBMQAO4hTq0reOjDH71hqH1fWrRe1CYbjhbh1PUQlWUsjgEM9FwVGz
 [...]
+
diff --git 
a/regression-test/suites/datatype_p0/nested_types/corner_cases/test_large_string_and_nested_type.groovy
 
b/regression-test/suites/datatype_p0/nested_types/corner_cases/test_large_string_and_nested_type.groovy
new file mode 100644
index 00000000000..28a68681f7c
--- /dev/null
+++ 
b/regression-test/suites/datatype_p0/nested_types/corner_cases/test_large_string_and_nested_type.groovy
@@ -0,0 +1,46 @@
+import org.apache.commons.lang3.StringUtils
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_large_string_and_nested_type", "p0") {
+    sql """ DROP TABLE IF EXISTS test_large_string_and_nested_type"""
+    sql """ CREATE TABLE `test_large_string_and_nested_type` (
+              `col1` INT NULL,
+              `col2` VARCHAR(49053) NOT NULL,
+              `col3` MAP<TEXT,TEXT> NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`col1`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`col1`) BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "min_load_replica_num" = "-1",
+            "is_being_synced" = "false",
+            "storage_medium" = "hdd",
+            "storage_format" = "V2",
+            "inverted_index_storage_format" = "V2",
+            "light_schema_change" = "true",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false",
+            "group_commit_interval_ms" = "10000",
+            "group_commit_data_bytes" = "134217728"
+            );
+            """
+    sql """INSERT INTO test_large_string_and_nested_type VALUES (1, 
'bbyNAEofVG2QVSEnKaKtNIZRA9BgH409IxsionnxaH5z70Ttfz5mobCvUA5xM7O6jPMLHEoSkngDfficKjKJISfl822rQlrYithBeQLVw1upYyXCTxtS2Nl0B3vi3tGeN4j6kPuhJoCQLLSivaHQrTgCCpV8ItDuQIoiRlkolabnjDHKShh3YracboXMHAYebiUxuaq3G3glxz09zXccRCgXzyDUIV4sJMO0GsE8JDo2QnzZpIcUvngS8uJhyIes0eIuPTAVFWQzYlEPopQKViLSzckSi86yIUnNABwFSTp6CTL651nNe9XifaXqQphZdnrLs7ngSJVexlSVLc2ECV1e1ZCP98L18gtgsG8P0LbUaTiWQULfSiMBtt1h2N5AjOVLQaHrkquETQDmyAhYipPfHxp9In6GFlpilAJ
 [...]
+    qt_sql """ select * from  test_large_string_and_nested_type """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to