This is an automated email from the ASF dual-hosted git repository.

morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new d5577ecd6d0 branch-3.1: [fix](load)fix ingestion load error case cause 
be core. (#55500)
d5577ecd6d0 is described below

commit d5577ecd6d084a9c2330daf2a8f449176961c1e5
Author: daidai <[email protected]>
AuthorDate: Thu Sep 4 10:21:17 2025 +0800

    branch-3.1: [fix](load)fix ingestion load error case cause be core. (#55500)
    
    ### What problem does this PR solve?
    Related PR: #45937
    
    Problem Summary:
    Fix the error case on ingestion load and the core in parquet reader.
    
    ==8898==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x62f0020603fc at pc 0x55f634e64ded bp 0x7fba0d03c410 sp 0x7fba0d03bbd8
    READ of size 4 at 0x62f0020603fc thread T768 (PUSH-9699)
        #0 0x55f634e64dec in __asan_memcpy 
(/mnt/hdd01/ci/doris-deploy-branch-3.1-local/be/lib/doris_be+0x39a24dec) 
(BuildId: 9b04e7f7d3075dac)
        #1 0x55f634eca93f in std::char_traits::copy(char*, char const*, 
unsigned long) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/char_traits.h:409:33
        #2 0x55f634eca93f in std::__cxx11::basic_string, 
std::allocator>::_S_copy(char*, char const*, unsigned long) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:351:4
        #3 0x55f634eca93f in std::__cxx11::basic_string, 
std::allocator>::_S_copy_chars(char*, char const*, char const*) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:398:9
        #4 0x55f634eca93f in void std::__cxx11::basic_string, 
std::allocator>::_M_construct(char const*, char const*, 
std::forward_iterator_tag) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.tcc:225:6
        #5 0x55f654a4f74d in void std::__cxx11::basic_string, 
std::allocator>::_M_construct_aux(char const*, char const*, std::__false_type) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:247:11
        #6 0x55f654a4f74d in void std::__cxx11::basic_string, 
std::allocator>::_M_construct(char const*, char const*) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:266:4
        #7 0x55f654a4f74d in std::__cxx11::basic_string, 
std::allocator>::basic_string(char const*, unsigned long, std::allocator 
const&) 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:513:9
        #8 0x55f654a4f74d in 
doris::vectorized::parse_thrift_footer(std::shared_ptr, 
doris::vectorized::FileMetaData**, unsigned long*, doris::io::IOContext*) 
/home/zcp/repo_center/doris_branch-3.1/doris/be/src/vec/exec/format/parquet/parquet_thrift_util.h:55:17
---
 be/src/vec/exec/format/parquet/parquet_thrift_util.h      |   7 +++++--
 .../test_ingestion_load_alter_partition.out               | Bin 0 -> 162 bytes
 .../load_p0/ingestion_load/test_ingestion_load.groovy     |  10 +++++-----
 .../test_ingestion_load_alter_column.groovy               |  10 +++++-----
 .../test_ingestion_load_alter_partition.groovy            |  11 ++++++-----
 .../ingestion_load/test_ingestion_load_drop_table.groovy  |   3 +--
 .../ingestion_load/test_ingestion_load_multi_table.groovy |   2 +-
 .../test_ingestion_load_with_inverted_index.groovy        |   2 +-
 .../test_ingestion_load_with_partition.groovy             |  10 +++++-----
 9 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_thrift_util.h 
b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
index 15927fe4f65..b8475ffa989 100644
--- a/be/src/vec/exec/format/parquet/parquet_thrift_util.h
+++ b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
@@ -46,8 +46,11 @@ static Status parse_thrift_footer(io::FileReaderSPtr file, 
FileMetaData** file_m
 
     // validate magic
     uint8_t* magic_ptr = footer.data() + bytes_read - 4;
-    if (bytes_read < PARQUET_FOOTER_SIZE ||
-        memcmp(magic_ptr, PARQUET_VERSION_NUMBER, 
sizeof(PARQUET_VERSION_NUMBER)) != 0) {
+    if (bytes_read < PARQUET_FOOTER_SIZE) {
+        return Status::Corruption(
+                "Read parquet file footer fail, bytes read: {}, file size: {}, 
path: {}",
+                bytes_read, file_size, file->path().native());
+    } else if (memcmp(magic_ptr, PARQUET_VERSION_NUMBER, 
sizeof(PARQUET_VERSION_NUMBER)) != 0) {
         return Status::Corruption(
                 "Invalid magic number in parquet file, bytes read: {}, file 
size: {}, path: {}, "
                 "read magic: {}",
diff --git 
a/regression-test/data/load_p0/ingestion_load/test_ingestion_load_alter_partition.out
 
b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_alter_partition.out
new file mode 100644
index 00000000000..37d0553e58c
Binary files /dev/null and 
b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_alter_partition.out
 differ
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
index 91e20070c09..74f5f9398fe 100644
--- a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
+++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
@@ -21,7 +21,7 @@ import java.nio.file.StandardCopyOption
 
 suite('test_ingestion_load', 'p0,external') {
 
-    def testIngestLoadJob = { testTable, loadLabel, String dataFile ->
+    def testIngestLoadJob = { testTable, loadLabel, String dataFile , filesize 
->
 
         sql "TRUNCATE TABLE ${testTable}"
 
@@ -85,7 +85,7 @@ suite('test_ingestion_load', 'p0,external') {
                     "msg": "",
                     "appId": "",
                     "dppResult": "${dppResult}",
-                    "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+                    "filePathToSize": "{\\"${etlResultFilePath}\\": 
${filesize}}",
                     "hadoopProperties": 
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
                 }
             }"""
@@ -156,7 +156,7 @@ suite('test_ingestion_load', 'p0,external') {
 
         def label = "test_ingestion_load"
 
-        testIngestLoadJob.call(tableName, label, context.config.dataPath + 
'/load_p0/ingestion_load/data.parquet')
+        testIngestLoadJob.call(tableName, label, context.config.dataPath + 
'/load_p0/ingestion_load/data.parquet',5745)
 
         tableName = 'tbl_test_spark_load_unique_mor'
 
@@ -189,7 +189,7 @@ suite('test_ingestion_load', 'p0,external') {
 
         label = "test_ingestion_load_unique_mor"
 
-        testIngestLoadJob.call(tableName, label, context.config.dataPath + 
'/load_p0/ingestion_load/data.parquet')
+        testIngestLoadJob.call(tableName, label, context.config.dataPath + 
'/load_p0/ingestion_load/data.parquet',5745)
 
         tableName = 'tbl_test_spark_load_agg'
 
@@ -215,7 +215,7 @@ suite('test_ingestion_load', 'p0,external') {
 
         label = "test_ingestion_load_agg"
 
-        testIngestLoadJob.call(tableName, label, context.config.dataPath + 
'/load_p0/ingestion_load/data1.parquet')
+        testIngestLoadJob.call(tableName, label, context.config.dataPath + 
'/load_p0/ingestion_load/data1.parquet',4057)
 
     }
 
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
index 89be972b5bf..a4f9617ca76 100644
--- 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
+++ 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
@@ -85,7 +85,7 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
                     "msg": "",
                     "appId": "",
                     "dppResult": "${dppResult}",
-                    "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+                    "filePathToSize": "{\\"${etlResultFilePath}\\": 5745}",
                     "hadoopProperties": 
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
                 }
             }"""
@@ -112,7 +112,7 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
         while (max_try_milli_secs) {
             def result = sql "show load where label = '${loadLabel}'"
             if (result[0][2] == "CANCELLED") {
-                msg = result[0][7]
+                def  msg = result[0][7]
                 logger.info("err msg: " + msg)
                 assertTrue((result[0][7] =~ /schema of index \[\d+\] has 
changed/).find())
                 break
@@ -134,6 +134,8 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
 
         try {
 
+            sql "DROP TABLE if exists ${tableName1}"
+            sql "DROP TABLE if exists ${tableName2}"
             sql """
                 CREATE TABLE IF NOT EXISTS ${tableName1} (
                     c_int int(11) NULL,
@@ -199,10 +201,8 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
             })
 
         } finally {
-            //sql "DROP TABLE ${tableName1}"
-            //sql "DROP TABLE ${tableName2}"
         }
 
     }
 
-}
\ No newline at end of file
+}
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
index 83492d1bf1c..56002a7318b 100644
--- 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
+++ 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
@@ -123,8 +123,8 @@ suite('test_ingestion_load_alter_partition', 'p0,external') 
{
                 qt_select "select c1, count(*) from ${testTable} group by c1 
order by c1"
                 break
             } else if (result[0][2] == "CANCELLED") {
-                msg = result[0][7]
-                logger.info("err msg: " + msg)
+                def msg2 = result[0][7]
+                logger.info("err msg: " + msg2)
                 assertTrue((result[0][7] =~ /partition does not exist/).find())
                 break
             } else {
@@ -146,6 +146,10 @@ suite('test_ingestion_load_alter_partition', 
'p0,external') {
 
         try {
 
+            sql "DROP TABLE if exists ${tableName1}"
+            sql "DROP TABLE if exists ${tableName2}"
+            sql "DROP TABLE if exists ${tableName3}"
+
             sql """
                 CREATE TABLE IF NOT EXISTS ${tableName1} (
                     c0 int not null,
@@ -214,9 +218,6 @@ suite('test_ingestion_load_alter_partition', 'p0,external') 
{
             })
 
         } finally {
-//            sql "DROP TABLE ${tableName1}"
-//            sql "DROP TABLE ${tableName2}"
-//            sql "DROP TABLE ${tableName3}"
         }
 
     }
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
index 1f0adb8c1c0..c5b5fc90de9 100644
--- 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
+++ 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
@@ -85,7 +85,7 @@ suite('test_ingestion_load_drop_table', 'p0,external') {
                     "msg": "",
                     "appId": "",
                     "dppResult": "${dppResult}",
-                    "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+                    "filePathToSize": "{\\"${etlResultFilePath}\\": 5745}",
                     "hadoopProperties": 
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
                 }
             }"""
@@ -188,7 +188,6 @@ suite('test_ingestion_load_drop_table', 'p0,external') {
             })
 
         } finally {
-            sql "DROP TABLE ${tableName}"
         }
 
     }
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
index e536b57c204..34de65761d0 100644
--- 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
+++ 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
@@ -103,7 +103,7 @@ suite('test_ingestion_load_multi_table', 'p0,external') {
                     "msg": "",
                     "appId": "",
                     "dppResult": "${dppResult}",
-                    "filePathToSize": "{\\"${etlResultFilePath1}\\": 81758, 
\\"${etlResultFilePath2}\\": 81758}",
+                    "filePathToSize": "{\\"${etlResultFilePath1}\\": 5745, 
\\"${etlResultFilePath2}\\": 5745}",
                     "hadoopProperties": 
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
                 }
             }"""
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
index 15db777ddee..08e1aeea353 100644
--- 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
+++ 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
@@ -85,7 +85,7 @@ suite('test_ingestion_load_with_inverted_index', 
'p0,external') {
                     "msg": "",
                     "appId": "",
                     "dppResult": "${dppResult}",
-                    "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+                    "filePathToSize": "{\\"${etlResultFilePath}\\": 5745}",
                     "hadoopProperties": 
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
                 }
             }"""
diff --git 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
index 12a904f15d8..c7843d5a866 100644
--- 
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
+++ 
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
@@ -71,7 +71,7 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
             }
         }
 
-        etlResultFilePaths = []
+        def etlResultFilePaths = []
         for(int i=0; i < dataFiles.size(); i++) {
             Files.copy(Paths.get(dataFiles[i]),
                 Paths.get(context.config.dataPath + 
"/load_p0/ingestion_load/${resultFileNames[i]}"), 
StandardCopyOption.REPLACE_EXISTING)
@@ -115,7 +115,7 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
 
         def max_try_milli_secs = 120000
         while (max_try_milli_secs) {
-            result = sql "show load where label = '${loadLabel}'"
+            def result = sql "show load where label = '${loadLabel}'"
             if (result[0][2] == "FINISHED") {
                 sql "sync"
                 qt_select "select c1, count(*) from ${testTable} group by c1 
order by c1"
@@ -133,8 +133,8 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
 
     if (enableHdfs()) {
 
-        def tableName = 'tbl_test_spark_load_partition'
-
+        def tableName = 'tbl_test_spark_load_with_partition'
+        sql "DROP TABLE if exists ${tableName}"
         sql """
             CREATE TABLE IF NOT EXISTS ${tableName} (
                 c0 int not null,
@@ -151,7 +151,7 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
             )
             """
 
-        def label = "test_ingestion_load_partition"
+        def label = "test_ingestion_load_with_partition__"
 
         testIngestLoadJob.call(tableName, label, [context.config.dataPath + 
'/load_p0/ingestion_load/data2-0.parquet', context.config.dataPath + 
'/load_p0/ingestion_load/data2-1.parquet',context.config.dataPath + 
'/load_p0/ingestion_load/data2-2.parquet',context.config.dataPath + 
'/load_p0/ingestion_load/data2-3.parquet'])
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to