This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new d5577ecd6d0 branch-3.1: [fix](load)fix ingestion load error case cause
be core. (#55500)
d5577ecd6d0 is described below
commit d5577ecd6d084a9c2330daf2a8f449176961c1e5
Author: daidai <[email protected]>
AuthorDate: Thu Sep 4 10:21:17 2025 +0800
branch-3.1: [fix](load)fix ingestion load error case cause be core. (#55500)
### What problem does this PR solve?
Related PR: #45937
Problem Summary:
Fix the error case on ingestion load and the core in parquet reader.
==8898==ERROR: AddressSanitizer: heap-buffer-overflow on address
0x62f0020603fc at pc 0x55f634e64ded bp 0x7fba0d03c410 sp 0x7fba0d03bbd8
READ of size 4 at 0x62f0020603fc thread T768 (PUSH-9699)
#0 0x55f634e64dec in __asan_memcpy
(/mnt/hdd01/ci/doris-deploy-branch-3.1-local/be/lib/doris_be+0x39a24dec)
(BuildId: 9b04e7f7d3075dac)
#1 0x55f634eca93f in std::char_traits::copy(char*, char const*,
unsigned long)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/char_traits.h:409:33
#2 0x55f634eca93f in std::__cxx11::basic_string,
std::allocator>::_S_copy(char*, char const*, unsigned long)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:351:4
#3 0x55f634eca93f in std::__cxx11::basic_string,
std::allocator>::_S_copy_chars(char*, char const*, char const*)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:398:9
#4 0x55f634eca93f in void std::__cxx11::basic_string,
std::allocator>::_M_construct(char const*, char const*,
std::forward_iterator_tag)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.tcc:225:6
#5 0x55f654a4f74d in void std::__cxx11::basic_string,
std::allocator>::_M_construct_aux(char const*, char const*, std::__false_type)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:247:11
#6 0x55f654a4f74d in void std::__cxx11::basic_string,
std::allocator>::_M_construct(char const*, char const*)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:266:4
#7 0x55f654a4f74d in std::__cxx11::basic_string,
std::allocator>::basic_string(char const*, unsigned long, std::allocator
const&)
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:513:9
#8 0x55f654a4f74d in
doris::vectorized::parse_thrift_footer(std::shared_ptr,
doris::vectorized::FileMetaData**, unsigned long*, doris::io::IOContext*)
/home/zcp/repo_center/doris_branch-3.1/doris/be/src/vec/exec/format/parquet/parquet_thrift_util.h:55:17
---
be/src/vec/exec/format/parquet/parquet_thrift_util.h | 7 +++++--
.../test_ingestion_load_alter_partition.out | Bin 0 -> 162 bytes
.../load_p0/ingestion_load/test_ingestion_load.groovy | 10 +++++-----
.../test_ingestion_load_alter_column.groovy | 10 +++++-----
.../test_ingestion_load_alter_partition.groovy | 11 ++++++-----
.../ingestion_load/test_ingestion_load_drop_table.groovy | 3 +--
.../ingestion_load/test_ingestion_load_multi_table.groovy | 2 +-
.../test_ingestion_load_with_inverted_index.groovy | 2 +-
.../test_ingestion_load_with_partition.groovy | 10 +++++-----
9 files changed, 29 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/parquet_thrift_util.h
b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
index 15927fe4f65..b8475ffa989 100644
--- a/be/src/vec/exec/format/parquet/parquet_thrift_util.h
+++ b/be/src/vec/exec/format/parquet/parquet_thrift_util.h
@@ -46,8 +46,11 @@ static Status parse_thrift_footer(io::FileReaderSPtr file,
FileMetaData** file_m
// validate magic
uint8_t* magic_ptr = footer.data() + bytes_read - 4;
- if (bytes_read < PARQUET_FOOTER_SIZE ||
- memcmp(magic_ptr, PARQUET_VERSION_NUMBER,
sizeof(PARQUET_VERSION_NUMBER)) != 0) {
+ if (bytes_read < PARQUET_FOOTER_SIZE) {
+ return Status::Corruption(
+ "Read parquet file footer fail, bytes read: {}, file size: {},
path: {}",
+ bytes_read, file_size, file->path().native());
+ } else if (memcmp(magic_ptr, PARQUET_VERSION_NUMBER,
sizeof(PARQUET_VERSION_NUMBER)) != 0) {
return Status::Corruption(
"Invalid magic number in parquet file, bytes read: {}, file
size: {}, path: {}, "
"read magic: {}",
diff --git
a/regression-test/data/load_p0/ingestion_load/test_ingestion_load_alter_partition.out
b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_alter_partition.out
new file mode 100644
index 00000000000..37d0553e58c
Binary files /dev/null and
b/regression-test/data/load_p0/ingestion_load/test_ingestion_load_alter_partition.out
differ
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
index 91e20070c09..74f5f9398fe 100644
--- a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
+++ b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load.groovy
@@ -21,7 +21,7 @@ import java.nio.file.StandardCopyOption
suite('test_ingestion_load', 'p0,external') {
- def testIngestLoadJob = { testTable, loadLabel, String dataFile ->
+ def testIngestLoadJob = { testTable, loadLabel, String dataFile , filesize
->
sql "TRUNCATE TABLE ${testTable}"
@@ -85,7 +85,7 @@ suite('test_ingestion_load', 'p0,external') {
"msg": "",
"appId": "",
"dppResult": "${dppResult}",
- "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+ "filePathToSize": "{\\"${etlResultFilePath}\\":
${filesize}}",
"hadoopProperties":
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
}
}"""
@@ -156,7 +156,7 @@ suite('test_ingestion_load', 'p0,external') {
def label = "test_ingestion_load"
- testIngestLoadJob.call(tableName, label, context.config.dataPath +
'/load_p0/ingestion_load/data.parquet')
+ testIngestLoadJob.call(tableName, label, context.config.dataPath +
'/load_p0/ingestion_load/data.parquet',5745)
tableName = 'tbl_test_spark_load_unique_mor'
@@ -189,7 +189,7 @@ suite('test_ingestion_load', 'p0,external') {
label = "test_ingestion_load_unique_mor"
- testIngestLoadJob.call(tableName, label, context.config.dataPath +
'/load_p0/ingestion_load/data.parquet')
+ testIngestLoadJob.call(tableName, label, context.config.dataPath +
'/load_p0/ingestion_load/data.parquet',5745)
tableName = 'tbl_test_spark_load_agg'
@@ -215,7 +215,7 @@ suite('test_ingestion_load', 'p0,external') {
label = "test_ingestion_load_agg"
- testIngestLoadJob.call(tableName, label, context.config.dataPath +
'/load_p0/ingestion_load/data1.parquet')
+ testIngestLoadJob.call(tableName, label, context.config.dataPath +
'/load_p0/ingestion_load/data1.parquet',4057)
}
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
index 89be972b5bf..a4f9617ca76 100644
---
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
+++
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_column.groovy
@@ -85,7 +85,7 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
"msg": "",
"appId": "",
"dppResult": "${dppResult}",
- "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+ "filePathToSize": "{\\"${etlResultFilePath}\\": 5745}",
"hadoopProperties":
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
}
}"""
@@ -112,7 +112,7 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
while (max_try_milli_secs) {
def result = sql "show load where label = '${loadLabel}'"
if (result[0][2] == "CANCELLED") {
- msg = result[0][7]
+ def msg = result[0][7]
logger.info("err msg: " + msg)
assertTrue((result[0][7] =~ /schema of index \[\d+\] has
changed/).find())
break
@@ -134,6 +134,8 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
try {
+ sql "DROP TABLE if exists ${tableName1}"
+ sql "DROP TABLE if exists ${tableName2}"
sql """
CREATE TABLE IF NOT EXISTS ${tableName1} (
c_int int(11) NULL,
@@ -199,10 +201,8 @@ suite('test_ingestion_load_alter_column', 'p0,external') {
})
} finally {
- //sql "DROP TABLE ${tableName1}"
- //sql "DROP TABLE ${tableName2}"
}
}
-}
\ No newline at end of file
+}
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
index 83492d1bf1c..56002a7318b 100644
---
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
+++
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_alter_partition.groovy
@@ -123,8 +123,8 @@ suite('test_ingestion_load_alter_partition', 'p0,external')
{
qt_select "select c1, count(*) from ${testTable} group by c1
order by c1"
break
} else if (result[0][2] == "CANCELLED") {
- msg = result[0][7]
- logger.info("err msg: " + msg)
+ def msg2 = result[0][7]
+ logger.info("err msg: " + msg2)
assertTrue((result[0][7] =~ /partition does not exist/).find())
break
} else {
@@ -146,6 +146,10 @@ suite('test_ingestion_load_alter_partition',
'p0,external') {
try {
+ sql "DROP TABLE if exists ${tableName1}"
+ sql "DROP TABLE if exists ${tableName2}"
+ sql "DROP TABLE if exists ${tableName3}"
+
sql """
CREATE TABLE IF NOT EXISTS ${tableName1} (
c0 int not null,
@@ -214,9 +218,6 @@ suite('test_ingestion_load_alter_partition', 'p0,external')
{
})
} finally {
-// sql "DROP TABLE ${tableName1}"
-// sql "DROP TABLE ${tableName2}"
-// sql "DROP TABLE ${tableName3}"
}
}
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
index 1f0adb8c1c0..c5b5fc90de9 100644
---
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
+++
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_drop_table.groovy
@@ -85,7 +85,7 @@ suite('test_ingestion_load_drop_table', 'p0,external') {
"msg": "",
"appId": "",
"dppResult": "${dppResult}",
- "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+ "filePathToSize": "{\\"${etlResultFilePath}\\": 5745}",
"hadoopProperties":
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
}
}"""
@@ -188,7 +188,6 @@ suite('test_ingestion_load_drop_table', 'p0,external') {
})
} finally {
- sql "DROP TABLE ${tableName}"
}
}
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
index e536b57c204..34de65761d0 100644
---
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
+++
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_multi_table.groovy
@@ -103,7 +103,7 @@ suite('test_ingestion_load_multi_table', 'p0,external') {
"msg": "",
"appId": "",
"dppResult": "${dppResult}",
- "filePathToSize": "{\\"${etlResultFilePath1}\\": 81758,
\\"${etlResultFilePath2}\\": 81758}",
+ "filePathToSize": "{\\"${etlResultFilePath1}\\": 5745,
\\"${etlResultFilePath2}\\": 5745}",
"hadoopProperties":
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
}
}"""
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
index 15db777ddee..08e1aeea353 100644
---
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
+++
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_inverted_index.groovy
@@ -85,7 +85,7 @@ suite('test_ingestion_load_with_inverted_index',
'p0,external') {
"msg": "",
"appId": "",
"dppResult": "${dppResult}",
- "filePathToSize": "{\\"${etlResultFilePath}\\": 81758}",
+ "filePathToSize": "{\\"${etlResultFilePath}\\": 5745}",
"hadoopProperties":
"{\\"fs.defaultFS\\":\\"${getHdfsFs()}\\",\\"hadoop.username\\":\\"${getHdfsUser()}\\",\\"hadoop.password\\":\\"${getHdfsPasswd()}\\"}"
}
}"""
diff --git
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
index 12a904f15d8..c7843d5a866 100644
---
a/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
+++
b/regression-test/suites/load_p0/ingestion_load/test_ingestion_load_with_partition.groovy
@@ -71,7 +71,7 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
}
}
- etlResultFilePaths = []
+ def etlResultFilePaths = []
for(int i=0; i < dataFiles.size(); i++) {
Files.copy(Paths.get(dataFiles[i]),
Paths.get(context.config.dataPath +
"/load_p0/ingestion_load/${resultFileNames[i]}"),
StandardCopyOption.REPLACE_EXISTING)
@@ -115,7 +115,7 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
def max_try_milli_secs = 120000
while (max_try_milli_secs) {
- result = sql "show load where label = '${loadLabel}'"
+ def result = sql "show load where label = '${loadLabel}'"
if (result[0][2] == "FINISHED") {
sql "sync"
qt_select "select c1, count(*) from ${testTable} group by c1
order by c1"
@@ -133,8 +133,8 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
if (enableHdfs()) {
- def tableName = 'tbl_test_spark_load_partition'
-
+ def tableName = 'tbl_test_spark_load_with_partition'
+ sql "DROP TABLE if exists ${tableName}"
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
c0 int not null,
@@ -151,7 +151,7 @@ suite('test_ingestion_load_with_partition', 'p0,external') {
)
"""
- def label = "test_ingestion_load_partition"
+ def label = "test_ingestion_load_with_partition__"
testIngestLoadJob.call(tableName, label, [context.config.dataPath +
'/load_p0/ingestion_load/data2-0.parquet', context.config.dataPath +
'/load_p0/ingestion_load/data2-1.parquet',context.config.dataPath +
'/load_p0/ingestion_load/data2-2.parquet',context.config.dataPath +
'/load_p0/ingestion_load/data2-3.parquet'])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]