This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 547e88b1ee7 branch-2.1: [fix](csv reader) fix core dump when parsing
csv with enclose #45485 (#45889)
547e88b1ee7 is described below
commit 547e88b1ee76163c583671c5a3e3ac50a9342a7e
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Dec 25 12:09:20 2024 +0800
branch-2.1: [fix](csv reader) fix core dump when parsing csv with enclose
#45485 (#45889)
Cherry-picked from #45485
Co-authored-by: hui lai <[email protected]>
---
be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp | 3 ++-
.../load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out | 2 ++
.../data/load_p0/stream_load/test_csv_with_enclose_and_escape.out | 2 ++
.../load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy | 2 +-
.../suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy | 2 +-
5 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
index 9a09a90d1aa..ad86cca212b 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
@@ -173,8 +173,9 @@ void
EncloseCsvLineReaderContext::_on_pre_match_enclose(const uint8_t* start, si
void EncloseCsvLineReaderContext::_on_match_enclose(const uint8_t* start,
size_t& len) {
const uint8_t* curr_start = start + _idx;
+ size_t curr_len = len - _idx;
const uint8_t* delim_pos =
- find_col_sep_func(curr_start, _column_sep_len,
_column_sep.c_str(), _column_sep_len);
+ find_col_sep_func(curr_start, curr_len, _column_sep.c_str(),
_column_sep_len);
if (delim_pos != nullptr) [[likely]] {
on_col_sep_found(start, delim_pos);
diff --git
a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
index 53aea0d8f89..0c1450e35fe 100644
---
a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
+++
b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out
@@ -9,3 +9,5 @@
3 abc"de,fg"h 2023-07-15 i\nj,k\n" 2023-07-20:05:48:31
ghi
6 ab"c 2023-07-20 d"ef" 2023-07-20:05:48:31 "g"hi
7 aaa 2023-07-20 2023-07-20:05:48:31
+8 aaa"bbb"ccc 2023-07-20 "aa"bb 2023-07-20:05:48:31 aa"bb"
+9 aa,"bbb cc" 2023-07-20 ""aa"bb ,2023-07-20:05:48:31,"aa"bb"
diff --git
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
index 18b4968b7cd..5646d96230f 100644
---
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
+++
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
@@ -5,6 +5,8 @@
3 abc"de,fg"h 2023-07-15 i\nj,k\n" 2023-07-20T05:48:31
ghi
6 ab"c 2023-07-20 d"ef" 2023-07-20T05:48:31 "g"hi
7 aaa 2023-07-20 2023-07-20T05:48:31
+8 aaa"bbb"ccc 2023-07-20 "aa"bb 2023-07-20T05:48:31 aa"bb"
+9 aa,"bbb \N 2023-07-20 \N ,2023-07-20:05:48:31,"aa"bb"
10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31
@@ab$$$cd$$$
10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31
@@ab$$$cd$$$
10 abc \N "def" \N "ghi"
diff --git
a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
index d1294e40731..a552dacd133 100644
---
a/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
+++
b/regression-test/suites/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.groovy
@@ -18,7 +18,7 @@
suite("test_csv_with_enclose_and_escapeS3_load", "load_p0") {
- def tableName = "test_csv_with_enclose_and_escape"
+ def tableName = "test_csv_with_enclose_and_escapeS3_load"
def s3BucketName = getS3BucketName()
sql """ DROP TABLE IF EXISTS ${tableName} """
diff --git
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
index 7e24b4158c8..1562fa35cfd 100644
---
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
+++
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
@@ -85,7 +85,7 @@ suite("test_csv_with_enclose_and_escape", "p0") {
result, exception, startTime, endTime ->
assertTrue(exception == null)
def json = parseJson(result)
- assertEquals("Fail", json.Status)
+ assertEquals("Success", json.Status)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]