This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 72c42941da7 branch-3.1: [fix](csv reader) fix wrong result when escape
same as enclose #57632 (#57762)
72c42941da7 is described below
commit 72c42941da7340d6e8adc6dab1fd0a6b3e5e5a1b
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Nov 7 13:05:11 2025 +0800
branch-3.1: [fix](csv reader) fix wrong result when escape same as enclose
#57632 (#57762)
Cherry-picked from #57632
Co-authored-by: hui lai <[email protected]>
---
.../vec/exec/format/file_reader/new_plain_text_line_reader.cpp | 4 +++-
.../data/load_p0/stream_load/enclose_with_same_escape.csv | 2 ++
.../load_p0/stream_load/test_csv_with_enclose_and_escape.out | 2 ++
.../load_p0/stream_load/test_csv_with_enclose_and_escape.groovy | 9 +++++++++
4 files changed, 16 insertions(+), 1 deletion(-)
diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
index 80534fb9471..ca1ffa521dc 100644
--- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
+++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp
@@ -150,7 +150,9 @@ void EncloseCsvLineReaderCtx::_on_normal(const uint8_t*
start, size_t& len) {
void EncloseCsvLineReaderCtx::_on_pre_match_enclose(const uint8_t* start,
size_t& len) {
do {
do {
- if (start[_idx] == _escape) [[unlikely]] {
+ // When escape and enclose are the same, only use quote-escape
logic (double quote escaping)
+ // to avoid conflicts between escape and enclose handling
+ if (_escape != _enclose && start[_idx] == _escape) [[unlikely]] {
_should_escape = !_should_escape;
} else if (_should_escape) [[unlikely]] {
_should_escape = false;
diff --git
a/regression-test/data/load_p0/stream_load/enclose_with_same_escape.csv
b/regression-test/data/load_p0/stream_load/enclose_with_same_escape.csv
new file mode 100644
index 00000000000..b721f087788
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/enclose_with_same_escape.csv
@@ -0,0 +1,2 @@
+"12","{""a"": 1}",2023-07-15,def,"2023-07-20 05:48:31","{""a"": 1}"
+"13","{""a"": 2}",2023-07-15,def,"2023-07-20 05:48:31","{""a"": 2}"
\ No newline at end of file
diff --git
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
index e00ca42ea84..7bc642d024a 100644
---
a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
+++
b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out
@@ -13,4 +13,6 @@
10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31
@@ab$$$cd$$$
10 abc 2023-07-15 def 2023-07-20T05:48:31 ghi
11 abc,def 2023-07-15 ghi 2023-07-20T05:48:31 jkl\nmne
+12 {"a": 1} 2023-07-15 def 2023-07-20T05:48:31 {"a": 1}
+13 {"a": 2} 2023-07-15 def 2023-07-20T05:48:31 {"a": 2}
diff --git
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
index d15c702ff4d..e8f5e08ec20 100644
---
a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
+++
b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy
@@ -106,6 +106,15 @@ suite("test_csv_with_enclose_and_escape", "p0") {
file "enclose_not_trim_quotes.csv"
}
+ streamLoad {
+ table "${tableName}"
+ set 'column_separator', ','
+ set 'enclose', "\""
+ set 'escape', '\"'
+
+ file "enclose_with_same_escape.csv"
+ }
+
sql "sync"
qt_select """
SELECT * FROM ${tableName} ORDER BY k1, k2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]