This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new bebc7ae4914 branch-4.1: [fix](serde) fix split_by_delimiter missing
backslash escape handling #61995 (#64432)
bebc7ae4914 is described below
commit bebc7ae49140efe4111b9ade0c4dc8c54e8e7b7f
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jun 16 13:37:03 2026 +0800
branch-4.1: [fix](serde) fix split_by_delimiter missing backslash escape
handling #61995 (#64432)
Cherry-picked from #61995
Co-authored-by: Chenyang Sun <[email protected]>
---
.../complex_type_deserialize_util.h | 4 +-
.../data_type_serde/data_type_serde_map_test.cpp | 57 ++++++++++++++++++++++
regression-test/data/jsonb_p0/test_jsonb_cast.csv | 2 +-
regression-test/data/jsonb_p0/test_jsonb_cast.out | 6 +--
.../data/jsonb_p0/test_jsonb_unescaped.csv | 2 +-
.../jsonb_p0/test_jsonb_with_unescaped_string.out | 4 +-
6 files changed, 67 insertions(+), 8 deletions(-)
diff --git a/be/src/core/data_type_serde/complex_type_deserialize_util.h
b/be/src/core/data_type_serde/complex_type_deserialize_util.h
index 20f49636c76..ff76d64dcd6 100644
--- a/be/src/core/data_type_serde/complex_type_deserialize_util.h
+++ b/be/src/core/data_type_serde/complex_type_deserialize_util.h
@@ -42,7 +42,9 @@ struct ComplexTypeDeserializeUtil {
std::vector<SplitResult> elements;
for (int pos = 0; pos < str.size; ++pos) {
char c = str.data[pos];
- if (c == '"' || c == '\'') {
+ if (c == '\\' && pos + 1 < static_cast<int>(str.size)) {
+ ++pos; // skip escaped character
+ } else if (c == '"' || c == '\'') {
if (!has_quote) {
quote_char = c;
has_quote = !has_quote;
diff --git a/be/test/core/data_type_serde/data_type_serde_map_test.cpp
b/be/test/core/data_type_serde/data_type_serde_map_test.cpp
index 33d72e3b7e8..cfe6dee93e0 100644
--- a/be/test/core/data_type_serde/data_type_serde_map_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_map_test.cpp
@@ -41,6 +41,7 @@
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_string.h"
#include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/complex_type_deserialize_util.h"
#include "core/field.h"
#include "core/types.h"
#include "storage/olap_common.h"
@@ -178,4 +179,60 @@ TEST_F(DataTypeMapSerDeTest, ArrowMemNotAligned) {
EXPECT_TRUE(st.ok());
}
+// Stream Load JSON stores Map as String via to_json_string, then converts back
+// via from_string → split_by_delimiter. The splitter must handle '\' escapes
+// so that '\"' inside a value doesn't flip quote state and expose inner
':'/','.
+TEST_F(DataTypeMapSerDeTest, SplitByDelimiterHandlesBackslashEscape) {
+ DataTypeSerDe::FormatOptions opts;
+ opts.map_key_delim = ':';
+ opts.collection_delim = ',';
+
+ auto make_map_type = []() {
+ auto str =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+ return std::make_shared<DataTypeMap>(str, str);
+ };
+
+ // split_by_delimiter: '\"' must not toggle quote state
+ // Input (after stripping outer {}): "k":"[{\"a\":\"b\\nc:"
+ // Expected: 2 elements — key "k" and value "[{\"a\":\"b\\nc:"
+ {
+ std::string inner = "\"k\":\"[{\\\"a\\\":\\\"b\\\\nc:\"";
+ StringRef str(inner.data(), inner.size());
+ auto result = ComplexTypeDeserializeUtil::split_by_delimiter(
+ str, [&](char c) { return c == opts.map_key_delim || c ==
opts.collection_delim; });
+ EXPECT_EQ(result.size(), 2u);
+ }
+
+ // from_string: value ending with ':' (map_key_delim) must not cause split
error
+ // Simulates to_json_string output: {"k":"[{\"a\":\"b\\nc:"}
+ {
+ auto map_type = make_map_type();
+ auto col = map_type->create_column();
+ std::string map_str = "{\"k\":\"[{\\\"a\\\":\\\"b\\\\nc:\"}";
+ StringRef ref(map_str.data(), map_str.size());
+ EXPECT_TRUE(map_type->get_serde()->from_string(ref, *col, opts).ok());
+ EXPECT_EQ(col->size(), 1u);
+ }
+
+ // from_string: value ending with ',' (collection_delim) — same class of
bug
+ {
+ auto map_type = make_map_type();
+ auto col = map_type->create_column();
+ std::string map_str = "{\"k\":\"[{\\\"a\\\":\\\"b\\\\nc,\"}";
+ StringRef ref(map_str.data(), map_str.size());
+ EXPECT_TRUE(map_type->get_serde()->from_string(ref, *col, opts).ok());
+ EXPECT_EQ(col->size(), 1u);
+ }
+
+ // Control: value ending with ')' (not a delimiter) — always worked
+ {
+ auto map_type = make_map_type();
+ auto col = map_type->create_column();
+ std::string map_str = "{\"k\":\"[{\\\"a\\\":\\\"b\\\\nc)\"}";
+ StringRef ref(map_str.data(), map_str.size());
+ EXPECT_TRUE(map_type->get_serde()->from_string(ref, *col, opts).ok());
+ EXPECT_EQ(col->size(), 1u);
+ }
+}
+
} // namespace doris
diff --git a/regression-test/data/jsonb_p0/test_jsonb_cast.csv
b/regression-test/data/jsonb_p0/test_jsonb_cast.csv
index d4d64bebe19..3efda7706dc 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_cast.csv
+++ b/regression-test/data/jsonb_p0/test_jsonb_cast.csv
@@ -1,4 +1,4 @@
1 \N
2 ['{\'x\':\'{"y":1}\', \'t\':\'{"y":2}\'}', '{"x":1}']
-3 ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
+3 ['foo\'bar', 'foo"bar', 'foo\'bar', 'foo\'\'bar']
4 ['\/some\/cool\/url', '/some/cool/url',
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
diff --git a/regression-test/data/jsonb_p0/test_jsonb_cast.out
b/regression-test/data/jsonb_p0/test_jsonb_cast.out
index bb47b7249a5..0046ea38e0a 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_cast.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_cast.out
@@ -2,13 +2,13 @@
-- !select_1 --
1 \N
2 ["{'x':'{"y":1}', 't':'{"y":2}'}", "{"x":1}"]
-3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3 ["foo'bar", "foo"bar", "foo'bar", "foo''bar"]
4 ["/some/cool/url", "/some/cool/url",
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
-- !select_2 --
1 \N
2 ["{'x':'{"y":1}', 't':'{"y":2}'}", "{"x":1}"]
-3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3 ["foo'bar", "foo"bar", "foo'bar", "foo''bar"]
4 ["/some/cool/url", "/some/cool/url",
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
27 ["{"k1":"v1", "k2":200}"]
28 ["{"a.b.c":{"k1.a1":"v31", "k2":300},"a":"niu"}"]
@@ -18,7 +18,7 @@
-- !select_json --
1 \N
2 ["{'x':'{\\"y\\":1}', 't':'{\\"y\\":2}'}","{\\"x\\":1}"]
-3 ["foo'bar', 'foo\\"bar', 'foo\\\\'bar', 'foo''bar"]
+3 ["foo'bar","foo\\"bar","foo'bar","foo''bar"]
4
["/some/cool/url","/some/cool/url","a\\\\_\\\\c\\\\l\\\\i\\\\c\\\\k\\\\h\\\\o\\\\u\\\\s\\\\e"]
27 ["{\\"k1\\":\\"v1\\", \\"k2\\":200}"]
28 ["{\\"a.b.c\\":{\\"k1.a1\\":\\"v31\\",
\\"k2\\":300},\\"a\\":\\"niu\\"}"]
diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
index e4f859e7511..37c07297cbf 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
+++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
@@ -1,5 +1,5 @@
1 \N
2 ['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', '{"x" : 1}']
-3 ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
+3 ['foo\'bar', 'foo"bar', 'foo\'bar', 'foo\'\'bar']
4 ['\/some\/cool\/url', '/some/cool/url',
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
5 ["\"双引号\"", "反斜\\线"]
\ No newline at end of file
diff --git a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
index 99fb23ef9ee..f7df0f30c14 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
@@ -2,14 +2,14 @@
-- !select_csv --
1 \N
2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"]
-3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3 ["foo'bar", "foo"bar", "foo'bar", "foo''bar"]
4 ["/some/cool/url", "/some/cool/url",
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
5 [""双引号"", "反斜\\线"]
-- !select_json --
1 \N
2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "'{"x" : 1}'"]
-3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3 ["foo'bar", "foo"bar", "foo\\'bar", "foo''bar"]
4 ["/some/cool/url", "/some/cool/url",
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
5 [""双引号"", "反斜\\线"]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]