This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 6fd672c470a [fix](jsonb)fix jsonb deserialze #37251 (#38629)
6fd672c470a is described below
commit 6fd672c470ad31309d572e535560e39b94d7cfa5
Author: amory <[email protected]>
AuthorDate: Thu Aug 1 19:07:06 2024 +0800
[fix](jsonb)fix jsonb deserialze #37251 (#38629)
---
be/src/vec/functions/function_cast.h | 1 +
.../data/jsonb_p0/test_jsonb_unescaped.csv | 5 ++
.../data/jsonb_p0/test_jsonb_unescaped.json | 5 ++
.../jsonb_p0/test_jsonb_with_unescaped_string.out | 15 ++++
.../test_jsonb_with_unescaped_string.groovy | 99 ++++++++++++++++++++++
5 files changed, 125 insertions(+)
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 8bcc531231b..f246246bdbf 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -625,6 +625,7 @@ struct ConvertImplGenericFromString {
const bool is_complex = is_complex_type(data_type_to);
DataTypeSerDe::FormatOptions format_options;
format_options.converted_from_string = true;
+ format_options.escape_char = '\\';
for (size_t i = 0; i < size; ++i) {
const auto& val = col_from_string->get_data_at(i);
diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
new file mode 100644
index 00000000000..e4f859e7511
--- /dev/null
+++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
@@ -0,0 +1,5 @@
+1 \N
+2 ['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', '{"x" : 1}']
+3 ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
+4 ['\/some\/cool\/url', '/some/cool/url',
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
+5 ["\"双引号\"", "反斜\\线"]
\ No newline at end of file
diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.json
b/regression-test/data/jsonb_p0/test_jsonb_unescaped.json
new file mode 100644
index 00000000000..de718c8efde
--- /dev/null
+++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.json
@@ -0,0 +1,5 @@
+{"id":1,"a":null}
+{"id":2,"a":['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', \'{"x" : 1}']}
+{"id":3,"a":['foo\'bar', 'foo\"bar', 'foo\\\'bar', 'foo\'\'bar']}
+{"id":4,"a":['\/some\/cool\/url', '/some/cool/url',
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']}
+{"id":5,"a":["\"双引号\"", "反斜\\线"]}
\ No newline at end of file
diff --git a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
new file mode 100644
index 00000000000..99fb23ef9ee
--- /dev/null
+++ b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_csv --
+1 \N
+2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"]
+3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+4 ["/some/cool/url", "/some/cool/url",
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
+5 [""双引号"", "反斜\\线"]
+
+-- !select_json --
+1 \N
+2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "'{"x" : 1}'"]
+3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+4 ["/some/cool/url", "/some/cool/url",
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
+5 [""双引号"", "反斜\\线"]
+
diff --git
a/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy
b/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy
new file mode 100644
index 00000000000..b728c46cb20
--- /dev/null
+++ b/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_jsonb_with_unescaped_string", "p0") {
+
+ // define a sql table with array<text> which has some Escape Character and
should also to cast to json
+ def testTable = "tbl_unescaped_jsonb"
+ def dataFile = "test_jsonb_unescaped.csv"
+ def dataFileJson = "test_jsonb_unescaped.json"
+
+ sql """ set experimental_enable_nereids_planner = true """
+ sql """ set enable_fallback_to_original_planner = true """
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ id INT,
+ a ARRAY<TEXT>,
+ )
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 3
+ PROPERTIES("replication_num" = "1");
+ """
+
+ // load the jsonb data from csv file
+ streamLoad {
+ table testTable
+
+ file dataFile // import csv file
+ time 10000 // limit inflight 10s
+ set 'strict_mode', 'true'
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals(5, json.NumberTotalRows)
+ assertEquals(5, json.NumberLoadedRows)
+ assertTrue(json.LoadBytes > 0)
+ }
+ }
+
+ sql """ sync; """
+
+ // check result
+ qt_select_csv "SELECT * FROM ${testTable} ORDER BY id"
+
+ sql "truncate table ${testTable}"
+ // load the jsonb data from json file
+ streamLoad {
+ table testTable
+
+ file dataFileJson // import json file
+ time 10000 // limit inflight 10s
+ set 'format', 'json' // import format
+ set 'read_json_by_line', 'true' // read json by line
+ set 'strict_mode', 'true'
+
+ // if declared a check callback, the default check condition will
ignore.
+ // So you must check all condition
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals(5, json.NumberTotalRows)
+ assertEquals(5, json.NumberLoadedRows)
+ assertTrue(json.LoadBytes > 0)
+ }
+ }
+
+
+ sql """ sync; """
+
+ // check result
+ qt_select_json "SELECT * FROM ${testTable} ORDER BY id"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]