This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 6fd672c470a [fix](jsonb)fix jsonb deserialze #37251 (#38629)
6fd672c470a is described below

commit 6fd672c470ad31309d572e535560e39b94d7cfa5
Author: amory <[email protected]>
AuthorDate: Thu Aug 1 19:07:06 2024 +0800

    [fix](jsonb)fix jsonb deserialze #37251 (#38629)
---
 be/src/vec/functions/function_cast.h               |  1 +
 .../data/jsonb_p0/test_jsonb_unescaped.csv         |  5 ++
 .../data/jsonb_p0/test_jsonb_unescaped.json        |  5 ++
 .../jsonb_p0/test_jsonb_with_unescaped_string.out  | 15 ++++
 .../test_jsonb_with_unescaped_string.groovy        | 99 ++++++++++++++++++++++
 5 files changed, 125 insertions(+)

diff --git a/be/src/vec/functions/function_cast.h 
b/be/src/vec/functions/function_cast.h
index 8bcc531231b..f246246bdbf 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -625,6 +625,7 @@ struct ConvertImplGenericFromString {
             const bool is_complex = is_complex_type(data_type_to);
             DataTypeSerDe::FormatOptions format_options;
             format_options.converted_from_string = true;
+            format_options.escape_char = '\\';
 
             for (size_t i = 0; i < size; ++i) {
                 const auto& val = col_from_string->get_data_at(i);
diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv 
b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
new file mode 100644
index 00000000000..e4f859e7511
--- /dev/null
+++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
@@ -0,0 +1,5 @@
+1      \N
+2      ['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', '{"x" : 1}']
+3      ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
+4      ['\/some\/cool\/url', '/some/cool/url', 
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
+5      ["\"双引号\"", "反斜\\线"]
\ No newline at end of file
diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.json 
b/regression-test/data/jsonb_p0/test_jsonb_unescaped.json
new file mode 100644
index 00000000000..de718c8efde
--- /dev/null
+++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.json
@@ -0,0 +1,5 @@
+{"id":1,"a":null}
+{"id":2,"a":['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', \'{"x" : 1}']}
+{"id":3,"a":['foo\'bar', 'foo\"bar', 'foo\\\'bar', 'foo\'\'bar']}
+{"id":4,"a":['\/some\/cool\/url', '/some/cool/url', 
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']}
+{"id":5,"a":["\"双引号\"", "反斜\\线"]}
\ No newline at end of file
diff --git a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out 
b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
new file mode 100644
index 00000000000..99fb23ef9ee
--- /dev/null
+++ b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
@@ -0,0 +1,15 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_csv --
+1      \N
+2      ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"]
+3      ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+4      ["/some/cool/url", "/some/cool/url", 
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
+5      [""双引号"", "反斜\\线"]
+
+-- !select_json --
+1      \N
+2      ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "'{"x" : 1}'"]
+3      ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+4      ["/some/cool/url", "/some/cool/url", 
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
+5      [""双引号"", "反斜\\线"]
+
diff --git 
a/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy 
b/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy
new file mode 100644
index 00000000000..b728c46cb20
--- /dev/null
+++ b/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_jsonb_with_unescaped_string", "p0") {
+
+    // define a sql table with array<text> which has some Escape Character and 
should also to cast to json
+    def testTable = "tbl_unescaped_jsonb"
+    def dataFile = "test_jsonb_unescaped.csv"
+    def dataFileJson = "test_jsonb_unescaped.json"
+
+    sql """ set experimental_enable_nereids_planner = true """
+    sql """ set enable_fallback_to_original_planner = true """
+
+    sql "DROP TABLE IF EXISTS ${testTable}"
+
+    sql """
+        CREATE TABLE IF NOT EXISTS ${testTable} (
+            id INT,
+            a ARRAY<TEXT>,
+        )
+        DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 3
+        PROPERTIES("replication_num" = "1");
+        """
+
+    // load the jsonb data from csv file
+    streamLoad {
+        table testTable
+        
+        file dataFile // import csv file
+        time 10000 // limit inflight 10s
+        set 'strict_mode', 'true'
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals(5, json.NumberTotalRows)
+            assertEquals(5, json.NumberLoadedRows)
+            assertTrue(json.LoadBytes > 0)
+        }
+    }
+
+    sql """ sync; """
+
+    // check result
+    qt_select_csv "SELECT * FROM ${testTable} ORDER BY id"
+
+    sql "truncate table ${testTable}"
+    // load the jsonb data from json file
+    streamLoad {
+        table testTable
+
+        file dataFileJson // import json file
+        time 10000 // limit inflight 10s
+        set 'format', 'json' // import format
+        set 'read_json_by_line', 'true' // read json by line
+        set 'strict_mode', 'true'
+
+        // if declared a check callback, the default check condition will 
ignore.
+        // So you must check all condition
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals(5, json.NumberTotalRows)
+            assertEquals(5, json.NumberLoadedRows)
+            assertTrue(json.LoadBytes > 0)
+        }
+    }
+
+
+    sql """ sync; """
+
+    // check result
+    qt_select_json "SELECT * FROM ${testTable} ORDER BY id"
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to