This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d20036ea29b [improvement](json load) Compatible with "$" and "$."
that appear during json load (#56703)
d20036ea29b is described below
commit d20036ea29b7362944cd10ab80cde2d50610aa9d
Author: Refrain <[email protected]>
AuthorDate: Sat Oct 11 19:39:51 2025 +0800
[improvement](json load) Compatible with "$" and "$." that appear during
json load (#56703)
For JSON-related parameters, both `$` and `$.` are supported for
`json_root` in json load
---
be/src/vec/exec/format/json/new_json_reader.cpp | 16 ++-
regression-test/data/json_p0/simple_json.json | 8 ++
regression-test/data/json_p0/test_json_root.out | 29 ++++
.../suites/json_p0/test_json_root.groovy | 159 +++++++++++++++++++++
4 files changed, 209 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp
b/be/src/vec/exec/format/json/new_json_reader.cpp
index 0b8139a7d2d..bd143d9c4bd 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -452,8 +452,13 @@ Status NewJsonReader::_parse_jsonpath_and_json_root() {
if (!path.IsString()) {
return Status::InvalidJsonPath("Invalid json path: {}",
_jsonpaths);
}
+ std::string json_path = path.GetString();
+ // $ -> $. in json_path
+ if (UNLIKELY(json_path.size() == 1 && json_path[0] == '$')) {
+ json_path.insert(1, ".");
+ }
std::vector<JsonPath> parsed_paths;
- JsonFunctions::parse_json_paths(path.GetString(),
&parsed_paths);
+ JsonFunctions::parse_json_paths(json_path, &parsed_paths);
_parsed_jsonpaths.push_back(std::move(parsed_paths));
}
@@ -464,7 +469,12 @@ Status NewJsonReader::_parse_jsonpath_and_json_root() {
// parse jsonroot
if (!_json_root.empty()) {
- JsonFunctions::parse_json_paths(_json_root, &_parsed_json_root);
+ std::string json_root = _json_root;
+ // $ -> $. in json_root
+ if (json_root.size() == 1 && json_root[0] == '$') {
+ json_root.insert(1, ".");
+ }
+ JsonFunctions::parse_json_paths(json_root, &_parsed_json_root);
}
return Status::OK();
}
@@ -1410,7 +1420,7 @@ Status NewJsonReader::_simdjson_write_columns_by_jsonpath(
}
}
if (i < _parsed_jsonpaths.size() &&
JsonFunctions::is_root_path(_parsed_jsonpaths[i])) {
- // Indicate that the jsonpath is "$.", read the full root json
object, insert the original doc directly
+ // Indicate that the jsonpath is "$" or "$.", read the full root
json object, insert the original doc directly
ColumnNullable* nullable_column = nullptr;
IColumn* target_column_ptr = nullptr;
if (slot_desc->is_nullable()) {
diff --git a/regression-test/data/json_p0/simple_json.json
b/regression-test/data/json_p0/simple_json.json
new file mode 100644
index 00000000000..ef9301b80ff
--- /dev/null
+++ b/regression-test/data/json_p0/simple_json.json
@@ -0,0 +1,8 @@
+[
+ {"id": 1, "city": "Beijing", "code": 100},
+ {"id": 2, "city": "Shanghai", "code": 200},
+ {"id": 3, "city": "Guangzhou", "code": 300},
+ {"id": 4, "city": "Shenzhen", "code": 400},
+ {"id": 5, "city": "Hangzhou", "code": 500}
+]
+
diff --git a/regression-test/data/json_p0/test_json_root.out
b/regression-test/data/json_p0/test_json_root.out
new file mode 100644
index 00000000000..9959f007da7
--- /dev/null
+++ b/regression-test/data/json_p0/test_json_root.out
@@ -0,0 +1,29 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_jsonroot_dollar_dot --
+1 Beijing 100
+2 Shanghai 200
+3 Guangzhou 300
+4 Shenzhen 400
+5 Hangzhou 500
+
+-- !select_jsonroot_dollar --
+1 Beijing 100
+2 Shanghai 200
+3 Guangzhou 300
+4 Shenzhen 400
+5 Hangzhou 500
+
+-- !select_jsonpath_dollar --
+1
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+2
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+3
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+4
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+5
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+
+-- !select_jsonpath_dollar_dot --
+1
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+2
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+3
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+4
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+5
[{"id":1,"city":"Beijing","code":100},{"id":2,"city":"Shanghai","code":200},{"id":3,"city":"Guangzhou","code":300},{"id":4,"city":"Shenzhen","code":400},{"id":5,"city":"Hangzhou","code":500}]
+
diff --git a/regression-test/suites/json_p0/test_json_root.groovy
b/regression-test/suites/json_p0/test_json_root.groovy
new file mode 100644
index 00000000000..21f4d83b355
--- /dev/null
+++ b/regression-test/suites/json_p0/test_json_root.groovy
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_json_root", "p0") {
+ def testTable = "t"
+ def dataFile = "simple_json.json"
+ def s3BucketName = getS3BucketName()
+ def s3Endpoint = getS3Endpoint()
+ def s3Region = getS3Region()
+ def ak = getS3AK()
+ def sk = getS3SK()
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${testTable} (
+ id INT DEFAULT '10',
+ city VARCHAR(32) DEFAULT '',
+ code BIGINT SUM DEFAULT '0')
+ DISTRIBUTED BY RANDOM BUCKETS 10
+ PROPERTIES("replication_num" = "1");
+ """
+
+ // case1: use "$." in json_root
+ streamLoad {
+ table testTable
+ file dataFile
+ time 10000
+ set 'format', 'json'
+ set 'strip_outer_array', 'true'
+ set 'json_root', '$.'
+ set 'columns', 'id,city,code'
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertTrue(json.NumberLoadedRows > 0)
+ }
+ }
+
+ sql """ sync; """
+
+ qt_select_jsonroot_dollar_dot "SELECT * FROM ${testTable} ORDER BY id"
+
+ sql "TRUNCATE TABLE ${testTable}"
+
+ // case2: use "$" in json_root
+ streamLoad {
+ table testTable
+ file dataFile
+ time 10000
+ set 'format', 'json'
+ set 'strip_outer_array', 'true'
+ set 'json_root', '$'
+ set 'columns', 'id,city,code'
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertTrue(json.NumberLoadedRows > 0)
+ }
+ }
+
+ sql """ sync; """
+
+ qt_select_jsonroot_dollar "SELECT * FROM ${testTable} ORDER BY id"
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+
+ testTable = "t_with_json"
+
+ sql """
+ CREATE TABLE IF NOT EXISTS ${testTable}(
+ c1 INT DEFAULT '10',
+ c2 Json
+ )
+ DISTRIBUTED BY RANDOM BUCKETS 10
+ PROPERTIES("replication_num" = "1");
+ """
+
+ // case3: use "$" in json_path
+
+ streamLoad {
+ table testTable
+ file dataFile
+ time 10000
+ set 'format', 'json'
+ set 'strip_outer_array', 'true'
+ set 'jsonpaths', '["$.id", "$"]'
+ set 'columns', 'c1,c2'
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertTrue(json.NumberLoadedRows > 0)
+ }
+ }
+
+ sql """ sync; """
+
+ qt_select_jsonpath_dollar "SELECT * FROM ${testTable} ORDER BY c1"
+
+ sql "TRUNCATE TABLE ${testTable}"
+
+
+ // case4: use "$." in json_path
+
+ streamLoad {
+ table testTable
+ file dataFile
+ time 10000
+ set 'format', 'json'
+ set 'strip_outer_array', 'true'
+ set 'jsonpaths', '["$.id", "$."]'
+ set 'columns', 'c1,c2'
+
+ check { result, exception, startTime, endTime ->
+ if (exception != null) {
+ throw exception
+ }
+ log.info("Stream load result: ${result}".toString())
+ def json = parseJson(result)
+ assertEquals("success", json.Status.toLowerCase())
+ assertTrue(json.NumberLoadedRows > 0)
+ }
+ }
+
+ sql """ sync; """
+
+ qt_select_jsonpath_dollar_dot "SELECT * FROM ${testTable} ORDER BY c1"
+
+ sql "DROP TABLE IF EXISTS ${testTable}"
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]