This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 58d2ac15fc9 [Bug](json reader) object should stop processing when
encounter error #31159 (#31174)
58d2ac15fc9 is described below
commit 58d2ac15fc9ccc688cfe765da75015623aab6609
Author: lihangyu <[email protected]>
AuthorDate: Wed Feb 21 14:00:51 2024 +0800
[Bug](json reader) object should stop processing when encounter error
#31159 (#31174)
If DATA_QUALITY_ERROR encountered we should stop processing this document
any more.Otherwise there will be UB in simdjson.
---
be/src/exprs/json_functions.cpp | 2 +-
be/src/vec/exec/format/json/new_json_reader.cpp | 6 ++---
.../data/load_p0/stream_load/test_json_load.out | 4 +++
.../stream_load/test_malformed_json_with_path.json | 3 +++
.../load_p0/stream_load/test_json_load.groovy | 29 ++++++++++++++++++++++
5 files changed, 40 insertions(+), 4 deletions(-)
diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index 30608adeb25..8612432b160 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -260,7 +260,7 @@ Status
JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
const std::string& _msg = msg;
\
if (UNLIKELY(_err)) {
\
if (_err == simdjson::NO_SUCH_FIELD || _err ==
simdjson::INDEX_OUT_OF_BOUNDS) { \
- return Status::DataQualityError(
\
+ return Status::NotFound<false>(
\
fmt::format("Not found target filed, err: {}, msg:
{}", \
simdjson::error_message(_err), _msg));
\
}
\
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp
b/be/src/vec/exec/format/json/new_json_reader.cpp
index f9f434dc229..feed55037a5 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1253,7 +1253,7 @@ Status
NewJsonReader::_simdjson_handle_flat_array_complex_json(
simdjson::ondemand::value val;
Status st = JsonFunctions::extract_from_object(cur,
_parsed_json_root, &val);
if (UNLIKELY(!st.ok())) {
- if (st.is<DATA_QUALITY_ERROR>()) {
+ if (st.is_not_found()) {
RETURN_IF_ERROR(_append_error_msg(nullptr,
st.to_string(), "", nullptr));
ADVANCE_ROW();
continue;
@@ -1698,11 +1698,11 @@ Status
NewJsonReader::_simdjson_write_columns_by_jsonpath(
Status st;
if (i < _parsed_jsonpaths.size()) {
st = JsonFunctions::extract_from_object(*value,
_parsed_jsonpaths[i], &json_value);
- if (!st.ok() && !st.is<DATA_QUALITY_ERROR>()) {
+ if (!st.ok() && !st.is_not_found()) {
return st;
}
}
- if (i >= _parsed_jsonpaths.size() || st.is<DATA_QUALITY_ERROR>()) {
+ if (i >= _parsed_jsonpaths.size() || st.is_not_found()) {
// not match in jsondata, filling with default value
RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr,
valid));
if (!(*valid)) {
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out
b/regression-test/data/load_p0/stream_load/test_json_load.out
index b9250608475..7351891633d 100644
--- a/regression-test/data/load_p0/stream_load/test_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_json_load.out
@@ -241,3 +241,7 @@ John 30 New York
{"email":"[email protected]","phone":"+1-123-456-7890"}
100 2345676
200 755
+-- !select26 --
+android \N \N \N \N \N
+android \N \N \N \N \N
+
diff --git
a/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
b/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
new file mode 100644
index 00000000000..f87ebaa5d3b
--- /dev/null
+++
b/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
@@ -0,0 +1,3 @@
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","event_value":"device_id":"gikj78675678","media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","device_id":"gikj78675678","media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","device_id":"gikj78675678","syscode":123,
"media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index ea0ab0fd04a..6d0c221ca03 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -705,4 +705,33 @@ suite("test_json_load", "p0") {
set_be_param.call("enable_simdjson_reader", "true")
try_sql("DROP TABLE IF EXISTS ${testTable}")
}
+
+ // case27: import json with malformed json along with json path
+ try {
+ sql "DROP TABLE IF EXISTS ${testTable}"
+
+ sql """CREATE TABLE IF NOT EXISTS ${testTable}
+ (
+ `syscode` VARCHAR(20) NOT NULL COMMENT "",
+ `event_dt` DateTime NULL COMMENT "",
+ `pro_brand` VARCHAR(20) COMMENT "",
+ `app_package` VARCHAR(50) COMMENT "",
+ `platform` VARCHAR(20) COMMENT "",
+ `log_num` BIGINT DEFAULT "0" COMMENT ""
+ )
+ DUPLICATE KEY(`syscode`,
`event_dt`,`pro_brand`,`app_package`,`platform`)
+ COMMENT ''
+ DISTRIBUTED BY RANDOM BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );"""
+
+ load_json_data.call("${testTable}", "${testTable}_case27_1", 'false',
'true', 'json', 'id= id * 10',
'[\"$.platform\",\"$.app_package\",\"$.sysCode\",\"$.sys_code\",\"$.proBrand\",\"$.pro_brand\",\"$.event_time\"]',
+ '', '', '', 'test_malformed_json_with_path.json',
false, 2)
+ sql "sync"
+ qt_select26 "select * from ${testTable}"
+
+ } finally {
+ try_sql("DROP TABLE IF EXISTS ${testTable}")
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]