This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 58d2ac15fc9 [Bug](json reader) object should stop processing when 
encounter error #31159 (#31174)
58d2ac15fc9 is described below

commit 58d2ac15fc9ccc688cfe765da75015623aab6609
Author: lihangyu <[email protected]>
AuthorDate: Wed Feb 21 14:00:51 2024 +0800

    [Bug](json reader) object should stop processing when encounter error 
#31159 (#31174)
    
    If DATA_QUALITY_ERROR encountered we should stop processing this document 
any more.Otherwise there will be UB in simdjson.
---
 be/src/exprs/json_functions.cpp                    |  2 +-
 be/src/vec/exec/format/json/new_json_reader.cpp    |  6 ++---
 .../data/load_p0/stream_load/test_json_load.out    |  4 +++
 .../stream_load/test_malformed_json_with_path.json |  3 +++
 .../load_p0/stream_load/test_json_load.groovy      | 29 ++++++++++++++++++++++
 5 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp
index 30608adeb25..8612432b160 100644
--- a/be/src/exprs/json_functions.cpp
+++ b/be/src/exprs/json_functions.cpp
@@ -260,7 +260,7 @@ Status 
JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
         const std::string& _msg = msg;                                         
             \
         if (UNLIKELY(_err)) {                                                  
             \
             if (_err == simdjson::NO_SUCH_FIELD || _err == 
simdjson::INDEX_OUT_OF_BOUNDS) { \
-                return Status::DataQualityError(                               
             \
+                return Status::NotFound<false>(                                
             \
                         fmt::format("Not found target filed, err: {}, msg: 
{}",             \
                                     simdjson::error_message(_err), _msg));     
             \
             }                                                                  
             \
diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index f9f434dc229..feed55037a5 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1253,7 +1253,7 @@ Status 
NewJsonReader::_simdjson_handle_flat_array_complex_json(
                 simdjson::ondemand::value val;
                 Status st = JsonFunctions::extract_from_object(cur, 
_parsed_json_root, &val);
                 if (UNLIKELY(!st.ok())) {
-                    if (st.is<DATA_QUALITY_ERROR>()) {
+                    if (st.is_not_found()) {
                         RETURN_IF_ERROR(_append_error_msg(nullptr, 
st.to_string(), "", nullptr));
                         ADVANCE_ROW();
                         continue;
@@ -1698,11 +1698,11 @@ Status 
NewJsonReader::_simdjson_write_columns_by_jsonpath(
         Status st;
         if (i < _parsed_jsonpaths.size()) {
             st = JsonFunctions::extract_from_object(*value, 
_parsed_jsonpaths[i], &json_value);
-            if (!st.ok() && !st.is<DATA_QUALITY_ERROR>()) {
+            if (!st.ok() && !st.is_not_found()) {
                 return st;
             }
         }
-        if (i >= _parsed_jsonpaths.size() || st.is<DATA_QUALITY_ERROR>()) {
+        if (i >= _parsed_jsonpaths.size() || st.is_not_found()) {
             // not match in jsondata, filling with default value
             RETURN_IF_ERROR(_fill_missing_column(slot_desc, column_ptr, 
valid));
             if (!(*valid)) {
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out 
b/regression-test/data/load_p0/stream_load/test_json_load.out
index b9250608475..7351891633d 100644
--- a/regression-test/data/load_p0/stream_load/test_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_json_load.out
@@ -241,3 +241,7 @@ John        30      New York        
{"email":"[email protected]","phone":"+1-123-456-7890"}
 100    2345676
 200    755
 
+-- !select26 --
+android        \N      \N      \N      \N      \N
+android        \N      \N      \N      \N      \N
+
diff --git 
a/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json 
b/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
new file mode 100644
index 00000000000..f87ebaa5d3b
--- /dev/null
+++ 
b/regression-test/data/load_p0/stream_load/test_malformed_json_with_path.json
@@ -0,0 +1,3 @@
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","event_value":"device_id":"gikj78675678","media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","device_id":"gikj78675678","media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
+{"app_version":"v1.0.0","app_package":"com.fdf.listen","subject":"USER","ip":"45334","platform":"android","app_name":"听听","pro_brand":"图书","report_time":0,"user_id":"unknown","platform_ID":"1","action":"CLICK","event_name":"section_play","phone_num":"45645642692","pro_code":"unknown","device_id":"gikj78675678","syscode":123,
 
"media_id":"67867","album_id":"1734","duration":"60","event_time":1706841911773,"object":"play_content"}
\ No newline at end of file
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index ea0ab0fd04a..6d0c221ca03 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -705,4 +705,33 @@ suite("test_json_load", "p0") {
         set_be_param.call("enable_simdjson_reader", "true")
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
+
+    // case27: import json with malformed json along with json path
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+
+        sql """CREATE TABLE IF NOT EXISTS ${testTable} 
+            (
+                `syscode` VARCHAR(20)  NOT NULL COMMENT "",
+                `event_dt` DateTime NULL COMMENT "",
+                `pro_brand` VARCHAR(20)  COMMENT "",
+                `app_package`  VARCHAR(50) COMMENT "",
+                `platform` VARCHAR(20) COMMENT "",
+                `log_num`  BIGINT DEFAULT "0" COMMENT ""
+            )
+            DUPLICATE KEY(`syscode`, 
`event_dt`,`pro_brand`,`app_package`,`platform`)
+            COMMENT ''
+            DISTRIBUTED BY RANDOM BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );"""
+
+        load_json_data.call("${testTable}", "${testTable}_case27_1", 'false', 
'true', 'json', 'id= id * 10', 
'[\"$.platform\",\"$.app_package\",\"$.sysCode\",\"$.sys_code\",\"$.proBrand\",\"$.pro_brand\",\"$.event_time\"]',
+                             '', '', '', 'test_malformed_json_with_path.json', 
false, 2)
+        sql "sync"
+        qt_select26 "select * from ${testTable}"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to