worker24h commented on a change in pull request #4020:
URL: https://github.com/apache/incubator-doris/pull/4020#discussion_r449954838



##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -406,54 +459,60 @@ Status JsonReader::handle_simple_json(Tuple* tuple, const 
std::vector<SlotDescri
     return Status::OK();
 }
 
-Status JsonReader::set_tuple_value_from_map(Tuple* tuple, const 
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool *valid) {
+// for complex format json with strip_outer_array = false
+Status JsonReader::_set_tuple_value_from_jmap(Tuple* tuple, const 
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool *valid) {
     std::unordered_map<std::string, JsonDataInternal>::iterator it_map;
     for (auto v : slot_descs) {
         it_map = _jmap.find(v->col_name());
         if (it_map == _jmap.end()) {
-            return Status::RuntimeError("The column name of table is not foud 
in jsonpath.");
+            return Status::RuntimeError("The column name of table is not foud 
in jsonpath: " + v->col_name());
         }
-        rapidjson::Value::ConstValueIterator value = it_map->second.get_next();
+        rapidjson::Value* value = it_map->second.get_value();
         if (value == nullptr) {
             if (v->is_nullable()) {
                 tuple->set_null(v->null_indicator_offset());
             } else  {
                 std::stringstream str_error;
                 str_error << "The column `" << it_map->first << "` is not 
nullable, but it's not found in jsondata.";
-                _state->append_error_msg_to_file("", str_error.str());
+                _state->append_error_msg_to_file(_print_json_value(*value), 
str_error.str());
                 _counter->num_rows_filtered++;
                 *valid = false; // current row is invalid
                 break;
             }
         } else {
-            RETURN_IF_ERROR(write_data_to_tuple(value, v, tuple, tuple_pool));
+            _write_data_to_tuple(value, v, tuple, tuple_pool, valid);
+            if (!(*valid)) {
+                return Status::OK();
+            }
         }
     }
     *valid = true;
     return Status::OK();
 }
 
-Status JsonReader::handle_nest_complex_json(Tuple* tuple, const 
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof) {
+// _json_doc should be an object
+Status JsonReader::_handle_nested_complex_json(Tuple* tuple, const 
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof) {
     do {
         bool valid = false;
         if (_next_line >= _total_lines) {
-            RETURN_IF_ERROR(parse_json_doc(eof));
-            if (*eof) {
-                return Status::OK();
+            Status st = _parse_json_doc(eof);
+            if (st.is_data_quality_error()) {
+                continue; // continue to read next
             }
-            _total_lines = get_data_by_jsonpath(slot_descs);
-            if (_total_lines == -1) {
-                return Status::InternalError("Parse json data is failed.");
-            } else if (_total_lines == 0) {
-                *eof = true;
+            RETURN_IF_ERROR(st); // terminate if encounter other errors

Review comment:
       I think that this code can be deleted




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to