morningman commented on a change in pull request #4020:
URL: https://github.com/apache/incubator-doris/pull/4020#discussion_r450582767
##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -406,54 +459,60 @@ Status JsonReader::handle_simple_json(Tuple* tuple, const
std::vector<SlotDescri
return Status::OK();
}
-Status JsonReader::set_tuple_value_from_map(Tuple* tuple, const
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool *valid) {
+// for complex format json with strip_outer_array = false
+Status JsonReader::_set_tuple_value_from_jmap(Tuple* tuple, const
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool *valid) {
std::unordered_map<std::string, JsonDataInternal>::iterator it_map;
for (auto v : slot_descs) {
it_map = _jmap.find(v->col_name());
if (it_map == _jmap.end()) {
- return Status::RuntimeError("The column name of table is not foud
in jsonpath.");
+ return Status::RuntimeError("The column name of table is not foud
in jsonpath: " + v->col_name());
}
- rapidjson::Value::ConstValueIterator value = it_map->second.get_next();
+ rapidjson::Value* value = it_map->second.get_value();
if (value == nullptr) {
if (v->is_nullable()) {
tuple->set_null(v->null_indicator_offset());
} else {
std::stringstream str_error;
str_error << "The column `" << it_map->first << "` is not
nullable, but it's not found in jsondata.";
- _state->append_error_msg_to_file("", str_error.str());
+ _state->append_error_msg_to_file(_print_json_value(*value),
str_error.str());
_counter->num_rows_filtered++;
*valid = false; // current row is invalid
break;
}
} else {
- RETURN_IF_ERROR(write_data_to_tuple(value, v, tuple, tuple_pool));
+ _write_data_to_tuple(value, v, tuple, tuple_pool, valid);
+ if (!(*valid)) {
+ return Status::OK();
+ }
}
}
*valid = true;
return Status::OK();
}
-Status JsonReader::handle_nest_complex_json(Tuple* tuple, const
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof) {
+// _json_doc should be an object
+Status JsonReader::_handle_nested_complex_json(Tuple* tuple, const
std::vector<SlotDescriptor*>& slot_descs, MemPool* tuple_pool, bool* eof) {
do {
bool valid = false;
if (_next_line >= _total_lines) {
- RETURN_IF_ERROR(parse_json_doc(eof));
- if (*eof) {
- return Status::OK();
+ Status st = _parse_json_doc(eof);
+ if (st.is_data_quality_error()) {
+ continue; // continue to read next
}
- _total_lines = get_data_by_jsonpath(slot_descs);
- if (_total_lines == -1) {
- return Status::InternalError("Parse json data is failed.");
- } else if (_total_lines == 0) {
- *eof = true;
+ RETURN_IF_ERROR(st); // terminate if encounter other errors
Review comment:
file reader may return other error
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]