yangzhg commented on a change in pull request #8599:
URL: https://github.com/apache/incubator-doris/pull/8599#discussion_r832023482
##########
File path: be/src/exec/json_scanner.cpp
##########
@@ -360,6 +365,79 @@ void JsonReader::_close() {
_closed = true;
}
+Status JsonReader::_simdjson_parse_json_doc(size_t* size, bool* eof) {
+ // read a whole message
+ SCOPED_TIMER(_file_read_timer);
+ const uint8_t* json_str = nullptr;
+ std::unique_ptr<uint8_t[]> json_str_ptr;
+ if (_line_reader != nullptr) {
+ RETURN_IF_ERROR(_line_reader->read_line(&json_str, size, eof));
+ } else {
+ int64_t length = 0;
+ RETURN_IF_ERROR(_file_reader->read_one_message(&json_str_ptr,
&length));
+ json_str = json_str_ptr.get();
+ *size = length;
+ if (length == 0) {
+ *eof = true;
+ }
+ }
+
+ _bytes_read_counter += *size;
+ if (*eof) {
+ return Status::OK();
+ }
+ _json_doc = nullptr;
+
+ auto document = _parser.parse((char*)json_str, *size);
+ if (document.error() != 0) {
+ LOG(INFO) << "json_str: " << std::string((char*)json_str, *size) << ",
error code: " << document.error();
+ fmt::memory_buffer error_msg;
+ fmt::format_to(error_msg, "Simdjson Parse json data for JsonDoc
failed. code: {}", document.error());
+ RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string
{ return std::string((char*)json_str, *size); },
+ [&]() -> std::string { return error_msg.data(); },
_scanner_eof));
+ _counter->num_rows_filtered++;
+ if (*_scanner_eof) {
+ // Case A: if _scanner_eof is set to true in
"append_error_msg_to_file", which means
+ // we meet enough invalid rows and the scanner should be stopped.
+ // So we set eof to true and return OK, the caller will stop the
process as we meet the end of file.
+ *eof = true;
+ return Status::OK();
+ }
+ return Status::DataQualityError(error_msg.data());
+ }
+ _element = document.value_unsafe();
+
+ if (_element.is_array() && !_strip_outer_array) {
+ fmt::memory_buffer error_msg;
+ fmt::format_to(error_msg, "{}", "JSON data is array-object,
`strip_outer_array` must be TRUE.");
+ RETURN_IF_ERROR(_state->append_error_msg_to_file([&]() -> std::string
{ return _print_json_value(_origin_json_doc); },
+ [&]() -> std::string { return error_msg.data(); },
_scanner_eof));
Review comment:
use fmt::to_string(error_msg), not use error_msg.data()
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]