xiaokang commented on code in PR #16903:
URL: https://github.com/apache/doris/pull/16903#discussion_r1111645505
##########
be/src/exprs/json_functions.cpp:
##########
@@ -282,4 +282,67 @@ void JsonFunctions::get_parsed_paths(const
std::vector<std::string>& path_exprs,
}
}
+Status JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
+ const std::vector<JsonPath>&
jsonpath,
+ simdjson::ondemand::value* value)
noexcept {
+#define HANDLE_SIMDJSON_ERROR(err, msg)
\
+ do {
\
+ const simdjson::error_code& _err = err;
\
+ const std::string& _msg = msg;
\
+ if (UNLIKELY(_err)) {
\
+ if (_err == simdjson::NO_SUCH_FIELD || _err ==
simdjson::INDEX_OUT_OF_BOUNDS) { \
+ return Status::NotFound(
\
+ fmt::format("err: {}, msg: {}",
simdjson::error_message(_err), _msg)); \
+ }
\
+ return Status::DataQualityError(
\
+ fmt::format("err: {}, msg: {}",
simdjson::error_message(_err), _msg)); \
+ }
\
+ } while (false);
+
+ if (jsonpath.size() <= 1) {
+ // The first elem of json path should be '$'.
+ // A valid json path's size is >= 2.
+ return Status::DataQualityError("empty json path");
+ }
+
+ simdjson::ondemand::value tvalue;
+
+ // Skip the first $.
+ for (int i = 1; i < jsonpath.size(); i++) {
+ if (UNLIKELY(!jsonpath[i].is_valid)) {
+ return Status::DataQualityError(fmt::format("invalid json path:
{}", jsonpath[i].key));
+ }
+
+ const std::string& col = jsonpath[i].key;
+ int index = jsonpath[i].idx;
+
+ // Since the simdjson::ondemand::object cannot be converted to
simdjson::ondemand::value,
+ // we have to do some special treatment for the second elem of json
path.
+ // If the key is not found in json object, simdjson::NO_SUCH_FIELD
would be returned.
+ if (i == 1) {
+ HANDLE_SIMDJSON_ERROR(obj.find_field_unordered(col).get(tvalue),
+ fmt::format("unable to find field: {}",
col));
+ } else {
+ HANDLE_SIMDJSON_ERROR(tvalue.find_field_unordered(col).get(tvalue),
+ fmt::format("unable to find field: {}",
col));
+ }
+
+ if (index != -1) {
+ // try to access tvalue as array.
+ // If the index is beyond the length of array,
simdjson::INDEX_OUT_OF_BOUNDS would be returned.
+ simdjson::ondemand::array arr;
+ HANDLE_SIMDJSON_ERROR(tvalue.get_array().get(arr),
+ fmt::format("failed to access field as
array, field: {}", col));
+
+ HANDLE_SIMDJSON_ERROR(
+ arr.at(index).get(tvalue),
Review Comment:
add a TODO
##########
be/src/exprs/json_functions.cpp:
##########
@@ -282,4 +282,67 @@ void JsonFunctions::get_parsed_paths(const
std::vector<std::string>& path_exprs,
}
}
+Status JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
+ const std::vector<JsonPath>&
jsonpath,
+ simdjson::ondemand::value* value)
noexcept {
+#define HANDLE_SIMDJSON_ERROR(err, msg)
\
+ do {
\
+ const simdjson::error_code& _err = err;
\
+ const std::string& _msg = msg;
\
+ if (UNLIKELY(_err)) {
\
+ if (_err == simdjson::NO_SUCH_FIELD || _err ==
simdjson::INDEX_OUT_OF_BOUNDS) { \
+ return Status::NotFound(
\
+ fmt::format("err: {}, msg: {}",
simdjson::error_message(_err), _msg)); \
+ }
\
+ return Status::DataQualityError(
\
Review Comment:
add comment to explain the pricinple
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]