This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 981586155c [Improvement][json] optimize performance of json_extract by
reusing json path object (#23430)
981586155c is described below
commit 981586155c024d901802c1271c3403c73b1e7346
Author: Kang <[email protected]>
AuthorDate: Sun Aug 27 17:39:10 2023 +0800
[Improvement][json] optimize performance of json_extract by reusing json
path object (#23430)
* reuse json path to speed up json function
* fix typo
* clang format
* path reentry safe
* fix compile error
* fix bug of continue
---
be/src/vec/functions/function_jsonb.cpp | 93 ++++++++++++++++++---------------
1 file changed, 52 insertions(+), 41 deletions(-)
diff --git a/be/src/vec/functions/function_jsonb.cpp
b/be/src/vec/functions/function_jsonb.cpp
index bd90cbf35a..86988c304b 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -517,6 +517,48 @@ public:
auto writer = std::make_unique<JsonbWriter>();
std::unique_ptr<JsonbToJson> formater;
+ // reuseable json path list, espacially for const path
+ std::vector<JsonbPath> json_path_list;
+ json_path_list.resize(rdata_columns.size());
+
+ // lambda function to parse json path for row i and path pi
+ auto parse_json_path = [&](size_t i, size_t pi) -> Status {
+ const ColumnString* path_col = rdata_columns[pi];
+ const ColumnString::Chars& rdata = path_col->get_chars();
+ const ColumnString::Offsets& roffsets = path_col->get_offsets();
+ size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1];
+ size_t r_size = roffsets[index_check_const(i, path_const[pi])] -
r_off;
+ const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
+
+ JsonbPath path;
+ if (!path.seek(r_raw, r_size)) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(rdata.data()),
+ rdata.size()));
+ }
+
+ // if not valid json path , should return error message to user
+ if (is_invalid_json_path) {
+ return Status::InvalidArgument(
+ "Json path error: {} for value: {}",
+
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+ std::string_view(reinterpret_cast<const
char*>(rdata.data()),
+ rdata.size()));
+ }
+
+ json_path_list[pi] = std::move(path);
+
+ return Status::OK();
+ };
+
+ for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
+ if (path_const[pi]) {
+ RETURN_IF_ERROR(parse_json_path(0, pi));
+ }
+ }
+
for (size_t i = 0; i < input_rows_count; ++i) {
if (null_map[i]) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
@@ -526,61 +568,30 @@ public:
size_t l_size = loffsets[index_check_const(i, json_data_const)] -
l_off;
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
if (rdata_columns.size() == 1) { // just return origin value
- const ColumnString* path_col = rdata_columns[0];
- const ColumnString::Chars& rdata = path_col->get_chars();
- const ColumnString::Offsets& roffsets =
path_col->get_offsets();
- size_t r_off = roffsets[index_check_const(i, path_const[0]) -
1];
- size_t r_size = roffsets[index_check_const(i, path_const[0])]
- r_off;
- const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
-
- JsonbPath path;
- if (!path.seek(r_raw, r_size)) {
- return Status::InvalidArgument(
- "Json path error: {} for value: {}",
-
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
- std::string_view(reinterpret_cast<const
char*>(rdata.data()),
- rdata.size()));
+ if (!path_const[0]) {
+ RETURN_IF_ERROR(parse_json_path(i, 0));
}
-
inner_loop_impl(i, res_data, res_offsets, null_map, writer,
formater, l_raw, l_size,
- path);
+ json_path_list[0]);
} else { // will make array string to user
writer->reset();
writer->writeStartArray();
+
+ // doc is NOT necessary to be deleted since JsonbDocument will
not allocate memory
+ JsonbDocument* doc = JsonbDocument::createDocument(l_raw,
l_size);
+
for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
- const ColumnString* path_col = rdata_columns[pi];
- const ColumnString::Chars& rdata = path_col->get_chars();
- const ColumnString::Offsets& roffsets =
path_col->get_offsets();
- size_t r_off = roffsets[index_check_const(i,
path_const[pi]) - 1];
- size_t r_size = roffsets[index_check_const(i,
path_const[pi])] - r_off;
- const char* r_raw = reinterpret_cast<const
char*>(&rdata[r_off]);
- // doc is NOT necessary to be deleted since JsonbDocument
will not allocate memory
- JsonbDocument* doc = JsonbDocument::createDocument(l_raw,
l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
writer->writeNull();
continue;
}
- JsonbPath path;
- if (!path.seek(r_raw, r_size)) {
- return Status::InvalidArgument(
- "Json path error: {} for value: {}",
-
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
- std::string_view(reinterpret_cast<const
char*>(rdata.data()),
- rdata.size()));
- }
-
- // if not valid json path , should return error message to
user
- if (is_invalid_json_path) {
- return Status::InvalidArgument(
- "Json path error: {} for value: {}",
-
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
- std::string_view(reinterpret_cast<const
char*>(rdata.data()),
- rdata.size()));
+ if (!path_const[pi]) {
+ RETURN_IF_ERROR(parse_json_path(i, pi));
}
// value is NOT necessary to be deleted since JsonbValue
will not allocate memory
- JsonbValue* value = doc->getValue()->findValue(path,
nullptr);
+ JsonbValue* value =
doc->getValue()->findValue(json_path_list[pi], nullptr);
if (UNLIKELY(!value)) {
writer->writeNull();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]