This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 981586155c [Improvement][json] optimize performance of json_extract by 
reusing json path object (#23430)
981586155c is described below

commit 981586155c024d901802c1271c3403c73b1e7346
Author: Kang <[email protected]>
AuthorDate: Sun Aug 27 17:39:10 2023 +0800

    [Improvement][json] optimize performance of json_extract by reusing json 
path object (#23430)
    
    * reuse json path to speed up json function
    
    * fix typo
    
    * clang format
    
    * path reentry safe
    
    * fix compile error
    
    * fix bug of continue
---
 be/src/vec/functions/function_jsonb.cpp | 93 ++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 41 deletions(-)

diff --git a/be/src/vec/functions/function_jsonb.cpp 
b/be/src/vec/functions/function_jsonb.cpp
index bd90cbf35a..86988c304b 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -517,6 +517,48 @@ public:
         auto writer = std::make_unique<JsonbWriter>();
         std::unique_ptr<JsonbToJson> formater;
 
+        // reuseable json path list, espacially for const path
+        std::vector<JsonbPath> json_path_list;
+        json_path_list.resize(rdata_columns.size());
+
+        // lambda function to parse json path for row i and path pi
+        auto parse_json_path = [&](size_t i, size_t pi) -> Status {
+            const ColumnString* path_col = rdata_columns[pi];
+            const ColumnString::Chars& rdata = path_col->get_chars();
+            const ColumnString::Offsets& roffsets = path_col->get_offsets();
+            size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1];
+            size_t r_size = roffsets[index_check_const(i, path_const[pi])] - 
r_off;
+            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
+
+            JsonbPath path;
+            if (!path.seek(r_raw, r_size)) {
+                return Status::InvalidArgument(
+                        "Json path error: {} for value: {}",
+                        
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                        std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                         rdata.size()));
+            }
+
+            // if not valid json path , should return error message to user
+            if (is_invalid_json_path) {
+                return Status::InvalidArgument(
+                        "Json path error: {} for value: {}",
+                        
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
+                        std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
+                                         rdata.size()));
+            }
+
+            json_path_list[pi] = std::move(path);
+
+            return Status::OK();
+        };
+
+        for (size_t pi = 0; pi < rdata_columns.size(); pi++) {
+            if (path_const[pi]) {
+                RETURN_IF_ERROR(parse_json_path(0, pi));
+            }
+        }
+
         for (size_t i = 0; i < input_rows_count; ++i) {
             if (null_map[i]) {
                 StringOP::push_null_string(i, res_data, res_offsets, null_map);
@@ -526,61 +568,30 @@ public:
             size_t l_size = loffsets[index_check_const(i, json_data_const)] - 
l_off;
             const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
             if (rdata_columns.size() == 1) { // just return origin value
-                const ColumnString* path_col = rdata_columns[0];
-                const ColumnString::Chars& rdata = path_col->get_chars();
-                const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
-                size_t r_off = roffsets[index_check_const(i, path_const[0]) - 
1];
-                size_t r_size = roffsets[index_check_const(i, path_const[0])] 
- r_off;
-                const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
-
-                JsonbPath path;
-                if (!path.seek(r_raw, r_size)) {
-                    return Status::InvalidArgument(
-                            "Json path error: {} for value: {}",
-                            
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
-                            std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
-                                             rdata.size()));
+                if (!path_const[0]) {
+                    RETURN_IF_ERROR(parse_json_path(i, 0));
                 }
-
                 inner_loop_impl(i, res_data, res_offsets, null_map, writer, 
formater, l_raw, l_size,
-                                path);
+                                json_path_list[0]);
             } else { // will make array string to user
                 writer->reset();
                 writer->writeStartArray();
+
+                // doc is NOT necessary to be deleted since JsonbDocument will 
not allocate memory
+                JsonbDocument* doc = JsonbDocument::createDocument(l_raw, 
l_size);
+
                 for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
-                    const ColumnString* path_col = rdata_columns[pi];
-                    const ColumnString::Chars& rdata = path_col->get_chars();
-                    const ColumnString::Offsets& roffsets = 
path_col->get_offsets();
-                    size_t r_off = roffsets[index_check_const(i, 
path_const[pi]) - 1];
-                    size_t r_size = roffsets[index_check_const(i, 
path_const[pi])] - r_off;
-                    const char* r_raw = reinterpret_cast<const 
char*>(&rdata[r_off]);
-                    // doc is NOT necessary to be deleted since JsonbDocument 
will not allocate memory
-                    JsonbDocument* doc = JsonbDocument::createDocument(l_raw, 
l_size);
                     if (UNLIKELY(!doc || !doc->getValue())) {
                         writer->writeNull();
                         continue;
                     }
 
-                    JsonbPath path;
-                    if (!path.seek(r_raw, r_size)) {
-                        return Status::InvalidArgument(
-                                "Json path error: {} for value: {}",
-                                
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
-                                std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
-                                                 rdata.size()));
-                    }
-
-                    // if not valid json path , should return error message to 
user
-                    if (is_invalid_json_path) {
-                        return Status::InvalidArgument(
-                                "Json path error: {} for value: {}",
-                                
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
-                                std::string_view(reinterpret_cast<const 
char*>(rdata.data()),
-                                                 rdata.size()));
+                    if (!path_const[pi]) {
+                        RETURN_IF_ERROR(parse_json_path(i, pi));
                     }
 
                     // value is NOT necessary to be deleted since JsonbValue 
will not allocate memory
-                    JsonbValue* value = doc->getValue()->findValue(path, 
nullptr);
+                    JsonbValue* value = 
doc->getValue()->findValue(json_path_list[pi], nullptr);
 
                     if (UNLIKELY(!value)) {
                         writer->writeNull();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to