github-actions[bot] commented on code in PR #63309:
URL: https://github.com/apache/doris/pull/63309#discussion_r3251880082


##########
be/src/exprs/function/function_jsonb.cpp:
##########
@@ -1162,6 +1163,329 @@ struct JsonbExtractStringImpl {
     } //function
 };
 
+struct JsonbExtractStrictString {
+    static constexpr auto name = "jsonb_extract_string";
+    static constexpr auto alias = "json_extract_string";
+
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+
+private:
+    static ALWAYS_INLINE void inner_loop_impl(size_t i, ColumnString::Chars& 
res_data,
+                                              ColumnString::Offsets& 
res_offsets, NullMap& null_map,
+                                              const char* l_raw, size_t 
l_size, JsonbPath& path) {
+        const JsonbDocument* doc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(l_raw, l_size, &doc);
+        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
+            StringOP::push_null_string(i, res_data, res_offsets, null_map);
+            return;
+        }
+
+        auto find_result = doc->getValue()->findValue(path);
+        const auto* value = find_result.value;
+        if (UNLIKELY(!value) || !value->isString()) {
+            StringOP::push_null_string(i, res_data, res_offsets, null_map);
+            return;
+        }
+
+        const auto* str_value = value->unpack<JsonbStringVal>();
+        StringOP::push_value_string(std::string_view(str_value->getBlob(), 
str_value->length()), i,
+                                    res_data, res_offsets);
+    }
+
+public:
+    static Status vector_vector_v2(FunctionContext* context, const 
ColumnString::Chars& ldata,
+                                   const ColumnString::Offsets& loffsets, 
const NullMap* l_null_map,
+                                   const bool& json_data_const,
+                                   const std::vector<const ColumnString*>& 
rdata_columns,
+                                   const std::vector<const NullMap*>& 
r_null_maps,
+                                   const std::vector<bool>& path_const,
+                                   ColumnString::Chars& res_data,
+                                   ColumnString::Offsets& res_offsets, 
NullMap& null_map) {
+        DCHECK_EQ(rdata_columns.size(), 1);
+        const size_t input_rows_count = null_map.size();
+        res_offsets.resize(input_rows_count);
+
+        JsonbPath json_path;
+        auto parse_json_path = [&](size_t i) -> Status {
+            const auto index = index_check_const(i, path_const[0]);
+
+            const ColumnString* path_col = rdata_columns[0];
+            const ColumnString::Chars& rdata = path_col->get_chars();
+            const ColumnString::Offsets& roffsets = path_col->get_offsets();
+            size_t r_off = roffsets[index - 1];
+            size_t r_size = roffsets[index] - r_off;
+            const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
+
+            JsonbPath path;
+            if (!path.seek(r_raw, r_size)) {
+                return Status::InvalidArgument("Json path error: Invalid Json 
Path for value: {}",
+                                               std::string_view(r_raw, 
r_size));
+            }
+
+            json_path = std::move(path);
+            return Status::OK();
+        };
+
+        if (path_const[0]) {
+            if (r_null_maps[0] && (*r_null_maps[0])[0]) {
+                for (size_t i = 0; i < input_rows_count; ++i) {
+                    StringOP::push_null_string(i, res_data, res_offsets, 
null_map);
+                }
+                return Status::OK();
+            }
+            RETURN_IF_ERROR(parse_json_path(0));
+        }
+
+        res_data.reserve(ldata.size());
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map[i]) {
+                continue;
+            }
+
+            const auto data_index = index_check_const(i, json_data_const);
+            if (l_null_map && (*l_null_map)[data_index]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+
+            const auto path_index = index_check_const(i, path_const[0]);
+            if (r_null_maps[0] && (*r_null_maps[0])[path_index]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+
+            if (!path_const[0]) {
+                RETURN_IF_ERROR(parse_json_path(i));
+            }
+
+            size_t l_off = loffsets[data_index - 1];
+            size_t l_size = loffsets[data_index] - l_off;
+            const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
+            inner_loop_impl(i, res_data, res_offsets, null_map, l_raw, l_size, 
json_path);
+        }
+        return Status::OK();
+    }
+};
+
+template <typename Derived, typename ResultDataType, typename ResultColumnType>
+struct JsonbExtractTypedNumberImpl {
+    using ReturnType = ResultDataType;
+    using ColumnType = ResultColumnType;
+    using Container = typename ColumnType::Container;
+
+private:
+    static ALWAYS_INLINE void insert_null(size_t i, Container& res, NullMap& 
null_map) {
+        res[i] = 0;
+        null_map[i] = 1;
+    }
+
+    static ALWAYS_INLINE void inner_loop_impl(size_t i, Container& res, 
NullMap& null_map,
+                                              const char* l_raw_str, size_t 
l_str_size,
+                                              JsonbPath& path) {
+        if (null_map[i]) {
+            res[i] = 0;
+            return;
+        }
+
+        const JsonbDocument* doc = nullptr;
+        auto st = JsonbDocument::checkAndCreateDocument(l_raw_str, l_str_size, 
&doc);
+        if (!st.ok() || !doc || !doc->getValue()) [[unlikely]] {
+            insert_null(i, res, null_map);
+            return;
+        }
+
+        auto find_result = doc->getValue()->findValue(path);
+        const auto* value = find_result.value;
+        if (UNLIKELY(!value) || !Derived::extract_value(value, res[i])) {
+            insert_null(i, res, null_map);
+        }
+    }
+
+public:
+    static Status vector_vector(FunctionContext* context, const 
ColumnString::Chars& ldata,
+                                const ColumnString::Offsets& loffsets, const 
NullMap* l_null_map,
+                                const ColumnString::Chars& rdata,
+                                const ColumnString::Offsets& roffsets, const 
NullMap* r_null_map,
+                                Container& res, NullMap& null_map) {
+        size_t size = loffsets.size();
+        res.resize(size);
+
+        for (size_t i = 0; i < loffsets.size(); i++) {
+            if ((l_null_map && (*l_null_map)[i]) || (r_null_map && 
(*r_null_map)[i])) {
+                insert_null(i, res, null_map);
+                continue;
+            }
+
+            const char* l_raw_str = reinterpret_cast<const 
char*>(&ldata[loffsets[i - 1]]);
+            int l_str_size = loffsets[i] - loffsets[i - 1];
+
+            const char* r_raw_str = reinterpret_cast<const 
char*>(&rdata[roffsets[i - 1]]);
+            int r_str_size = roffsets[i] - roffsets[i - 1];
+
+            JsonbPath path;
+            if (!path.seek(r_raw_str, r_str_size)) {
+                return Status::InvalidArgument(
+                        "Json path error: Invalid Json Path for value: {} at 
row: {}",
+                        std::string_view(r_raw_str, r_str_size), i);
+            }
+
+            inner_loop_impl(i, res, null_map, l_raw_str, l_str_size, path);
+        }
+        return Status::OK();
+    }
+
+    static Status scalar_vector(FunctionContext* context, const StringRef& 
ldata,
+                                const ColumnString::Chars& rdata,
+                                const ColumnString::Offsets& roffsets, const 
NullMap* r_null_map,
+                                Container& res, NullMap& null_map) {
+        size_t size = roffsets.size();
+        res.resize(size);

Review Comment:
   This sizes the result from the unwrapped path column, which is only one row 
when both the JSON argument and path argument are `ColumnConst`. 
`FunctionJsonbExtract::execute_impl` checks `jsonb_data_const` first and calls 
this `scalar_vector` branch even when `path_const[0]` is also true, so a query 
that evaluates `jsonb_extract_int(jsonb_parse('{"a":1}'), '$.a')` over a 
multi-row block can build a nested result column of size 1 and pair it with a 
null map sized to `input_rows_count`. That can either trip column-size 
invariants or return only the first row. Please handle the const/const case 
explicitly, or pass `input_rows_count` into this branch and fill that many 
result rows.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to