This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 21b9fdfbe9f [fix](be) Reject super wildcard path in json keys (#63300)
21b9fdfbe9f is described below
commit 21b9fdfbe9f3becb018f4978ccb003a51e0f2a70
Author: Jerry Hu <[email protected]>
AuthorDate: Mon May 18 20:49:57 2026 +0800
[fix](be) Reject super wildcard path in json keys (#63300)
### What problem does this PR solve?
Issue Number: None
Related PR: None
Problem Summary: `json_keys`/`jsonb_keys` rejected ordinary wildcard
paths but allowed super wildcard paths such as $**.a to fall through and
return NULL. The function only supports reading keys from a single
object, so super wildcard paths should fail with the same
INVALID_JSON_PATH error as other wildcard paths.
### Release note
Reject unsupported super wildcard JSON paths in json_keys/jsonb_keys
instead of returning NULL.
### Check List (For Author)
- Test: Unit Test / Regression test / Static check
- Unit Test: ./run-be-ut.sh --run
--filter=FunctionJsonbTEST.JsonbKeysRejectSuperWildcardPath
- Regression test: Added
regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy (not
run locally; no FE/BE cluster was listening on configured regression
ports)
- Static check: build-support/check-format.sh
- Static check: build-support/run-clang-tidy.sh --build-dir
be/ut_build_ASAN (failed due to pre-existing function_jsonb.cpp
complexity diagnostics and toolchain header/NOLINTEND errors)
- Behavior changed: Yes (json_keys/jsonb_keys now return
INVALID_JSON_PATH for $** paths instead of NULL)
- Does this need documentation: No
---
be/src/exprs/function/function_jsonb.cpp | 4 +-
be/test/exprs/function/function_jsonb_test.cpp | 74 +++++++++++++++++++++-
.../jsonb_p0/test_jsonb_keys_invalid_path.groovy | 35 ++++++++++
3 files changed, 110 insertions(+), 3 deletions(-)
diff --git a/be/src/exprs/function/function_jsonb.cpp
b/be/src/exprs/function/function_jsonb.cpp
index 40c10cb4e61..1f11e496877 100644
--- a/be/src/exprs/function/function_jsonb.cpp
+++ b/be/src/exprs/function/function_jsonb.cpp
@@ -568,7 +568,7 @@ private:
r_raw_ref.to_string());
}
- if (const_path.is_wildcard()) {
+ if (const_path.is_wildcard() || const_path.is_supper_wildcard()) {
return Status::InvalidJsonPath(
"In this situation, path expressions may not contain
the * and ** tokens "
"or an array range.");
@@ -610,7 +610,7 @@ private:
std::string_view(data.data, data.size), i);
}
- if (path.is_wildcard()) {
+ if (path.is_wildcard() || path.is_supper_wildcard()) {
return Status::InvalidJsonPath(
"In this situation, path expressions may not
contain the * and ** "
"tokens "
diff --git a/be/test/exprs/function/function_jsonb_test.cpp
b/be/test/exprs/function/function_jsonb_test.cpp
index c321fffb4f4..25377a0bce9 100644
--- a/be/test/exprs/function/function_jsonb_test.cpp
+++ b/be/test/exprs/function/function_jsonb_test.cpp
@@ -16,13 +16,15 @@
// under the License.
#include <gtest/gtest.h>
-#include <stdint.h>
+#include <cstdint>
#include <memory>
#include <string>
+#include <vector>
#include "common/status.h"
#include "core/column/column_const.h"
+#include "core/data_type/data_type_array.h"
#include "core/data_type/data_type_jsonb.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
@@ -41,6 +43,67 @@
namespace doris {
using namespace ut_type;
+namespace {
+
+ColumnPtr create_jsonb_column(size_t rows) {
+ auto jsonb_type = std::make_shared<DataTypeJsonb>();
+ auto jsonb_column = jsonb_type->create_column();
+ for (size_t i = 0; i < rows; ++i) {
+ EXPECT_TRUE(insert_cell(jsonb_column, jsonb_type,
STRING(R"({"a":{"b":1}})")));
+ }
+ return jsonb_column;
+}
+
+ColumnPtr create_path_column(const std::vector<std::string>& paths) {
+ auto path_type = std::make_shared<DataTypeString>();
+ auto path_column = path_type->create_column();
+ for (const auto& path : paths) {
+ EXPECT_TRUE(insert_cell(path_column, path_type, STRING(path)));
+ }
+ return path_column;
+}
+
+ColumnPtr create_const_path_column(const std::string& path, size_t rows) {
+ return ColumnConst::create(create_path_column({path}), rows);
+}
+
+Status execute_json_keys_with_path(ColumnPtr path_column, size_t rows) {
+ auto jsonb_type = std::make_shared<DataTypeJsonb>();
+ auto path_type = std::make_shared<DataTypeString>();
+ auto return_type = make_nullable(
+
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeString>())));
+
+ Block block;
+ block.insert({create_jsonb_column(rows), jsonb_type, "jsonb"});
+ block.insert({std::move(path_column), path_type, "path"});
+
+ FunctionBasePtr func = SimpleFunctionFactory::instance().get_function(
+ "json_keys", block.get_columns_with_type_and_name(), return_type);
+ DORIS_CHECK(func != nullptr);
+
+ FunctionUtils fn_utils(return_type, {jsonb_type, path_type}, false);
+ auto* fn_ctx = fn_utils.get_fn_ctx();
+ RETURN_IF_ERROR(func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL));
+ RETURN_IF_ERROR(func->open(fn_ctx, FunctionContext::THREAD_LOCAL));
+
+ block.insert({nullptr, return_type, "result"});
+ auto st = func->execute(fn_ctx, block, {0, 1}, block.columns() - 1, rows);
+
+ static_cast<void>(func->close(fn_ctx, FunctionContext::THREAD_LOCAL));
+ static_cast<void>(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL));
+ return st;
+}
+
+void expect_invalid_json_keys_super_wildcard_path(const Status& st) {
+ EXPECT_EQ(st.code(), ErrorCode::INVALID_JSON_PATH) << st.to_string();
+ EXPECT_NE(st.to_string().find(
+ "path expressions may not contain the * and ** tokens or
an array range"),
+ std::string::npos)
+ << st.to_string();
+}
+
+} // namespace
+
TEST(FunctionJsonbTEST, JsonbParseTest) {
std::string func_name = "json_parse";
InputTypeSet input_types = {Nullable {PrimitiveType::TYPE_VARCHAR}};
@@ -186,6 +249,15 @@ TEST(FunctionJsonbTEST, JsonbParseErrorToValueTest) {
ASSERT_EQ(st.code(), ErrorCode::INVALID_ARGUMENT) << st.to_string();
}
+TEST(FunctionJsonbTEST, JsonbKeysRejectSuperWildcardPath) {
+ auto const_path_status =
execute_json_keys_with_path(create_const_path_column("$**.a", 1), 1);
+ expect_invalid_json_keys_super_wildcard_path(const_path_status);
+
+ auto non_const_path_status =
+ execute_json_keys_with_path(create_path_column({"$.a", "$**.a"}),
2);
+ expect_invalid_json_keys_super_wildcard_path(non_const_path_status);
+}
+
TEST(FunctionJsonbTEST, JsonbExtractTest) {
std::string func_name = "jsonb_extract";
InputTypeSet input_types = {PrimitiveType::TYPE_JSONB,
PrimitiveType::TYPE_VARCHAR};
diff --git
a/regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy
b/regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy
new file mode 100644
index 00000000000..8ba7d2f9c1b
--- /dev/null
+++ b/regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_jsonb_keys_invalid_path", "p0") {
+ test {
+ sql """
+ SELECT json_keys(CAST('{"a":{"b":1}}' AS JSONB), '\$**.a');
+ """
+ exception "In this situation, path expressions may not contain the *
and ** tokens or an array range."
+ }
+
+ test {
+ sql """
+ SELECT json_keys(j, p)
+ FROM (
+ SELECT CAST('{"a":{"b":1}}' AS JSONB) AS j, '\$**.a' AS p
+ ) t;
+ """
+ exception "In this situation, path expressions may not contain the *
and ** tokens or an array range."
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]