This is an automated email from the ASF dual-hosted git repository. mrhhsg pushed a commit to branch fix-json-strict-extract in repository https://gitbox.apache.org/repos/asf/doris.git
commit ab4c47c9aa8ca22563434743b7dbd9d2a194fd88 Author: Hu Shenggang <[email protected]> AuthorDate: Sat May 16 22:41:45 2026 +0800 [fix](json) Address strict JSON review comments ### What problem does this PR solve? Issue Number: None Related PR: #63309 Problem Summary: Avoid the extra raw_json pre-pass in strict JSONB parsing, and keep const JSON plus const path extract results sized to the input block. ### Release note None ### Check List (For Author) - Test: Unit Test - ./run-be-ut.sh --run --filter=JsonBinaryValueTest.TestValidation:FunctionJsonbTEST.JsonbParseTest:FunctionJsonbTEST.JsonbParseErrorToNullTest:FunctionJsonbTEST.JsonValidStrictTest:FunctionJsonbTEST.JsonExtractConstConstMultiRow - build-support/check-format.sh - build-support/run-clang-tidy.sh --build-dir be/ut_build_ASAN (failed due existing complexity/NOLINT diagnostics and local toolchain stddef.h lookup) - Behavior changed: Yes. Const JSON plus const path non-string JSONB extract returns a correctly sized repeated result. - Does this need documentation: No --- be/src/exprs/function/function_jsonb.cpp | 14 +++++++ be/src/util/jsonb_parser_simd.h | 23 ----------- be/test/exprs/function/function_jsonb_test.cpp | 55 +++++++++++++++++++++++++- 3 files changed, 68 insertions(+), 24 deletions(-) diff --git a/be/src/exprs/function/function_jsonb.cpp b/be/src/exprs/function/function_jsonb.cpp index 40c10cb4e61..ce1cbfc3c37 100644 --- a/be/src/exprs/function/function_jsonb.cpp +++ b/be/src/exprs/function/function_jsonb.cpp @@ -450,6 +450,20 @@ public: return create_all_null_result(); } + if (path_const[0]) { + auto const_null_map = ColumnUInt8::create(1, 0); + auto const_res = Impl::ColumnType::create(); + RETURN_IF_ERROR(Impl::scalar_vector( + context, jsonb_data_column->get_data_at(0), rdata, roffsets, + path_null_maps[0], const_res->get_data(), const_null_map->get_data())); + DCHECK_EQ(const_res->size(), 1); + auto nullable_column = + ColumnNullable::create(std::move(const_res), std::move(const_null_map)); + block.get_by_position(result).column = + ColumnConst::create(std::move(nullable_column), input_rows_count); + return Status::OK(); + } + RETURN_IF_ERROR(Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets, path_null_maps[0], res->get_data(), null_map->get_data())); diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h index 7632eb45fba..202ecb8d8b0 100644 --- a/be/src/util/jsonb_parser_simd.h +++ b/be/src/util/jsonb_parser_simd.h @@ -93,29 +93,6 @@ struct JsonbParser { simdjson::padded_string json_str {pch, len}; simdjson::ondemand::document doc = simdjson_parser.iterate(json_str); - auto is_json_whitespace = [](char c) { - return c == ' ' || c == '\t' || c == '\n' || c == '\r'; - }; - const char* json_begin = json_str.data(); - const char* json_end = json_str.data() + len; - while (json_begin < json_end && is_json_whitespace(*json_begin)) { - ++json_begin; - } - while (json_end > json_begin && is_json_whitespace(*(json_end - 1))) { - --json_end; - } - - std::string_view raw_json; - simdjson::error_code raw_res = doc.raw_json().get(raw_json); - if (raw_res != simdjson::SUCCESS) { - return Status::InvalidArgument(fmt::format("simdjson raw_json failed: {}", - simdjson::error_message(raw_res))); - } - if (raw_json.data() != json_begin || raw_json.data() + raw_json.size() != json_end) { - return Status::InvalidArgument("simdjson parse exception: trailing content"); - } - doc.rewind(); - // simdjson process top level primitive types specially // so some repeated code here switch (doc.type()) { diff --git a/be/test/exprs/function/function_jsonb_test.cpp b/be/test/exprs/function/function_jsonb_test.cpp index 7db6b9febbd..9d7b96f02e4 100644 --- a/be/test/exprs/function/function_jsonb_test.cpp +++ b/be/test/exprs/function/function_jsonb_test.cpp @@ -16,8 +16,8 @@ // under the License. #include <gtest/gtest.h> -#include <stdint.h> +#include <cstdint> #include <memory> #include <string> @@ -384,6 +384,59 @@ TEST(FunctionJsonbTEST, JsonExtractCheckArg) { ASSERT_EQ(st.code(), ErrorCode::INVALID_ARGUMENT); } +TEST(FunctionJsonbTEST, JsonExtractConstConstMultiRow) { + constexpr size_t input_rows_count = 3; + auto json_data_type = std::make_shared<DataTypeJsonb>(); + auto path_data_type = std::make_shared<DataTypeString>(); + auto return_type = make_nullable(std::make_shared<DataTypeUInt8>()); + + JsonbWriter writer; + ASSERT_TRUE(writer.writeStartObject()); + ASSERT_TRUE(writer.writeKey("a")); + ASSERT_TRUE(writer.writeNull()); + ASSERT_TRUE(writer.writeEndObject()); + + auto json_column = json_data_type->create_column(); + json_column->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); + + auto path_column = path_data_type->create_column(); + path_column->insert_data("$.a", 3); + + Block block; + block.insert({ColumnConst::create(std::move(json_column), input_rows_count), json_data_type, + "json_col"}); + block.insert({ColumnConst::create(std::move(path_column), input_rows_count), path_data_type, + "path_col"}); + + FunctionBasePtr func = SimpleFunctionFactory::instance().get_function( + "json_extract_isnull", block.get_columns_with_type_and_name(), return_type); + ASSERT_TRUE(func != nullptr); + + FunctionUtils fn_utils(return_type, {json_data_type, path_data_type}, 0); + auto* fn_ctx = fn_utils.get_fn_ctx(); + auto st = func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL); + ASSERT_TRUE(st.ok()) << "open failed: " << st.to_string(); + st = func->open(fn_ctx, FunctionContext::THREAD_LOCAL); + ASSERT_TRUE(st.ok()) << "open failed: " << st.to_string(); + + block.insert({nullptr, return_type, "result"}); + auto result = block.columns() - 1; + st = func->execute(fn_ctx, block, {0, 1}, result, input_rows_count); + ASSERT_TRUE(st.ok()) << "execute failed: " << st.to_string(); + + auto result_column = block.get_by_position(result).column->convert_to_full_column_if_const(); + ASSERT_EQ(result_column->size(), input_rows_count); + const auto& result_nullable = assert_cast<const ColumnNullable&>(*result_column); + const auto& result_data = assert_cast<const ColumnUInt8&>(result_nullable.get_nested_column()); + for (size_t i = 0; i < input_rows_count; ++i) { + EXPECT_FALSE(result_nullable.is_null_at(i)); + EXPECT_EQ(result_data.get_data()[i], 1); + } + + static_cast<void>(func->close(fn_ctx, FunctionContext::THREAD_LOCAL)); + static_cast<void>(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); +} + TEST(FunctionJsonbTEST, JsonParseCheckArg) { ColumnsWithTypeAndName args; args.emplace_back( --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
