This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 52579cbbdcc [refactor](be) Remove unused JSON helpers (#64002)
52579cbbdcc is described below
commit 52579cbbdcc71fe88fcb294af931672cead78aad
Author: Jerry Hu <[email protected]>
AuthorDate: Wed Jun 3 11:04:39 2026 +0800
[refactor](be) Remove unused JSON helpers (#64002)
### What problem does this PR solve?
Issue Number: None
Problem Summary:
Remove dead helper code from BE JSON-related implementations:
- Remove the unused `ExecuteReducer` template and its `JsonParser`/path
parsing helper chain from `function_json.cpp`.
- Remove the unused `convert_jsonb_to_rapidjson` declaration/definition
after its only live dependency was removed.
- Remove the commented-out test helper that referenced the deleted
conversion helper.
- Clean up now-unused includes and make small style cleanups around the
touched code.
This is an internal cleanup only and does not change JSON function
behavior.
### Release note
None
### Check List (For Author)
- Test: Manual test
- `ninja -C be/ut_build_ASAN
src/core/CMakeFiles/Core.dir/data_type_serde/data_type_jsonb_serde.cpp.o
src/exprs/CMakeFiles/Exprs.dir/function/function_json.cpp.o
test/CMakeFiles/doris_be_test.dir/core/column/column_variant_test.cpp.o`
- `build-support/clang-format.sh`
- `build-support/check-format.sh`
- `git diff --check`
- Behavior changed: No
- Does this need documentation: No
---
.../core/data_type_serde/data_type_jsonb_serde.cpp | 75 ------
.../core/data_type_serde/data_type_jsonb_serde.h | 3 -
be/src/exprs/function/function_json.cpp | 289 +--------------------
be/test/core/column/column_variant_test.cpp | 16 --
4 files changed, 5 insertions(+), 378 deletions(-)
diff --git a/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
b/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
index 13aec081feb..bd8dfdfd312 100644
--- a/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_jsonb_serde.cpp
@@ -17,10 +17,6 @@
#include "core/data_type_serde/data_type_jsonb_serde.h"
-#include <rapidjson/document.h>
-#include <rapidjson/stringbuffer.h>
-#include <rapidjson/writer.h>
-
#include <cstddef>
#include <cstdint>
#include <memory>
@@ -265,77 +261,6 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn&
column, const PValues& a
return Status::OK();
}
-void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value&
target,
- rapidjson::Document::AllocatorType& allocator)
{
- // convert type of jsonb to rapidjson::Value
- switch (val.type) {
- case JsonbType::T_True:
- target.SetBool(true);
- break;
- case JsonbType::T_False:
- target.SetBool(false);
- break;
- case JsonbType::T_Null:
- target.SetNull();
- break;
- case JsonbType::T_Float:
- target.SetFloat(val.unpack<JsonbFloatVal>()->val());
- break;
- case JsonbType::T_Double:
- target.SetDouble(val.unpack<JsonbDoubleVal>()->val());
- break;
- case JsonbType::T_Int64:
- target.SetInt64(val.unpack<JsonbInt64Val>()->val());
- break;
- case JsonbType::T_Int32:
- target.SetInt(val.unpack<JsonbInt32Val>()->val());
- break;
- case JsonbType::T_Int16:
- target.SetInt(val.unpack<JsonbInt16Val>()->val());
- break;
- case JsonbType::T_Int8:
- target.SetInt(val.unpack<JsonbInt8Val>()->val());
- break;
- case JsonbType::T_String:
- target.SetString(val.unpack<JsonbStringVal>()->getBlob(),
- val.unpack<JsonbStringVal>()->getBlobLen());
- break;
- case JsonbType::T_Array: {
- target.SetArray();
- const ArrayVal& array = *val.unpack<ArrayVal>();
- if (array.numElem() == 0) {
- target.SetNull();
- break;
- }
- target.Reserve(array.numElem(), allocator);
- for (auto it = array.begin(); it != array.end(); ++it) {
- rapidjson::Value array_val;
- convert_jsonb_to_rapidjson(*static_cast<const JsonbValue*>(it),
array_val, allocator);
- target.PushBack(array_val, allocator);
- }
- break;
- }
- case JsonbType::T_Object: {
- target.SetObject();
- const ObjectVal& obj = *val.unpack<ObjectVal>();
- for (auto it = obj.begin(); it != obj.end(); ++it) {
- rapidjson::Value obj_val;
- convert_jsonb_to_rapidjson(*it->value(), obj_val, allocator);
- target.AddMember(rapidjson::GenericStringRef(it->getKeyStr(),
it->klen()), obj_val,
- allocator);
- }
- break;
- }
- case JsonbType::T_Int128: {
-
target.SetUint64(static_cast<uint64_t>(val.unpack<JsonbInt128Val>()->val()));
- break;
- }
- default:
- CHECK(false) << "unkown type " << static_cast<int>(val.type);
- break;
- }
-}
-
Status DataTypeJsonbSerDe::serialize_column_to_jsonb(const IColumn&
from_column, int64_t row_num,
JsonbWriter& writer)
const {
const auto& jsonb_binary = assert_cast<const
ColumnString&>(from_column).get_data_at(row_num);
diff --git a/be/src/core/data_type_serde/data_type_jsonb_serde.h
b/be/src/core/data_type_serde/data_type_jsonb_serde.h
index 3a243a1c1a8..8d14c6d254a 100644
--- a/be/src/core/data_type_serde/data_type_jsonb_serde.h
+++ b/be/src/core/data_type_serde/data_type_jsonb_serde.h
@@ -90,7 +90,4 @@ public:
void to_string(const IColumn& column, size_t row_num, BufferWritable& bw,
const FormatOptions& options) const override;
};
-
-void convert_jsonb_to_rapidjson(const JsonbValue& val, rapidjson::Value&
target,
- rapidjson::Document::AllocatorType& allocator);
} // namespace doris
diff --git a/be/src/exprs/function/function_json.cpp
b/be/src/exprs/function/function_json.cpp
index bb2ea13b7b1..133cb3fb392 100644
--- a/be/src/exprs/function/function_json.cpp
+++ b/be/src/exprs/function/function_json.cpp
@@ -18,24 +18,12 @@
#include <glog/logging.h>
#include <rapidjson/allocators.h>
#include <rapidjson/document.h>
-#include <rapidjson/encodings.h>
-#include <rapidjson/pointer.h>
#include <rapidjson/rapidjson.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
-#include <re2/re2.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <algorithm>
-#include <boost/iterator/iterator_facade.hpp>
-#include <boost/token_functions.hpp>
-#include <boost/tokenizer.hpp>
+
#include <memory>
-#include <string>
#include <string_view>
-#include <type_traits>
#include <utility>
#include <vector>
@@ -57,281 +45,14 @@
#include "core/string_ref.h"
#include "core/types.h"
#include "core/value/jsonb_value.h"
-#include "exec/common/stringop_substring.h"
-#include "exec/common/template_helpers.hpp"
-#include "exprs/aggregate/aggregate_function.h"
#include "exprs/function/function.h"
-#include "exprs/function/function_totype.h"
#include "exprs/function/simple_function_factory.h"
-#include "exprs/json_functions.h"
-#include "util/string_parser.hpp"
-#include "util/string_util.h"
namespace doris {
class FunctionContext;
} // namespace doris
namespace doris {
-static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+|\\*)\\])?");
-
-template <typename T, typename U>
-void char_split(std::vector<T>& res, const U& var, char p) {
- int start = 0;
- int pos = start;
- int end = var.length();
- while (pos < end) {
- while (var[pos] != p && pos < end) {
- pos++;
- }
- res.emplace_back(&var[start], pos - start);
- pos++;
- start = pos;
- }
-}
-
-// T = std::vector<std::string>
-// TODO: update RE2 to support std::vector<std::string_view>
-template <typename T>
-void get_parsed_paths(const T& path_exprs, std::vector<JsonPath>*
parsed_paths) {
- if (path_exprs.empty()) {
- return;
- }
-
- if (path_exprs[0] != "$") {
- parsed_paths->emplace_back("", -1, false);
- } else {
- parsed_paths->emplace_back("$", -1, true);
- }
-
- for (int i = 1; i < path_exprs.size(); i++) {
- std::string col;
- std::string index;
- if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col,
&index))) {
- parsed_paths->emplace_back("", -1, false);
- } else {
- int idx = -1;
- if (!index.empty()) {
- if (index == "*") {
- idx = -2;
- } else {
- idx = atoi(index.c_str());
- }
- }
- parsed_paths->emplace_back(col, idx, true);
- }
- }
-}
-
-rapidjson::Value* NO_SANITIZE_UNDEFINED
-match_value(const std::vector<JsonPath>& parsed_paths, rapidjson::Value*
document,
- rapidjson::Document::AllocatorType& mem_allocator, bool
is_insert_null = false) {
- rapidjson::Value* root = document;
- rapidjson::Value* array_obj = nullptr;
- for (int i = 1; i < parsed_paths.size(); i++) {
- if (root == nullptr || root->IsNull()) {
- return nullptr;
- }
-
- if (UNLIKELY(!parsed_paths[i].is_valid)) {
- return nullptr;
- }
-
- const std::string& col = parsed_paths[i].key;
- int index = parsed_paths[i].idx;
- if (LIKELY(!col.empty())) {
- if (root->IsObject()) {
- if (!root->HasMember(col.c_str())) {
- return nullptr;
- } else {
- root = &((*root)[col.c_str()]);
- }
- } else {
- // root is not a nested type, return NULL
- return nullptr;
- }
- }
-
- if (UNLIKELY(index != -1)) {
- // judge the rapidjson:Value, which base the top's result,
- // if not array return NULL;else get the index value from the array
- if (root->IsArray()) {
- if (root->IsNull()) {
- return nullptr;
- } else if (index == -2) {
- // [*]
- array_obj = static_cast<rapidjson::Value*>(
- mem_allocator.Malloc(sizeof(rapidjson::Value)));
- array_obj->SetArray();
-
- for (int j = 0; j < root->Size(); j++) {
- rapidjson::Value v;
- v.CopyFrom((*root)[j], mem_allocator);
- array_obj->PushBack(v, mem_allocator);
- }
- root = array_obj;
- } else if (index >= root->Size()) {
- return nullptr;
- } else {
- root = &((*root)[index]);
- }
- } else {
- return nullptr;
- }
- }
- }
- return root;
-}
-
-template <JsonFunctionType fntype>
-rapidjson::Value* get_json_object(std::string_view json_string,
std::string_view path_string,
- rapidjson::Document* document) {
- std::vector<JsonPath>* parsed_paths;
- std::vector<JsonPath> tmp_parsed_paths;
-
- //Cannot use '\' as the last character, return NULL
- if (path_string.back() == '\\') {
- return nullptr;
- }
-
- std::string fixed_string;
- if (path_string.size() >= 2 && path_string[0] == '$' && path_string[1] !=
'.') {
- // Boost tokenizer requires explicit "." after "$" to correctly
extract JSON path tokens.
- // Without this, expressions like "$[0].key" cannot be properly split.
- // This commit ensures a "." is automatically added after "$" to
maintain consistent token parsing behavior.
- fixed_string = "$.";
- fixed_string += path_string.substr(1);
- path_string = fixed_string;
- }
-
- try {
-#ifdef USE_LIBCPP
- std::string s(path_string);
- auto tok = get_json_token(s);
-#else
- auto tok = get_json_token(path_string);
-#endif
- std::vector<std::string> paths(tok.begin(), tok.end());
- get_parsed_paths(paths, &tmp_parsed_paths);
- if (tmp_parsed_paths.empty()) {
- return document;
- }
- } catch (boost::escaped_list_error&) {
- // meet unknown escape sequence, example '$.name\k'
- return nullptr;
- }
-
- parsed_paths = &tmp_parsed_paths;
-
- if (!(*parsed_paths)[0].is_valid) {
- return nullptr;
- }
-
- if (UNLIKELY((*parsed_paths).size() == 1)) {
- if (fntype == JSON_FUN_STRING) {
- document->SetString(json_string.data(),
-
cast_set<rapidjson::SizeType>(json_string.size()),
- document->GetAllocator());
- } else {
- return document;
- }
- }
-
- document->Parse(json_string.data(), json_string.size());
- if (UNLIKELY(document->HasParseError())) {
- // VLOG_CRITICAL << "Error at offset " << document->GetErrorOffset()
<< ": "
- // << GetParseError_En(document->GetParseError());
- return nullptr;
- }
-
- return match_value(*parsed_paths, document, document->GetAllocator());
-}
-
-template <int flag>
-struct JsonParser {
- //string
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- value.SetString(data.data, cast_set<rapidjson::SizeType>(data.size),
allocator);
- }
-};
-
-template <>
-struct JsonParser<'0'> {
- // null
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- value.SetNull();
- }
-};
-
-template <>
-struct JsonParser<'1'> {
- // bool
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- DCHECK(data.size == 1 || strncmp(data.data, "true", 4) == 0 ||
- strncmp(data.data, "false", 5) == 0);
- value.SetBool(*data.data == '1' || *data.data == 't');
- }
-};
-
-template <>
-struct JsonParser<'2'> {
- // int
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- value.SetInt(StringParser::string_to_int<int32_t>(data.data,
data.size, &result));
- }
-};
-
-template <>
-struct JsonParser<'3'> {
- // double
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- value.SetDouble(StringParser::string_to_float<double>(data.data,
data.size, &result));
- }
-};
-
-template <>
-struct JsonParser<'4'> {
- // time
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- // remove double quotes, "xxx" -> xxx
- value.SetString(data.data + 1, cast_set<rapidjson::SizeType>(data.size
- 2), allocator);
- }
-};
-
-template <>
-struct JsonParser<'5'> {
- // bigint
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- value.SetInt64(StringParser::string_to_int<int64_t>(data.data,
data.size, &result));
- }
-};
-
-template <>
-struct JsonParser<'7'> {
- // json string
- static void update_value(StringParser::ParseResult& result,
rapidjson::Value& value,
- StringRef data,
rapidjson::Document::AllocatorType& allocator) {
- rapidjson::Document document;
- const JsonbValue* json_val = JsonbDocument::createValue(data.data,
data.size);
- convert_jsonb_to_rapidjson(*json_val, document, allocator);
- value.CopyFrom(document, allocator);
- }
-};
-
-template <int flag, typename Impl>
-struct ExecuteReducer {
- template <typename... TArgs>
- static void run(TArgs&&... args) {
- Impl::template
execute_type<JsonParser<flag>>(std::forward<TArgs>(args)...);
- }
-};
-
struct FunctionJsonQuoteImpl {
static constexpr auto name = "json_quote";
@@ -385,9 +106,9 @@ public:
std::vector<ColumnPtr> column_ptrs; // prevent converted column
destruct
std::vector<const ColumnString*> data_columns;
- for (int i = 0; i < arguments.size(); i++) {
+ for (unsigned int argument : arguments) {
column_ptrs.push_back(
-
block.get_by_position(arguments[i]).column->convert_to_full_column_if_const());
+
block.get_by_position(argument).column->convert_to_full_column_if_const());
data_columns.push_back(assert_cast<const
ColumnString*>(column_ptrs.back().get()));
}
@@ -515,8 +236,8 @@ public:
auto null_map = ColumnUInt8::create(input_rows_count, 0);
- const ColumnString* col_from_string =
check_and_get_column<ColumnString>(col_from);
- if (auto* nullable = check_and_get_column<ColumnNullable>(col_from)) {
+ const auto* col_from_string =
check_and_get_column<ColumnString>(col_from);
+ if (const auto* nullable =
check_and_get_column<ColumnNullable>(col_from)) {
col_from_string =
check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
}
diff --git a/be/test/core/column/column_variant_test.cpp
b/be/test/core/column/column_variant_test.cpp
index c35f0d52790..dff9e2c0ae5 100644
--- a/be/test/core/column/column_variant_test.cpp
+++ b/be/test/core/column/column_variant_test.cpp
@@ -474,22 +474,6 @@ doris::Field get_jsonb_field(std::string_view type) {
return field_map[type];
}
-// std::string convert_jsonb_field_to_string(doris::Field jsonb) {
-// const auto& val = jsonb.get<JsonbField>();
-// const JsonbValue* json_val =
JsonbDocument::createValue(val.get_value(), val.get_size());
-
-// rapidjson::Document doc;
-// doc.SetObject();
-// rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
-// rapidjson::Value json_value;
-// convert_jsonb_to_rapidjson(*json_val, json_value, allocator);
-// doc.AddMember("value", json_value, allocator);
-// rapidjson::StringBuffer buffer;
-// rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
-// doc.Accept(writer);
-// return std::string(buffer.GetString());
-// }
-
std::string convert_field_to_string(doris::Field array) {
rapidjson::Document doc;
doc.SetObject();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]