This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 87ec82713f8 [opt](function) Optimize the performance of the
json_extract function and remove some unnecessary code (#55779)
87ec82713f8 is described below
commit 87ec82713f82e1297d690005d09a7585aa0f2aa6
Author: Jerry Hu <[email protected]>
AuthorDate: Wed Sep 10 21:41:18 2025 +0800
[opt](function) Optimize the performance of the json_extract function and
remove some unnecessary code (#55779)
---
be/src/util/jsonb_document.cpp | 3 +-
be/src/util/jsonb_writer.h | 17 +++++--
be/src/vec/functions/function_jsonb.cpp | 55 ++--------------------
.../expression/rules/JsonFunctionRewrite.java | 3 +-
4 files changed, 20 insertions(+), 58 deletions(-)
diff --git a/be/src/util/jsonb_document.cpp b/be/src/util/jsonb_document.cpp
index ef17722e5fa..3e79835c109 100644
--- a/be/src/util/jsonb_document.cpp
+++ b/be/src/util/jsonb_document.cpp
@@ -52,8 +52,7 @@ JsonbFindResult JsonbValue::findValue(JsonbPath& path) const {
}
for (size_t i = 0; i < path.get_leg_vector_size(); ++i) {
- values.assign(results.begin(), results.end());
- results.clear();
+ values = std::move(results);
for (const auto* pval : values) {
switch (path.get_leg_from_leg_vector(i)->type) {
case MEMBER_CODE: {
diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h
index 46aa36af35b..2f059525fa0 100644
--- a/be/src/util/jsonb_writer.h
+++ b/be/src/util/jsonb_writer.h
@@ -36,17 +36,16 @@
#ifndef JSONB_JSONBWRITER_H
#define JSONB_JSONBWRITER_H
+#include <glog/logging.h>
+
#include <cstdint>
#include <limits>
#include <stack>
#include <string>
-#include "common/exception.h"
#include "common/status.h"
#include "jsonb_document.h"
#include "jsonb_stream.h"
-#include "runtime/define_primitive_type.h"
-#include "runtime/primitive_type.h"
#include "vec/core/types.h"
namespace doris {
@@ -137,6 +136,18 @@ public:
return false;
}
+ bool writeValueSimple(const JsonbValue* value) {
+ DCHECK(value) << "value should not be nullptr";
+ DCHECK(first_) << "only called at the beginning";
+ DCHECK(stack_.empty()) << "only called at the beginning";
+ DCHECK(!hasHdr_) << "only called at the beginning";
+ first_ = false;
+ writeHeader();
+ os_->write((char*)value, value->numPackedBytes());
+ kvState_ = WS_Value;
+ return true;
+ }
+
// write a key id
bool writeKey(JsonbKeyValue::keyid_type idx) {
if (!stack_.empty() && verifyKeyState()) {
diff --git a/be/src/vec/functions/function_jsonb.cpp
b/be/src/vec/functions/function_jsonb.cpp
index dfc0101c475..562d0268568 100644
--- a/be/src/vec/functions/function_jsonb.cpp
+++ b/be/src/vec/functions/function_jsonb.cpp
@@ -808,7 +808,6 @@ template <typename ValueType>
struct JsonbExtractStringImpl {
using ReturnType = typename ValueType::ReturnType;
using ColumnType = typename ValueType::ColumnType;
- static const bool only_check_exists = ValueType::only_check_exists;
private:
static ALWAYS_INLINE void inner_loop_impl(JsonbWriter* writer, size_t i,
@@ -836,9 +835,8 @@ private:
StringOP::push_value_string(std::string_view(find_result.value->typeName()), i,
res_data, res_offsets);
return;
- }
-
- if constexpr (std::is_same_v<DataTypeJsonb, ReturnType>) {
+ } else {
+ static_assert(std::is_same_v<DataTypeJsonb, ReturnType>);
if constexpr (ValueType::no_quotes) {
if (find_result.value->isString()) {
const auto* str_value =
find_result.value->unpack<JsonbStringVal>();
@@ -856,46 +854,10 @@ private:
}
}
}
-
- writer->writeValue(find_result.value);
+ writer->writeValueSimple(find_result.value);
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
writer->getOutput()->getSize()),
i, res_data, res_offsets);
- } else {
- if (LIKELY(find_result.value->isString())) {
- const auto* str_value =
find_result.value->unpack<JsonbStringVal>();
- StringOP::push_value_string(
- std::string_view(str_value->getBlob(),
str_value->length()), i, res_data,
- res_offsets);
- } else if (find_result.value->isNull()) {
- StringOP::push_null_string(i, res_data, res_offsets, null_map);
- } else if (find_result.value->isTrue()) {
- StringOP::push_value_string("true", i, res_data, res_offsets);
- } else if (find_result.value->isFalse()) {
- StringOP::push_value_string("false", i, res_data, res_offsets);
- } else if (find_result.value->isInt8()) {
- StringOP::push_value_string(
-
std::to_string(find_result.value->unpack<JsonbInt8Val>()->val()), i,
- res_data, res_offsets);
- } else if (find_result.value->isInt16()) {
- StringOP::push_value_string(
-
std::to_string(find_result.value->unpack<JsonbInt16Val>()->val()), i,
- res_data, res_offsets);
- } else if (find_result.value->isInt32()) {
- StringOP::push_value_string(
-
std::to_string(find_result.value->unpack<JsonbInt32Val>()->val()), i,
- res_data, res_offsets);
- } else if (find_result.value->isInt64()) {
- StringOP::push_value_string(
-
std::to_string(find_result.value->unpack<JsonbInt64Val>()->val()), i,
- res_data, res_offsets);
- } else {
- if (!formater) {
- formater.reset(new JsonbToJson());
- }
-
StringOP::push_value_string(formater->to_json_string(find_result.value), i,
- res_data, res_offsets);
- }
}
}
@@ -949,6 +911,7 @@ public:
}
}
+ res_data.reserve(ldata.size());
for (size_t i = 0; i < input_rows_count; ++i) {
if (null_map[i]) {
continue;
@@ -1274,18 +1237,10 @@ public:
} //function
};
-struct JsonbTypeExists {
- using T = uint8_t;
- using ReturnType = DataTypeUInt8;
- using ColumnType = ColumnUInt8;
- static const bool only_check_exists = true;
-};
-
struct JsonbTypeJson {
using T = std::string;
using ReturnType = DataTypeJsonb;
using ColumnType = ColumnString;
- static const bool only_check_exists = false;
static const bool only_get_type = false;
static const bool no_quotes = false;
};
@@ -1294,7 +1249,6 @@ struct JsonbTypeJsonNoQuotes {
using T = std::string;
using ReturnType = DataTypeJsonb;
using ColumnType = ColumnString;
- static const bool only_check_exists = false;
static const bool only_get_type = false;
static const bool no_quotes = true;
};
@@ -1303,7 +1257,6 @@ struct JsonbTypeType {
using T = std::string;
using ReturnType = DataTypeString;
using ColumnType = ColumnString;
- static const bool only_check_exists = false;
static const bool only_get_type = true;
static const bool no_quotes = false;
};
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/JsonFunctionRewrite.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/JsonFunctionRewrite.java
index 723ae036fb7..8ddf423755c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/JsonFunctionRewrite.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/JsonFunctionRewrite.java
@@ -28,7 +28,6 @@ import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonInsert;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonObject;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonReplace;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonSet;
-import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonUnQuote;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonbExtract;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonbExtractBigint;
import
org.apache.doris.nereids.trees.expressions.functions.scalar.JsonbExtractBool;
@@ -154,7 +153,7 @@ public class JsonFunctionRewrite implements
ExpressionPatternRuleFactory {
} else if (function instanceof JsonbExtractDouble) {
return new Cast(jsonExtract, DoubleType.INSTANCE, false);
} else if (function instanceof JsonbExtractString) {
- return new JsonUnQuote(new Cast(jsonExtract, StringType.INSTANCE,
false));
+ return new Cast(jsonExtract, StringType.INSTANCE, false);
} else {
return function;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]