This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 158e6dedb3c [Opt](Serde) optimize serialization to string on variant
type (#43237)
158e6dedb3c is described below
commit 158e6dedb3c18327cd64805238060cf31410c95c
Author: lihangyu <[email protected]>
AuthorDate: Wed Nov 6 13:25:01 2024 +0800
[Opt](Serde) optimize serialization to string on variant type (#43237)
1. avoid sanitize type each time serialization one row
2. use type id to compare instead of compare type name

`select count(cast(payload["issue"] as string)) from gharchive`
before 101s
after 15s
---
be/src/vec/columns/column_object.cpp | 34 +++++++++++++++++++---------------
1 file changed, 19 insertions(+), 15 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index acf27cac848..3d392e2addb 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1311,14 +1311,14 @@ rapidjson::Value*
find_leaf_node_by_path(rapidjson::Value& json, const PathInDat
// 2. nested array with only nulls, eg. [null. null],todo: think a better way
to deal distinguish array null value and real null value.
// 3. empty root jsonb value(not null)
// 4. type is nothing
-bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type,
int row,
- const PathInData& path) {
+bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type,
+ TypeIndex base_type_id, int row, const PathInData& path) {
// skip nulls
if (nullable && nullable->is_null_at(row)) {
return true;
}
// check if it is empty nested json array, then skip
- if (type->equals(*ColumnObject::NESTED_TYPE)) {
+ if (base_type_id == TypeIndex::VARIANT &&
type->equals(*ColumnObject::NESTED_TYPE)) {
Field field = (*nullable)[row];
if (field.get_type() == Field::Types::Array) {
const auto& array = field.get<Array>();
@@ -1338,7 +1338,7 @@ bool skip_empty_json(const ColumnNullable* nullable,
const DataTypePtr& type, in
return true;
}
// skip nothing type
- if
(WhichDataType(remove_nullable(get_base_type_of_array(type))).is_nothing()) {
+ if (base_type_id == TypeIndex::Nothing) {
return true;
}
return false;
@@ -1346,17 +1346,19 @@ bool skip_empty_json(const ColumnNullable* nullable,
const DataTypePtr& type, in
Status find_and_set_leave_value(const IColumn* column, const PathInData& path,
const DataTypeSerDeSPtr& type_serde, const
DataTypePtr& type,
- rapidjson::Value& root,
+ TypeIndex base_type_index, rapidjson::Value&
root,
rapidjson::Document::AllocatorType& allocator,
Arena& mem_pool,
int row) {
+#ifndef NDEBUG
// sanitize type and column
if (column->get_name() != type->create_column()->get_name()) {
return Status::InternalError(
"failed to set value for path {}, expected type {}, but got {}
at row {}",
path.get_path(), type->get_name(), column->get_name(), row);
}
+#endif
const auto* nullable = check_and_get_column<ColumnNullable>(column);
- if (skip_empty_json(nullable, type, row, path)) {
+ if (skip_empty_json(nullable, type, base_type_index, row, path)) {
return Status::OK();
}
// TODO could cache the result of leaf nodes with it's path info
@@ -1476,11 +1478,12 @@ Status
ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String
VLOG_DEBUG << "dump structure " <<
JsonFunctions::print_json_value(*doc_structure);
#endif
for (const auto& subcolumn : subcolumns) {
-
RETURN_IF_ERROR(find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(),
- subcolumn->path,
-
subcolumn->data.get_least_common_type_serde(),
-
subcolumn->data.get_least_common_type(), root,
-
doc_structure->GetAllocator(), mem_pool, row));
+ RETURN_IF_ERROR(find_and_set_leave_value(
+ subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
+ subcolumn->data.get_least_common_type_serde(),
+ subcolumn->data.get_least_common_type(),
+ subcolumn->data.least_common_type.get_base_type_id(), root,
+ doc_structure->GetAllocator(), mem_pool, row));
if (subcolumn->path.empty() && !root.IsObject()) {
// root was modified, only handle root node
break;
@@ -1549,10 +1552,11 @@ Status ColumnObject::merge_sparse_to_root_column() {
++null_count;
continue;
}
- bool succ = find_and_set_leave_value(column, subcolumn->path,
-
subcolumn->data.get_least_common_type_serde(),
-
subcolumn->data.get_least_common_type(), root,
-
doc_structure->GetAllocator(), mem_pool, i);
+ bool succ = find_and_set_leave_value(
+ column, subcolumn->path,
subcolumn->data.get_least_common_type_serde(),
+ subcolumn->data.get_least_common_type(),
+ subcolumn->data.least_common_type.get_base_type_id(), root,
+ doc_structure->GetAllocator(), mem_pool, i);
if (succ && subcolumn->path.empty() && !root.IsObject()) {
// root was modified, only handle root node
break;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]