This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 4d8e5f3c8dd [Opt](Variant) merge schema in sync_rowsets to prevents
from CPU overhead each time describe table(#42856) (#43062)
4d8e5f3c8dd is described below
commit 4d8e5f3c8dd52deab25813e31670b4e19f8949f1
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Nov 4 16:49:41 2024 +0800
[Opt](Variant) merge schema in sync_rowsets to prevents from CPU overhead
each time describe table(#42856) (#43062)
PR Body: Should prevent from merge schema each time calling
`merged_tablet_schema`. So this pr put the merge logic in `sync_rowsets`
stage.
Cherry-picked from #42856
---
be/src/cloud/cloud_tablet.cpp | 45 ++++++++++++++++++++++++++++---------
be/src/cloud/cloud_tablet.h | 6 +++++
be/src/service/internal_service.cpp | 5 ++++-
3 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index 576f1da7262..86893dc38a2 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -108,6 +108,36 @@ Status CloudTablet::capture_rs_readers(const Version&
spec_version,
return capture_rs_readers_unlocked(version_path, rs_splits);
}
+Status CloudTablet::merge_rowsets_schema() {
+ // Find the rowset with the max version
+ auto max_version_rowset =
+ std::max_element(
+ _rs_version_map.begin(), _rs_version_map.end(),
+ [](const auto& a, const auto& b) {
+ return !a.second->tablet_schema()
+ ? true
+ : (!b.second->tablet_schema()
+ ? false
+ :
a.second->tablet_schema()->schema_version() <
+
b.second->tablet_schema()
+
->schema_version());
+ })
+ ->second;
+ TabletSchemaSPtr max_version_schema = max_version_rowset->tablet_schema();
+ // If the schema has variant columns, perform a merge to create a wide
tablet schema
+ if (max_version_schema->num_variant_columns() > 0) {
+ std::vector<TabletSchemaSPtr> schemas;
+ std::transform(_rs_version_map.begin(), _rs_version_map.end(),
std::back_inserter(schemas),
+ [](const auto& rs_meta) { return
rs_meta.second->tablet_schema(); });
+ // Merge the collected schemas to obtain the least common schema
+
RETURN_IF_ERROR(vectorized::schema_util::get_least_common_schema(schemas,
nullptr,
+
max_version_schema));
+ VLOG_DEBUG << "dump schema: " <<
max_version_schema->dump_full_schema();
+ _merged_tablet_schema = max_version_schema;
+ }
+ return Status::OK();
+}
+
// There are only two tablet_states RUNNING and NOT_READY in cloud mode
// This function will erase the tablet from `CloudTabletMgr` when it can't
find this tablet in MS.
Status CloudTablet::sync_rowsets(int64_t query_version, bool
warmup_delta_data) {
@@ -133,6 +163,10 @@ Status CloudTablet::sync_rowsets(int64_t query_version,
bool warmup_delta_data)
if (st.is<ErrorCode::NOT_FOUND>()) {
clear_cache();
}
+
+ // Merge all rowset schemas within a CloudTablet
+ RETURN_IF_ERROR(merge_rowsets_schema());
+
return st;
}
@@ -188,16 +222,7 @@ Status CloudTablet::sync_if_not_running() {
}
TabletSchemaSPtr CloudTablet::merged_tablet_schema() const {
- std::shared_lock rdlock(_meta_lock);
- TabletSchemaSPtr target_schema;
- std::vector<TabletSchemaSPtr> schemas;
- for (const auto& [_, rowset] : _rs_version_map) {
- schemas.push_back(rowset->tablet_schema());
- }
- // get the max version schema and merge all schema
- static_cast<void>(
- vectorized::schema_util::get_least_common_schema(schemas, nullptr,
target_schema));
- return target_schema;
+ return _merged_tablet_schema;
}
void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool
version_overlap,
diff --git a/be/src/cloud/cloud_tablet.h b/be/src/cloud/cloud_tablet.h
index 2bd1ce47502..5f10211ef53 100644
--- a/be/src/cloud/cloud_tablet.h
+++ b/be/src/cloud/cloud_tablet.h
@@ -208,6 +208,9 @@ private:
Status sync_if_not_running();
+ // Merge all rowset schemas within a CloudTablet
+ Status merge_rowsets_schema();
+
CloudStorageEngine& _engine;
// this mutex MUST ONLY be used when sync meta
@@ -246,6 +249,9 @@ private:
std::mutex _base_compaction_lock;
std::mutex _cumulative_compaction_lock;
mutable std::mutex _rowset_update_lock;
+
+ // Schema will be merged from all rowsets when sync_rowsets
+ TabletSchemaSPtr _merged_tablet_schema;
};
using CloudTabletSPtr = std::shared_ptr<CloudTablet>;
diff --git a/be/src/service/internal_service.cpp
b/be/src/service/internal_service.cpp
index 8217bd11bb9..c23cc057584 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -1159,7 +1159,10 @@ void
PInternalService::fetch_remote_tablet_schema(google::protobuf::RpcControlle
LOG(WARNING) << "tablet does not exist, tablet id is "
<< tablet_id;
continue;
}
-
tablet_schemas.push_back(res.value()->merged_tablet_schema());
+ auto schema = res.value()->merged_tablet_schema();
+ if (schema != nullptr) {
+ tablet_schemas.push_back(schema);
+ }
}
if (!tablet_schemas.empty()) {
// merge all
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]