This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 4a91b2d46bc [pick](SchemaCache) remove redundant Schema cache (#40257)
4a91b2d46bc is described below
commit 4a91b2d46bc5e309c126ffb9264196f65cd0947c
Author: lihangyu <[email protected]>
AuthorDate: Mon Sep 2 19:24:03 2024 +0800
[pick](SchemaCache) remove redundant Schema cache (#40257)
#40091
---
be/src/olap/rowset/beta_rowset_reader.cpp | 16 +------------
be/src/olap/schema_cache.cpp | 21 ++----------------
be/src/olap/schema_cache.h | 37 +++++--------------------------
be/src/vec/exec/scan/new_olap_scanner.cpp | 8 +++----
4 files changed, 13 insertions(+), 69 deletions(-)
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp
b/be/src/olap/rowset/beta_rowset_reader.cpp
index e2ff07e349b..38fb5f124c0 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -134,21 +134,7 @@ Status
BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context
}
}
VLOG_NOTICE << "read columns size: " << read_columns.size();
- std::string schema_key = SchemaCache::get_schema_key(
- _read_options.tablet_id, _read_context->tablet_schema,
read_columns,
- _read_context->tablet_schema->schema_version(),
SchemaCache::Type::SCHEMA);
- // It is necessary to ensure that there is a schema version when using a
cache
- // because the absence of a schema version can result in reading a stale
version
- // of the schema after a schema change.
- // For table contains variants, it's schema is unstable and variable so we
could not use schema cache here
- if (_read_context->tablet_schema->schema_version() < 0 ||
- _read_context->tablet_schema->num_variant_columns() > 0 ||
- (_input_schema =
SchemaCache::instance()->get_schema<SchemaSPtr>(schema_key)) == nullptr) {
- _input_schema =
-
std::make_shared<Schema>(_read_context->tablet_schema->columns(), read_columns);
- SchemaCache::instance()->insert_schema(schema_key, _input_schema);
- }
-
+ _input_schema =
std::make_shared<Schema>(_read_context->tablet_schema->columns(), read_columns);
if (_read_context->predicates != nullptr) {
_read_options.column_predicates.insert(_read_options.column_predicates.end(),
_read_context->predicates->begin(),
diff --git a/be/src/olap/schema_cache.cpp b/be/src/olap/schema_cache.cpp
index 765e95cdc31..33c128e1cc7 100644
--- a/be/src/olap/schema_cache.cpp
+++ b/be/src/olap/schema_cache.cpp
@@ -39,26 +39,9 @@ SchemaCache* SchemaCache::instance() {
return ExecEnv::GetInstance()->schema_cache();
}
-// format: tabletId-unique_id1-uniqueid2...-version-type
-std::string SchemaCache::get_schema_key(int64_t tablet_id, const
TabletSchemaSPtr& schema,
- const std::vector<uint32_t>&
column_ids, int32_t version,
- Type type) {
- if (column_ids.empty() || schema->column(column_ids[0]).unique_id() < 0) {
- return "";
- }
- std::string key = fmt::format("{}-", tablet_id);
- std::for_each(column_ids.begin(), column_ids.end(), [&](const ColumnId&
cid) {
- uint32_t col_unique_id = schema->column(cid).unique_id();
- key.append(fmt::format("{}", col_unique_id));
- key.append("-");
- });
- key.append(fmt::format("{}-{}", version, type));
- return key;
-}
-
// format: tabletId-unique_id1-uniqueid2...-version-type
std::string SchemaCache::get_schema_key(int64_t tablet_id, const
std::vector<TColumn>& columns,
- int32_t version, Type type) {
+ int32_t version) {
if (columns.empty() || columns[0].col_unique_id < 0) {
return "";
}
@@ -67,7 +50,7 @@ std::string SchemaCache::get_schema_key(int64_t tablet_id,
const std::vector<TCo
key.append(fmt::format("{}", col.col_unique_id));
key.append("-");
});
- key.append(fmt::format("{}-{}", version, type));
+ key.append(fmt::format("{}", version));
return key;
}
diff --git a/be/src/olap/schema_cache.h b/be/src/olap/schema_cache.h
index 233d40ede77..7bb18a59c34 100644
--- a/be/src/olap/schema_cache.h
+++ b/be/src/olap/schema_cache.h
@@ -46,22 +46,15 @@ using SegmentIteratorUPtr =
std::unique_ptr<SegmentIterator>;
// with high concurrency, where queries are executed simultaneously.
class SchemaCache : public LRUCachePolicyTrackingManual {
public:
- enum class Type { TABLET_SCHEMA = 0, SCHEMA = 1 };
-
static SchemaCache* instance();
static void create_global_instance(size_t capacity);
- // get cache schema key, delimiter with SCHEMA_DELIMITER
- static std::string get_schema_key(int64_t tablet_id, const
TabletSchemaSPtr& schema,
- const std::vector<uint32_t>& column_ids,
int32_t version,
- Type type);
static std::string get_schema_key(int64_t tablet_id, const
std::vector<TColumn>& columns,
- int32_t version, Type type);
+ int32_t version);
// Get a shared cached schema from cache, schema_key is a subset of column
unique ids
- template <typename SchemaType>
- SchemaType get_schema(const std::string& schema_key) {
+ TabletSchemaSPtr get_schema(const std::string& schema_key) {
if (!instance() || schema_key.empty()) {
return {};
}
@@ -70,44 +63,26 @@ public:
Defer release([cache = this, lru_handle] {
cache->release(lru_handle); });
auto* value = (CacheValue*)LRUCachePolicy::value(lru_handle);
VLOG_DEBUG << "use cache schema";
- if constexpr (std::is_same_v<SchemaType, TabletSchemaSPtr>) {
- return value->tablet_schema;
- }
- if constexpr (std::is_same_v<SchemaType, SchemaSPtr>) {
- return value->schema;
- }
+ return value->tablet_schema;
}
return {};
}
// Insert a shared Schema into cache, schema_key is full column unique ids
- template <typename SchemaType>
- void insert_schema(const std::string& key, SchemaType schema) {
+ void insert_schema(const std::string& key, TabletSchemaSPtr schema) {
if (!instance() || key.empty()) {
return;
}
auto* value = new CacheValue;
- if constexpr (std::is_same_v<SchemaType, TabletSchemaSPtr>) {
- value->type = Type::TABLET_SCHEMA;
- value->tablet_schema = schema;
- } else if constexpr (std::is_same_v<SchemaType, SchemaSPtr>) {
- value->type = Type::SCHEMA;
- value->schema = schema;
- }
+ value->tablet_schema = schema;
- auto lru_handle = insert(key, value, 1, schema->mem_size(),
CachePriority::NORMAL);
+ auto* lru_handle = insert(key, value, 1, schema->mem_size(),
CachePriority::NORMAL);
release(lru_handle);
}
- // Try to prune the cache if expired.
- Status prune();
-
class CacheValue : public LRUCacheValueBase {
public:
- Type type;
- // either tablet_schema or schema
TabletSchemaSPtr tablet_schema = nullptr;
- SchemaSPtr schema = nullptr;
};
SchemaCache(size_t capacity)
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index d978e21437c..71de189cf45 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -147,10 +147,10 @@ Status NewOlapScanner::init() {
!olap_scan_node.columns_desc.empty() &&
olap_scan_node.columns_desc[0].col_unique_id >= 0 &&
tablet->tablet_schema()->num_variant_columns() == 0) {
- schema_key = SchemaCache::get_schema_key(
- tablet->tablet_id(), olap_scan_node.columns_desc,
olap_scan_node.schema_version,
- SchemaCache::Type::TABLET_SCHEMA);
- cached_schema =
SchemaCache::instance()->get_schema<TabletSchemaSPtr>(schema_key);
+ schema_key =
+ SchemaCache::get_schema_key(tablet->tablet_id(),
olap_scan_node.columns_desc,
+ olap_scan_node.schema_version);
+ cached_schema = SchemaCache::instance()->get_schema(schema_key);
}
if (cached_schema) {
tablet_schema = cached_schema;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]