This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 9c672fb1856 [Refactor](inverted index) refactor inverted index
interface in tablet schema (#43003) (#43280) (#43669)
9c672fb1856 is described below
commit 9c672fb1856369a5d0d742e146ccbf6923a43728
Author: Sun Chenyang <[email protected]>
AuthorDate: Tue Nov 12 17:54:44 2024 +0800
[Refactor](inverted index) refactor inverted index interface in tablet
schema (#43003) (#43280) (#43669)
pick from master #43003,#43280
---
be/src/cloud/cloud_meta_mgr.cpp | 8 +-
be/src/cloud/cloud_tablet.cpp | 14 ++-
be/src/cloud/cloud_warm_up_manager.cpp | 14 ++-
be/src/olap/compaction.cpp | 19 ++--
be/src/olap/delta_writer.cpp | 2 +-
be/src/olap/rowset/beta_rowset.cpp | 41 +++-----
be/src/olap/rowset/beta_rowset_writer.cpp | 4 +-
be/src/olap/rowset/segcompaction.cpp | 3 +-
be/src/olap/rowset/segment_v2/column_writer.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 17 ++--
be/src/olap/rowset/segment_v2/segment_writer.cpp | 21 ++--
.../rowset/segment_v2/vertical_segment_writer.cpp | 19 ++--
be/src/olap/schema_change.cpp | 9 +-
be/src/olap/snapshot_manager.cpp | 7 +-
be/src/olap/tablet.cpp | 17 +---
be/src/olap/tablet_schema.cpp | 77 ++-------------
be/src/olap/tablet_schema.h | 40 +++++---
be/src/olap/task/engine_storage_migration_task.cpp | 7 +-
be/src/olap/task/index_builder.cpp | 20 ++--
be/src/service/backend_service.cpp | 9 +-
be/src/vec/common/schema_util.cpp | 25 ++++-
be/src/vec/common/schema_util.h | 3 +
.../compaction/index_compaction_test.cpp | 8 +-
.../index_compaction_with_deleted_term.cpp | 8 +-
be/test/olap/tablet_index_test.cpp | 108 +++++++++++++++++++++
25 files changed, 266 insertions(+), 236 deletions(-)
diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp
index fefdb65e44b..0beeea42bd6 100644
--- a/be/src/cloud/cloud_meta_mgr.cpp
+++ b/be/src/cloud/cloud_meta_mgr.cpp
@@ -1225,12 +1225,8 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const
RowsetMeta& rs_meta) {
}
if (rs_meta.tablet_schema()->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- auto indices = rs_meta.tablet_schema()->indexes();
+ const auto& indices = rs_meta.tablet_schema()->inverted_indexes();
for (auto& index : indices) {
- // only get file_size for inverted index
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) {
std::string segment_path =
StorageResource().remote_segment_path(
rs_meta.tablet_id(), rs_meta.rowset_id().to_string(),
seg_id);
@@ -1239,7 +1235,7 @@ int64_t CloudMetaMgr::get_inverted_index_file_szie(const
RowsetMeta& rs_meta) {
std::string inverted_index_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
- index.index_id(), index.get_index_suffix());
+ index->index_id(), index->get_index_suffix());
auto st = fs->file_size(inverted_index_file_path, &file_size);
if (!st.ok()) {
file_size = 0;
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index a8b5fcde662..c7a85fc820d 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -285,15 +285,13 @@ void
CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> to_add, bool version_
auto schema_ptr = rowset_meta->tablet_schema();
auto idx_version =
schema_ptr->get_inverted_index_storage_format();
if (idx_version == InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : schema_ptr->indexes()) {
- if (index.index_type() == IndexType::INVERTED) {
- auto idx_path =
storage_resource.value()->remote_idx_v1_path(
- *rowset_meta, seg_id, index.index_id(),
- index.get_index_suffix());
- download_idx_file(idx_path);
- }
+ for (const auto& index :
schema_ptr->inverted_indexes()) {
+ auto idx_path =
storage_resource.value()->remote_idx_v1_path(
+ *rowset_meta, seg_id, index->index_id(),
+ index->get_index_suffix());
+ download_idx_file(idx_path);
}
- } else if (idx_version ==
InvertedIndexStorageFormatPB::V2) {
+ } else {
if (schema_ptr->has_inverted_index()) {
auto idx_path =
storage_resource.value()->remote_idx_v2_path(
*rowset_meta, seg_id);
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp
b/be/src/cloud/cloud_warm_up_manager.cpp
index 96a25fe8fe5..06d6df11dc4 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -147,15 +147,13 @@ void CloudWarmUpManager::handle_jobs() {
auto schema_ptr = rs->tablet_schema();
auto idx_version =
schema_ptr->get_inverted_index_storage_format();
if (idx_version == InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : schema_ptr->indexes()) {
- if (index.index_type() == IndexType::INVERTED) {
- wait->add_count();
- auto idx_path =
storage_resource.value()->remote_idx_v1_path(
- *rs, seg_id, index.index_id(),
index.get_index_suffix());
- download_idx_file(idx_path);
- }
+ for (const auto& index :
schema_ptr->inverted_indexes()) {
+ wait->add_count();
+ auto idx_path =
storage_resource.value()->remote_idx_v1_path(
+ *rs, seg_id, index->index_id(),
index->get_index_suffix());
+ download_idx_file(idx_path);
}
- } else if (idx_version ==
InvertedIndexStorageFormatPB::V2) {
+ } else {
if (schema_ptr->has_inverted_index()) {
wait->add_count();
auto idx_path =
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 7226a986905..6a8c88d99df 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -638,7 +638,7 @@ Status Compaction::do_inverted_index_compaction() {
Status status = Status::OK();
for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) {
auto col = _cur_tablet_schema->column_by_uid(column_uniq_id);
- const auto* index_meta = _cur_tablet_schema->get_inverted_index(col);
+ const auto* index_meta = _cur_tablet_schema->inverted_index(col);
std::vector<lucene::store::Directory*>
dest_index_dirs(dest_segment_num);
try {
@@ -676,15 +676,11 @@ Status Compaction::do_inverted_index_compaction() {
}
void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
- for (const auto& index : _cur_tablet_schema->indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
-
- auto col_unique_ids = index.col_unique_ids();
+ for (const auto& index : _cur_tablet_schema->inverted_indexes()) {
+ auto col_unique_ids = index->col_unique_ids();
// check if column unique ids is empty to avoid crash
if (col_unique_ids.empty()) {
- LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" <<
index.index_id()
+ LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] index[" <<
index->index_id()
<< "] has no column unique id, will skip index
compaction."
<< " tablet_schema=" <<
_cur_tablet_schema->dump_full_schema();
continue;
@@ -699,10 +695,9 @@ void
Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
bool is_continue = false;
std::optional<std::map<std::string, std::string>> first_properties;
for (const auto& rowset : _input_rowsets) {
- const auto* tablet_index =
- rowset->tablet_schema()->get_inverted_index(col_unique_id,
"");
+ const auto* tablet_index =
rowset->tablet_schema()->inverted_index(col_unique_id);
// no inverted index or index id is different from current index id
- if (tablet_index == nullptr || tablet_index->index_id() !=
index.index_id()) {
+ if (tablet_index == nullptr || tablet_index->index_id() !=
index->index_id()) {
is_continue = true;
break;
}
@@ -735,7 +730,7 @@ void
Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
return false;
}
- const auto* index_meta =
rowset->tablet_schema()->get_inverted_index(col_unique_id, "");
+ const auto* index_meta =
rowset->tablet_schema()->inverted_index(col_unique_id);
if (index_meta == nullptr) {
LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "]
column_unique_id["
<< col_unique_id << "] index meta is null, will
skip index compaction";
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index 00c622df59f..f42253a7884 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -249,7 +249,7 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const
PNodeInfo& node_info)
auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema();
if (!tablet_schema->skip_write_index_on_load()) {
for (auto& column : tablet_schema->columns()) {
- const TabletIndex* index_meta =
tablet_schema->get_inverted_index(*column);
+ const TabletIndex* index_meta =
tablet_schema->inverted_index(*column);
if (index_meta) {
indices_ids.emplace_back(index_meta->index_id(),
index_meta->get_index_suffix());
}
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 4b51dcc3530..bbb2ca72b4a 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -81,12 +81,7 @@ Status BetaRowset::get_inverted_index_size(size_t*
index_size) {
}
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- auto indices = _schema->indexes();
- for (auto& index : indices) {
- // only get file_size for inverted index
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
+ for (const auto& index : _schema->inverted_indexes()) {
for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
auto seg_path = DORIS_TRY(segment_path(seg_id));
int64_t file_size = 0;
@@ -94,7 +89,7 @@ Status BetaRowset::get_inverted_index_size(size_t*
index_size) {
std::string inverted_index_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(seg_path),
- index.index_id(), index.get_index_suffix());
+ index->index_id(), index->get_index_suffix());
RETURN_IF_ERROR(fs->file_size(inverted_index_file_path,
&file_size));
*index_size += file_size;
}
@@ -122,7 +117,7 @@ void BetaRowset::clear_inverted_index_cache() {
auto index_path_prefix =
InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path);
for (const auto& column : tablet_schema()->columns()) {
- const TabletIndex* index_meta =
tablet_schema()->get_inverted_index(*column);
+ const TabletIndex* index_meta =
tablet_schema()->inverted_index(*column);
if (index_meta) {
auto inverted_index_file_cache_key =
InvertedIndexDescriptor::get_index_file_cache_key(
@@ -227,7 +222,7 @@ Status BetaRowset::remove() {
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
+ const TabletIndex* index_meta =
_schema->inverted_index(*column);
if (index_meta) {
std::string inverted_index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
@@ -311,22 +306,19 @@ Status BetaRowset::link_files_to(const std::string& dir,
RowsetId new_rowset_id,
return status;
});
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : _schema->indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
- auto index_id = index.index_id();
+ for (const auto& index : _schema->inverted_indexes()) {
+ auto index_id = index->index_id();
if (without_index_uids != nullptr &&
without_index_uids->count(index_id)) {
continue;
}
std::string inverted_index_src_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(src_path),
- index_id, index.get_index_suffix());
+ index_id, index->get_index_suffix());
std::string inverted_index_dst_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(dst_path),
- index_id, index.get_index_suffix());
+ index_id, index->get_index_suffix());
bool index_file_exists = true;
RETURN_IF_ERROR(local_fs->exists(inverted_index_src_file_path,
&index_file_exists));
if (index_file_exists) {
@@ -405,7 +397,7 @@ Status BetaRowset::copy_files_to(const std::string& dir,
const RowsetId& new_row
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
// if (column.has_inverted_index()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
+ const TabletIndex* index_meta =
_schema->inverted_index(*column);
if (index_meta) {
std::string inverted_index_src_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
@@ -464,7 +456,7 @@ Status BetaRowset::upload_to(const StorageResource&
dest_fs, const RowsetId& new
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
// if (column.has_inverted_index()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
+ const TabletIndex* index_meta =
_schema->inverted_index(*column);
if (index_meta) {
std::string remote_inverted_index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
@@ -613,14 +605,11 @@ Status BetaRowset::add_to_binlog() {
linked_success_files.push_back(binlog_file);
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : _schema->indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
- auto index_id = index.index_id();
+ for (const auto& index : _schema->inverted_indexes()) {
+ auto index_id = index->index_id();
auto index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(seg_file), index_id,
- index.get_index_suffix());
+ index->get_index_suffix());
auto binlog_index_file = (std::filesystem::path(binlog_dir) /
std::filesystem::path(index_file).filename())
.string();
@@ -661,7 +650,7 @@ Status BetaRowset::calc_file_crc(uint32_t* crc_value,
int64_t* file_count) {
file_paths.emplace_back(seg_path);
if (_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
for (auto& column : _schema->columns()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
+ const TabletIndex* index_meta =
_schema->inverted_index(*column);
if (index_meta) {
std::string inverted_index_file =
InvertedIndexDescriptor::get_index_file_path_v1(
@@ -805,7 +794,7 @@ Status BetaRowset::show_nested_index_file(rapidjson::Value*
rowset_value,
} else {
rapidjson::Value indices(rapidjson::kArrayType);
for (auto column : _rowset_meta->tablet_schema()->columns()) {
- const auto* index_meta =
_rowset_meta->tablet_schema()->get_inverted_index(*column);
+ const auto* index_meta =
_rowset_meta->tablet_schema()->inverted_index(*column);
if (index_meta == nullptr) {
continue;
}
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 9cd634ec5ac..97e3bcb1deb 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -540,8 +540,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t
begin, int64_t end, u
}
// rename remaining inverted index files
for (auto column : _context.tablet_schema->columns()) {
- if (_context.tablet_schema->has_inverted_index(*column)) {
- const auto* index_info =
_context.tablet_schema->get_inverted_index(*column);
+ if (const auto& index_info =
_context.tablet_schema->inverted_index(*column);
+ index_info != nullptr) {
auto index_id = index_info->index_id();
if (_context.tablet_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
diff --git a/be/src/olap/rowset/segcompaction.cpp
b/be/src/olap/rowset/segcompaction.cpp
index 8f4184b7175..413e6150336 100644
--- a/be/src/olap/rowset/segcompaction.cpp
+++ b/be/src/olap/rowset/segcompaction.cpp
@@ -152,8 +152,7 @@ Status
SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e
}
// Delete inverted index files
for (auto&& column : schema->columns()) {
- if (schema->has_inverted_index(*column)) {
- const auto* index_info = schema->get_inverted_index(*column);
+ if (const auto* index_info = schema->inverted_index(*column);
index_info != nullptr) {
auto index_id = index_info->index_id();
if (schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
diff --git a/be/src/olap/rowset/segment_v2/column_writer.h
b/be/src/olap/rowset/segment_v2/column_writer.h
index 62f209db5ad..2d66b940a38 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.h
+++ b/be/src/olap/rowset/segment_v2/column_writer.h
@@ -63,7 +63,7 @@ struct ColumnWriterOptions {
bool need_inverted_index = false;
uint8_t gram_size;
uint16_t gram_bf_size;
- std::vector<const TabletIndex*> indexes;
+ std::vector<const TabletIndex*> indexes; // unused
const TabletIndex* inverted_index = nullptr;
InvertedIndexFileWriter* inverted_index_file_writer;
std::string to_string() const {
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index f43ccf37e78..000c1d0c300 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1055,16 +1055,17 @@ Status
SegmentIterator::_init_inverted_index_iterators() {
return Status::OK();
}
for (auto cid : _schema->column_ids()) {
+ // Use segment’s own index_meta, for compatibility with future
indexing needs to default to lowercase.
if (_inverted_index_iterators[cid] == nullptr) {
- // Not check type valid, since we need to get inverted index for
related variant type when reading the segment.
- // If check type valid, we can not get inverted index for variant
type, and result nullptr.The result for calling
- // get_inverted_index with variant suffix should return
corresponding inverted index meta.
- bool check_inverted_index_by_type = false;
- // Use segment’s own index_meta, for compatibility with future
indexing needs to default to lowercase.
+ // In the _opts.tablet_schema, the sub-column type information for
the variant is FieldType::OLAP_FIELD_TYPE_VARIANT.
+ // This is because the sub-column is created in
create_materialized_variant_column.
+ // We use this column to locate the metadata for the inverted
index, which requires a unique_id and path.
+ const auto& column = _opts.tablet_schema->column(cid);
+ int32_t col_unique_id =
+ column.is_extracted_column() ? column.parent_unique_id() :
column.unique_id();
RETURN_IF_ERROR(_segment->new_inverted_index_iterator(
- _opts.tablet_schema->column(cid),
-
_segment->_tablet_schema->get_inverted_index(_opts.tablet_schema->column(cid),
-
check_inverted_index_by_type),
+ column,
+ _segment->_tablet_schema->inverted_index(col_unique_id,
column.suffix_path()),
_opts, &_inverted_index_iterators[cid]));
}
}
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index c532969baa4..5c702df3334 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -218,22 +218,21 @@ Status SegmentWriter::_create_column_writer(uint32_t cid,
const TabletColumn& co
if (_opts.write_type == DataWriteType::TYPE_DIRECT &&
schema->skip_write_index_on_load()) {
skip_inverted_index = true;
}
- // indexes for this column
- opts.indexes = schema->get_indexes_for_column(column);
+
if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
opts.need_zone_map = false;
opts.need_bloom_filter = false;
opts.need_bitmap_index = false;
}
- opts.inverted_index_file_writer = _inverted_index_file_writer;
- for (const auto* index : opts.indexes) {
- if (!skip_inverted_index && index->index_type() ==
IndexType::INVERTED) {
- opts.inverted_index = index;
- opts.need_inverted_index = true;
- DCHECK(_inverted_index_file_writer != nullptr);
- // TODO support multiple inverted index
- break;
- }
+
+ // indexes for this column
+ if (const auto& index = schema->inverted_index(column);
+ index != nullptr && !skip_inverted_index) {
+ opts.inverted_index = index;
+ opts.need_inverted_index = true;
+ DCHECK(_inverted_index_file_writer != nullptr);
+ opts.inverted_index_file_writer = _inverted_index_file_writer;
+ // TODO support multiple inverted index
}
#define CHECK_FIELD_TYPE(TYPE, type_name)
\
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) {
\
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index 2cea4c86c09..a0fcd58bfb7 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -210,23 +210,20 @@ Status
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
tablet_schema->skip_write_index_on_load()) {
skip_inverted_index = true;
}
- // indexes for this column
- opts.indexes = tablet_schema->get_indexes_for_column(column);
+
if (!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
opts.need_zone_map = false;
opts.need_bloom_filter = false;
opts.need_bitmap_index = false;
}
- for (const auto* index : opts.indexes) {
- if (!skip_inverted_index && index->index_type() ==
IndexType::INVERTED) {
- opts.inverted_index = index;
- opts.need_inverted_index = true;
- DCHECK(_inverted_index_file_writer != nullptr);
- // TODO support multiple inverted index
- break;
- }
+ if (const auto& index = tablet_schema->inverted_index(column);
+ index != nullptr && !skip_inverted_index) {
+ opts.inverted_index = index;
+ opts.need_inverted_index = true;
+ DCHECK(_inverted_index_file_writer != nullptr);
+ opts.inverted_index_file_writer = _inverted_index_file_writer;
+ // TODO support multiple inverted index
}
- opts.inverted_index_file_writer = _inverted_index_file_writer;
#define CHECK_FIELD_TYPE(TYPE, type_name)
\
if (column.type() == FieldType::OLAP_FIELD_TYPE_##TYPE) {
\
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 2125b508d24..1c06f3a405a 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -81,6 +81,7 @@
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/common/assert_cast.h"
+#include "vec/common/schema_util.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/exprs/vexpr.h"
@@ -1367,13 +1368,9 @@ Status SchemaChangeJob::parse_request(const
SchemaChangeParams& sc_params,
*sc_directly = true;
return Status::OK();
} else if (column_mapping->ref_column_idx >= 0) {
- const auto& column_new = new_tablet_schema->column(i);
- const auto& column_old =
base_tablet_schema->column(column_mapping->ref_column_idx);
// index changed
- if (column_new.is_bf_column() != column_old.is_bf_column() ||
- column_new.has_bitmap_index() != column_old.has_bitmap_index()
||
- new_tablet_schema->has_inverted_index(column_new) !=
- base_tablet_schema->has_inverted_index(column_old)) {
+ if (vectorized::schema_util::has_schema_index_diff(
+ new_tablet_schema, base_tablet_schema, i,
column_mapping->ref_column_idx)) {
*sc_directly = true;
return Status::OK();
}
diff --git a/be/src/olap/snapshot_manager.cpp b/be/src/olap/snapshot_manager.cpp
index 2cfa9a8e8b7..67205835b53 100644
--- a/be/src/olap/snapshot_manager.cpp
+++ b/be/src/olap/snapshot_manager.cpp
@@ -698,11 +698,8 @@ Status SnapshotManager::_create_snapshot_files(const
TabletSharedPtr& ref_tablet
if (tablet_schema.get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : tablet_schema.indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
- auto index_id = index.index_id();
+ for (const auto& index : tablet_schema.inverted_indexes())
{
+ auto index_id = index->index_id();
auto index_file =
ref_tablet->get_segment_index_filepath(
rowset_id, segment_index, index_id);
auto snapshot_segment_index_file_path =
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index ed9ffaa0664..f97f153a860 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -1234,7 +1234,7 @@ std::vector<RowsetSharedPtr>
Tablet::pick_candidate_rowsets_to_build_inverted_in
std::shared_lock rlock(_meta_lock);
auto has_alter_inverted_index = [&](RowsetSharedPtr rowset) -> bool {
for (const auto& index_id : alter_index_uids) {
- if
(rowset->tablet_schema()->has_inverted_index_with_index_id(index_id, "")) {
+ if
(rowset->tablet_schema()->has_inverted_index_with_index_id(index_id)) {
return true;
}
}
@@ -2611,12 +2611,9 @@ void Tablet::gc_binlogs(int64_t version) {
// add binlog segment files and index files
for (int64_t i = 0; i < num_segments; ++i) {
wait_for_deleted_binlog_files.emplace_back(get_segment_filepath(rowset_id, i));
- for (const auto& index : this->tablet_schema()->indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
+ for (const auto& index :
this->tablet_schema()->inverted_indexes()) {
wait_for_deleted_binlog_files.emplace_back(
- get_segment_index_filepath(rowset_id, i,
index.index_id()));
+ get_segment_index_filepath(rowset_id, i,
index->index_id()));
}
}
};
@@ -2765,12 +2762,8 @@ int64_t Tablet::get_inverted_index_file_szie(const
RowsetMetaSharedPtr& rs_meta)
if (rs_meta->tablet_schema()->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- auto indices = rs_meta->tablet_schema()->indexes();
+ const auto& indices = rs_meta->tablet_schema()->inverted_indexes();
for (auto& index : indices) {
- // only get file_size for inverted index
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) {
std::string segment_path = get_segment_path(rs_meta, seg_id);
int64_t file_size = 0;
@@ -2778,7 +2771,7 @@ int64_t Tablet::get_inverted_index_file_szie(const
RowsetMetaSharedPtr& rs_meta)
std::string inverted_index_file_path =
InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path),
- index.index_id(), index.get_index_suffix());
+ index->index_id(), index->get_index_suffix());
auto st = fs->file_size(inverted_index_file_path, &file_size);
if (!st.ok()) {
file_size = 0;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index ef48c06aa3f..b5479d34016 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -905,14 +905,13 @@ void TabletColumn::append_sparse_column(TabletColumn
column) {
_num_sparse_columns++;
}
-void TabletSchema::append_index(TabletIndex index) {
+void TabletSchema::append_index(TabletIndex&& index) {
_indexes.push_back(std::move(index));
}
void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) {
int32_t col_unique_id = col.unique_id();
- const std::string& suffix_path =
- col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
+ const std::string& suffix_path = escape_for_path_name(col.suffix_path());
for (size_t i = 0; i < _indexes.size(); i++) {
for (int32_t id : _indexes[i].col_unique_ids()) {
if (id == col_unique_id && _indexes[i].get_index_suffix() ==
suffix_path) {
@@ -1352,28 +1351,6 @@ Result<const TabletColumn*> TabletSchema::column(const
std::string& field_name)
return _cols[it->second].get();
}
-std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column(
- const TabletColumn& col) const {
- std::vector<const TabletIndex*> indexes_for_column;
- // Some columns (Float, Double, JSONB ...) from the variant do not support
index, but they are listed in TabltetIndex.
- if
(!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
- return indexes_for_column;
- }
- int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id()
: col.unique_id();
- const std::string& suffix_path =
- col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
- // TODO use more efficient impl
- for (size_t i = 0; i < _indexes.size(); i++) {
- for (int32_t id : _indexes[i].col_unique_ids()) {
- if (id == col_unique_id && _indexes[i].get_index_suffix() ==
suffix_path) {
- indexes_for_column.push_back(&(_indexes[i]));
- }
- }
- }
-
- return indexes_for_column;
-}
-
void TabletSchema::update_tablet_columns(const TabletSchema& tablet_schema,
const std::vector<TColumn>&
t_columns) {
copy_from(tablet_schema);
@@ -1385,49 +1362,17 @@ void TabletSchema::update_tablet_columns(const
TabletSchema& tablet_schema,
}
}
-bool TabletSchema::has_inverted_index(const TabletColumn& col) const {
- // TODO use more efficient impl
- int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id()
: col.unique_id();
- const std::string& suffix_path =
- col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
+bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id) const {
for (size_t i = 0; i < _indexes.size(); i++) {
- if (_indexes[i].index_type() == IndexType::INVERTED) {
- for (int32_t id : _indexes[i].col_unique_ids()) {
- if (id == col_unique_id && _indexes[i].get_index_suffix() ==
suffix_path) {
- return true;
- }
- }
- }
- }
-
- return false;
-}
-
-bool TabletSchema::has_inverted_index_with_index_id(int64_t index_id,
- const std::string&
suffix_name) const {
- for (size_t i = 0; i < _indexes.size(); i++) {
- if (_indexes[i].index_type() == IndexType::INVERTED &&
- _indexes[i].get_index_suffix() == suffix_name &&
_indexes[i].index_id() == index_id) {
+ if (_indexes[i].index_type() == IndexType::INVERTED &&
_indexes[i].index_id() == index_id) {
return true;
}
}
return false;
}
-const TabletIndex* TabletSchema::get_inverted_index_with_index_id(
- int64_t index_id, const std::string& suffix_name) const {
- for (size_t i = 0; i < _indexes.size(); i++) {
- if (_indexes[i].index_type() == IndexType::INVERTED &&
- _indexes[i].get_index_suffix() == suffix_name &&
_indexes[i].index_id() == index_id) {
- return &(_indexes[i]);
- }
- }
-
- return nullptr;
-}
-
-const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id,
- const std::string&
suffix_path) const {
+const TabletIndex* TabletSchema::inverted_index(int32_t col_unique_id,
+ const std::string&
suffix_path) const {
for (size_t i = 0; i < _indexes.size(); i++) {
if (_indexes[i].index_type() == IndexType::INVERTED) {
for (int32_t id : _indexes[i].col_unique_ids()) {
@@ -1441,19 +1386,15 @@ const TabletIndex*
TabletSchema::get_inverted_index(int32_t col_unique_id,
return nullptr;
}
-const TabletIndex* TabletSchema::get_inverted_index(const TabletColumn& col,
- bool check_valid) const {
- // With check_valid set to true by default
+const TabletIndex* TabletSchema::inverted_index(const TabletColumn& col) const
{
// Some columns(Float, Double, JSONB ...) from the variant do not support
inverted index
- if (check_valid &&
!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
+ if
(!segment_v2::InvertedIndexColumnWriter::check_support_inverted_index(col)) {
return nullptr;
}
// TODO use more efficient impl
// Use parent id if unique not assigned, this could happend when accessing
subcolumns of variants
int32_t col_unique_id = col.is_extracted_column() ? col.parent_unique_id()
: col.unique_id();
- const std::string& suffix_path =
- col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
- return get_inverted_index(col_unique_id, suffix_path);
+ return inverted_index(col_unique_id,
escape_for_path_name(col.suffix_path()));
}
bool TabletSchema::has_ngram_bf_index(int32_t col_unique_id) const {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index e2f90e2716f..f3ff0d694b4 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -163,6 +163,9 @@ public:
bool is_extracted_column() const {
return _column_path != nullptr && !_column_path->empty() &&
_parent_col_unique_id > 0;
};
+ std::string suffix_path() const {
+ return is_extracted_column() ? _column_path->get_path() : "";
+ }
bool is_nested_subcolumn() const {
return _column_path != nullptr && _column_path->has_nested_part();
}
@@ -223,13 +226,16 @@ private:
bool _has_bitmap_index = false;
bool _visible = true;
- int32_t _parent_col_unique_id = -1;
+
std::vector<TabletColumnPtr> _sub_columns;
uint32_t _sub_column_count = 0;
bool _result_is_nullable = false;
int _be_exec_version = -1;
- vectorized::PathInDataPtr _column_path;
+
+ // The extracted sub-columns from "variant" contain the following
information:
+ int32_t _parent_col_unique_id = -1; // "variant" -> col_unique_id
+ vectorized::PathInDataPtr _column_path; // the path of the sub-columns
themselves
// Record information about columns merged into a sparse column within a
variant
// `{"id": 100, "name" : "jack", "point" : 3.9}`
@@ -315,7 +321,7 @@ public:
}
void to_schema_pb(TabletSchemaPB* tablet_meta_pb) const;
void append_column(TabletColumn column, ColumnType col_type =
ColumnType::NORMAL);
- void append_index(TabletIndex index);
+ void append_index(TabletIndex&& index);
void update_index(const TabletColumn& column, TabletIndex index);
void remove_index(int64_t index_id);
void clear_index();
@@ -385,7 +391,15 @@ public:
void set_row_store_page_size(long page_size) { _row_store_page_size =
page_size; }
long row_store_page_size() const { return _row_store_page_size; }
- const std::vector<TabletIndex>& indexes() const { return _indexes; }
+ const std::vector<const TabletIndex*> inverted_indexes() const {
+ std::vector<const TabletIndex*> inverted_indexes;
+ for (const auto& index : _indexes) {
+ if (index.index_type() == IndexType::INVERTED) {
+ inverted_indexes.emplace_back(&index);
+ }
+ }
+ return inverted_indexes;
+ }
bool has_inverted_index() const {
for (const auto& index : _indexes) {
if (index.index_type() == IndexType::INVERTED) {
@@ -394,17 +408,15 @@ public:
}
return false;
}
- std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn&
col) const;
- bool has_inverted_index(const TabletColumn& col) const;
- bool has_inverted_index_with_index_id(int64_t index_id, const std::string&
suffix_path) const;
- const TabletIndex* get_inverted_index_with_index_id(int64_t index_id,
- const std::string&
suffix_name) const;
- // check_valid: check if this column supports inverted index
+ bool has_inverted_index_with_index_id(int64_t index_id) const;
+ // Check whether this column supports inverted index
// Some columns (Float, Double, JSONB ...) from the variant do not support
index, but they are listed in TabletIndex.
- // If returned, the index file will not be found.
- const TabletIndex* get_inverted_index(const TabletColumn& col, bool
check_valid = true) const;
- const TabletIndex* get_inverted_index(int32_t col_unique_id,
- const std::string& suffix_path)
const;
+ const TabletIndex* inverted_index(const TabletColumn& col) const;
+
+ // Regardless of whether this column supports inverted index
+ // TabletIndex information will be returned as long as it exists.
+ const TabletIndex* inverted_index(int32_t col_unique_id,
+ const std::string& suffix_path = "")
const;
bool has_ngram_bf_index(int32_t col_unique_id) const;
const TabletIndex* get_ngram_bf_index(int32_t col_unique_id) const;
void update_indexes_from_thrift(const std::vector<doris::TOlapTableIndex>&
indexes);
diff --git a/be/src/olap/task/engine_storage_migration_task.cpp
b/be/src/olap/task/engine_storage_migration_task.cpp
index 21be34a334d..a300e6e0f09 100644
--- a/be/src/olap/task/engine_storage_migration_task.cpp
+++ b/be/src/olap/task/engine_storage_migration_task.cpp
@@ -407,11 +407,8 @@ Status
EngineStorageMigrationTask::_copy_index_and_data_files(
if (tablet_schema.get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : tablet_schema.indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
- auto index_id = index.index_id();
+ for (const auto& index : tablet_schema.inverted_indexes()) {
+ auto index_id = index->index_id();
auto index_file =
_tablet->get_segment_index_filepath(rowset_id,
segment_index, index_id);
auto snapshot_segment_index_file_path =
diff --git a/be/src/olap/task/index_builder.cpp
b/be/src/olap/task/index_builder.cpp
index 69ab95770f7..975920a437e 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -106,7 +106,7 @@ Status IndexBuilder::update_inverted_index_info() {
}
}
auto column = output_rs_tablet_schema->column(column_idx);
- const auto* index_meta =
output_rs_tablet_schema->get_inverted_index(column);
+ const auto* index_meta =
output_rs_tablet_schema->inverted_index(column);
if (index_meta == nullptr) {
LOG(ERROR) << "failed to find column: " << column_name
<< " index_id: " << t_inverted_index.index_id;
@@ -142,12 +142,7 @@ Status IndexBuilder::update_inverted_index_info() {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"indexes count cannot be negative");
}
- int32_t indexes_size = 0;
- for (auto index : output_rs_tablet_schema->indexes()) {
- if (index.index_type() == IndexType::INVERTED) {
- indexes_size++;
- }
- }
+ int32_t indexes_size =
output_rs_tablet_schema->inverted_indexes().size();
if (indexes_count != indexes_size) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"indexes count not equal to expected");
@@ -165,11 +160,11 @@ Status IndexBuilder::update_inverted_index_info() {
LOG(WARNING) << "referenced column was missing. "
<< "[column=" << t_inverted_index.columns[0]
<< " referenced_column=" << column_uid << "]";
- output_rs_tablet_schema->append_index(index);
+ output_rs_tablet_schema->append_index(std::move(index));
continue;
}
const TabletColumn& col =
output_rs_tablet_schema->column_by_uid(column_uid);
- const TabletIndex* exist_index =
output_rs_tablet_schema->get_inverted_index(col);
+ const TabletIndex* exist_index =
output_rs_tablet_schema->inverted_index(col);
if (exist_index && exist_index->index_id() !=
index.index_id()) {
LOG(WARNING) << fmt::format(
"column: {} has a exist inverted index, but the
index id not equal "
@@ -179,7 +174,7 @@ Status IndexBuilder::update_inverted_index_info() {
without_index_uids.insert(exist_index->index_id());
output_rs_tablet_schema->remove_index(exist_index->index_id());
}
- output_rs_tablet_schema->append_index(index);
+ output_rs_tablet_schema->append_index(std::move(index));
}
}
// construct input rowset reader
@@ -399,14 +394,15 @@ Status
IndexBuilder::handle_single_rowset(RowsetMetaSharedPtr output_rowset_meta
}
}
auto column = output_rowset_schema->column(column_idx);
+ // variant column is not support for building index
if
(!InvertedIndexColumnWriter::check_support_inverted_index(column)) {
continue;
}
-
DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id, ""));
+
DCHECK(output_rowset_schema->has_inverted_index_with_index_id(index_id));
_olap_data_convertor->add_column_data_convertor(column);
return_columns.emplace_back(column_idx);
std::unique_ptr<Field> field(FieldFactory::create(column));
- const auto* index_meta =
output_rowset_schema->get_inverted_index(column);
+ const auto* index_meta =
output_rowset_schema->inverted_index(column);
std::unique_ptr<segment_v2::InvertedIndexColumnWriter>
inverted_index_builder;
try {
RETURN_IF_ERROR(segment_v2::InvertedIndexColumnWriter::create(
diff --git a/be/src/service/backend_service.cpp
b/be/src/service/backend_service.cpp
index abdef513296..d74a9cd2e0b 100644
--- a/be/src/service/backend_service.cpp
+++ b/be/src/service/backend_service.cpp
@@ -353,11 +353,8 @@ void _ingest_binlog(StorageEngine& engine,
IngestBinlogArg* arg) {
std::vector<std::string> segment_index_file_names;
auto tablet_schema = rowset_meta->tablet_schema();
if (tablet_schema->get_inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
- for (const auto& index : tablet_schema->indexes()) {
- if (index.index_type() != IndexType::INVERTED) {
- continue;
- }
- auto index_id = index.index_id();
+ for (const auto& index : tablet_schema->inverted_indexes()) {
+ auto index_id = index->index_id();
for (int64_t segment_index = 0; segment_index < num_segments;
++segment_index) {
auto get_segment_index_file_size_url = fmt::format(
"{}?method={}&tablet_id={}&rowset_id={}&segment_index={}&segment_index_id={"
@@ -379,7 +376,7 @@ void _ingest_binlog(StorageEngine& engine, IngestBinlogArg*
arg) {
rowset_meta->rowset_id().to_string(), segment_index);
segment_index_file_names.push_back(InvertedIndexDescriptor::get_index_file_path_v1(
InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), index_id,
- index.get_index_suffix()));
+ index->get_index_suffix()));
status = HttpClient::execute_with_retry(max_retry, 1,
get_segment_index_file_size_cb);
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index adaee6e9fe6..9692c02fda3 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -360,6 +360,7 @@ void update_least_sparse_column(const
std::vector<TabletSchemaSPtr>& schemas,
void inherit_column_attributes(const TabletColumn& source, TabletColumn&
target,
TabletSchemaSPtr& target_schema) {
+ DCHECK(target.is_extracted_column());
if (target.type() != FieldType::OLAP_FIELD_TYPE_TINYINT &&
target.type() != FieldType::OLAP_FIELD_TYPE_ARRAY &&
target.type() != FieldType::OLAP_FIELD_TYPE_DOUBLE &&
@@ -368,18 +369,18 @@ void inherit_column_attributes(const TabletColumn&
source, TabletColumn& target,
target.set_is_bf_column(source.is_bf_column());
}
target.set_aggregation_method(source.aggregation());
- const auto* source_index_meta =
target_schema->get_inverted_index(source.unique_id(), "");
+ const auto* source_index_meta =
target_schema->inverted_index(source.unique_id());
if (source_index_meta != nullptr) {
// add index meta
TabletIndex index_info = *source_index_meta;
index_info.set_escaped_escaped_index_suffix_path(target.path_info_ptr()->get_path());
- // get_inverted_index: No need to check, just inherit directly
- const auto* target_index_meta =
target_schema->get_inverted_index(target, false);
+ const auto* target_index_meta = target_schema->inverted_index(
+ target.parent_unique_id(), target.path_info_ptr()->get_path());
if (target_index_meta != nullptr) {
// already exist
target_schema->update_index(target, index_info);
} else {
- target_schema->append_index(index_info);
+ target_schema->append_index(std::move(index_info));
}
}
}
@@ -591,4 +592,20 @@ Status extract(ColumnPtr source, const PathInData& path,
MutableColumnPtr& dst)
return Status::OK();
}
+bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema*
old_schema,
+ int32_t new_col_idx, int32_t old_col_idx) {
+ const auto& column_new = new_schema->column(new_col_idx);
+ const auto& column_old = old_schema->column(old_col_idx);
+
+ if (column_new.is_bf_column() != column_old.is_bf_column() ||
+ column_new.has_bitmap_index() != column_old.has_bitmap_index()) {
+ return true;
+ }
+
+ bool new_schema_has_inverted_index =
new_schema->inverted_index(column_new);
+ bool old_schema_has_inverted_index =
old_schema->inverted_index(column_old);
+
+ return new_schema_has_inverted_index != old_schema_has_inverted_index;
+}
+
} // namespace doris::vectorized::schema_util
diff --git a/be/src/vec/common/schema_util.h b/be/src/vec/common/schema_util.h
index 080e6331dc1..8ceb97a9156 100644
--- a/be/src/vec/common/schema_util.h
+++ b/be/src/vec/common/schema_util.h
@@ -121,4 +121,7 @@ Status extract(ColumnPtr source, const PathInData& path,
MutableColumnPtr& dst);
std::string dump_column(DataTypePtr type, const ColumnPtr& col);
+bool has_schema_index_diff(const TabletSchema* new_schema, const TabletSchema*
old_schema,
+ int32_t new_col_idx, int32_t old_col_idx);
+
} // namespace doris::vectorized::schema_util
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
index 922a77fcaa4..5e3370847e9 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp
@@ -406,7 +406,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
// read col key
const auto& key = _tablet_schema->column_by_uid(0);
- const auto* key_index = _tablet_schema->get_inverted_index(key);
+ const auto* key_index = _tablet_schema->inverted_index(key);
EXPECT_TRUE(key_index != nullptr);
std::vector<int> query_data {99, 66, 56, 87, 85, 96, 20000};
std::vector<int> query_result {21, 25, 22, 18, 14, 18, 0};
@@ -414,7 +414,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
// read col v3
const auto& v3_column = _tablet_schema->column_by_uid(3);
- const auto* v3_index = _tablet_schema->get_inverted_index(v3_column);
+ const auto* v3_index = _tablet_schema->inverted_index(v3_column);
EXPECT_TRUE(v3_index != nullptr);
std::vector<int> query_data3 {99, 66, 56, 87, 85, 96, 10000};
std::vector<int> query_result3 {12, 20, 25, 23, 16, 24, 0};
@@ -422,7 +422,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
// read col v1
const auto& v1_column = _tablet_schema->column_by_uid(1);
- const auto* v1_index = _tablet_schema->get_inverted_index(v1_column);
+ const auto* v1_index = _tablet_schema->inverted_index(v1_column);
EXPECT_TRUE(v1_index != nullptr);
std::vector<std::string> query_data1 {"good", "maybe", "great",
"null"};
std::vector<int> query_result1 {197, 191, 194, 0};
@@ -431,7 +431,7 @@ TEST_F(IndexCompactionTest, write_index_test) {
// read col v2
const auto& v2_column = _tablet_schema->column_by_uid(2);
- const auto* v2_index = _tablet_schema->get_inverted_index(v2_column);
+ const auto* v2_index = _tablet_schema->inverted_index(v2_column);
EXPECT_TRUE(v2_index != nullptr);
std::vector<std::string> query_data2 {"musicstream.com", "http",
"https", "null"};
std::vector<int> query_result2 {191, 799, 1201, 0};
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
index 4c77b71d66d..321d43fa872 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp
@@ -498,7 +498,7 @@ protected:
// read col key
const auto& key = _tablet_schema->column_by_uid(0);
- const auto* key_index = _tablet_schema->get_inverted_index(key);
+ const auto* key_index = _tablet_schema->inverted_index(key);
EXPECT_TRUE(key_index != nullptr);
std::vector<int> query_data {99, 66, 56, 87, 85, 96, 20000};
std::vector<int> query_result {19, 21, 21, 16, 14, 18, 0};
@@ -506,7 +506,7 @@ protected:
// read col v3
const auto& v3_column = _tablet_schema->column_by_uid(3);
- const auto* v3_index = _tablet_schema->get_inverted_index(v3_column);
+ const auto* v3_index = _tablet_schema->inverted_index(v3_column);
EXPECT_TRUE(v3_index != nullptr);
std::vector<int> query_data3 {99, 66, 56, 87, 85, 96, 10000};
std::vector<int> query_result3 {12, 18, 22, 21, 16, 20, 0};
@@ -514,7 +514,7 @@ protected:
// read col v1
const auto& v1_column = _tablet_schema->column_by_uid(1);
- const auto* v1_index = _tablet_schema->get_inverted_index(v1_column);
+ const auto* v1_index = _tablet_schema->inverted_index(v1_column);
EXPECT_TRUE(v1_index != nullptr);
std::vector<std::string> query_data1 {"good", "maybe", "great",
"null"};
std::vector<int> query_result1 {197, 191, 0, 0};
@@ -523,7 +523,7 @@ protected:
// read col v2
const auto& v2_column = _tablet_schema->column_by_uid(2);
- const auto* v2_index = _tablet_schema->get_inverted_index(v2_column);
+ const auto* v2_index = _tablet_schema->inverted_index(v2_column);
EXPECT_TRUE(v2_index != nullptr);
std::vector<std::string> query_data2 {"musicstream.com", "http",
"https", "null"};
std::vector<int> query_result2 {176, 719, 1087, 0};
diff --git a/be/test/olap/tablet_index_test.cpp
b/be/test/olap/tablet_index_test.cpp
new file mode 100644
index 00000000000..7842f9af18d
--- /dev/null
+++ b/be/test/olap/tablet_index_test.cpp
@@ -0,0 +1,108 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "olap/tablet_schema.h"
+#include "vec/common/schema_util.h"
+
+namespace doris {
+
+class TabletIndexTest : public testing::Test {};
+
+void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index,
int64_t index_id,
+ const std::string& index_name, int32_t col_unique_id,
+ const std::string& column_type, const std::string&
column_name,
+ const IndexType& index_type, bool is_bf_column) {
+ column_pb->set_unique_id(col_unique_id);
+ column_pb->set_name(column_name);
+ column_pb->set_type(column_type);
+ column_pb->set_is_nullable(true);
+ column_pb->set_is_bf_column(is_bf_column);
+ tablet_index->set_index_id(index_id);
+ tablet_index->set_index_name(index_name);
+ tablet_index->set_index_type(index_type);
+ tablet_index->add_col_unique_id(col_unique_id);
+ if (index_type == IndexType::NGRAM_BF) {
+ auto* properties = tablet_index->mutable_properties();
+ (*properties)["gram_size"] = "5";
+ (*properties)["bf_size"] = "1024";
+ }
+}
+
+TEST_F(TabletIndexTest, test_inverted_index) {
+ TabletSchemaPB schema_pb;
+ schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000,
"key_index", 0, "INT",
+ "key", IndexType::INVERTED, true);
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001,
"v1_index", 1, "STRING",
+ "v1", IndexType::INVERTED, false);
+ construct_column(schema_pb.add_column(), schema_pb.add_index(), 10002,
"v2_index", 2, "STRING",
+ "v2", IndexType::NGRAM_BF, true);
+
+ TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+ tablet_schema->init_from_pb(schema_pb);
+
+ EXPECT_TRUE(tablet_schema->has_inverted_index());
+ EXPECT_EQ(tablet_schema->inverted_indexes().size(), 2);
+ EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(0))
!= nullptr);
+ EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(1))
!= nullptr);
+ EXPECT_TRUE(tablet_schema->inverted_index(tablet_schema->column_by_uid(2))
== nullptr);
+ EXPECT_TRUE(tablet_schema->inverted_index(3) == nullptr);
+ EXPECT_TRUE(tablet_schema->inverted_index(4, "v1.a") == nullptr);
+}
+
+TEST_F(TabletIndexTest, test_schema_index_diff) {
+ TabletSchemaPB new_schema_pb;
+ new_schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
new_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+ construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(),
10000, "key_index", 0,
+ "INT", "key", IndexType::INVERTED, true);
+ construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(),
10001, "v1_index", 1,
+ "STRING", "v1", IndexType::INVERTED, false);
+ construct_column(new_schema_pb.add_column(), new_schema_pb.add_index(),
10002, "v2_index", 2,
+ "STRING", "v2", IndexType::NGRAM_BF, true);
+
+ TabletSchemaSPtr new_tablet_schema = std::make_shared<TabletSchema>();
+ new_tablet_schema->init_from_pb(new_schema_pb);
+
+ TabletSchemaPB old_schema_pb;
+ old_schema_pb.set_keys_type(KeysType::DUP_KEYS);
+
old_schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+ construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(),
10000, "key_index", 0,
+ "INT", "key", IndexType::INVERTED, true);
+ construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(),
10001, "v1_index", 1,
+ "STRING", "v1", IndexType::INVERTED, true);
+ construct_column(old_schema_pb.add_column(), old_schema_pb.add_index(),
10002, "v2_index", 2,
+ "STRING", "v2", IndexType::INVERTED, true);
+
+ TabletSchemaSPtr old_tablet_schema = std::make_shared<TabletSchema>();
+ old_tablet_schema->init_from_pb(old_schema_pb);
+
+
EXPECT_FALSE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(),
+
old_tablet_schema.get(), 0, 0));
+
EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(),
+
old_tablet_schema.get(), 1, 1));
+
EXPECT_TRUE(vectorized::schema_util::has_schema_index_diff(new_tablet_schema.get(),
+
old_tablet_schema.get(), 2, 2));
+}
+
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]