This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new bc847342910 [opt](index compaction)Optimize logic of picking columns
for index compaction(#42051) (#42287)
bc847342910 is described below
commit bc847342910481a6ca7c1fc899837f99c916dab0
Author: qiye <[email protected]>
AuthorDate: Wed Oct 23 15:21:23 2024 +0800
[opt](index compaction)Optimize logic of picking columns for index
compaction(#42051) (#42287)
## Proposed changes
bp #42051
---
be/src/olap/compaction.cpp | 62 +++++++++-------------
be/src/olap/rowset/rowset_writer_context.h | 4 +-
be/src/olap/rowset/segment_v2/segment_writer.cpp | 2 +-
.../rowset/segment_v2/vertical_segment_writer.cpp | 3 +-
4 files changed, 29 insertions(+), 42 deletions(-)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index fa2d89352be..1c5b52dca0e 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -42,6 +42,7 @@
#include "olap/cumulative_compaction_policy.h"
#include "olap/cumulative_compaction_time_series_policy.h"
#include "olap/data_dir.h"
+#include "olap/olap_common.h"
#include "olap/olap_define.h"
#include "olap/rowset/beta_rowset.h"
#include "olap/rowset/rowset.h"
@@ -370,10 +371,11 @@ Status Compaction::do_compaction_impl(int64_t permits) {
// 2. write merged rows to output rowset
// The test results show that merger is low-memory-footprint, there is no
need to tracker its mem pool
Merger::Statistics stats;
- // if ctx.skip_inverted_index.size() > 0, it means we need to do inverted
index compaction.
+ // if ctx.columns_to_do_index_compaction.size() > 0, it means we need to
do inverted index compaction.
// the row ID conversion matrix needs to be used for inverted index
compaction.
- if (ctx.skip_inverted_index.size() > 0 || (_tablet->keys_type() ==
KeysType::UNIQUE_KEYS &&
-
_tablet->enable_unique_key_merge_on_write())) {
+ if (!ctx.columns_to_do_index_compaction.empty() ||
+ (_tablet->keys_type() == KeysType::UNIQUE_KEYS &&
+ _tablet->enable_unique_key_merge_on_write())) {
stats.rowid_conversion = &_rowid_conversion;
}
int64_t way_num = merge_way_num();
@@ -436,37 +438,9 @@ Status Compaction::do_compaction_impl(int64_t permits) {
RETURN_IF_ERROR(check_correctness(stats));
if (_input_row_num > 0 && stats.rowid_conversion &&
config::inverted_index_compaction_enable &&
- !ctx.skip_inverted_index.empty()) {
+ !ctx.columns_to_do_index_compaction.empty()) {
OlapStopWatch inverted_watch;
- // check rowid_conversion correctness
- Version version = _tablet->max_version();
- DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
- std::set<RowLocation> missed_rows;
- std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>> location_map;
- // Convert the delete bitmap of the input rowsets to output rowset.
- std::size_t missed_rows_size = 0;
- _tablet->calc_compaction_output_rowset_delete_bitmap(
- _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
- &location_map, _tablet->tablet_meta()->delete_bitmap(),
- &output_rowset_delete_bitmap);
- if (!allow_delete_in_cumu_compaction()) {
- missed_rows_size = missed_rows.size();
- if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION
&&
- _tablet->tablet_state() == TABLET_RUNNING &&
- stats.merged_rows != missed_rows_size) {
- std::string err_msg = fmt::format(
- "cumulative compaction: the merged rows({}) is not
equal to missed "
- "rows({}) in rowid conversion, tablet_id: {},
table_id:{}",
- stats.merged_rows, missed_rows_size,
_tablet->tablet_id(),
- _tablet->table_id());
- DCHECK(false) << err_msg;
- LOG(WARNING) << err_msg;
- }
- }
-
- RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
-
// translation vec
// <<dest_idx_num, dest_docId>>
// the first level vector: index indicates src segment.
@@ -649,7 +623,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
};
Status status = Status::OK();
- for (auto&& column_uniq_id : ctx.skip_inverted_index) {
+ for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) {
auto col = _cur_tablet_schema->column_by_uid(column_uniq_id);
const auto* index_meta =
_cur_tablet_schema->get_inverted_index(col);
@@ -808,7 +782,20 @@ Status
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
InvertedIndexStorageFormatPB::V1) {
for (const auto& index : _cur_tablet_schema->indexes()) {
if (index.index_type() == IndexType::INVERTED) {
- auto col_unique_id = index.col_unique_ids()[0];
+ auto col_unique_ids = index.col_unique_ids();
+ // check if column unique ids is empty to avoid crash
+ if (col_unique_ids.empty()) {
+ LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "]
index["
+ << index.index_id()
+ << "] has no column unique id, will skip
index compaction."
+ << " tablet_schema=" <<
_cur_tablet_schema->dump_full_schema();
+ continue;
+ }
+ auto col_unique_id = col_unique_ids[0];
+ // Avoid doing inverted index compaction on non-slice type
columns
+ if
(!field_is_slice_type(_cur_tablet_schema->column_by_uid(col_unique_id).type()))
{
+ continue;
+ }
//NOTE: here src_rs may be in building index progress, so it
would not contain inverted index info.
bool all_have_inverted_index = std::all_of(
_input_rowsets.begin(), _input_rowsets.end(),
[&](const auto& src_rs) {
@@ -892,7 +879,7 @@ Status
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
reader->close();
// why is 3?
- // bkd index will write at least 3 files
+ // slice type index file at least has 3 files:
null_bitmap, segments_N, segments.gen
if (files.size() < 3) {
LOG(WARNING) << "tablet[" <<
_tablet->tablet_id()
<< "] column_unique_id[" <<
col_unique_id << "],"
@@ -905,9 +892,8 @@ Status
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
return true;
return true;
});
- if (all_have_inverted_index &&
-
field_is_slice_type(_cur_tablet_schema->column_by_uid(col_unique_id).type())) {
- ctx.skip_inverted_index.insert(col_unique_id);
+ if (all_have_inverted_index) {
+ ctx.columns_to_do_index_compaction.insert(col_unique_id);
}
}
}
diff --git a/be/src/olap/rowset/rowset_writer_context.h
b/be/src/olap/rowset/rowset_writer_context.h
index ad82f6c491e..b5fa21960da 100644
--- a/be/src/olap/rowset/rowset_writer_context.h
+++ b/be/src/olap/rowset/rowset_writer_context.h
@@ -90,8 +90,8 @@ struct RowsetWriterContext {
int64_t newest_write_timestamp = -1;
bool enable_unique_key_merge_on_write = false;
- // store column_unique_id to skip write inverted index
- std::set<int32_t> skip_inverted_index;
+ // store column_unique_id to do index compaction
+ std::set<int32_t> columns_to_do_index_compaction;
DataWriteType write_type = DataWriteType::TYPE_DEFAULT;
BaseTabletSPtr tablet = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 72dd5d70ce7..7690a330ae4 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -210,7 +210,7 @@ Status SegmentWriter::init(const std::vector<uint32_t>&
col_ids, bool has_key) {
if (_opts.rowset_ctx != nullptr) {
// skip write inverted index for index compaction
skip_inverted_index =
-
_opts.rowset_ctx->skip_inverted_index.count(column.unique_id()) > 0;
+
_opts.rowset_ctx->columns_to_do_index_compaction.count(column.unique_id()) > 0;
}
// skip write inverted index on load if skip_write_index_on_load is
true
if (_opts.write_type == DataWriteType::TYPE_DIRECT &&
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index c49a827e259..549ad41d2cc 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -164,7 +164,8 @@ Status
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
bool skip_inverted_index = false;
if (_opts.rowset_ctx != nullptr) {
// skip write inverted index for index compaction
- skip_inverted_index =
_opts.rowset_ctx->skip_inverted_index.count(column.unique_id()) > 0;
+ skip_inverted_index =
+
_opts.rowset_ctx->columns_to_do_index_compaction.count(column.unique_id()) > 0;
}
// skip write inverted index on load if skip_write_index_on_load is true
if (_opts.write_type == DataWriteType::TYPE_DIRECT &&
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]