This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0-var
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0-var by this push:
new b169a650c70 [fix](index compaction)Support compact multi segment in
one index and add trans_vec check (#28545)
b169a650c70 is described below
commit b169a650c70766a018427e8a83ac82dc84c9da62
Author: qiye <[email protected]>
AuthorDate: Mon Dec 18 11:56:34 2023 +0800
[fix](index compaction)Support compact multi segment in one index and add
trans_vec check (#28545)
---
be/src/clucene | 2 +-
be/src/olap/compaction.cpp | 38 +++++++++++++++++++---
.../segment_v2/inverted_index_compaction.cpp | 1 +
3 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/be/src/clucene b/be/src/clucene
index d20200ed36d..0ab22b4f4fa 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit d20200ed36dda4087489d49457a4da0c44ad4d09
+Subproject commit 0ab22b4f4fa704e9040c8e0c4694c3bfc77769b0
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 635ca0eaa26..4529d275284 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -402,6 +402,34 @@ Status Compaction::do_compaction_impl(int64_t permits) {
if (_input_row_num > 0 && stats.rowid_conversion &&
config::inverted_index_compaction_enable) {
OlapStopWatch inverted_watch;
+
+ // check rowid_conversion correctness
+ Version version = _tablet->max_version();
+ DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+ std::set<RowLocation> missed_rows;
+ std::map<RowsetSharedPtr, std::list<std::pair<RowLocation,
RowLocation>>> location_map;
+ // Convert the delete bitmap of the input rowsets to output rowset.
+ std::size_t missed_rows_size = 0;
+ _tablet->calc_compaction_output_rowset_delete_bitmap(
+ _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
+ &location_map, _tablet->tablet_meta()->delete_bitmap(),
+ &output_rowset_delete_bitmap);
+ if (!allow_delete_in_cumu_compaction()) {
+ missed_rows_size = missed_rows.size();
+ if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION
&&
+ stats.merged_rows != missed_rows_size) {
+ std::string err_msg = fmt::format(
+ "cumulative compaction: the merged rows({}) is not
equal to missed "
+ "rows({}) in rowid conversion, tablet_id: {},
table_id:{}",
+ stats.merged_rows, missed_rows_size,
_tablet->tablet_id(),
+ _tablet->table_id());
+ DCHECK(false) << err_msg;
+ LOG(WARNING) << err_msg;
+ }
+ }
+
+ RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset,
location_map));
+
// translation vec
// <<dest_idx_num, dest_docId>>
// the first level vector: index indicates src segment.
@@ -426,7 +454,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
// src index files
// format: rowsetId_segmentId
std::vector<std::string> src_index_files(src_segment_num);
- for (auto m : src_seg_to_id_map) {
+ for (const auto& m : src_seg_to_id_map) {
std::pair<RowsetId, uint32_t> p = m.first;
src_index_files[m.second] = p.first.to_string() + "_" +
std::to_string(p.second);
}
@@ -675,11 +703,11 @@ Status Compaction::modify_rowsets(const
Merger::Statistics* stats) {
// of incremental data later.
// TODO(LiaoXin): check if there are duplicate keys
std::size_t missed_rows_size = 0;
+ _tablet->calc_compaction_output_rowset_delete_bitmap(
+ _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
+ &location_map, _tablet->tablet_meta()->delete_bitmap(),
+ &output_rowset_delete_bitmap);
if (!allow_delete_in_cumu_compaction()) {
- _tablet->calc_compaction_output_rowset_delete_bitmap(
- _input_rowsets, _rowid_conversion, 0, version.second + 1,
&missed_rows,
- &location_map, _tablet->tablet_meta()->delete_bitmap(),
- &output_rowset_delete_bitmap);
missed_rows_size = missed_rows.size();
if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION
&& stats != nullptr &&
stats->merged_rows != missed_rows_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index 7f653a93591..b3a28c6ebfc 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -56,6 +56,7 @@ Status compact_column(int32_t index_id, int src_segment_num,
int dest_segment_nu
dest_index_dirs[i] = DorisCompoundDirectory::getDirectory(fs,
path.c_str(), true);
}
+ DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
dest_segment_num_rows);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]