This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0-var
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0-var by this push:
     new b169a650c70 [fix](index compaction)Support compact multi segment in 
one index and add trans_vec check (#28545)
b169a650c70 is described below

commit b169a650c70766a018427e8a83ac82dc84c9da62
Author: qiye <[email protected]>
AuthorDate: Mon Dec 18 11:56:34 2023 +0800

    [fix](index compaction)Support compact multi segment in one index and add 
trans_vec check (#28545)
---
 be/src/clucene                                     |  2 +-
 be/src/olap/compaction.cpp                         | 38 +++++++++++++++++++---
 .../segment_v2/inverted_index_compaction.cpp       |  1 +
 3 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index d20200ed36d..0ab22b4f4fa 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit d20200ed36dda4087489d49457a4da0c44ad4d09
+Subproject commit 0ab22b4f4fa704e9040c8e0c4694c3bfc77769b0
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 635ca0eaa26..4529d275284 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -402,6 +402,34 @@ Status Compaction::do_compaction_impl(int64_t permits) {
 
     if (_input_row_num > 0 && stats.rowid_conversion && 
config::inverted_index_compaction_enable) {
         OlapStopWatch inverted_watch;
+
+        // check rowid_conversion correctness
+        Version version = _tablet->max_version();
+        DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+        std::set<RowLocation> missed_rows;
+        std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>> location_map;
+        // Convert the delete bitmap of the input rowsets to output rowset.
+        std::size_t missed_rows_size = 0;
+        _tablet->calc_compaction_output_rowset_delete_bitmap(
+                _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
+                &location_map, _tablet->tablet_meta()->delete_bitmap(),
+                &output_rowset_delete_bitmap);
+        if (!allow_delete_in_cumu_compaction()) {
+            missed_rows_size = missed_rows.size();
+            if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION 
&&
+                stats.merged_rows != missed_rows_size) {
+                std::string err_msg = fmt::format(
+                        "cumulative compaction: the merged rows({}) is not 
equal to missed "
+                        "rows({}) in rowid conversion, tablet_id: {}, 
table_id:{}",
+                        stats.merged_rows, missed_rows_size, 
_tablet->tablet_id(),
+                        _tablet->table_id());
+                DCHECK(false) << err_msg;
+                LOG(WARNING) << err_msg;
+            }
+        }
+
+        RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, 
location_map));
+
         // translation vec
         // <<dest_idx_num, dest_docId>>
         // the first level vector: index indicates src segment.
@@ -426,7 +454,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
             // src index files
             // format: rowsetId_segmentId
             std::vector<std::string> src_index_files(src_segment_num);
-            for (auto m : src_seg_to_id_map) {
+            for (const auto& m : src_seg_to_id_map) {
                 std::pair<RowsetId, uint32_t> p = m.first;
                 src_index_files[m.second] = p.first.to_string() + "_" + 
std::to_string(p.second);
             }
@@ -675,11 +703,11 @@ Status Compaction::modify_rowsets(const 
Merger::Statistics* stats) {
         // of incremental data later.
         // TODO(LiaoXin): check if there are duplicate keys
         std::size_t missed_rows_size = 0;
+        _tablet->calc_compaction_output_rowset_delete_bitmap(
+                _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
+                &location_map, _tablet->tablet_meta()->delete_bitmap(),
+                &output_rowset_delete_bitmap);
         if (!allow_delete_in_cumu_compaction()) {
-            _tablet->calc_compaction_output_rowset_delete_bitmap(
-                    _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
-                    &location_map, _tablet->tablet_meta()->delete_bitmap(),
-                    &output_rowset_delete_bitmap);
             missed_rows_size = missed_rows.size();
             if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION 
&& stats != nullptr &&
                 stats->merged_rows != missed_rows_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index 7f653a93591..b3a28c6ebfc 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -56,6 +56,7 @@ Status compact_column(int32_t index_id, int src_segment_num, 
int dest_segment_nu
         dest_index_dirs[i] = DorisCompoundDirectory::getDirectory(fs, 
path.c_str(), true);
     }
 
+    DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
     index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
                                   dest_segment_num_rows);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to