This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch revert-42280-index_compaction_test_api
in repository https://gitbox.apache.org/repos/asf/doris.git

commit fdfe8e503348618adc180ce5ad110f668d8ce789
Author: qiye <[email protected]>
AuthorDate: Fri Oct 25 15:08:40 2024 +0800

    Revert "[opt](index compaction)Add dual write inverted index file switch 
(#42…"
    
    This reverts commit e9a14d700e87a995a1d4cd84aa3457b61997ece8.
---
 be/src/common/config.cpp                           |   2 -
 be/src/common/config.h                             |   4 -
 be/src/olap/compaction.cpp                         | 151 +--------------------
 be/src/olap/compaction.h                           |   3 -
 be/src/olap/rowset/segment_v2/segment_writer.cpp   |   3 +-
 .../rowset/segment_v2/vertical_segment_writer.cpp  |   3 +-
 6 files changed, 3 insertions(+), 163 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index d031189141e..31170b731f4 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1042,8 +1042,6 @@ DEFINE_Int32(max_depth_in_bkd_tree, "32");
 DEFINE_mBool(inverted_index_compaction_enable, "false");
 // Only for debug, do not use in production
 DEFINE_mBool(debug_inverted_index_compaction, "false");
-// Only for debug, do not use in production
-DEFINE_mBool(dual_write_inverted_index_enable, "false");
 // index by RAM directory
 DEFINE_mBool(inverted_index_ram_dir_enable, "true");
 // use num_broadcast_buffer blocks as buffer to do broadcast
diff --git a/be/src/common/config.h b/be/src/common/config.h
index d5daf0c6924..585c4dc45cc 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1090,11 +1090,7 @@ DECLARE_Int32(max_depth_in_bkd_tree);
 // index compaction
 DECLARE_mBool(inverted_index_compaction_enable);
 // Only for debug, do not use in production
-// Debug switch for collecting intermediate data in inverted index compaction
 DECLARE_mBool(debug_inverted_index_compaction);
-// Only for debug, do not use in production
-// Debug switch for writing inverted index both in compaction process and 
index compaction process
-DECLARE_mBool(dual_write_inverted_index_enable);
 // index by RAM directory
 DECLARE_mBool(inverted_index_ram_dir_enable);
 // use num_broadcast_buffer blocks as buffer to do broadcast
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index d23510d373d..dee06a8a79b 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -18,7 +18,6 @@
 #include "olap/compaction.h"
 
 #include <fmt/format.h>
-#include <gen_cpp/olap_common.pb.h>
 #include <gen_cpp/olap_file.pb.h>
 #include <glog/logging.h>
 
@@ -36,8 +35,6 @@
 #include <shared_mutex>
 #include <utility>
 
-#include "CLucene/config/repl_wchar.h"
-#include "CLucene/index/Terms.h"
 #include "cloud/cloud_meta_mgr.h"
 #include "cloud/cloud_storage_engine.h"
 #include "common/config.h"
@@ -622,7 +619,6 @@ Status Compaction::do_inverted_index_compaction() {
     // Some columns have already been indexed
     // key: seg_id, value: inverted index file size
     std::unordered_map<int, int64_t> compacted_idx_file_size;
-    auto tmp_file_dir = 
ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir();
     for (int seg_id = 0; seg_id < dest_segment_num; ++seg_id) {
         std::string index_path_prefix {
                 
InvertedIndexDescriptor::get_index_file_path_prefix(ctx.segment_path(seg_id))};
@@ -648,12 +644,6 @@ Status Compaction::do_inverted_index_compaction() {
                 }
                 compacted_idx_file_size[seg_id] = fsize;
             }
-            // if dual_write_inverted_index_enable is true, we need to write 
inverted index to tmp dir
-            if (config::dual_write_inverted_index_enable) {
-                auto tmp_index_path_prefix =
-                        tmp_file_dir / (dest_rowset_id.to_string() + "_" + 
std::to_string(seg_id));
-                index_path_prefix = tmp_index_path_prefix;
-            }
             auto inverted_index_file_writer = 
std::make_unique<InvertedIndexFileWriter>(
                     ctx.fs(), index_path_prefix, ctx.rowset_id.to_string(), 
seg_id,
                     _cur_tablet_schema->get_inverted_index_storage_format());
@@ -677,6 +667,7 @@ Status Compaction::do_inverted_index_compaction() {
     }
 
     // use tmp file dir to store index files
+    auto tmp_file_dir = 
ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir();
     auto index_tmp_path = tmp_file_dir / dest_rowset_id.to_string();
     LOG(INFO) << "start index compaction"
               << ". tablet=" << _tablet->tablet_id() << ", source index size=" 
<< src_segment_num
@@ -763,70 +754,6 @@ Status Compaction::do_inverted_index_compaction() {
         return status;
     }
 
-    // check idx file correctness only when dual_write_inverted_index_enable 
is true
-    if (config::dual_write_inverted_index_enable) {
-        for (auto&& column_uniq_id : ctx.columns_to_do_index_compaction) {
-            auto col = _cur_tablet_schema->column_by_uid(column_uniq_id);
-            const auto* index_meta = 
_cur_tablet_schema->get_inverted_index(col);
-            for (int dest_segment_id = 0; dest_segment_id < dest_segment_num; 
dest_segment_id++) {
-                // create index file reader for normal compaction index file
-                std::string index_path_prefix 
{InvertedIndexDescriptor::get_index_file_path_prefix(
-                        ctx.segment_path(dest_segment_id))};
-                io::Path cfs_path;
-                if (_cur_tablet_schema->get_inverted_index_storage_format() !=
-                    doris::InvertedIndexStorageFormatPB::V1) {
-                    cfs_path = 
InvertedIndexDescriptor::get_index_file_path_v2(index_path_prefix);
-                } else {
-                    cfs_path = InvertedIndexDescriptor::get_index_file_path_v1(
-                            index_path_prefix, index_meta->index_id(),
-                            index_meta->get_index_suffix());
-                }
-                auto inverted_index_file_reader = 
std::make_unique<InvertedIndexFileReader>(
-                        ctx.fs(), index_path_prefix,
-                        
_cur_tablet_schema->get_inverted_index_storage_format());
-                bool open_idx_file_cache = false;
-                auto st = 
inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
-                                                           
open_idx_file_cache);
-                if (!st.ok()) {
-                    LOG(FATAL) << "inverted_index_file_reader init failed in 
index compaction "
-                                  "correctness check, error:"
-                               << st;
-                }
-                auto index_reader = 
DORIS_TRY(inverted_index_file_reader->open(index_meta));
-
-                // create index file reader for tmp index compaction index file
-                auto tmp_index_path_prefix = tmp_file_dir / 
(dest_rowset_id.to_string() + "_" +
-                                                             
std::to_string(dest_segment_id));
-                auto tmp_inverted_index_file_reader = 
std::make_unique<InvertedIndexFileReader>(
-                        doris::io::global_local_filesystem(), 
tmp_index_path_prefix,
-                        
_cur_tablet_schema->get_inverted_index_storage_format());
-                st = 
tmp_inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
-                                                          open_idx_file_cache);
-                if (!st.ok()) {
-                    LOG(FATAL) << "tmp_inverted_index_file_reader init failed 
in index compaction "
-                                  "correctness check, error:"
-                               << st;
-                }
-                auto tmp_index_reader = 
DORIS_TRY(tmp_inverted_index_file_reader->open(index_meta));
-
-                st = check_idx_file_correctness(*index_reader, 
*tmp_index_reader);
-                if (!st.ok()) {
-                    LOG(FATAL) << "index compaction correctness check failed"
-                               << ", tablet=" << _tablet->tablet_id() << ", 
index_path=" << cfs_path
-                               << ", tmp_index_path="
-                               << (tmp_index_path_prefix.string() + "_" +
-                                   std::to_string(index_meta->index_id()) + 
".idx")
-                               << ", error=" << st.msg();
-                }
-                LOG(INFO) << "index compaction correctness check succeed"
-                          << ", tablet=" << _tablet->tablet_id() << ", 
index_path=" << cfs_path
-                          << ", tmp_index_path="
-                          << (tmp_index_path_prefix.string() + "_" +
-                              std::to_string(index_meta->index_id()) + ".idx");
-            }
-        }
-    }
-
     // index compaction should update total disk size and index disk size
     
_output_rowset->rowset_meta()->set_data_disk_size(_output_rowset->data_disk_size()
 +
                                                       
inverted_index_file_size);
@@ -849,82 +776,6 @@ Status Compaction::do_inverted_index_compaction() {
     return Status::OK();
 }
 
-Status Compaction::check_idx_file_correctness(DorisCompoundReader& 
index_reader,
-                                              DorisCompoundReader& 
tmp_index_reader) {
-    lucene::index::IndexReader* idx_reader = 
lucene::index::IndexReader::open(&index_reader);
-    lucene::index::IndexReader* tmp_idx_reader =
-            lucene::index::IndexReader::open(&tmp_index_reader);
-
-    // compare numDocs
-    if (idx_reader->numDocs() != tmp_idx_reader->numDocs()) {
-        return Status::InternalError(
-                "index compaction correctness check failed, numDocs not equal, 
idx_numDocs={}, "
-                "tmp_idx_numDocs={}",
-                idx_reader->numDocs(), tmp_idx_reader->numDocs());
-    }
-
-    lucene::index::TermEnum* term_enum = idx_reader->terms();
-    lucene::index::TermEnum* tmp_term_enum = tmp_idx_reader->terms();
-
-    // iterate TermEnum
-    while (term_enum->next() && tmp_term_enum->next()) {
-        std::string token = 
lucene_wcstoutf8string(term_enum->term(false)->text(),
-                                                   
term_enum->term(false)->textLength());
-        std::string field = 
lucene_wcstoutf8string(term_enum->term(false)->field(),
-                                                   
lenOfString(term_enum->term(false)->field()));
-        std::string tmp_token = 
lucene_wcstoutf8string(tmp_term_enum->term(false)->text(),
-                                                       
tmp_term_enum->term(false)->textLength());
-        std::string tmp_field =
-                lucene_wcstoutf8string(tmp_term_enum->term(false)->field(),
-                                       
lenOfString(tmp_term_enum->term(false)->field()));
-        // compare token and field
-        if (field != tmp_field) {
-            return Status::InternalError(
-                    "index compaction correctness check failed, fields not 
equal, field={}, "
-                    "tmp_field={}",
-                    field, field);
-        }
-        if (token != tmp_token) {
-            return Status::InternalError(
-                    "index compaction correctness check failed, tokens not 
equal, token={}, "
-                    "tmp_token={}",
-                    token, tmp_token);
-        }
-
-        // get term's docId and freq
-        lucene::index::TermDocs* term_docs = 
idx_reader->termDocs(term_enum->term());
-        lucene::index::TermDocs* tmp_term_docs = 
tmp_idx_reader->termDocs(tmp_term_enum->term());
-
-        // compare term's docId and freq
-        while (term_docs->next() && tmp_term_docs->next()) {
-            if (term_docs->doc() != tmp_term_docs->doc() ||
-                term_docs->freq() != tmp_term_docs->freq()) {
-                return Status::InternalError(
-                        "index compaction correctness check failed, docId or 
freq not equal, "
-                        "docId={}, tmp_docId={}, freq={}, tmp_freq={}",
-                        term_docs->doc(), tmp_term_docs->doc(), 
term_docs->freq(),
-                        tmp_term_docs->freq());
-            }
-        }
-
-        // check if there are remaining docs
-        if (term_docs->next() || tmp_term_docs->next()) {
-            return Status::InternalError(
-                    "index compaction correctness check failed, number of docs 
not equal for "
-                    "term={}, tmp_term={}",
-                    token, tmp_token);
-        }
-    }
-
-    // check if there are remaining terms
-    if (term_enum->next() || tmp_term_enum->next()) {
-        return Status::InternalError(
-                "index compaction correctness check failed, number of terms 
not equal");
-    }
-
-    return Status::OK();
-}
-
 void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) {
     for (const auto& index : _cur_tablet_schema->indexes()) {
         if (index.index_type() != IndexType::INVERTED) {
diff --git a/be/src/olap/compaction.h b/be/src/olap/compaction.h
index 99de5573a91..08afe840280 100644
--- a/be/src/olap/compaction.h
+++ b/be/src/olap/compaction.h
@@ -69,9 +69,6 @@ protected:
 
     Status do_inverted_index_compaction();
 
-    Status check_idx_file_correctness(DorisCompoundReader& index_reader,
-                                      DorisCompoundReader& tmp_index_reader);
-
     void construct_index_compaction_columns(RowsetWriterContext& ctx);
 
     virtual Status construct_output_rowset_writer(RowsetWriterContext& ctx) = 
0;
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index f18f5d5d641..4301303dac9 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -208,8 +208,7 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, 
const TabletColumn& co
 
     opts.need_bitmap_index = column.has_bitmap_index();
     bool skip_inverted_index = false;
-    // if dual_write_inverted_index_enable is true, do not skip write inverted 
index on index compaction columns
-    if (_opts.rowset_ctx != nullptr && 
!config::dual_write_inverted_index_enable) {
+    if (_opts.rowset_ctx != nullptr) {
         // skip write inverted index for index compaction column
         skip_inverted_index =
                 
_opts.rowset_ctx->columns_to_do_index_compaction.count(column.unique_id()) > 0;
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp 
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index ff9c694d1b4..ce033cdd002 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -201,8 +201,7 @@ Status 
VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
 
     opts.need_bitmap_index = column.has_bitmap_index();
     bool skip_inverted_index = false;
-    // if dual_write_inverted_index_enable is true, do not skip write inverted 
index on index compaction columns
-    if (_opts.rowset_ctx != nullptr && 
!config::dual_write_inverted_index_enable) {
+    if (_opts.rowset_ctx != nullptr) {
         // skip write inverted index for index compaction column
         skip_inverted_index =
                 
_opts.rowset_ctx->columns_to_do_index_compaction.contains(column.unique_id());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to