This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new e6a7a0128a5 [opt](index compaction) optimize checks before index
compaction (#25486)
e6a7a0128a5 is described below
commit e6a7a0128a5f84ffd89121723b87a4342fa8edb4
Author: qiye <[email protected]>
AuthorDate: Thu Oct 26 11:21:46 2023 +0800
[opt](index compaction) optimize checks before index compaction (#25486)
---
be/src/olap/compaction.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 45 insertions(+), 1 deletion(-)
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index a03b0e3fd17..f0d024ae665 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -47,6 +47,7 @@
#include "olap/rowset/rowset_writer.h"
#include "olap/rowset/rowset_writer_context.h"
#include "olap/rowset/segment_v2/inverted_index_compaction.h"
+#include "olap/rowset/segment_v2/inverted_index_compound_directory.h"
#include "olap/storage_engine.h"
#include "olap/storage_policy.h"
#include "olap/tablet.h"
@@ -541,6 +542,8 @@ Status
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
BetaRowsetSharedPtr rowset =
std::static_pointer_cast<BetaRowset>(src_rs);
if (rowset == nullptr) {
+ LOG(WARNING) << "tablet[" <<
_tablet->tablet_id()
+ << "] rowset is null, will skip
index compaction";
return false;
}
auto fs = rowset->rowset_meta()->fs();
@@ -548,6 +551,9 @@ Status
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
auto index_meta =
rowset->tablet_schema()->get_inverted_index(unique_id);
if (index_meta == nullptr) {
+ LOG(WARNING) << "tablet[" <<
_tablet->tablet_id()
+ << "] index_unique_id[" <<
unique_id
+ << "] index meta is null, will
skip index compaction";
return false;
}
for (auto i = 0; i < rowset->num_segments(); i++) {
@@ -563,10 +569,48 @@ Status
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
return false;
}
if (!exists) {
- LOG(WARNING) <<
inverted_index_src_file_path
+ LOG(WARNING) << "tablet[" <<
_tablet->tablet_id()
+ << "] index_unique_id[" <<
unique_id << "],"
+ <<
inverted_index_src_file_path
<< " is not exists, will skip
index compaction";
return false;
}
+
+ // check idx file size
+ int64_t file_size = 0;
+ if
(fs->file_size(inverted_index_src_file_path, &file_size) !=
+ Status::OK()) {
+ LOG(ERROR) << inverted_index_src_file_path
+ << " fs->file_size error";
+ return false;
+ }
+ if (file_size == 0) {
+ LOG(WARNING) << "tablet[" <<
_tablet->tablet_id()
+ << "] index_unique_id[" <<
unique_id << "],"
+ <<
inverted_index_src_file_path
+ << " is empty file, will skip
index compaction";
+ return false;
+ }
+
+ // check index meta
+ std::filesystem::path
p(inverted_index_src_file_path);
+ std::string dir_str = p.parent_path().string();
+ std::string file_str = p.filename().string();
+ lucene::store::Directory* dir =
+
DorisCompoundDirectory::getDirectory(fs, dir_str.c_str());
+ auto reader = new DorisCompoundReader(dir,
file_str.c_str());
+ std::vector<std::string> files;
+ reader->list(&files);
+
+ // why is 3?
+ // bkd index will write at least 3 files
+ if (files.size() < 3) {
+ LOG(WARNING) << "tablet[" <<
_tablet->tablet_id()
+ << "] index_unique_id[" <<
unique_id << "],"
+ <<
inverted_index_src_file_path
+ << " is corrupted, will skip
index compaction";
+ return false;
+ }
}
return true;
});
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]