This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new eac4f7f80d1 [fix](checker) Fix checking inverted index file
incorrectly (#53876)
eac4f7f80d1 is described below
commit eac4f7f80d1ade4c70a60484ada7fe6142eea931
Author: Uniqueyou <[email protected]>
AuthorDate: Sat Aug 2 23:25:54 2025 +0800
[fix](checker) Fix checking inverted index file incorrectly (#53876)
---
cloud/src/recycler/checker.cpp | 343 +++++++++++++++-----
cloud/src/recycler/checker.h | 20 ++
cloud/test/recycler_test.cpp | 702 +++++++++++++++++++++++++++++++++++++++--
3 files changed, 964 insertions(+), 101 deletions(-)
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 67e358cfa35..024ba9fa52a 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -39,6 +39,7 @@
#include "common/bvars.h"
#include "common/config.h"
+#include "common/defer.h"
#include "common/encryption_util.h"
#include "common/logging.h"
#include "common/util.h"
@@ -504,12 +505,24 @@ int InstanceChecker::do_check() {
};
TabletFiles tablet_files_cache;
- auto check_rowset_objects = [&, this](const doris::RowsetMetaCloudPB&
rs_meta,
- std::string_view key) {
+ auto check_rowset_objects = [&, this](doris::RowsetMetaCloudPB& rs_meta,
std::string_view key) {
if (rs_meta.num_segments() == 0) {
return;
}
+ bool data_loss = false;
+ bool segment_file_loss = false;
+ bool index_file_loss = false;
+
+ DORIS_CLOUD_DEFER {
+ if (data_loss) {
+ LOG(INFO) << "segment file is" << (segment_file_loss ? "" : "
not") << " loss, "
+ << "index file is" << (index_file_loss ? "" : "
not") << " loss, "
+ << "rowset.tablet_id = " << rs_meta.tablet_id();
+ num_rowset_loss++;
+ }
+ };
+
++num_scanned_with_segment;
if (tablet_files_cache.tablet_id != rs_meta.tablet_id()) {
long tablet_volume = 0;
@@ -543,7 +556,6 @@ int InstanceChecker::do_check() {
instance_volume += tablet_volume;
}
- bool data_loss = false;
for (int i = 0; i < rs_meta.num_segments(); ++i) {
auto path = segment_path(rs_meta.tablet_id(),
rs_meta.rowset_id_v2(), i);
@@ -556,11 +568,36 @@ int InstanceChecker::do_check() {
break;
}
data_loss = true;
+ segment_file_loss = true;
TEST_SYNC_POINT_CALLBACK("InstanceChecker.do_check1", &path);
LOG(WARNING) << "object not exist, path=" << path
<< ", rs_meta=" << rs_meta.ShortDebugString() << "
key=" << hex(key);
}
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to init txn, err=" << err;
+ return;
+ }
+
+ TabletIndexPB tablet_index;
+ if (get_tablet_idx(txn_kv_.get(), instance_id_, rs_meta.tablet_id(),
tablet_index) == -1) {
+ LOG(WARNING) << "failedt to get tablet index, tablet_id= " <<
rs_meta.tablet_id();
+ return;
+ }
+
+ auto tablet_schema_key =
+ meta_schema_key({instance_id_, tablet_index.index_id(),
rs_meta.schema_version()});
+ std::string tablet_schema_val;
+ err = txn->get(tablet_schema_key, &tablet_schema_val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ // rowset don't have tablet schema key means no index
+ return;
+ }
+ auto* schema = rs_meta.mutable_tablet_schema();
+ schema->ParseFromString(tablet_schema_val);
+
std::vector<std::pair<int64_t, std::string>> index_ids;
for (const auto& i : rs_meta.tablet_schema().index()) {
if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
@@ -571,7 +608,6 @@ int InstanceChecker::do_check() {
if (!key_exist(txn_kv_.get(), tablet_idx_key)) {
for (int i = 0; i < rs_meta.num_segments(); ++i) {
std::vector<std::string> index_path_v;
- std::vector<std::string> loss_file_path;
if (rs_meta.tablet_schema().inverted_index_storage_format() ==
InvertedIndexStorageFormatPB::V1) {
for (const auto& index_id : index_ids) {
@@ -589,32 +625,17 @@ int InstanceChecker::do_check() {
}
if (!index_path_v.empty()) {
- if (std::all_of(index_path_v.begin(), index_path_v.end(),
- [&](const auto& idx_file_path) {
- if
(!tablet_files_cache.files.contains(idx_file_path)) {
-
loss_file_path.emplace_back(idx_file_path);
- return false;
- }
- return true;
- })) {
+ if (std::ranges::all_of(index_path_v, [&](const auto&
idx_file_path) {
+ return
tablet_files_cache.files.contains(idx_file_path);
+ })) {
continue;
}
}
-
+ index_file_loss = true;
data_loss = true;
- LOG(WARNING) << "object not exist, path="
- << std::accumulate(loss_file_path.begin(),
loss_file_path.end(),
- std::string(),
- [](const auto& a, const auto&
b) {
- return a.empty() ? b : a +
", " + b;
- })
- << " key=" << hex(tablet_idx_key);
+ LOG(WARNING) << "object not exist, key=" <<
hex(tablet_idx_key);
}
}
-
- if (data_loss) {
- ++num_rowset_loss;
- }
};
// scan visible rowsets
@@ -639,7 +660,9 @@ int InstanceChecker::do_check() {
while (it->has_next() && !stopped()) {
auto [k, v] = it->next();
- if (!it->has_next()) start_key = k;
+ if (!it->has_next()) {
+ start_key = k;
+ }
doris::RowsetMetaCloudPB rs_meta;
if (!rs_meta.ParseFromArray(v.data(), v.size())) {
@@ -710,11 +733,8 @@ int InstanceChecker::do_inverted_check() {
};
TabletRowsets tablet_rowsets_cache;
- struct TabletIndexes {
- int64_t tablet_id {0};
- std::unordered_set<int64_t> index_ids;
- };
- TabletIndexes tablet_indexes_cache;
+ RowsetIndexesFormatV1 rowset_index_cache_v1;
+ RowsetIndexesFormatV2 rowset_index_cache_v2;
// Return 0 if check success, return 1 if file is garbage data, negative
if error occurred
auto check_segment_file = [&](const std::string& obj_key) {
@@ -793,10 +813,12 @@ int InstanceChecker::do_inverted_check() {
return 0;
};
+
auto check_inverted_index_file = [&](const std::string& obj_key) {
std::vector<std::string> str;
butil::SplitString(obj_key, '/', &str);
- // data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+ // format v1:
data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+ // format v2: data/{tablet_id}/{rowset_id}_{seg_num}.idx
if (str.size() < 3) {
return -1;
}
@@ -807,62 +829,31 @@ int InstanceChecker::do_inverted_check() {
return -1;
}
- if (!str.back().ends_with(".idx")) {
+ // v1: {rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+ // v2: {rowset_id}_{seg_num}.idx
+ std::string rowset_info = str.back();
+
+ if (!rowset_info.ends_with(".idx")) {
return 0; // Not an index file
}
- int64_t index_id;
+ InvertedIndexStorageFormatPB inverted_index_storage_format =
+ std::count(rowset_info.begin(), rowset_info.end(), '_') > 1
+ ? InvertedIndexStorageFormatPB::V1
+ : InvertedIndexStorageFormatPB::V2;
- size_t pos = str.back().find_last_of('_');
+ size_t pos = rowset_info.find_last_of('_');
if (pos == std::string::npos || pos + 1 >= str.back().size() - 4) {
LOG(WARNING) << "Invalid index_id format, key=" << obj_key;
return -1;
}
- index_id = atol(str.back().substr(pos + 1, str.back().size() -
4).c_str());
-
- if (tablet_indexes_cache.tablet_id == tablet_id) {
- if (tablet_indexes_cache.index_ids.contains(index_id)) {
- return 0;
- } else {
- LOG(WARNING) << "index not exists, key=" << obj_key;
- return -1;
- }
- }
- // Get all index id of this tablet
- tablet_indexes_cache.tablet_id = tablet_id;
- tablet_indexes_cache.index_ids.clear();
- std::unique_ptr<Transaction> txn;
- TxnErrorCode err = txn_kv_->create_txn(&txn);
- if (err != TxnErrorCode::TXN_OK) {
- LOG(WARNING) << "failed to create txn";
- return -1;
- }
- auto tablet_idx_key = meta_tablet_idx_key({instance_id_, tablet_id});
- std::string tablet_idx_val;
- err = txn->get(tablet_idx_key, &tablet_idx_val);
- if (err != TxnErrorCode::TXN_OK) {
- LOG(WARNING) << "failed to get tablet idx,"
- << " key=" << hex(tablet_idx_key) << " err=" << err;
- return -1;
- }
-
- TabletIndexPB tablet_idx_pb;
- if (!tablet_idx_pb.ParseFromArray(tablet_idx_val.data(),
tablet_idx_val.size())) {
- LOG(WARNING) << "malformed index meta value, key=" <<
hex(tablet_idx_key);
- return -1;
- }
- if (!tablet_idx_pb.has_index_id()) {
- LOG(WARNING) << "tablet index meta does not have index_id, key="
<< hex(tablet_idx_key);
- return -1;
- }
- tablet_indexes_cache.index_ids.insert(tablet_idx_pb.index_id());
-
- if (!tablet_indexes_cache.index_ids.contains(index_id)) {
- LOG(WARNING) << "index should be recycled, key=" << obj_key;
- return 1;
+ if (inverted_index_storage_format == InvertedIndexStorageFormatPB::V1)
{
+ return check_inverted_index_file_storage_format_v1(tablet_id,
obj_key, rowset_info,
+
rowset_index_cache_v1);
+ } else {
+ return check_inverted_index_file_storage_format_v2(tablet_id,
obj_key, rowset_info,
+
rowset_index_cache_v2);
}
-
- return 0;
};
// so we choose to skip here.
TEST_SYNC_POINT_RETURN_WITH_VALUE("InstanceChecker::do_inverted_check",
(int)0);
@@ -1243,6 +1234,202 @@ int InstanceChecker::get_pending_delete_bitmap_keys(
return 0;
}
+int InstanceChecker::check_inverted_index_file_storage_format_v1(
+ int64_t tablet_id, const std::string& file_path, const std::string&
rowset_info,
+ RowsetIndexesFormatV1& rowset_index_cache_v1) {
+ // format v1:
data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+ std::string rowset_id;
+ int64_t segment_id;
+ std::string index_id_with_suffix_name;
+ // {rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+ std::vector<std::string> str;
+ butil::SplitString(rowset_info.substr(0, rowset_info.size() - 4), '_',
&str);
+ if (str.size() < 3) {
+ LOG(WARNING) << "Split rowset info with '_' error, str size < 3,
rowset_info = "
+ << rowset_info;
+ return -1;
+ }
+ rowset_id = str[0];
+ segment_id = std::atoll(str[1].c_str());
+ index_id_with_suffix_name = str[2];
+
+ if (rowset_index_cache_v1.rowset_id == rowset_id) {
+ if (rowset_index_cache_v1.segment_ids.contains(segment_id)) {
+ if (auto it =
rowset_index_cache_v1.index_ids.find(index_id_with_suffix_name);
+ it == rowset_index_cache_v1.index_ids.end()) {
+ // clang-format off
+ LOG(WARNING) << fmt::format("index_id with suffix name not
found, rowset_info = {}, obj_key = {}", rowset_info, file_path);
+ // clang-format on
+ return -1;
+ }
+ } else {
+ // clang-format off
+ LOG(WARNING) << fmt::format("segment id not found, rowset_info =
{}, obj_key = {}", rowset_info, file_path);
+ // clang-format on
+ return -1;
+ }
+ }
+
+ rowset_index_cache_v1.rowset_id = rowset_id;
+ rowset_index_cache_v1.segment_ids.clear();
+ rowset_index_cache_v1.index_ids.clear();
+
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ std::unique_ptr<RangeGetIterator> it;
+ auto begin = meta_rowset_key({instance_id_, tablet_id, 0});
+ auto end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
+ do {
+ TxnErrorCode err = txn->get(begin, end, &it);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get rowset kv, err=" << err;
+ return -1;
+ }
+ if (!it->has_next()) {
+ break;
+ }
+ while (it->has_next()) {
+ // recycle corresponding resources
+ auto [k, v] = it->next();
+ doris::RowsetMetaCloudPB rs_meta;
+ if (!rs_meta.ParseFromArray(v.data(), v.size())) {
+ LOG(WARNING) << "malformed rowset meta value, key=" << hex(k);
+ return -1;
+ }
+
+ for (size_t i = 0; i < rs_meta.num_segments(); i++) {
+ rowset_index_cache_v1.segment_ids.insert(i);
+ }
+
+ TabletIndexPB tablet_index;
+ if (get_tablet_idx(txn_kv_.get(), instance_id_,
rs_meta.tablet_id(), tablet_index) ==
+ -1) {
+ LOG(WARNING) << "failedt to get tablet index, tablet_id= " <<
rs_meta.tablet_id();
+ return -1;
+ }
+
+ auto tablet_schema_key = meta_schema_key(
+ {instance_id_, tablet_index.index_id(),
rs_meta.schema_version()});
+ std::string tablet_schema_val;
+ err = txn->get(tablet_schema_key, &tablet_schema_val);
+ if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+ // rowset don't have tablet schema key means no index
+ return 0;
+ }
+ auto* schema = rs_meta.mutable_tablet_schema();
+ schema->ParseFromString(tablet_schema_val);
+
+ for (const auto& i : rs_meta.tablet_schema().index()) {
+ if (i.has_index_type() && i.index_type() ==
IndexType::INVERTED) {
+ rowset_index_cache_v1.index_ids.insert(
+ fmt::format("{}{}", i.index_name(),
i.index_suffix_name()));
+ }
+ }
+
+ if (!it->has_next()) {
+ begin = k;
+ begin.push_back('\x00'); // Update to next smallest key for
iteration
+ break;
+ }
+ }
+ } while (it->more() && !stopped());
+
+ if (!rowset_index_cache_v1.segment_ids.contains(segment_id)) {
+ // Garbage data leak
+ LOG(WARNING) << "rowset should be recycled, key=" << file_path;
+ return 1;
+ }
+
+ if (!rowset_index_cache_v1.index_ids.contains(index_id_with_suffix_name)) {
+ // Garbage data leak
+ LOG(WARNING) << "rowset with inde meta should be recycled, key=" <<
file_path;
+ return 1;
+ }
+
+ return 0;
+}
+
+int InstanceChecker::check_inverted_index_file_storage_format_v2(
+ int64_t tablet_id, const std::string& file_path, const std::string&
rowset_info,
+ RowsetIndexesFormatV2& rowset_index_cache_v2) {
+ std::string rowset_id;
+ int64_t segment_id;
+ // {rowset_id}_{seg_num}.idx
+ std::vector<std::string> str;
+ butil::SplitString(rowset_info.substr(0, rowset_info.size() - 4), '_',
&str);
+ if (str.size() < 2) {
+ // clang-format off
+ LOG(WARNING) << "Split rowset info with '_' error, str size < 2,
rowset_info = " << rowset_info;
+ // clang-format on
+ return -1;
+ }
+ rowset_id = str[0];
+ segment_id = std::atoll(str[1].c_str());
+
+ if (rowset_index_cache_v2.rowset_id == rowset_id) {
+ if (!rowset_index_cache_v2.segment_ids.contains(segment_id)) {
+ // clang-format off
+ LOG(WARNING) << fmt::format("index file not found, rowset_info =
{}, obj_key = {}", rowset_info, file_path);
+ // clang-format on
+ return -1;
+ }
+ }
+
+ rowset_index_cache_v2.rowset_id = rowset_id;
+ rowset_index_cache_v2.segment_ids.clear();
+
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ std::unique_ptr<RangeGetIterator> it;
+ auto begin = meta_rowset_key({instance_id_, tablet_id, 0});
+ auto end = meta_rowset_key({instance_id_, tablet_id, INT64_MAX});
+ do {
+ TxnErrorCode err = txn->get(begin, end, &it);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get rowset kv, err=" << err;
+ return -1;
+ }
+ if (!it->has_next()) {
+ break;
+ }
+ while (it->has_next()) {
+ // recycle corresponding resources
+ auto [k, v] = it->next();
+ doris::RowsetMetaCloudPB rs_meta;
+ if (!rs_meta.ParseFromArray(v.data(), v.size())) {
+ LOG(WARNING) << "malformed rowset meta value, key=" << hex(k);
+ return -1;
+ }
+
+ for (size_t i = 0; i < rs_meta.num_segments(); i++) {
+ rowset_index_cache_v2.segment_ids.insert(i);
+ }
+
+ if (!it->has_next()) {
+ begin = k;
+ begin.push_back('\x00'); // Update to next smallest key for
iteration
+ break;
+ }
+ }
+ } while (it->more() && !stopped());
+
+ if (!rowset_index_cache_v2.segment_ids.contains(segment_id)) {
+ // Garbage data leak
+ LOG(WARNING) << "rowset with index meta should be recycled, key=" <<
file_path;
+ return 1;
+ }
+
+ return 0;
+}
+
int InstanceChecker::check_delete_bitmap_storage_optimize_v2(
int64_t tablet_id, int64_t&
rowsets_with_useless_delete_bitmap_version) {
// end_version: create_time
diff --git a/cloud/src/recycler/checker.h b/cloud/src/recycler/checker.h
index afa96ca5ae4..b9658aaf60d 100644
--- a/cloud/src/recycler/checker.h
+++ b/cloud/src/recycler/checker.h
@@ -114,6 +114,18 @@ public:
void stop() { stopped_.store(true, std::memory_order_release); }
bool stopped() const { return stopped_.load(std::memory_order_acquire); }
+private:
+ struct RowsetIndexesFormatV1 {
+ std::string rowset_id;
+ std::unordered_set<int64_t> segment_ids;
+ std::unordered_set<std::string> index_ids;
+ };
+
+ struct RowsetIndexesFormatV2 {
+ std::string rowset_id;
+ std::unordered_set<int64_t> segment_ids;
+ };
+
private:
// returns 0 for success otherwise error
int init_obj_store_accessors(const InstanceInfoPB& instance);
@@ -132,6 +144,14 @@ private:
std::unordered_set<std::string>&
pending_delete_bitmaps);
int check_delete_bitmap_storage_optimize_v2(int64_t tablet_id, int64_t&
abnormal_rowsets_num);
+ int check_inverted_index_file_storage_format_v1(int64_t tablet_id, const
std::string& file_path,
+ const std::string&
rowset_info,
+ RowsetIndexesFormatV1&
rowset_index_cache_v1);
+
+ int check_inverted_index_file_storage_format_v2(int64_t tablet_id, const
std::string& file_path,
+ const std::string&
rowset_info,
+ RowsetIndexesFormatV2&
rowset_index_cache_v2);
+
std::atomic_bool stopped_ {false};
std::shared_ptr<TxnKv> txn_kv_;
std::string instance_id_;
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index b4401c6a68c..bbf75c555c6 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -21,10 +21,12 @@
#include <fmt/core.h>
#include <gen_cpp/cloud.pb.h>
#include <gen_cpp/olap_file.pb.h>
+#include <glog/logging.h>
#include <gtest/gtest.h>
#include <chrono>
#include <cstdint>
+#include <cstdlib>
#include <future>
#include <memory>
#include <random>
@@ -57,6 +59,94 @@ int64_t current_time = 0;
static constexpr int64_t db_id = 1000;
static RecyclerMetricsContext ctx;
+std::vector<std::string> index_v2_file_path = {
+
"data/1753202639945/0200000000001a5c92f4e7d9j8f2b4c8a3e6f8b1c9d2e5f8_0.idx",
+
"data/1753202639947/0200000000001b8d45a74r6c7sf3e9c2b6d4a8e1f7c3d9e2_0.idx",
+
"data/1753202639951/0200000000001c9e56b8g4f0x8s7g2f0d3c7e5b9f2e8d4f0_0.idx",
+
"data/1753202639953/0200000000001d0f67c9h5g8a3e6f8b1e4d8f6c0g3f9e5g1_0.idx",
+
"data/1753202639955/0200000000001e1g78d067c9h5g8i6h2f5e9g7d1h4g0f6h2_0.idx",
+
"data/1753202639957/0200000000001f2h89e1jg7d1h4g07i3g6f0h8e2i5h1g7i3_0.idx",
+
"data/1753202639959/020000000000208i90f2k0h8e2i5h8j4h7g1i9f3j6i2h8j4_0.idx",
+
"data/1753202639961/02000000000021aj01g3l9k5i8h2j8e2i5h8j0g4k7j3i9k5_0.idx",
+
"data/1753202639963/02000000000022bk12h4m0lk0h8e2i56j9i3k1h5l8k4j0l6_0.idx",
+
"data/1753202639965/02000000000023cl23i5n1m7g3l9k5i8k0j4l2i6m9l5k1m7_0.idx",
+
"data/1753202639967/02000000000024dm34j1m7g3l9k6o2n8l1k5m3j7n0m6l2n8_0.idx",
+
"data/1753202639969/02000000000025en45k7p3o9m2l6n4k34j1m7g38o1n7m3o9_0.idx",
+
"data/1753202639971/02000000000026fo56l8q4p0n2l6n4k343m7o5l9p2o8n4p0_0.idx",
+
"data/1753202639973/02000000000027gp67m9r5q8q4p0n2l1o4n8p6m0q3p9o5q1_0.idx",
+
"data/1753202639975/02000000000028hq78n0s6rm9r5q8q42p5o9q7n1r4q0p6r2_0.idx",
+
"data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_4.idx",
+
"data/1753202639979/0200000000002ajs90p2u8t4m3q6p0r8r7q1s9p3t6s2r8t4_0.idx",
+
"data/1753202639981/0200000000002bkt01q3v9u2u8t4m3q5s8r2t0q4u7t3s9u5_0.idx",
+
"data/1753202639983/0200000000002clu12r4w1q3v9u2u0v6t9s3u1r5v8u4t0v6_0.idx",
+
"data/1753202639985/0200000000002dmv23s5x1w7u0t4t9s3u1r5v2s6w9v5u1w7_0.idx"};
+
+std::vector<std::string> segment_v2_file_path = {
+
"data/1753202639945/0200000000001a5c92f4e7d9j8f2b4c8a3e6f8b1c9d2e5f8_0.dat",
+
"data/1753202639947/0200000000001b8d45a74r6c7sf3e9c2b6d4a8e1f7c3d9e2_0.dat",
+
"data/1753202639951/0200000000001c9e56b8g4f0x8s7g2f0d3c7e5b9f2e8d4f0_0.dat",
+
"data/1753202639953/0200000000001d0f67c9h5g8a3e6f8b1e4d8f6c0g3f9e5g1_0.dat",
+
"data/1753202639955/0200000000001e1g78d067c9h5g8i6h2f5e9g7d1h4g0f6h2_0.dat",
+
"data/1753202639957/0200000000001f2h89e1jg7d1h4g07i3g6f0h8e2i5h1g7i3_0.dat",
+
"data/1753202639959/020000000000208i90f2k0h8e2i5h8j4h7g1i9f3j6i2h8j4_0.dat",
+
"data/1753202639961/02000000000021aj01g3l9k5i8h2j8e2i5h8j0g4k7j3i9k5_0.dat",
+
"data/1753202639963/02000000000022bk12h4m0lk0h8e2i56j9i3k1h5l8k4j0l6_0.dat",
+
"data/1753202639965/02000000000023cl23i5n1m7g3l9k5i8k0j4l2i6m9l5k1m7_0.dat",
+
"data/1753202639967/02000000000024dm34j1m7g3l9k6o2n8l1k5m3j7n0m6l2n8_0.dat",
+
"data/1753202639969/02000000000025en45k7p3o9m2l6n4k34j1m7g38o1n7m3o9_0.dat",
+
"data/1753202639971/02000000000026fo56l8q4p0n2l6n4k343m7o5l9p2o8n4p0_0.dat",
+
"data/1753202639973/02000000000027gp67m9r5q8q4p0n2l1o4n8p6m0q3p9o5q1_0.dat",
+
"data/1753202639975/02000000000028hq78n0s6rm9r5q8q42p5o9q7n1r4q0p6r2_0.dat",
+
"data/1753202639977/02000000000029ir89o1t7s78n0s6rm3q6p0r8o2s5r1q7s3_4.dat",
+
"data/1753202639979/0200000000002ajs90p2u8t4m3q6p0r8r7q1s9p3t6s2r8t4_0.dat",
+
"data/1753202639981/0200000000002bkt01q3v9u2u8t4m3q5s8r2t0q4u7t3s9u5_0.dat",
+
"data/1753202639983/0200000000002clu12r4w1q3v9u2u0v6t9s3u1r5v8u4t0v6_0.dat",
+
"data/1753202639985/0200000000002dmv23s5x1w7u0t4t9s3u1r5v2s6w9v5u1w7_0.dat"};
+
+// clang-format off
+std::vector<std::string> index_v1_file_path = {
+
"data/1753202846974/0200000000007864994f6aa97288842758c2e89b03e65682_0_1753202846943.idx",
+
"data/1753202845724/020000000000786635407b55b72242ac167cf83cd4c598a2_0_1753202841593.idx",
+
"data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0_1753202846943.idx",
+
"data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0_1753202846943.idx",
+
"data/1753202846986/02000000000078ec35407b55b72242ac167cf83cd4c598a2_0_1753202846943.idx",
+
"data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202844931.idx",
+
"data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202846410.idx",
+
"data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0_1753202847011.idx",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202844931.idx",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202846410.idx",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202847011.idx",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0_1753202858543.idx",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202844931.idx",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202846410.idx",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202847011.idx",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0_1753202858543.idx",
+
"data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202844931.idx",
+
"data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202846410.idx",
+
"data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0_1753202847011.idx"};
+// clang-format on
+
+std::vector<std::string> segment_v1_file_path = {
+
"data/1753202846974/0200000000007864994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202845724/020000000000786635407b55b72242ac167cf83cd4c598a2_0.dat",
+
"data/1753202846984/020000000000788bdd40fcf18bcaa1bbd4058ef92606e79a_0.dat",
+
"data/1753202846986/02000000000078e635407b55b72242ac167cf83cd4c598a2_0.dat",
+
"data/1753202846986/02000000000078ec35407b55b72242ac167cf83cd4c598a2_0.dat",
+
"data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat",
+
"data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat",
+
"data/1753202847030/020000000000791335407b55b72242ac167cf83cd4c598a2_0.dat",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007aed994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858558/0200000000007afc994f6aa97288842758c2e89b03e65682_0.dat",
+
"data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat",
+
"data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat",
+
"data/1753202858552/0200000000007b7add40fcf18bcaa1bbd4058ef92606e79a_0.dat"};
+
doris::cloud::RecyclerThreadPoolGroup thread_group;
int main(int argc, char** argv) {
@@ -247,6 +337,311 @@ static int create_tmp_rowset(TxnKv* txn_kv,
StorageVaultAccessor* accessor,
return 0;
}
+static int create_committed_rowset_with_tablet_schema(
+ TxnKv* txn_kv, StorageVaultAccessor* accessor, const std::string&
resource_id,
+ int64_t tablet_id, int64_t version, int num_segments = 1, size_t
num_inverted_indexes = 1,
+ bool use_inverted_index_storage_format_v1 = true) {
+ std::string val;
+ std::unique_ptr<Transaction> txn;
+ int64_t tablet_index_id = 123;
+ int64_t schema_version = 456;
+
+ auto rowset_id = next_rowset_id();
+ MetaRowsetKeyInfo key_info {instance_id, tablet_id, version};
+ std::string rowset_meta_key = meta_rowset_key(key_info);
+
+ doris::RowsetMetaCloudPB rowset_pb;
+ rowset_pb.set_rowset_id(0); // useless but required
+ rowset_pb.set_rowset_id_v2(rowset_id);
+ rowset_pb.set_num_segments(num_segments);
+ rowset_pb.set_tablet_id(tablet_id);
+ rowset_pb.set_resource_id(resource_id);
+ rowset_pb.set_creation_time(current_time);
+ rowset_pb.set_schema_version(schema_version);
+ rowset_pb.SerializeToString(&val);
+
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(rowset_meta_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ TabletIndexPB tablet_index;
+ tablet_index.set_index_id(tablet_index_id);
+ tablet_index.set_tablet_id(tablet_id);
+ std::string tablet_index_key = meta_tablet_idx_key({instance_id,
tablet_id});
+ tablet_index.SerializeToString(&val);
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(tablet_index_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ if (num_inverted_indexes) {
+ doris::TabletSchemaCloudPB tablet_schema;
+ if (use_inverted_index_storage_format_v1) {
+
tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
+ } else {
+
tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ }
+ tablet_schema.set_schema_version(schema_version);
+ for (size_t i = 0; i < num_inverted_indexes; i++) {
+ auto index = tablet_schema.add_index();
+ index->set_index_id(i);
+ index->set_index_type(IndexType::INVERTED);
+ }
+ std::string tablet_schema_key =
+ meta_schema_key({instance_id, tablet_index_id,
schema_version});
+ std::string val;
+ tablet_schema.SerializeToString(&val);
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(tablet_schema_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ }
+
+ for (int i = 0; i < num_segments; ++i) {
+ auto path = segment_path(tablet_id, rowset_id, i);
+ accessor->put_file(path, "");
+ if (use_inverted_index_storage_format_v1) {
+ for (int j = 0; j < num_inverted_indexes; ++j) {
+ std::string path = inverted_index_path_v1(tablet_id,
rowset_id, i, j, "");
+ accessor->put_file(path, "");
+ }
+ } else {
+ std::string path = inverted_index_path_v2(tablet_id, rowset_id, i);
+ accessor->put_file(path, "");
+ }
+ }
+ return 0;
+}
+
+static int create_committed_rowset_by_real_index_v2_file(TxnKv* txn_kv,
+ StorageVaultAccessor*
accessor,
+ const std::string&
resource_id,
+ const std::string&
file_path,
+ int64_t version = 1) {
+ std::string val;
+ std::unique_ptr<Transaction> txn;
+
+ // Parse file path to extract tablet_id and rowset_id
+ // Expected format: data/{tablet_id}/{rowset_id}_{segment_id}.{ext}
+ std::vector<std::string> path_parts;
+ butil::SplitString(file_path, '/', &path_parts);
+
+ if (path_parts.size() < 3 || path_parts[0] != "data") {
+ LOG(WARNING) << "Invalid file path format: " << file_path;
+ return -1;
+ }
+
+ int64_t tablet_id = std::stoll(path_parts[1]);
+ std::string filename = path_parts[2];
+
+ // Extract rowset_id and segment_id from filename
+ size_t underscore_pos = filename.find_last_of('_');
+ size_t dot_pos = filename.find_last_of('.');
+
+ if (underscore_pos == std::string::npos || dot_pos == std::string::npos ||
+ underscore_pos >= dot_pos) {
+ LOG(WARNING) << "Invalid filename format: " << filename;
+ return -1;
+ }
+
+ std::string rowset_id = filename.substr(0, underscore_pos);
+ std::string segment_str = filename.substr(underscore_pos + 1, dot_pos -
underscore_pos - 1);
+ std::string extension = filename.substr(dot_pos + 1);
+
+ int segment_id = stoll(segment_str);
+ int64_t tablet_index_id = 123; // Default index id
+ int64_t schema_version = 456; // Default schema version
+
+ // Create rowset meta data
+ MetaRowsetKeyInfo key_info {instance_id, tablet_id, version};
+ std::string rowset_meta_key = meta_rowset_key(key_info);
+
+ doris::RowsetMetaCloudPB rowset_pb;
+ rowset_pb.set_rowset_id(0); // useless but required
+ rowset_pb.set_rowset_id_v2(rowset_id);
+ rowset_pb.set_num_segments(segment_id + 1); // segment_id is 0-based
+ rowset_pb.set_tablet_id(tablet_id);
+ rowset_pb.set_resource_id(resource_id);
+ rowset_pb.set_creation_time(current_time);
+ rowset_pb.set_schema_version(schema_version);
+ rowset_pb.SerializeToString(&val);
+
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(rowset_meta_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ // Create tablet index meta data
+ TabletIndexPB tablet_index;
+ tablet_index.set_index_id(tablet_index_id);
+ tablet_index.set_tablet_id(tablet_id);
+ std::string tablet_index_key = meta_tablet_idx_key({instance_id,
tablet_id});
+ tablet_index.SerializeToString(&val);
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(tablet_index_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ // Create tablet schema if dealing with index files
+ if (extension == "idx") {
+ doris::TabletSchemaCloudPB tablet_schema;
+
tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+ tablet_schema.set_schema_version(schema_version);
+
+ auto index = tablet_schema.add_index();
+ index->set_index_id(0);
+ index->set_index_type(IndexType::INVERTED);
+
+ std::string tablet_schema_key =
+ meta_schema_key({instance_id, tablet_index_id,
schema_version});
+ tablet_schema.SerializeToString(&val);
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(tablet_schema_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ }
+
+ accessor->put_file(file_path, "");
+
+ return 0;
+}
+
+static int create_committed_rowset_by_real_index_v1_file(TxnKv* txn_kv,
+ StorageVaultAccessor*
accessor,
+ const std::string&
resource_id,
+ const std::string&
file_path,
+ int64_t version = 1) {
+ std::string val;
+ std::unique_ptr<Transaction> txn;
+
+ // Parse file path to extract tablet_id and rowset_id
+ // Expected format:
data/{tablet_id}/{rowset_id}_{segment_id}_{index_id}{suffix}.idx
+ std::vector<std::string> path_parts;
+ butil::SplitString(file_path, '/', &path_parts);
+
+ if (path_parts.size() < 3 || path_parts[0] != "data") {
+ LOG(WARNING) << "Invalid file path format: " << file_path;
+ return -1;
+ }
+
+ int64_t tablet_id = std::stoll(path_parts[1]);
+ std::string filename = path_parts[2];
+
+ // Extract rowset_id, segment_id, index_id, and suffix from filename
+ // Format: {rowset_id}_{segment_id}_{index_id}{suffix}.idx
+ size_t first_underscore_pos = filename.find('_');
+ size_t second_underscore_pos = filename.find('_', first_underscore_pos +
1);
+ size_t dot_pos = filename.find_last_of('.');
+
+ if (first_underscore_pos == std::string::npos || second_underscore_pos ==
std::string::npos ||
+ dot_pos == std::string::npos || first_underscore_pos >=
second_underscore_pos ||
+ second_underscore_pos >= dot_pos) {
+ LOG(WARNING) << "Invalid filename format: " << filename;
+ return -1;
+ }
+
+ std::string rowset_id = filename.substr(0, first_underscore_pos);
+ std::string segment_str = filename.substr(first_underscore_pos + 1,
+ second_underscore_pos -
first_underscore_pos - 1);
+ std::string remaining =
+ filename.substr(second_underscore_pos + 1, dot_pos -
second_underscore_pos - 1);
+ std::string extension = filename.substr(dot_pos + 1);
+
+ // Parse index_id and suffix from remaining part
+ // Format: {index_id}{suffix} or just {index_id}
+ std::string index_id_str = remaining;
+ std::string index_suffix = "";
+
+ int segment_id = stoll(segment_str);
+ int64_t index_id = std::stoll(index_id_str);
+ int64_t tablet_index_id = 123; // Default tablet index id
+ int64_t schema_version = 456; // Default schema version
+
+ // Create rowset meta data
+ MetaRowsetKeyInfo key_info {instance_id, tablet_id, version};
+ std::string rowset_meta_key = meta_rowset_key(key_info);
+
+ doris::RowsetMetaCloudPB rowset_pb;
+ rowset_pb.set_rowset_id(0); // useless but required
+ rowset_pb.set_rowset_id_v2(rowset_id);
+ rowset_pb.set_num_segments(segment_id + 1); // segment_id is 0-based
+ rowset_pb.set_tablet_id(tablet_id);
+ rowset_pb.set_resource_id(resource_id);
+ rowset_pb.set_creation_time(current_time);
+ rowset_pb.set_schema_version(schema_version);
+ rowset_pb.SerializeToString(&val);
+
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(rowset_meta_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ // Create tablet index meta data
+ TabletIndexPB tablet_index;
+ tablet_index.set_index_id(tablet_index_id);
+ tablet_index.set_tablet_id(tablet_id);
+ std::string tablet_index_key = meta_tablet_idx_key({instance_id,
tablet_id});
+ tablet_index.SerializeToString(&val);
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(tablet_index_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+
+ // Create tablet schema if dealing with index files
+ if (extension == "idx") {
+ doris::TabletSchemaCloudPB tablet_schema;
+
tablet_schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
+ tablet_schema.set_schema_version(schema_version);
+
+ auto index = tablet_schema.add_index();
+ index->set_index_id(index_id);
+ index->set_index_type(IndexType::INVERTED);
+ if (!index_suffix.empty()) {
+ index->set_index_suffix_name(index_suffix);
+ }
+
+ std::string tablet_schema_key =
+ meta_schema_key({instance_id, tablet_index_id,
schema_version});
+ tablet_schema.SerializeToString(&val);
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(tablet_schema_key, val);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ }
+
+ accessor->put_file(file_path, "");
+
+ return 0;
+}
+
static int create_committed_rowset(TxnKv* txn_kv, StorageVaultAccessor*
accessor,
const std::string& resource_id, int64_t
tablet_id,
int64_t version, int num_segments = 1,
@@ -3113,7 +3508,85 @@ TEST(CheckerTest, DISABLED_abnormal_inverted_check) {
ASSERT_NE(checker.do_inverted_check(), 0);
}
-TEST(CheckerTest, inverted_check_recycle_idx_file) {
+TEST(CheckerTest, normal_check_index_file) {
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+
+ auto sp = SyncPoint::get_instance();
+ SyncPoint::CallbackGuard guard;
+ sp->set_call_back(
+ "InstanceChecker::do_inverted_check",
+ [](auto&& args) {
+ auto* ret = try_any_cast_ret<int>(args);
+ ret->first = 0;
+ ret->second = true;
+ },
+ &guard);
+ sp->enable_processing();
+ DORIS_CLOUD_DEFER {
+ SyncPoint::get_instance()->disable_processing();
+ };
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ // Add some visible rowsets along with some rowsets that should be recycled
+ // call inverted check after do recycle which would sweep all the rowsets
not visible
+ auto accessor = checker.accessor_map_.begin()->second;
+ for (const auto& file : index_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ for (const auto& file : segment_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+ ASSERT_EQ(checker.do_inverted_check(), 0);
+}
+
+TEST(CheckerTest, normal_inverted_check_index_file) {
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+
+ auto sp = SyncPoint::get_instance();
+ SyncPoint::CallbackGuard guard;
+ sp->set_call_back(
+ "InstanceChecker::do_inverted_check",
+ [](auto&& args) {
+ auto* ret = try_any_cast_ret<int>(args);
+ ret->first = 0;
+ ret->second = true;
+ },
+ &guard);
+ sp->enable_processing();
+ DORIS_CLOUD_DEFER {
+ SyncPoint::get_instance()->disable_processing();
+ };
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ // Add some visible rowsets along with some rowsets that should be recycled
+ // call inverted check after do recycle which would sweep all the rowsets
not visible
+ auto accessor = checker.accessor_map_.begin()->second;
+ for (const auto& file : index_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ for (const auto& file : segment_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+ ASSERT_EQ(checker.do_inverted_check(), 0);
+}
+
+TEST(CheckerTest, inverted_check_recycle_idx_file_v1) {
auto* sp = SyncPoint::get_instance();
std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
[&sp](int*) {
sp->clear_all_call_backs();
@@ -3152,34 +3625,207 @@ TEST(CheckerTest, inverted_check_recycle_idx_file) {
});
sp->enable_processing();
- for (int t = 10001; t <= 10100; ++t) {
- for (int v = 0; v < 10; ++v) {
- int ret = create_committed_rowset(txn_kv.get(), accessor.get(),
"1", t, v, 1, 3);
- ASSERT_EQ(ret, 0) << "Failed to create committed rs: " << ret;
+ for (const auto& file : index_v1_file_path) {
+ create_committed_rowset_by_real_index_v1_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ for (const auto& file : segment_v1_file_path) {
+ create_committed_rowset_by_real_index_v1_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ size_t delete_kv_num = 5;
+ std::string meta_rowset_key_begin, meta_rowset_key_end;
+ meta_rowset_key({instance_id, 0, 1}, &meta_rowset_key_begin);
+ meta_rowset_key({instance_id, INT64_MAX, 1}, &meta_rowset_key_end);
+ std::vector<std::string> rowset_key_to_delete;
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv->create_txn(&txn);
+ DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err;
+
+ std::unique_ptr<RangeGetIterator> it;
+ do {
+ err = txn->get(meta_rowset_key_begin, meta_rowset_key_end, &it);
+ while (it->has_next()) {
+ auto [k, v] = it->next();
+ if (rowset_key_to_delete.size() < delete_kv_num) {
+ rowset_key_to_delete.emplace_back(k);
+ }
+ if (!it->has_next()) {
+ meta_rowset_key_begin = k;
+ }
}
+ meta_rowset_key_begin.push_back('\x00');
+ } while (it->more());
+
+ for (const auto& key : rowset_key_to_delete) {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv->create_txn(&txn);
+ DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err;
+ txn->remove(key);
+ err = txn->commit();
+ DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err;
}
+
std::unique_ptr<ListIterator> list_iter;
int ret = accessor->list_directory("data", &list_iter);
ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
- int64_t tablet_id_to_delete_index = -1;
+ ASSERT_EQ(checker.do_inverted_check(), 1);
+}
+
+TEST(CheckerTest, inverted_check_recycle_idx_file_v2) {
+ auto* sp = SyncPoint::get_instance();
+ std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
[&sp](int*) {
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ });
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+ obj_info->set_ak(config::test_s3_ak);
+ obj_info->set_sk(config::test_s3_sk);
+ obj_info->set_endpoint(config::test_s3_endpoint);
+ obj_info->set_region(config::test_s3_region);
+ obj_info->set_bucket(config::test_s3_bucket);
+ obj_info->set_prefix("CheckerTest");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ // Add some visible rowsets along with some rowsets that should be recycled
+ // call inverted check after do recycle which would sweep all the rowsets
not visible
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ sp->set_call_back(
+ "InstanceRecycler::init_storage_vault_accessors.mock_vault",
[&accessor](auto&& args) {
+ auto* map = try_any_cast<
+ std::unordered_map<std::string,
std::shared_ptr<StorageVaultAccessor>>*>(
+ args[0]);
+ auto* vault = try_any_cast<StorageVaultPB*>(args[1]);
+ if (vault->name() == "test_success_hdfs_vault") {
+ map->emplace(vault->id(), accessor);
+ }
+ });
+ sp->enable_processing();
+
+ for (const auto& file : index_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ for (const auto& file : segment_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ size_t delete_kv_num = 5;
+ std::string meta_rowset_key_begin, meta_rowset_key_end;
+ meta_rowset_key({instance_id, 0, 1}, &meta_rowset_key_begin);
+ meta_rowset_key({instance_id, INT64_MAX, 1}, &meta_rowset_key_end);
+ std::vector<std::string> rowset_key_to_delete;
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv->create_txn(&txn);
+ DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err;
+
+ std::unique_ptr<RangeGetIterator> it;
+ do {
+ err = txn->get(meta_rowset_key_begin, meta_rowset_key_end, &it);
+ while (it->has_next()) {
+ auto [k, v] = it->next();
+ if (rowset_key_to_delete.size() < delete_kv_num) {
+ rowset_key_to_delete.emplace_back(k);
+ }
+ if (!it->has_next()) {
+ meta_rowset_key_begin = k;
+ }
+ }
+ meta_rowset_key_begin.push_back('\x00');
+ } while (it->more());
+
+ for (const auto& key : rowset_key_to_delete) {
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv->create_txn(&txn);
+ DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err;
+ txn->remove(key);
+ err = txn->commit();
+ DCHECK_EQ(err, TxnErrorCode::TXN_OK) << err;
+ }
+
+ std::unique_ptr<ListIterator> list_iter;
+ int ret = accessor->list_directory("data", &list_iter);
+ ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+
+ ASSERT_EQ(checker.do_inverted_check(), 1);
+}
+
+TEST(CheckerTest, forward_check_recycle_idx_file_v1) {
+ auto* sp = SyncPoint::get_instance();
+ std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
[&sp](int*) {
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ });
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+ obj_info->set_ak(config::test_s3_ak);
+ obj_info->set_sk(config::test_s3_sk);
+ obj_info->set_endpoint(config::test_s3_endpoint);
+ obj_info->set_region(config::test_s3_region);
+ obj_info->set_bucket(config::test_s3_bucket);
+ obj_info->set_prefix("CheckerTest");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ // Add some visible rowsets along with some rowsets that should be recycled
+ // call inverted check after do recycle which would sweep all the rowsets
not visible
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ sp->set_call_back(
+ "InstanceRecycler::init_storage_vault_accessors.mock_vault",
[&accessor](auto&& args) {
+ auto* map = try_any_cast<
+ std::unordered_map<std::string,
std::shared_ptr<StorageVaultAccessor>>*>(
+ args[0]);
+ auto* vault = try_any_cast<StorageVaultPB*>(args[1]);
+ if (vault->name() == "test_success_hdfs_vault") {
+ map->emplace(vault->id(), accessor);
+ }
+ });
+ sp->enable_processing();
+
+ for (const auto& file : index_v1_file_path) {
+ create_committed_rowset_by_real_index_v1_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ for (const auto& file : segment_v1_file_path) {
+ create_committed_rowset_by_real_index_v1_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+ std::unique_ptr<ListIterator> list_iter;
+ int ret = accessor->list_directory("data", &list_iter);
+ ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+
+ int64_t tablet_to_delete = -1;
for (auto file = list_iter->next(); file.has_value(); file =
list_iter->next()) {
std::vector<std::string> str;
butil::SplitString(file->path, '/', &str);
int64_t tablet_id = atol(str[1].c_str());
- // only delete one index files of ever tablet for mock recycle
- // The reason for not select "delete all idx file" is that inverted
checking cannot handle this case
- // forward checking is required.
- if (file->path.ends_with(".idx") && tablet_id_to_delete_index !=
tablet_id) {
+ // delete all index files of ever tablet for mock missing
+ if (file->path.ends_with(".idx") && tablet_to_delete != tablet_id) {
+ tablet_to_delete = tablet_id;
accessor->delete_file(file->path);
- tablet_id_to_delete_index = tablet_id;
}
}
- ASSERT_EQ(checker.do_inverted_check(), 1);
+ ASSERT_EQ(checker.do_check(), 1);
}
-TEST(CheckerTest, forward_check_recycle_idx_file) {
+TEST(CheckerTest, forward_check_recycle_idx_file_v2) {
auto* sp = SyncPoint::get_instance();
std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
[&sp](int*) {
sp->clear_all_call_backs();
@@ -3218,19 +3864,27 @@ TEST(CheckerTest, forward_check_recycle_idx_file) {
});
sp->enable_processing();
- for (int t = 10001; t <= 10100; ++t) {
- for (int v = 0; v < 10; ++v) {
- create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v,
1, 3);
- }
+ for (const auto& file : index_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
+ }
+
+ for (const auto& file : segment_v2_file_path) {
+ create_committed_rowset_by_real_index_v2_file(txn_kv.get(),
accessor.get(), "1", file);
}
std::unique_ptr<ListIterator> list_iter;
int ret = accessor->list_directory("data", &list_iter);
ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+ int64_t tablet_to_delete = -1;
for (auto file = list_iter->next(); file.has_value(); file =
list_iter->next()) {
- // delete all index files of ever tablet for mock recycle
- if (file->path.ends_with(".idx")) {
+ std::vector<std::string> str;
+ butil::SplitString(file->path, '/', &str);
+ int64_t tablet_id = atol(str[1].c_str());
+
+ // delete all index files of ever tablet for mock missing
+ if (file->path.ends_with(".idx") && tablet_to_delete != tablet_id) {
accessor->delete_file(file->path);
+ tablet_to_delete = tablet_id;
}
}
ASSERT_EQ(checker.do_check(), 1);
@@ -3250,12 +3904,12 @@ TEST(CheckerTest, normal) {
auto accessor = checker.accessor_map_.begin()->second;
for (int t = 10001; t <= 10100; ++t) {
for (int v = 0; v < 10; ++v) {
- create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v,
1);
+ create_committed_rowset_with_tablet_schema(txn_kv.get(),
accessor.get(), "1", t, v, 1);
}
}
for (int t = 10101; t <= 10200; ++t) {
for (int v = 0; v < 10; ++v) {
- create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v,
5);
+ create_committed_rowset_with_tablet_schema(txn_kv.get(),
accessor.get(), "1", t, v, 5);
}
}
ASSERT_EQ(checker.do_check(), 0);
@@ -3276,12 +3930,14 @@ TEST(CheckerTest, abnormal) {
auto accessor = checker.accessor_map_.begin()->second;
for (int t = 10001; t <= 10100; ++t) {
for (int v = 0; v < 10; ++v) {
- create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v,
1, 0);
+ create_committed_rowset_with_tablet_schema(txn_kv.get(),
accessor.get(), "1", t, v, 1,
+ 0);
}
}
for (int t = 10101; t <= 10200; ++t) {
for (int v = 0; v < 10; ++v) {
- create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v,
5, 0);
+ create_committed_rowset_with_tablet_schema(txn_kv.get(),
accessor.get(), "1", t, v, 5,
+ 0);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]