This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 81ccfe0eec6 branch-3.0: [opt](checker) Add inverted index file check 
for checker #51591 (#52318)
81ccfe0eec6 is described below

commit 81ccfe0eec668408644b3556afb21ffeb6fd10c7
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Jun 26 09:50:00 2025 +0800

    branch-3.0: [opt](checker) Add inverted index file check for checker #51591 
(#52318)
    
    Cherry-picked from #51591
    
    Co-authored-by: Uniqueyou <[email protected]>
---
 cloud/src/recycler/checker.cpp | 144 +++++++++++++++++++++++++++++++-
 cloud/test/CMakeLists.txt      |   4 +-
 cloud/test/mock_accessor.cpp   | 183 -----------------------------------------
 cloud/test/mock_accessor.h     | 158 +++++++++++++++++++++++++++++++++++
 cloud/test/recycler_test.cpp   | 147 ++++++++++++++++++++++++++++++++-
 5 files changed, 448 insertions(+), 188 deletions(-)

diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 60b6b7fc5ee..6a191b021b2 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -26,10 +26,12 @@
 #include <gen_cpp/olap_file.pb.h>
 #include <glog/logging.h>
 
+#include <algorithm>
 #include <chrono>
 #include <cstdint>
 #include <memory>
 #include <mutex>
+#include <numeric>
 #include <sstream>
 #include <string_view>
 #include <unordered_set>
@@ -422,7 +424,8 @@ int InstanceChecker::init_storage_vault_accessors(const 
InstanceInfoPB& instance
             LOG(WARNING) << "malformed storage vault, unable to deserialize 
key=" << hex(k);
             return -1;
         }
-
+        
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
+                                 &accessor_map_, &vault);
         if (vault.has_hdfs_info()) {
             auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
             int ret = accessor->init();
@@ -536,6 +539,7 @@ int InstanceChecker::do_check() {
         bool data_loss = false;
         for (int i = 0; i < rs_meta.num_segments(); ++i) {
             auto path = segment_path(rs_meta.tablet_id(), 
rs_meta.rowset_id_v2(), i);
+
             if (tablet_files_cache.files.contains(path)) {
                 continue;
             }
@@ -549,6 +553,57 @@ int InstanceChecker::do_check() {
             LOG(WARNING) << "object not exist, path=" << path << " key=" << 
hex(key);
         }
 
+        std::vector<std::pair<int64_t, std::string>> index_ids;
+        for (const auto& i : rs_meta.tablet_schema().index()) {
+            if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
+                index_ids.emplace_back(i.index_id(), i.index_suffix_name());
+            }
+        }
+        std::string tablet_idx_key = meta_tablet_idx_key({instance_id_, 
rs_meta.tablet_id()});
+        if (!key_exist(txn_kv_.get(), tablet_idx_key)) {
+            for (int i = 0; i < rs_meta.num_segments(); ++i) {
+                std::vector<std::string> index_path_v;
+                std::vector<std::string> loss_file_path;
+                if (rs_meta.tablet_schema().inverted_index_storage_format() ==
+                    InvertedIndexStorageFormatPB::V1) {
+                    for (const auto& index_id : index_ids) {
+                        LOG(INFO) << "check inverted index, tablet_id=" << 
rs_meta.tablet_id()
+                                  << " rowset_id=" << rs_meta.rowset_id_v2()
+                                  << " segment_index=" << i << " index_id=" << 
index_id.first
+                                  << " index_suffix_name=" << index_id.second;
+                        index_path_v.emplace_back(
+                                inverted_index_path_v1(rs_meta.tablet_id(), 
rs_meta.rowset_id_v2(),
+                                                       i, index_id.first, 
index_id.second));
+                    }
+                } else {
+                    index_path_v.emplace_back(
+                            inverted_index_path_v2(rs_meta.tablet_id(), 
rs_meta.rowset_id_v2(), i));
+                }
+
+                if (!index_path_v.empty()) {
+                    if (std::all_of(index_path_v.begin(), index_path_v.end(),
+                                    [&](const auto& idx_file_path) {
+                                        if 
(!tablet_files_cache.files.contains(idx_file_path)) {
+                                            
loss_file_path.emplace_back(idx_file_path);
+                                            return false;
+                                        }
+                                        return true;
+                                    })) {
+                        continue;
+                    }
+                }
+
+                data_loss = true;
+                LOG(WARNING) << "object not exist, path="
+                             << std::accumulate(loss_file_path.begin(), 
loss_file_path.end(),
+                                                std::string(),
+                                                [](const auto& a, const auto& 
b) {
+                                                    return a.empty() ? b : a + 
", " + b;
+                                                })
+                             << " key=" << hex(tablet_idx_key);
+            }
+        }
+
         if (data_loss) {
             ++num_rowset_loss;
         }
@@ -647,6 +702,12 @@ int InstanceChecker::do_inverted_check() {
     };
     TabletRowsets tablet_rowsets_cache;
 
+    struct TabletIndexes {
+        int64_t tablet_id {0};
+        std::unordered_set<int64_t> index_ids;
+    };
+    TabletIndexes tablet_indexes_cache;
+
     // Return 0 if check success, return 1 if file is garbage data, negative 
if error occurred
     auto check_segment_file = [&](const std::string& obj_key) {
         std::vector<std::string> str;
@@ -724,8 +785,77 @@ int InstanceChecker::do_inverted_check() {
 
         return 0;
     };
+    auto check_inverted_index_file = [&](const std::string& obj_key) {
+        std::vector<std::string> str;
+        butil::SplitString(obj_key, '/', &str);
+        // data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+        if (str.size() < 3) {
+            return -1;
+        }
+
+        int64_t tablet_id = atol(str[1].c_str());
+        if (tablet_id <= 0) {
+            LOG(WARNING) << "failed to parse tablet_id, key=" << obj_key;
+            return -1;
+        }
+
+        if (!str.back().ends_with(".idx")) {
+            return 0; // Not an index file
+        }
+
+        int64_t index_id;
+
+        size_t pos = str.back().find_last_of('_');
+        if (pos == std::string::npos || pos + 1 >= str.back().size() - 4) {
+            LOG(WARNING) << "Invalid index_id format, key=" << obj_key;
+            return -1;
+        }
+        index_id = atol(str.back().substr(pos + 1, str.back().size() - 
4).c_str());
 
-    // TODO(Xiaocc): Currently we haven't implemented one generator-like s3 
accessor list function
+        if (tablet_indexes_cache.tablet_id == tablet_id) {
+            if (tablet_indexes_cache.index_ids.contains(index_id)) {
+                return 0;
+            } else {
+                LOG(WARNING) << "index not exists, key=" << obj_key;
+                return -1;
+            }
+        }
+        // Get all index id of this tablet
+        tablet_indexes_cache.tablet_id = tablet_id;
+        tablet_indexes_cache.index_ids.clear();
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to create txn";
+            return -1;
+        }
+        auto tablet_idx_key = meta_tablet_idx_key({instance_id_, tablet_id});
+        std::string tablet_idx_val;
+        err = txn->get(tablet_idx_key, &tablet_idx_val);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to get tablet idx,"
+                         << " key=" << hex(tablet_idx_key) << " err=" << err;
+            return -1;
+        }
+
+        TabletIndexPB tablet_idx_pb;
+        if (!tablet_idx_pb.ParseFromArray(tablet_idx_val.data(), 
tablet_idx_val.size())) {
+            LOG(WARNING) << "malformed index meta value, key=" << 
hex(tablet_idx_key);
+            return -1;
+        }
+        if (!tablet_idx_pb.has_index_id()) {
+            LOG(WARNING) << "tablet index meta does not have index_id, key=" 
<< hex(tablet_idx_key);
+            return -1;
+        }
+        tablet_indexes_cache.index_ids.insert(tablet_idx_pb.index_id());
+
+        if (!tablet_indexes_cache.index_ids.contains(index_id)) {
+            LOG(WARNING) << "index should be recycled, key=" << obj_key;
+            return 1;
+        }
+
+        return 0;
+    };
     // so we choose to skip here.
     TEST_SYNC_POINT_RETURN_WITH_VALUE("InstanceChecker::do_inverted_check", 
(int)0);
 
@@ -748,6 +878,16 @@ int InstanceChecker::do_inverted_check() {
                     check_ret = -1;
                 }
             }
+            ret = check_inverted_index_file(file->path);
+            if (ret != 0) {
+                LOG(WARNING) << "failed to check index file, uri=" << 
accessor->uri()
+                             << " path=" << file->path;
+                if (ret == 1) {
+                    ++num_file_leak;
+                } else {
+                    check_ret = -1;
+                }
+            }
         }
 
         if (!list_iter->is_valid()) {
diff --git a/cloud/test/CMakeLists.txt b/cloud/test/CMakeLists.txt
index 65c9cde561b..8378a33ddfd 100644
--- a/cloud/test/CMakeLists.txt
+++ b/cloud/test/CMakeLists.txt
@@ -17,7 +17,7 @@ add_executable(doris_txn_test doris_txn_test.cpp)
 add_executable(txn_kv_test txn_kv_test.cpp)
 set_target_properties(txn_kv_test PROPERTIES COMPILE_FLAGS 
"-fno-access-control")
 
-add_executable(recycler_test recycler_test.cpp mock_accessor.cpp)
+add_executable(recycler_test recycler_test.cpp)
 
 add_executable(mem_txn_kv_test mem_txn_kv_test.cpp)
 
@@ -59,7 +59,7 @@ add_executable(util_test util_test.cpp)
 
 add_executable(network_util_test network_util_test.cpp)
 
-add_executable(txn_lazy_commit_test txn_lazy_commit_test.cpp mock_accessor.cpp)
+add_executable(txn_lazy_commit_test txn_lazy_commit_test.cpp)
 
 message("Meta-service test dependencies: ${TEST_LINK_LIBS}")
 #target_link_libraries(sync_point_test ${TEST_LINK_LIBS})
diff --git a/cloud/test/mock_accessor.cpp b/cloud/test/mock_accessor.cpp
deleted file mode 100644
index f11c5969321..00000000000
--- a/cloud/test/mock_accessor.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "mock_accessor.h"
-
-#include <glog/logging.h>
-
-#include <iterator>
-#include <ranges>
-#include <vector>
-
-#include "common/logging.h"
-#include "common/string_util.h"
-#include "cpp/sync_point.h"
-#include "recycler/storage_vault_accessor.h"
-
-namespace doris::cloud {
-
-class MockListIterator final : public ListIterator {
-public:
-    MockListIterator(std::vector<std::string> entries) : 
entries_(std::move(entries)) {}
-    ~MockListIterator() override = default;
-
-    bool is_valid() override { return true; }
-
-    bool has_next() override { return !entries_.empty(); }
-
-    std::optional<FileMeta> next() override {
-        std::optional<FileMeta> ret;
-        if (has_next()) {
-            ret = FileMeta {.path = std::move(entries_.back())};
-            entries_.pop_back();
-        }
-
-        return ret;
-    }
-
-private:
-    std::vector<std::string> entries_;
-};
-
-MockAccessor::MockAccessor() : StorageVaultAccessor(AccessorType::MOCK) {
-    uri_ = "mock";
-}
-
-MockAccessor::~MockAccessor() = default;
-
-auto MockAccessor::get_prefix_range(const std::string& path_prefix) {
-    auto begin = objects_.lower_bound(path_prefix);
-    if (begin == objects_.end()) {
-        return std::make_pair(begin, begin);
-    }
-
-    auto path1 = path_prefix;
-    path1.back() += 1;
-    auto end = objects_.lower_bound(path1);
-    return std::make_pair(begin, end);
-}
-
-int MockAccessor::delete_prefix_impl(const std::string& path_prefix) {
-    TEST_SYNC_POINT("MockAccessor::delete_prefix");
-    LOG(INFO) << "delete object of prefix=" << path_prefix;
-    std::lock_guard lock(mtx_);
-
-    auto [begin, end] = get_prefix_range(path_prefix);
-    if (begin == end) {
-        return 0;
-    }
-
-    objects_.erase(begin, end);
-    return 0;
-}
-
-int MockAccessor::delete_prefix(const std::string& path_prefix, int64_t 
expiration_time) {
-    auto norm_path_prefix = path_prefix;
-    strip_leading(norm_path_prefix, "/");
-    if (norm_path_prefix.empty()) {
-        LOG_WARNING("invalid dir_path {}", path_prefix);
-        return -1;
-    }
-
-    return delete_prefix_impl(norm_path_prefix);
-}
-
-int MockAccessor::delete_directory(const std::string& dir_path) {
-    auto norm_dir_path = dir_path;
-    strip_leading(norm_dir_path, "/");
-    if (norm_dir_path.empty()) {
-        LOG_WARNING("invalid dir_path {}", dir_path);
-        return -1;
-    }
-
-    return delete_prefix_impl(!norm_dir_path.ends_with('/') ? norm_dir_path + 
'/' : norm_dir_path);
-}
-
-int MockAccessor::delete_all(int64_t expiration_time) {
-    std::lock_guard lock(mtx_);
-    objects_.clear();
-    return 0;
-}
-
-int MockAccessor::delete_files(const std::vector<std::string>& paths) {
-    TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_files", (int)0);
-
-    for (auto&& path : paths) {
-        delete_file(path);
-    }
-    return 0;
-}
-
-int MockAccessor::delete_file(const std::string& path) {
-    LOG(INFO) << "delete object path=" << path;
-    std::lock_guard lock(mtx_);
-    objects_.erase(path);
-    return 0;
-}
-
-int MockAccessor::put_file(const std::string& path, const std::string& 
content) {
-    std::lock_guard lock(mtx_);
-    objects_.insert(path);
-    return 0;
-}
-
-int MockAccessor::list_all(std::unique_ptr<ListIterator>* res) {
-    std::vector<std::string> entries;
-
-    {
-        std::lock_guard lock(mtx_);
-        entries.reserve(objects_.size());
-        entries.assign(objects_.rbegin(), objects_.rend());
-    }
-
-    *res = std::make_unique<MockListIterator>(std::move(entries));
-
-    return 0;
-}
-
-int MockAccessor::list_directory(const std::string& dir_path, 
std::unique_ptr<ListIterator>* res) {
-    auto norm_dir_path = dir_path;
-    strip_leading(norm_dir_path, "/");
-    if (norm_dir_path.empty()) {
-        LOG_WARNING("invalid dir_path {}", dir_path);
-        return -1;
-    }
-
-    std::vector<std::string> entries;
-
-    {
-        std::lock_guard lock(mtx_);
-        auto [begin, end] = get_prefix_range(norm_dir_path);
-        if (begin != end) {
-            entries.reserve(std::distance(begin, end));
-            std::ranges::copy(std::ranges::subrange(begin, end) | 
std::ranges::views::reverse,
-                              std::back_inserter(entries));
-        }
-    }
-
-    *res = std::make_unique<MockListIterator>(std::move(entries));
-
-    return 0;
-}
-
-int MockAccessor::exists(const std::string& path) {
-    std::lock_guard lock(mtx_);
-    return !objects_.contains(path);
-}
-
-} // namespace doris::cloud
diff --git a/cloud/test/mock_accessor.h b/cloud/test/mock_accessor.h
index ba8ede32462..4e209d93261 100644
--- a/cloud/test/mock_accessor.h
+++ b/cloud/test/mock_accessor.h
@@ -17,12 +17,43 @@
 
 #pragma once
 
+#include <glog/logging.h>
+
+#include <iterator>
 #include <mutex>
+#include <ranges>
 #include <set>
+#include <vector>
 
+#include "common/logging.h"
+#include "common/string_util.h"
+#include "cpp/sync_point.h"
+#include "mock_accessor.h"
 #include "recycler/storage_vault_accessor.h"
 
 namespace doris::cloud {
+class MockListIterator final : public ListIterator {
+public:
+    MockListIterator(std::vector<std::string> entries) : 
entries_(std::move(entries)) {}
+    ~MockListIterator() override = default;
+
+    bool is_valid() override { return true; }
+
+    bool has_next() override { return !entries_.empty(); }
+
+    std::optional<FileMeta> next() override {
+        std::optional<FileMeta> ret;
+        if (has_next()) {
+            ret = FileMeta {.path = std::move(entries_.back())};
+            entries_.pop_back();
+        }
+
+        return ret;
+    }
+
+private:
+    std::vector<std::string> entries_;
+};
 
 class MockAccessor final : public StorageVaultAccessor {
 public:
@@ -57,4 +88,131 @@ private:
     std::set<std::string> objects_;
 };
 
+inline MockAccessor::MockAccessor() : StorageVaultAccessor(AccessorType::MOCK) 
{
+    uri_ = "mock";
+}
+
+inline MockAccessor::~MockAccessor() = default;
+
+inline auto MockAccessor::get_prefix_range(const std::string& path_prefix) {
+    auto begin = objects_.lower_bound(path_prefix);
+    if (begin == objects_.end()) {
+        return std::make_pair(begin, begin);
+    }
+
+    auto path1 = path_prefix;
+    path1.back() += 1;
+    auto end = objects_.lower_bound(path1);
+    return std::make_pair(begin, end);
+}
+
+inline int MockAccessor::delete_prefix_impl(const std::string& path_prefix) {
+    TEST_SYNC_POINT("MockAccessor::delete_prefix");
+    LOG(INFO) << "delete object of prefix=" << path_prefix;
+    std::lock_guard lock(mtx_);
+
+    auto [begin, end] = get_prefix_range(path_prefix);
+    if (begin == end) {
+        return 0;
+    }
+
+    objects_.erase(begin, end);
+    return 0;
+}
+
+inline int MockAccessor::delete_prefix(const std::string& path_prefix, int64_t 
expiration_time) {
+    auto norm_path_prefix = path_prefix;
+    strip_leading(norm_path_prefix, "/");
+    if (norm_path_prefix.empty()) {
+        LOG_WARNING("invalid dir_path {}", path_prefix);
+        return -1;
+    }
+
+    return delete_prefix_impl(norm_path_prefix);
+}
+
+inline int MockAccessor::delete_directory(const std::string& dir_path) {
+    auto norm_dir_path = dir_path;
+    strip_leading(norm_dir_path, "/");
+    if (norm_dir_path.empty()) {
+        LOG_WARNING("invalid dir_path {}", dir_path);
+        return -1;
+    }
+
+    return delete_prefix_impl(!norm_dir_path.ends_with('/') ? norm_dir_path + 
'/' : norm_dir_path);
+}
+
+inline int MockAccessor::delete_all(int64_t expiration_time) {
+    std::lock_guard lock(mtx_);
+    objects_.clear();
+    return 0;
+}
+
+inline int MockAccessor::delete_files(const std::vector<std::string>& paths) {
+    TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_files", (int)0);
+
+    for (auto&& path : paths) {
+        delete_file(path);
+    }
+    return 0;
+}
+
+inline int MockAccessor::delete_file(const std::string& path) {
+    LOG(INFO) << "delete object path=" << path;
+    std::lock_guard lock(mtx_);
+    objects_.erase(path);
+    return 0;
+}
+
+inline int MockAccessor::put_file(const std::string& path, const std::string& 
content) {
+    std::lock_guard lock(mtx_);
+    objects_.insert(path);
+    return 0;
+}
+
+inline int MockAccessor::list_all(std::unique_ptr<ListIterator>* res) {
+    std::vector<std::string> entries;
+
+    {
+        std::lock_guard lock(mtx_);
+        entries.reserve(objects_.size());
+        entries.assign(objects_.rbegin(), objects_.rend());
+    }
+
+    *res = std::make_unique<MockListIterator>(std::move(entries));
+
+    return 0;
+}
+
+inline int MockAccessor::list_directory(const std::string& dir_path,
+                                        std::unique_ptr<ListIterator>* res) {
+    auto norm_dir_path = dir_path;
+    strip_leading(norm_dir_path, "/");
+    if (norm_dir_path.empty()) {
+        LOG_WARNING("invalid dir_path {}", dir_path);
+        return -1;
+    }
+
+    std::vector<std::string> entries;
+
+    {
+        std::lock_guard lock(mtx_);
+        auto [begin, end] = get_prefix_range(norm_dir_path);
+        if (begin != end) {
+            entries.reserve(std::distance(begin, end));
+            std::ranges::copy(std::ranges::subrange(begin, end) | 
std::ranges::views::reverse,
+                              std::back_inserter(entries));
+        }
+    }
+
+    *res = std::make_unique<MockListIterator>(std::move(entries));
+
+    return 0;
+}
+
+inline int MockAccessor::exists(const std::string& path) {
+    std::lock_guard lock(mtx_);
+    return !objects_.contains(path);
+}
+
 } // namespace doris::cloud
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index bcd7dd39160..da5dcc1556b 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -17,6 +17,7 @@
 
 #include "recycler/recycler.h"
 
+#include <butil/strings/string_split.h>
 #include <fmt/core.h>
 #include <gen_cpp/cloud.pb.h>
 #include <gen_cpp/olap_file.pb.h>
@@ -258,8 +259,11 @@ static int create_committed_rowset(TxnKv* txn_kv, 
StorageVaultAccessor* accessor
     rowset_pb.set_creation_time(current_time);
     if (num_inverted_indexes > 0) {
         auto schema = rowset_pb.mutable_tablet_schema();
+        
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         for (int i = 0; i < num_inverted_indexes; ++i) {
-            schema->add_index()->set_index_id(i);
+            auto index = schema->add_index();
+            index->set_index_id(i);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
     rowset_pb.SerializeToString(&val);
@@ -277,6 +281,24 @@ static int create_committed_rowset(TxnKv* txn_kv, 
StorageVaultAccessor* accessor
         auto path = segment_path(tablet_id, rowset_id, i);
         accessor->put_file(path, "");
         for (int j = 0; j < num_inverted_indexes; ++j) {
+            std::string key1;
+            std::string val1;
+            MetaTabletIdxKeyInfo key_info1 {instance_id, tablet_id};
+            meta_tablet_idx_key(key_info1, &key1);
+            TabletIndexPB tablet_table;
+            tablet_table.set_db_id(db_id);
+            tablet_table.set_index_id(j);
+            tablet_table.set_tablet_id(tablet_id);
+            if (!tablet_table.SerializeToString(&val1)) {
+                return -1;
+            }
+            if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+                return -1;
+            }
+            txn->put(key1, val1);
+            if (txn->commit() != TxnErrorCode::TXN_OK) {
+                return -1;
+            }
             auto path = inverted_index_path_v1(tablet_id, rowset_id, i, j, "");
             accessor->put_file(path, "");
         }
@@ -2470,6 +2492,129 @@ TEST(CheckerTest, DISABLED_abnormal_inverted_check) {
     ASSERT_NE(checker.do_inverted_check(), 0);
 }
 
+TEST(CheckerTest, inverted_check_recycle_idx_file) {
+    auto* sp = SyncPoint::get_instance();
+    std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, 
[&sp](int*) {
+        sp->clear_all_call_backs();
+        sp->disable_processing();
+    });
+
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("1");
+    obj_info->set_ak(config::test_s3_ak);
+    obj_info->set_sk(config::test_s3_sk);
+    obj_info->set_endpoint(config::test_s3_endpoint);
+    obj_info->set_region(config::test_s3_region);
+    obj_info->set_bucket(config::test_s3_bucket);
+    obj_info->set_prefix("CheckerTest");
+
+    InstanceChecker checker(txn_kv, instance_id);
+    ASSERT_EQ(checker.init(instance), 0);
+    // Add some visible rowsets along with some rowsets that should be recycled
+    // call inverted check after do recycle which would sweep all the rowsets 
not visible
+    auto accessor = checker.accessor_map_.begin()->second;
+
+    sp->set_call_back(
+            "InstanceRecycler::init_storage_vault_accessors.mock_vault", 
[&accessor](auto&& args) {
+                auto* map = try_any_cast<
+                        std::unordered_map<std::string, 
std::shared_ptr<StorageVaultAccessor>>*>(
+                        args[0]);
+                auto* vault = try_any_cast<StorageVaultPB*>(args[1]);
+                if (vault->name() == "test_success_hdfs_vault") {
+                    map->emplace(vault->id(), accessor);
+                }
+            });
+    sp->enable_processing();
+
+    for (int t = 10001; t <= 10100; ++t) {
+        for (int v = 0; v < 10; ++v) {
+            int ret = create_committed_rowset(txn_kv.get(), accessor.get(), 
"1", t, v, 1, 3);
+            ASSERT_EQ(ret, 0) << "Failed to create committed rs: " << ret;
+        }
+    }
+    std::unique_ptr<ListIterator> list_iter;
+    int ret = accessor->list_directory("data", &list_iter);
+    ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+
+    int64_t tablet_id_to_delete_index = -1;
+    for (auto file = list_iter->next(); file.has_value(); file = 
list_iter->next()) {
+        std::vector<std::string> str;
+        butil::SplitString(file->path, '/', &str);
+        int64_t tablet_id = atol(str[1].c_str());
+
+        // only delete one index files of ever tablet for mock recycle
+        // The reason for not select "delete all idx file" is that inverted 
checking cannot handle this case
+        // forward checking is required.
+        if (file->path.ends_with(".idx") && tablet_id_to_delete_index != 
tablet_id) {
+            accessor->delete_file(file->path);
+            tablet_id_to_delete_index = tablet_id;
+        }
+    }
+    ASSERT_EQ(checker.do_inverted_check(), 1);
+}
+
+TEST(CheckerTest, forward_check_recycle_idx_file) {
+    auto* sp = SyncPoint::get_instance();
+    std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01, 
[&sp](int*) {
+        sp->clear_all_call_backs();
+        sp->disable_processing();
+    });
+
+    auto txn_kv = std::make_shared<MemTxnKv>();
+    ASSERT_EQ(txn_kv->init(), 0);
+
+    InstanceInfoPB instance;
+    instance.set_instance_id(instance_id);
+    auto obj_info = instance.add_obj_info();
+    obj_info->set_id("1");
+    obj_info->set_ak(config::test_s3_ak);
+    obj_info->set_sk(config::test_s3_sk);
+    obj_info->set_endpoint(config::test_s3_endpoint);
+    obj_info->set_region(config::test_s3_region);
+    obj_info->set_bucket(config::test_s3_bucket);
+    obj_info->set_prefix("CheckerTest");
+
+    InstanceChecker checker(txn_kv, instance_id);
+    ASSERT_EQ(checker.init(instance), 0);
+    // Add some visible rowsets along with some rowsets that should be recycled
+    // call inverted check after do recycle which would sweep all the rowsets 
not visible
+    auto accessor = checker.accessor_map_.begin()->second;
+
+    sp->set_call_back(
+            "InstanceRecycler::init_storage_vault_accessors.mock_vault", 
[&accessor](auto&& args) {
+                auto* map = try_any_cast<
+                        std::unordered_map<std::string, 
std::shared_ptr<StorageVaultAccessor>>*>(
+                        args[0]);
+                auto* vault = try_any_cast<StorageVaultPB*>(args[1]);
+                if (vault->name() == "test_success_hdfs_vault") {
+                    map->emplace(vault->id(), accessor);
+                }
+            });
+    sp->enable_processing();
+
+    for (int t = 10001; t <= 10100; ++t) {
+        for (int v = 0; v < 10; ++v) {
+            create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v, 
1, 3);
+        }
+    }
+    std::unique_ptr<ListIterator> list_iter;
+    int ret = accessor->list_directory("data", &list_iter);
+    ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+
+    for (auto file = list_iter->next(); file.has_value(); file = 
list_iter->next()) {
+        // delete all index files of ever tablet for mock recycle
+        if (file->path.ends_with(".idx")) {
+            accessor->delete_file(file->path);
+        }
+    }
+    ASSERT_EQ(checker.do_check(), 1);
+}
+
 TEST(CheckerTest, normal) {
     auto txn_kv = std::make_shared<MemTxnKv>();
     ASSERT_EQ(txn_kv->init(), 0);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to