This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 12939129707 [Fix](recycler) recycler need index_suffix and inverted 
index storage format to delete idx file (#38306)
12939129707 is described below

commit 12939129707fcd9a46f14627c58125991d3f95cd
Author: airborne12 <[email protected]>
AuthorDate: Thu Jul 25 16:33:34 2024 +0800

    [Fix](recycler) recycler need index_suffix and inverted index storage 
format to delete idx file (#38306)
    
    ## Proposed changes
    
    Recycler did not correctly delete idx file for inverted index format V2
    and variant data type.
---
 cloud/src/recycler/recycler.cpp | 74 ++++++++++++++++++++++++++++-------------
 cloud/src/recycler/recycler.h   |  2 ++
 cloud/src/recycler/util.h       | 15 +++++++--
 cloud/test/recycler_test.cpp    | 52 +++++++++++++++++++++--------
 4 files changed, 103 insertions(+), 40 deletions(-)

diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp
index f2d21e0a1b9..0b2267e601d 100644
--- a/cloud/src/recycler/recycler.cpp
+++ b/cloud/src/recycler/recycler.cpp
@@ -375,7 +375,7 @@ public:
             : instance_id_(std::move(instance_id)), txn_kv_(std::move(txn_kv)) 
{}
 
     // Return 0 if success, 1 if schema kv not found, negative for error
-    int get(int64_t index_id, int32_t schema_version, std::vector<int64_t>& 
res) {
+    int get(int64_t index_id, int32_t schema_version, InvertedIndexInfo& res) {
         {
             std::lock_guard lock(mtx_);
             if (schemas_without_inverted_index_.count({index_id, 
schema_version})) {
@@ -407,10 +407,13 @@ public:
             LOG(WARNING) << "malformed schema value, key=" << hex(schema_key);
             return -1;
         }
-        if (schema.index_size() > 0) {
-            res.reserve(schema.index_size());
+        if (schema.index_size() > 0 && 
schema.has_inverted_index_storage_format()) {
+            res.first = schema.inverted_index_storage_format();
+            res.second.reserve(schema.index_size());
             for (auto& i : schema.index()) {
-                res.push_back(i.index_id());
+                if (i.has_index_type() && i.index_type() == 
IndexType::INVERTED) {
+                    res.second.push_back(std::make_pair(i.index_id(), 
i.index_suffix_name()));
+                }
             }
         }
         insert(index_id, schema_version, res);
@@ -418,15 +421,15 @@ public:
     }
 
     // Empty `ids` means this schema has no inverted index
-    void insert(int64_t index_id, int32_t schema_version, const 
std::vector<int64_t>& ids) {
-        if (ids.empty()) {
+    void insert(int64_t index_id, int32_t schema_version, const 
InvertedIndexInfo& index_info) {
+        if (index_info.second.empty()) {
             TEST_SYNC_POINT("InvertedIndexIdCache::insert1");
             std::lock_guard lock(mtx_);
             schemas_without_inverted_index_.emplace(index_id, schema_version);
         } else {
             TEST_SYNC_POINT("InvertedIndexIdCache::insert2");
             std::lock_guard lock(mtx_);
-            inverted_index_id_map_.try_emplace({index_id, schema_version}, 
ids);
+            inverted_index_id_map_.try_emplace({index_id, schema_version}, 
index_info);
         }
     }
 
@@ -445,7 +448,7 @@ private:
         }
     };
     // <index_id, schema_version> -> inverted_index_ids
-    std::unordered_map<Key, std::vector<int64_t>, HashOfKey> 
inverted_index_id_map_;
+    std::unordered_map<Key, InvertedIndexInfo, HashOfKey> 
inverted_index_id_map_;
     // Store <index_id, schema_version> of schema which doesn't have inverted 
index
     std::unordered_set<Key, HashOfKey> schemas_without_inverted_index_;
 };
@@ -1273,17 +1276,26 @@ int InstanceRecycler::delete_rowset_data(const 
doris::RowsetMetaCloudPB& rs_meta
     const auto& rowset_id = rs_meta_pb.rowset_id_v2();
     int64_t tablet_id = rs_meta_pb.tablet_id();
     // process inverted indexes
-    std::vector<int64_t> index_ids;
+    std::vector<std::pair<int64_t, std::string>> index_ids;
     index_ids.reserve(rs_meta_pb.tablet_schema().index_size());
     for (auto& i : rs_meta_pb.tablet_schema().index()) {
-        index_ids.push_back(i.index_id());
+        if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
+            index_ids.push_back(std::make_pair(i.index_id(), 
i.index_suffix_name()));
+        }
     }
     std::vector<std::string> file_paths;
-    file_paths.reserve(num_segments * (1 + index_ids.size()));
+    auto tablet_schema = rs_meta_pb.tablet_schema();
     for (int64_t i = 0; i < num_segments; ++i) {
         file_paths.push_back(segment_path(tablet_id, rowset_id, i));
-        for (int64_t index_id : index_ids) {
-            file_paths.push_back(inverted_index_path(tablet_id, rowset_id, i, 
index_id));
+        if (tablet_schema.has_inverted_index_storage_format()) {
+            if (tablet_schema.inverted_index_storage_format() == 
InvertedIndexStorageFormatPB::V1) {
+                for (const auto& index_id : index_ids) {
+                    file_paths.push_back(inverted_index_path_v1(tablet_id, 
rowset_id, i,
+                                                                
index_id.first, index_id.second));
+                }
+            } else if (!index_ids.empty()) {
+                file_paths.push_back(inverted_index_path_v2(tablet_id, 
rowset_id, i));
+            }
         }
     }
     // TODO(AlexYue): seems could do do batch
@@ -1294,7 +1306,8 @@ int InstanceRecycler::delete_rowset_data(const 
std::vector<doris::RowsetMetaClou
     int ret = 0;
     // resource_id -> file_paths
     std::map<std::string, std::vector<std::string>> resource_file_paths;
-    for (auto& rs : rowsets) {
+
+    for (const auto& rs : rowsets) {
         {
             std::lock_guard lock(recycled_tablets_mtx_);
             if (recycled_tablets_.count(rs.tablet_id())) {
@@ -1317,14 +1330,21 @@ int InstanceRecycler::delete_rowset_data(const 
std::vector<doris::RowsetMetaClou
         int64_t num_segments = rs.num_segments();
         if (num_segments <= 0) continue;
 
-        // process inverted indexes
-        std::vector<int64_t> index_ids;
+        // Process inverted indexes
+        std::vector<std::pair<int64_t, std::string>> index_ids;
+        // default format as v2.
+        InvertedIndexStorageFormatPB index_format = 
InvertedIndexStorageFormatPB::V2;
+
         if (rs.has_tablet_schema()) {
-            index_ids.reserve(rs.tablet_schema().index().size());
-            for (auto& index_pb : rs.tablet_schema().index()) {
-                index_ids.push_back(index_pb.index_id());
+            for (const auto& index : rs.tablet_schema().index()) {
+                if (index.has_index_type() && index.index_type() == 
IndexType::INVERTED) {
+                    index_ids.emplace_back(index.index_id(), 
index.index_suffix_name());
+                }
+            }
+            if (rs.tablet_schema().has_inverted_index_storage_format()) {
+                index_format = 
rs.tablet_schema().inverted_index_storage_format();
             }
-        } else { // Detached schema
+        } else {
             if (!rs.has_index_id() || !rs.has_schema_version()) {
                 LOG(WARNING) << "rowset must have either schema or 
schema_version and index_id, "
                                 "instance_id="
@@ -1333,8 +1353,9 @@ int InstanceRecycler::delete_rowset_data(const 
std::vector<doris::RowsetMetaClou
                 ret = -1;
                 continue;
             }
+            InvertedIndexInfo index_info;
             int get_ret =
-                    inverted_index_id_cache_->get(rs.index_id(), 
rs.schema_version(), index_ids);
+                    inverted_index_id_cache_->get(rs.index_id(), 
rs.schema_version(), index_info);
             if (get_ret != 0) {
                 if (get_ret == 1) { // Schema kv not found
                     // Check tablet existence
@@ -1352,11 +1373,18 @@ int InstanceRecycler::delete_rowset_data(const 
std::vector<doris::RowsetMetaClou
                 ret = -1;
                 continue;
             }
+            index_format = index_info.first;
+            index_ids = std::move(index_info.second);
         }
         for (int64_t i = 0; i < num_segments; ++i) {
             file_paths.push_back(segment_path(tablet_id, rowset_id, i));
-            for (int64_t index_id : index_ids) {
-                file_paths.push_back(inverted_index_path(tablet_id, rowset_id, 
i, index_id));
+            if (index_format == InvertedIndexStorageFormatPB::V1) {
+                for (const auto& index_id : index_ids) {
+                    file_paths.push_back(inverted_index_path_v1(tablet_id, 
rowset_id, i,
+                                                                
index_id.first, index_id.second));
+                }
+            } else if (!index_ids.empty()) {
+                file_paths.push_back(inverted_index_path_v2(tablet_id, 
rowset_id, i));
             }
         }
     }
diff --git a/cloud/src/recycler/recycler.h b/cloud/src/recycler/recycler.h
index 9f6a238877b..950c0193f77 100644
--- a/cloud/src/recycler/recycler.h
+++ b/cloud/src/recycler/recycler.h
@@ -226,6 +226,8 @@ private:
 
     // TODO(plat1ko): Add new accessor to map in runtime for new created 
storage vaults
     std::unordered_map<std::string, std::shared_ptr<StorageVaultAccessor>> 
accessor_map_;
+    using InvertedIndexInfo =
+            std::pair<InvertedIndexStorageFormatPB, 
std::vector<std::pair<int64_t, std::string>>>;
 
     class InvertedIndexIdCache;
     std::unique_ptr<InvertedIndexIdCache> inverted_index_id_cache_;
diff --git a/cloud/src/recycler/util.h b/cloud/src/recycler/util.h
index be81561b75b..b63090062bf 100644
--- a/cloud/src/recycler/util.h
+++ b/cloud/src/recycler/util.h
@@ -57,9 +57,18 @@ inline std::string segment_path(int64_t tablet_id, const 
std::string& rowset_id,
     return fmt::format("data/{}/{}_{}.dat", tablet_id, rowset_id, segment_id);
 }
 
-inline std::string inverted_index_path(int64_t tablet_id, const std::string& 
rowset_id,
-                                       int64_t segment_id, int64_t index_id) {
-    return fmt::format("data/{}/{}_{}_{}.idx", tablet_id, rowset_id, 
segment_id, index_id);
+inline std::string inverted_index_path_v2(int64_t tablet_id, const 
std::string& rowset_id,
+                                          int64_t segment_id) {
+    return fmt::format("data/{}/{}_{}.idx", tablet_id, rowset_id, segment_id);
+}
+
+inline std::string inverted_index_path_v1(int64_t tablet_id, const 
std::string& rowset_id,
+                                          int64_t segment_id, int64_t index_id,
+                                          std::string_view index_path_suffix) {
+    std::string suffix =
+            index_path_suffix.empty() ? "" : std::string {"@"} + 
index_path_suffix.data();
+    return fmt::format("data/{}/{}_{}_{}{}.idx", tablet_id, rowset_id, 
segment_id, index_id,
+                       suffix);
 }
 
 inline std::string rowset_path_prefix(int64_t tablet_id, const std::string& 
rowset_id) {
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index 4409b8a2c41..ca8ffbcee61 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -162,8 +162,8 @@ static int create_recycle_rowset(TxnKv* txn_kv, 
StorageVaultAccessor* accessor,
         auto path = segment_path(rowset.tablet_id(), rowset.rowset_id_v2(), i);
         accessor->put_file(path, "");
         for (auto& index : rowset.tablet_schema().index()) {
-            auto path = inverted_index_path(rowset.tablet_id(), 
rowset.rowset_id_v2(), i,
-                                            index.index_id());
+            auto path = inverted_index_path_v1(rowset.tablet_id(), 
rowset.rowset_id_v2(), i,
+                                               index.index_id(), 
index.index_suffix_name());
             accessor->put_file(path, "");
         }
     }
@@ -200,8 +200,8 @@ static int create_tmp_rowset(TxnKv* txn_kv, 
StorageVaultAccessor* accessor,
         auto path = segment_path(rowset.tablet_id(), rowset.rowset_id_v2(), i);
         accessor->put_file(path, path);
         for (auto& index : rowset.tablet_schema().index()) {
-            auto path = inverted_index_path(rowset.tablet_id(), 
rowset.rowset_id_v2(), i,
-                                            index.index_id());
+            auto path = inverted_index_path_v1(rowset.tablet_id(), 
rowset.rowset_id_v2(), i,
+                                               index.index_id(), 
index.index_suffix_name());
             accessor->put_file(path, path);
         }
     }
@@ -247,7 +247,7 @@ static int create_committed_rowset(TxnKv* txn_kv, 
StorageVaultAccessor* accessor
         auto path = segment_path(tablet_id, rowset_id, i);
         accessor->put_file(path, "");
         for (int j = 0; j < num_inverted_indexes; ++j) {
-            auto path = inverted_index_path(tablet_id, rowset_id, i, j);
+            auto path = inverted_index_path_v1(tablet_id, rowset_id, i, j, "");
             accessor->put_file(path, "");
         }
     }
@@ -671,8 +671,11 @@ TEST(RecyclerTest, recycle_rowsets) {
     for (int i = 0; i < 5; ++i) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
+        
schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -750,8 +753,11 @@ TEST(RecyclerTest, bench_recycle_rowsets) {
     for (int i = 0; i < 5; ++i) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
+        
schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -812,9 +818,12 @@ TEST(RecyclerTest, recycle_tmp_rowsets) {
     std::vector<doris::TabletSchemaCloudPB> schemas;
     for (int i = 0; i < 5; ++i) {
         auto& schema = schemas.emplace_back();
+        
schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         schema.set_schema_version(i);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -873,7 +882,9 @@ TEST(RecyclerTest, recycle_tablet) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -945,8 +956,11 @@ TEST(RecyclerTest, recycle_indexes) {
     for (int i = 0; i < 5; ++i) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
+        
schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -1054,8 +1068,11 @@ TEST(RecyclerTest, recycle_partitions) {
     for (int i = 0; i < 5; ++i) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
+        
schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -1986,7 +2003,9 @@ TEST(RecyclerTest, recycle_deleted_instance) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -2254,7 +2273,9 @@ TEST(CheckerTest, DISABLED_abnormal_inverted_check) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 
@@ -2538,8 +2559,11 @@ TEST(RecyclerTest, delete_rowset_data) {
     for (int i = 0; i < 5; ++i) {
         auto& schema = schemas.emplace_back();
         schema.set_schema_version(i);
+        
schema.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
         for (int j = 0; j < i; ++j) {
-            schema.add_index()->set_index_id(j);
+            auto index = schema.add_index();
+            index->set_index_id(j);
+            index->set_index_type(IndexType::INVERTED);
         }
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to