This is an automated email from the ASF dual-hosted git repository.

wangdan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git


The following commit(s) were added to refs/heads/master by this push:
     new f5e1b8130 fix(duplication): failed to start duplication after source 
replica servers exit due to missing duplicating status in .app-info (#1608)
f5e1b8130 is described below

commit f5e1b81302c4fe85566f7c7872ac2007d0f16d91
Author: ninsmiracle <[email protected]>
AuthorDate: Wed Apr 3 11:11:42 2024 +0800

    fix(duplication): failed to start duplication after source replica servers 
exit due to missing duplicating status in .app-info (#1608)
    
    https://github.com/apache/incubator-pegasus/issues/1595
    
    Currently, after a duplication was added, `duplicating` status would be 
only updated
    for in-memory state. However, once some replica servers exited and were 
started again,
    `duplicating` status would be lost since it had not been persisted into 
`.app-info` before.
    Consequently, plog files would be removed by GC, which lead to failed 
assertion for decree
    in `replica_duplicator::verify_start_decree()`.
    
    To resolve this problem, `duplicating` should be updated for both in-memory 
and on-disk
    states. Then, `duplicating` status would be loaded correctly from 
`.app-info` file at the loading
    stage of replicas.
---
 src/replica/duplication/duplication_sync_timer.cpp | 29 +++++++++++++++++-----
 .../duplication/replica_duplicator_manager.h       |  2 +-
 src/replica/replica.h                              |  2 ++
 src/replica/replica_config.cpp                     | 19 ++++++++++++++
 4 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/src/replica/duplication/duplication_sync_timer.cpp 
b/src/replica/duplication/duplication_sync_timer.cpp
index 58d4e2ab7..d7254c939 100644
--- a/src/replica/duplication/duplication_sync_timer.cpp
+++ b/src/replica/duplication/duplication_sync_timer.cpp
@@ -116,19 +116,37 @@ void 
duplication_sync_timer::on_duplication_sync_reply(error_code err,
 void duplication_sync_timer::update_duplication_map(
     const std::map<int32_t, std::map<int32_t, duplication_entry>> &dup_map)
 {
-    for (const replica_ptr &r : _stub->get_all_replicas()) {
+    for (replica_ptr &r : _stub->get_all_replicas()) {
         auto dup_mgr = r->get_duplication_manager();
         if (!dup_mgr) {
             continue;
         }
 
         const auto &it = dup_map.find(r->get_gpid().get_app_id());
-        if (it == dup_map.end()) {
-            // no duplication is assigned to this app
+
+        // TODO(wangdan): at meta server side, an app is considered duplicating
+        // as long as any duplication of this app has valid status(i.e.
+        // duplication_info::is_invalid_status() returned false, see
+        // meta_duplication_service::refresh_duplicating_no_lock()). And 
duplications
+        // in duplication_sync_response returned by meta server could also be
+        // considered duplicating according to 
meta_duplication_service::duplication_sync().
+        // Thus we could update `duplicating` in both memory and 
file(.app-info).
+        //
+        // However, most of properties of an app(struct `app_info`) are 
written to .app-info
+        // file in replica::on_config_sync(), such as max_replica_count; on 
the other hand,
+        // in-memory `duplicating` is also updated in 
replica::on_config_proposal(). Thus we'd
+        // better think about a unique entrance to update `duplicating`(in 
both memory and disk),
+        // rather than update them at anywhere.
+        const auto duplicating = it != dup_map.end();
+        r->update_app_duplication_status(duplicating);
+
+        if (!duplicating) {
+            // No duplication is assigned to this app.
             dup_mgr->update_duplication_map({});
-        } else {
-            dup_mgr->update_duplication_map(it->second);
+            continue;
         }
+
+        dup_mgr->update_duplication_map(it->second);
     }
 }
 
@@ -198,6 +216,5 @@ duplication_sync_timer::get_dup_states(int app_id, /*out*/ 
bool *app_found)
     }
     return result;
 }
-
 } // namespace replication
 } // namespace dsn
diff --git a/src/replica/duplication/replica_duplicator_manager.h 
b/src/replica/duplication/replica_duplicator_manager.h
index 55fc92313..51bcbd1e1 100644
--- a/src/replica/duplication/replica_duplicator_manager.h
+++ b/src/replica/duplication/replica_duplicator_manager.h
@@ -53,7 +53,7 @@ public:
     // - the app is not assigned with duplication (dup_map.empty())
     void update_duplication_map(const std::map<int32_t, duplication_entry> 
&new_dup_map)
     {
-        if (_replica->status() != partition_status::PS_PRIMARY || 
new_dup_map.empty()) {
+        if (new_dup_map.empty() || _replica->status() != 
partition_status::PS_PRIMARY) {
             remove_all_duplications();
             return;
         }
diff --git a/src/replica/replica.h b/src/replica/replica.h
index 8b6fe6b16..ab9e451a4 100644
--- a/src/replica/replica.h
+++ b/src/replica/replica.h
@@ -251,6 +251,8 @@ public:
         _is_duplication_plog_checking.store(checking);
     }
 
+    void update_app_duplication_status(bool duplicating);
+
     //
     // Backup
     //
diff --git a/src/replica/replica_config.cpp b/src/replica/replica_config.cpp
index 580d82bbc..386a03e72 100644
--- a/src/replica/replica_config.cpp
+++ b/src/replica/replica_config.cpp
@@ -1176,5 +1176,24 @@ error_code replica::update_init_info_ballot_and_decree()
     return _app->update_init_info_ballot_and_decree(this);
 }
 
+void replica::update_app_duplication_status(bool duplicating)
+{
+    if (duplicating == _app_info.duplicating) {
+        return;
+    }
+
+    auto old_duplicating = _app_info.duplicating;
+    _app_info.__set_duplicating(duplicating);
+
+    CHECK_EQ_PREFIX_MSG(store_app_info(_app_info),
+                        ERR_OK,
+                        "store_app_info for duplicating failed: app_name={}, "
+                        "app_id={}, old_duplicating={}, new_duplicating={}",
+                        _app_info.app_name,
+                        _app_info.app_id,
+                        old_duplicating,
+                        _app_info.duplicating);
+}
+
 } // namespace replication
 } // namespace dsn


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to