This is an automated email from the ASF dual-hosted git repository.

laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git


The following commit(s) were added to refs/heads/master by this push:
     new 1c727a048 fix(balance): Distinguish cluster balanced or collecting 
balance info interruptted (#1951)
1c727a048 is described below

commit 1c727a0489299aa1c41cd40fe5fac716a179ba2c
Author: Samunroyu <[email protected]>
AuthorDate: Tue Mar 26 17:16:01 2024 +0800

    fix(balance): Distinguish cluster balanced or collecting balance info 
interruptted (#1951)
    
    After scaling out, Pegasus determines whether the shards are balanced by 
checking if the
    balance operation counts are equal to 0. However, since the meta server 
returns counts = 0
    when it cannot collect all replicas information, this leads to the 
premature termination of
    balancing. Therefore, in such cases, the counts should be equal to -1.
    
    A new bool variable introduce into class greedy_load_balancer to control 
balance operation
    count whether be -1.
---
 src/meta/greedy_load_balancer.cpp | 11 ++++++++++-
 src/meta/greedy_load_balancer.h   |  1 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/meta/greedy_load_balancer.cpp 
b/src/meta/greedy_load_balancer.cpp
index 6fb7aaadb..0905be033 100644
--- a/src/meta/greedy_load_balancer.cpp
+++ b/src/meta/greedy_load_balancer.cpp
@@ -63,6 +63,7 @@ greedy_load_balancer::greedy_load_balancer(meta_service 
*_svc) : server_load_bal
 {
     _app_balance_policy = std::make_unique<app_balance_policy>(_svc);
     _cluster_balance_policy = std::make_unique<cluster_balance_policy>(_svc);
+    _all_replca_infos_collected = false;
 
     ::memset(t_operation_counters, 0, sizeof(t_operation_counters));
 }
@@ -162,7 +163,8 @@ void greedy_load_balancer::greedy_balancer(const bool 
balance_checker)
 
     for (auto &kv : *(t_global_view->nodes)) {
         node_state &ns = kv.second;
-        if (!all_replica_infos_collected(ns)) {
+        _all_replca_infos_collected = all_replica_infos_collected(ns);
+        if (!_all_replca_infos_collected) {
             return;
         }
     }
@@ -235,6 +237,13 @@ void greedy_load_balancer::report(const 
dsn::replication::migration_list &list,
         }
     }
 
+    if (!_all_replca_infos_collected) {
+        counters[ALL_COUNT] = -1;
+        LOG_DEBUG(
+            "balance checker operation count = {}, due to meta server hasn't 
collected all replica",
+            counters[ALL_COUNT]);
+    }
+
     ::memcpy(t_operation_counters, counters, sizeof(counters));
     
METRIC_SET_GREEDY_BALANCE_STATS(_svc->get_server_state()->get_table_metric_entities(),
                                     balance_stats);
diff --git a/src/meta/greedy_load_balancer.h b/src/meta/greedy_load_balancer.h
index 45710963a..b6baf0aaa 100644
--- a/src/meta/greedy_load_balancer.h
+++ b/src/meta/greedy_load_balancer.h
@@ -74,6 +74,7 @@ private:
     migration_list *t_migration_result;
     int t_alive_nodes;
     int t_operation_counters[MAX_COUNT];
+    bool _all_replca_infos_collected;
 
     std::unique_ptr<load_balance_policy> _app_balance_policy;
     std::unique_ptr<load_balance_policy> _cluster_balance_policy;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to