This is an automated email from the ASF dual-hosted git repository.
laiyingchun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pegasus.git
The following commit(s) were added to refs/heads/master by this push:
new 1c727a048 fix(balance): Distinguish cluster balanced or collecting
balance info interruptted (#1951)
1c727a048 is described below
commit 1c727a0489299aa1c41cd40fe5fac716a179ba2c
Author: Samunroyu <[email protected]>
AuthorDate: Tue Mar 26 17:16:01 2024 +0800
fix(balance): Distinguish cluster balanced or collecting balance info
interruptted (#1951)
After scaling out, Pegasus determines whether the shards are balanced by
checking if the
balance operation counts are equal to 0. However, since the meta server
returns counts = 0
when it cannot collect all replicas information, this leads to the
premature termination of
balancing. Therefore, in such cases, the counts should be equal to -1.
A new bool variable introduce into class greedy_load_balancer to control
balance operation
count whether be -1.
---
src/meta/greedy_load_balancer.cpp | 11 ++++++++++-
src/meta/greedy_load_balancer.h | 1 +
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/meta/greedy_load_balancer.cpp
b/src/meta/greedy_load_balancer.cpp
index 6fb7aaadb..0905be033 100644
--- a/src/meta/greedy_load_balancer.cpp
+++ b/src/meta/greedy_load_balancer.cpp
@@ -63,6 +63,7 @@ greedy_load_balancer::greedy_load_balancer(meta_service
*_svc) : server_load_bal
{
_app_balance_policy = std::make_unique<app_balance_policy>(_svc);
_cluster_balance_policy = std::make_unique<cluster_balance_policy>(_svc);
+ _all_replca_infos_collected = false;
::memset(t_operation_counters, 0, sizeof(t_operation_counters));
}
@@ -162,7 +163,8 @@ void greedy_load_balancer::greedy_balancer(const bool
balance_checker)
for (auto &kv : *(t_global_view->nodes)) {
node_state &ns = kv.second;
- if (!all_replica_infos_collected(ns)) {
+ _all_replca_infos_collected = all_replica_infos_collected(ns);
+ if (!_all_replca_infos_collected) {
return;
}
}
@@ -235,6 +237,13 @@ void greedy_load_balancer::report(const
dsn::replication::migration_list &list,
}
}
+ if (!_all_replca_infos_collected) {
+ counters[ALL_COUNT] = -1;
+ LOG_DEBUG(
+ "balance checker operation count = {}, due to meta server hasn't
collected all replica",
+ counters[ALL_COUNT]);
+ }
+
::memcpy(t_operation_counters, counters, sizeof(counters));
METRIC_SET_GREEDY_BALANCE_STATS(_svc->get_server_state()->get_table_metric_entities(),
balance_stats);
diff --git a/src/meta/greedy_load_balancer.h b/src/meta/greedy_load_balancer.h
index 45710963a..b6baf0aaa 100644
--- a/src/meta/greedy_load_balancer.h
+++ b/src/meta/greedy_load_balancer.h
@@ -74,6 +74,7 @@ private:
migration_list *t_migration_result;
int t_alive_nodes;
int t_operation_counters[MAX_COUNT];
+ bool _all_replca_infos_collected;
std::unique_ptr<load_balance_policy> _app_balance_policy;
std::unique_ptr<load_balance_policy> _cluster_balance_policy;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]