This is an automated email from the ASF dual-hosted git repository.
tanxinyu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new 46e47672c40 [IOTDB-6309] Lager interval for auto ratis leader balance
(#12116)
46e47672c40 is described below
commit 46e47672c403bc8d363588a8856586e56b1dd2ad
Author: Yongzao <[email protected]>
AuthorDate: Fri Mar 8 16:32:11 2024 +0800
[IOTDB-6309] Lager interval for auto ratis leader balance (#12116)
---
.../manager/load/balancer/RouteBalancer.java | 24 ++++++++++++++++++++--
1 file changed, 22 insertions(+), 2 deletions(-)
diff --git
a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
index 463a5df0dc7..1f3fb4efb55 100644
---
a/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
+++
b/iotdb-core/confignode/src/main/java/org/apache/iotdb/confignode/manager/load/balancer/RouteBalancer.java
@@ -86,6 +86,9 @@ public class RouteBalancer {
// The simple consensus protocol will always automatically designate
itself as the leader
||
ConsensusFactory.SIMPLE_CONSENSUS.equals(SCHEMA_REGION_CONSENSUS_PROTOCOL_CLASS);
+ // The interval of retrying to balance ratis leader after the last failed
time
+ private static final long BALANCE_RATIS_LEADER_FAILED_INTERVAL = 60 * 1000L;
+
private final IManager configManager;
/** RegionRouteMap */
@@ -94,6 +97,9 @@ public class RouteBalancer {
// For generating optimal RegionPriorityMap
private final IPriorityBalancer priorityRouter;
+ private long lastFailedTimeForBalanceRatisSchemaLeader = 0;
+ private long lastFailedTimeForBalanceRatisDataLeader = 0;
+
public RouteBalancer(IManager configManager) {
this.configManager = configManager;
@@ -127,10 +133,16 @@ public class RouteBalancer {
public synchronized Map<TConsensusGroupId, Pair<Integer, Integer>>
balanceRegionLeader() {
Map<TConsensusGroupId, Pair<Integer, Integer>> differentRegionLeaderMap =
new ConcurrentHashMap<>();
- if (IS_ENABLE_AUTO_LEADER_BALANCE_FOR_SCHEMA_REGION) {
+ if (IS_ENABLE_AUTO_LEADER_BALANCE_FOR_SCHEMA_REGION
+ &&
(!ConsensusFactory.RATIS_CONSENSUS.equals(SCHEMA_REGION_CONSENSUS_PROTOCOL_CLASS)
+ || System.currentTimeMillis() -
lastFailedTimeForBalanceRatisSchemaLeader
+ > BALANCE_RATIS_LEADER_FAILED_INTERVAL)) {
differentRegionLeaderMap.putAll(balanceRegionLeader(TConsensusGroupType.SchemaRegion));
}
- if (IS_ENABLE_AUTO_LEADER_BALANCE_FOR_DATA_REGION) {
+ if (IS_ENABLE_AUTO_LEADER_BALANCE_FOR_DATA_REGION
+ &&
(!ConsensusFactory.RATIS_CONSENSUS.equals(DATA_REGION_CONSENSUS_PROTOCOL_CLASS)
+ || System.currentTimeMillis() -
lastFailedTimeForBalanceRatisDataLeader
+ > BALANCE_RATIS_LEADER_FAILED_INTERVAL)) {
differentRegionLeaderMap.putAll(balanceRegionLeader(TConsensusGroupType.DataRegion));
}
@@ -199,6 +211,14 @@ public class RouteBalancer {
clientHandler.getRequest(i).getNewLeaderNode().getDataNodeId());
} else {
differentRegionLeaderMap.remove(clientHandler.getRequest(i).getRegionId());
+ if (TConsensusGroupType.SchemaRegion.equals(regionGroupType)
+ &&
ConsensusFactory.RATIS_CONSENSUS.equals(SCHEMA_REGION_CONSENSUS_PROTOCOL_CLASS))
{
+ lastFailedTimeForBalanceRatisSchemaLeader =
System.currentTimeMillis();
+ }
+ if (TConsensusGroupType.DataRegion.equals(regionGroupType)
+ &&
ConsensusFactory.RATIS_CONSENSUS.equals(DATA_REGION_CONSENSUS_PROTOCOL_CLASS)) {
+ lastFailedTimeForBalanceRatisDataLeader =
System.currentTimeMillis();
+ }
LOGGER.error(
"[LeaderBalancer] Failed to change the leader of Region: {} to
DataNode: {}",
clientHandler.getRequest(i).getRegionId(),