This is an automated email from the ASF dual-hosted git repository.
jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new c8aa3e3 In TableRebalancer, allow increasing from 1 replica without
downtime (#7532)
c8aa3e3 is described below
commit c8aa3e39ccec1971dd71863c10b07254fe2cbc78
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Thu Oct 7 11:27:07 2021 -0700
In TableRebalancer, allow increasing from 1 replica without downtime (#7532)
Currently when rebalancing a table without downtime, the min available
replicas check is performed on the min replica of current and target
assignment. This will prevent user from increasing the replica of the table
from 1 without downtime. This PR fixes that by only rely on target assignment
to decide the min available replicas.
---
.../helix/core/rebalance/TableRebalancer.java | 21 +++++++++++------
.../helix/core/rebalance/TableRebalancerTest.java | 27 ++++++++++++++++++++++
2 files changed, 41 insertions(+), 7 deletions(-)
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
index 2e1fef0..def5209 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
@@ -322,18 +322,23 @@ public class TableRebalancer {
}
// Calculate the min available replicas for no-downtime rebalance
- int numCurrentReplicas =
currentAssignment.values().iterator().next().size();
- int numTargetReplicas = targetAssignment.values().iterator().next().size();
- // Use the smaller one to determine the min available replicas
- int numReplicas = Math.min(numCurrentReplicas, numTargetReplicas);
+ // NOTE: The calculation is based on the number of replicas of the target
assignment. In case of increasing the
+ // number of replicas for the current assignment, the current
instance state map might not have enough
+ // replicas to reach the minimum available replicas requirement. In
this scenario we don't want to fail the
+ // check, but keep all the current instances as this is the best we
can do, and can help the table get out of
+ // this state.
+ int numReplicas = Integer.MAX_VALUE;
+ for (Map<String, String> instanceStateMap : targetAssignment.values()) {
+ numReplicas = Math.min(instanceStateMap.size(), numReplicas);
+ }
int minAvailableReplicas;
if (minReplicasToKeepUpForNoDowntime >= 0) {
// For non-negative value, use it as min available replicas
if (minReplicasToKeepUpForNoDowntime >= numReplicas) {
LOGGER.warn(
"Illegal config for minReplicasToKeepUpForNoDowntime: {} for
table: {}, must be less than number of "
- + "replicas (current: {}, target: {}), aborting the
rebalance", minReplicasToKeepUpForNoDowntime,
- tableNameWithType, numCurrentReplicas, numTargetReplicas);
+ + "replicas: {}, aborting the rebalance",
+ minReplicasToKeepUpForNoDowntime, tableNameWithType, numReplicas);
return new RebalanceResult(RebalanceResult.Status.FAILED, "Illegal min
available replicas config",
instancePartitionsMap, targetAssignment);
}
@@ -624,7 +629,9 @@ public class TableRebalancer {
/**
* Returns the next assignment for a segment based on the current instance
state map and the target instance state map
- * with regards to the minimum available replicas requirement.
+ * with regard to the minimum available replicas requirement.
+ * It is possible that the current instance state map does not have enough
replicas to reach the minimum available
+ * replicas requirement, and in this scenario we will keep all the current
instances as this is the best we can do.
*/
@VisibleForTesting
static SingleSegmentAssignment getNextSingleSegmentAssignment(Map<String,
String> currentInstanceStateMap,
diff --git
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
index 58bd878..dbef89a 100644
---
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
+++
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
@@ -128,6 +128,19 @@ public class TableRebalancerTest {
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap,
targetInstanceStateMap, 1);
assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
assertEquals(assignment._availableInstances, new
TreeSet<>(Arrays.asList("host5", "host6")));
+
+ // With increasing from 1 replica, next assignment should have 1 common
instances with current assignment
+ currentInstanceStateMap =
SegmentAssignmentUtils.getInstanceStateMap(Collections.singletonList("host1"),
ONLINE);
+ targetInstanceStateMap =
+ SegmentAssignmentUtils.getInstanceStateMap(Arrays.asList("host2",
"host3", "host4"), ONLINE);
+ // [host1, host2, host3]
+ assignment =
TableRebalancer.getNextSingleSegmentAssignment(currentInstanceStateMap,
targetInstanceStateMap, 1);
+ assertEquals(assignment._availableInstances,
Collections.singleton("host1"));
+ // Next round should make the assignment the same as target assignment
+ assignment =
+
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap,
targetInstanceStateMap, 1);
+ assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
+ assertEquals(assignment._availableInstances, new
TreeSet<>(Arrays.asList("host2", "host3")));
}
@Test
@@ -201,6 +214,20 @@ public class TableRebalancerTest {
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap,
targetInstanceStateMap, 2);
assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
assertEquals(assignment._availableInstances, new
TreeSet<>(Arrays.asList("host5", "host6")));
+
+ // With increasing from 1 replica, next assignment should have 1 common
instances with current assignment
+ // NOTE: This is the best we can do because we don't have 2 replicas
available
+ currentInstanceStateMap =
SegmentAssignmentUtils.getInstanceStateMap(Collections.singletonList("host1"),
ONLINE);
+ targetInstanceStateMap =
+ SegmentAssignmentUtils.getInstanceStateMap(Arrays.asList("host2",
"host3", "host4"), ONLINE);
+ // [host1, host2, host3]
+ assignment =
TableRebalancer.getNextSingleSegmentAssignment(currentInstanceStateMap,
targetInstanceStateMap, 2);
+ assertEquals(assignment._availableInstances,
Collections.singleton("host1"));
+ // Next round should make the assignment the same as target assignment
+ assignment =
+
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap,
targetInstanceStateMap, 2);
+ assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
+ assertEquals(assignment._availableInstances, new
TreeSet<>(Arrays.asList("host2", "host3")));
}
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]