This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new c8aa3e3  In TableRebalancer, allow increasing from 1 replica without 
downtime (#7532)
c8aa3e3 is described below

commit c8aa3e39ccec1971dd71863c10b07254fe2cbc78
Author: Xiaotian (Jackie) Jiang <[email protected]>
AuthorDate: Thu Oct 7 11:27:07 2021 -0700

    In TableRebalancer, allow increasing from 1 replica without downtime (#7532)
    
    Currently when rebalancing a table without downtime, the min available 
replicas check is performed on the min replica of current and target 
assignment. This will prevent user from increasing the replica of the table 
from 1 without downtime. This PR fixes that by only rely on target assignment 
to decide the min available replicas.
---
 .../helix/core/rebalance/TableRebalancer.java      | 21 +++++++++++------
 .../helix/core/rebalance/TableRebalancerTest.java  | 27 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
index 2e1fef0..def5209 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancer.java
@@ -322,18 +322,23 @@ public class TableRebalancer {
     }
 
     // Calculate the min available replicas for no-downtime rebalance
-    int numCurrentReplicas = 
currentAssignment.values().iterator().next().size();
-    int numTargetReplicas = targetAssignment.values().iterator().next().size();
-    // Use the smaller one to determine the min available replicas
-    int numReplicas = Math.min(numCurrentReplicas, numTargetReplicas);
+    // NOTE: The calculation is based on the number of replicas of the target 
assignment. In case of increasing the
+    //       number of replicas for the current assignment, the current 
instance state map might not have enough
+    //       replicas to reach the minimum available replicas requirement. In 
this scenario we don't want to fail the
+    //       check, but keep all the current instances as this is the best we 
can do, and can help the table get out of
+    //       this state.
+    int numReplicas = Integer.MAX_VALUE;
+    for (Map<String, String> instanceStateMap : targetAssignment.values()) {
+      numReplicas = Math.min(instanceStateMap.size(), numReplicas);
+    }
     int minAvailableReplicas;
     if (minReplicasToKeepUpForNoDowntime >= 0) {
       // For non-negative value, use it as min available replicas
       if (minReplicasToKeepUpForNoDowntime >= numReplicas) {
         LOGGER.warn(
             "Illegal config for minReplicasToKeepUpForNoDowntime: {} for 
table: {}, must be less than number of "
-                + "replicas (current: {}, target: {}), aborting the 
rebalance", minReplicasToKeepUpForNoDowntime,
-            tableNameWithType, numCurrentReplicas, numTargetReplicas);
+                + "replicas: {}, aborting the rebalance",
+            minReplicasToKeepUpForNoDowntime, tableNameWithType, numReplicas);
         return new RebalanceResult(RebalanceResult.Status.FAILED, "Illegal min 
available replicas config",
             instancePartitionsMap, targetAssignment);
       }
@@ -624,7 +629,9 @@ public class TableRebalancer {
 
   /**
    * Returns the next assignment for a segment based on the current instance 
state map and the target instance state map
-   * with regards to the minimum available replicas requirement.
+   * with regard to the minimum available replicas requirement.
+   * It is possible that the current instance state map does not have enough 
replicas to reach the minimum available
+   * replicas requirement, and in this scenario we will keep all the current 
instances as this is the best we can do.
    */
   @VisibleForTesting
   static SingleSegmentAssignment getNextSingleSegmentAssignment(Map<String, 
String> currentInstanceStateMap,
diff --git 
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
 
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
index 58bd878..dbef89a 100644
--- 
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
+++ 
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/rebalance/TableRebalancerTest.java
@@ -128,6 +128,19 @@ public class TableRebalancerTest {
         
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap, 
targetInstanceStateMap, 1);
     assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
     assertEquals(assignment._availableInstances, new 
TreeSet<>(Arrays.asList("host5", "host6")));
+
+    // With increasing from 1 replica, next assignment should have 1 common 
instances with current assignment
+    currentInstanceStateMap = 
SegmentAssignmentUtils.getInstanceStateMap(Collections.singletonList("host1"), 
ONLINE);
+    targetInstanceStateMap =
+        SegmentAssignmentUtils.getInstanceStateMap(Arrays.asList("host2", 
"host3", "host4"), ONLINE);
+    // [host1, host2, host3]
+    assignment = 
TableRebalancer.getNextSingleSegmentAssignment(currentInstanceStateMap, 
targetInstanceStateMap, 1);
+    assertEquals(assignment._availableInstances, 
Collections.singleton("host1"));
+    // Next round should make the assignment the same as target assignment
+    assignment =
+        
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap, 
targetInstanceStateMap, 1);
+    assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
+    assertEquals(assignment._availableInstances, new 
TreeSet<>(Arrays.asList("host2", "host3")));
   }
 
   @Test
@@ -201,6 +214,20 @@ public class TableRebalancerTest {
         
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap, 
targetInstanceStateMap, 2);
     assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
     assertEquals(assignment._availableInstances, new 
TreeSet<>(Arrays.asList("host5", "host6")));
+
+    // With increasing from 1 replica, next assignment should have 1 common 
instances with current assignment
+    // NOTE: This is the best we can do because we don't have 2 replicas 
available
+    currentInstanceStateMap = 
SegmentAssignmentUtils.getInstanceStateMap(Collections.singletonList("host1"), 
ONLINE);
+    targetInstanceStateMap =
+        SegmentAssignmentUtils.getInstanceStateMap(Arrays.asList("host2", 
"host3", "host4"), ONLINE);
+    // [host1, host2, host3]
+    assignment = 
TableRebalancer.getNextSingleSegmentAssignment(currentInstanceStateMap, 
targetInstanceStateMap, 2);
+    assertEquals(assignment._availableInstances, 
Collections.singleton("host1"));
+    // Next round should make the assignment the same as target assignment
+    assignment =
+        
TableRebalancer.getNextSingleSegmentAssignment(assignment._instanceStateMap, 
targetInstanceStateMap, 2);
+    assertEquals(assignment._instanceStateMap, targetInstanceStateMap);
+    assertEquals(assignment._availableInstances, new 
TreeSet<>(Arrays.asList("host2", "host3")));
   }
 
   @Test

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to