This is an automated email from the ASF dual-hosted git repository.

jlli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 8a80760  Refactor toggleInstanceState API (#4560)
8a80760 is described below

commit 8a80760a20483b878510d39a31e020400c5ee06c
Author: Jialiang Li <[email protected]>
AuthorDate: Tue Oct 1 10:10:30 2019 -0700

    Refactor toggleInstanceState API (#4560)
    
    * Fix toggle instance API
    
    * Fix ControllerInstanceToggleTest
---
 .../resources/PinotInstanceRestletResource.java    | 12 ++--
 .../helix/core/PinotHelixResourceManager.java      | 79 ++++++++++++++--------
 .../helix/ControllerInstanceToggleTest.java        | 26 +++++--
 3 files changed, 78 insertions(+), 39 deletions(-)

diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
index 79788c1..e6c0553 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
@@ -130,13 +130,17 @@ public class PinotInstanceRestletResource {
     }
 
     if (StateType.ENABLE.name().equalsIgnoreCase(state)) {
-      if 
(!pinotHelixResourceManager.enableInstance(instanceName).isSuccessful()) {
-        throw new ControllerApplicationException(LOGGER, "Failed to enable 
instance " + instanceName,
+      PinotResourceManagerResponse response = 
pinotHelixResourceManager.enableInstance(instanceName);
+      if (!response.isSuccessful()) {
+        throw new ControllerApplicationException(LOGGER,
+            "Failed to enable instance " + instanceName + " - " + 
response.getMessage(),
             Response.Status.INTERNAL_SERVER_ERROR);
       }
     } else if (StateType.DISABLE.name().equalsIgnoreCase(state)) {
-      if 
(!pinotHelixResourceManager.disableInstance(instanceName).isSuccessful()) {
-        throw new ControllerApplicationException(LOGGER, "Failed to disable 
instance " + instanceName,
+      PinotResourceManagerResponse response = 
pinotHelixResourceManager.disableInstance(instanceName);
+      if (!response.isSuccessful()) {
+        throw new ControllerApplicationException(LOGGER,
+            "Failed to disable instance " + instanceName + " - " + 
response.getMessage(),
             Response.Status.INTERNAL_SERVER_ERROR);
       }
     } else if (StateType.DROP.name().equalsIgnoreCase(state)) {
diff --git 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index 9afd643..81ebe7f 100644
--- 
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++ 
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -2084,11 +2084,11 @@ public class PinotHelixResourceManager {
   }
 
   public PinotResourceManagerResponse enableInstance(String instanceName) {
-    return toggleInstance(instanceName, true, 10);
+    return enableInstance(instanceName, true, 10_000L);
   }
 
   public PinotResourceManagerResponse disableInstance(String instanceName) {
-    return toggleInstance(instanceName, false, 10);
+    return enableInstance(instanceName, false, 10_000L);
   }
 
   /**
@@ -2140,52 +2140,71 @@ public class PinotHelixResourceManager {
    * Keeps checking until ideal-state is successfully updated or times out.
    *
    * @param instanceName: Name of Instance for which the status needs to be 
toggled.
-   * @param toggle: 'True' for ONLINE 'False' for OFFLINE.
-   * @param timeOutInSeconds: Time-out for setting ideal-state.
+   * @param enableInstance: 'True' for enabling the instance and 'False' for 
disabling the instance.
+   * @param timeOutMs: Time-out for setting ideal-state.
    * @return
    */
-  public PinotResourceManagerResponse toggleInstance(String instanceName, 
boolean toggle, int timeOutInSeconds) {
+  private PinotResourceManagerResponse enableInstance(String instanceName, 
boolean enableInstance, long timeOutMs) {
     if (!instanceExists(instanceName)) {
       return PinotResourceManagerResponse.failure("Instance " + instanceName + 
" not found");
     }
 
-    _helixAdmin.enableInstance(_helixClusterName, instanceName, toggle);
-    long deadline = System.currentTimeMillis() + 1000 * timeOutInSeconds;
-    boolean toggleSucceed = false;
-    String beforeToggleStates =
-        (toggle) ? SegmentOnlineOfflineStateModel.OFFLINE : 
SegmentOnlineOfflineStateModel.ONLINE;
+    _helixAdmin.enableInstance(_helixClusterName, instanceName, 
enableInstance);
+    long intervalWaitTimeMs = 500L;
+    long deadline = System.currentTimeMillis() + timeOutMs;
+    String offlineState = SegmentOnlineOfflineStateModel.OFFLINE;
 
     while (System.currentTimeMillis() < deadline) {
-      toggleSucceed = true;
       PropertyKey liveInstanceKey = _keyBuilder.liveInstance(instanceName);
       LiveInstance liveInstance = 
_helixDataAccessor.getProperty(liveInstanceKey);
       if (liveInstance == null) {
-        return toggle ? PinotResourceManagerResponse.FAILURE : 
PinotResourceManagerResponse.SUCCESS;
-      }
-      PropertyKey instanceCurrentStatesKey = 
_keyBuilder.currentStates(instanceName, liveInstance.getSessionId());
-      List<CurrentState> instanceCurrentStates = 
_helixDataAccessor.getChildValues(instanceCurrentStatesKey);
-      if (instanceCurrentStates == null) {
-        return PinotResourceManagerResponse.SUCCESS;
+        if (!enableInstance) {
+          // If we disable the instance, we actually don't care whether live 
instance being null. Thus, returning success should be good.
+          // Otherwise, wait until timeout.
+          return PinotResourceManagerResponse.SUCCESS;
+        }
       } else {
-        for (CurrentState currentState : instanceCurrentStates) {
-          for (String state : currentState.getPartitionStateMap().values()) {
-            if (beforeToggleStates.equals(state)) {
-              toggleSucceed = false;
+        boolean toggleSucceeded = true;
+        // Checks all the current states fall into the target states
+        PropertyKey instanceCurrentStatesKey = 
_keyBuilder.currentStates(instanceName, liveInstance.getSessionId());
+        List<CurrentState> instanceCurrentStates = 
_helixDataAccessor.getChildValues(instanceCurrentStatesKey);
+        if (instanceCurrentStates.isEmpty()) {
+          return PinotResourceManagerResponse.SUCCESS;
+        } else {
+          for (CurrentState currentState : instanceCurrentStates) {
+            for (String state : currentState.getPartitionStateMap().values()) {
+              // If instance is enabled, all the partitions should not 
eventually be offline.
+              // If instance is disabled, all the partitions should eventually 
be offline.
+              // TODO: Handle the case when realtime segments are in OFFLINE 
state because there're some problem with realtime segment consumption,
+              //  and realtime segment will mark itself as OFFLINE in ideal 
state.
+              //  Issue: https://github.com/apache/incubator-pinot/issues/4653
+              if ((enableInstance && !offlineState.equals(state)) || 
(!enableInstance && offlineState.equals(state))) {
+                toggleSucceeded = false;
+                break;
+              }
+            }
+            if (!toggleSucceeded) {
+              break;
             }
           }
         }
-      }
-      if (toggleSucceed) {
-        return (toggle) ? PinotResourceManagerResponse.success("Instance " + 
instanceName + " enabled")
-            : PinotResourceManagerResponse.success("Instance " + instanceName 
+ " disabled");
-      } else {
-        try {
-          Thread.sleep(500);
-        } catch (InterruptedException e) {
+        if (toggleSucceeded) {
+          return (enableInstance) ? 
PinotResourceManagerResponse.success("Instance " + instanceName + " enabled")
+              : PinotResourceManagerResponse.success("Instance " + 
instanceName + " disabled");
         }
       }
+
+      try {
+        Thread.sleep(intervalWaitTimeMs);
+      } catch (InterruptedException e) {
+        LOGGER.warn("Got interrupted when sleeping for {}ms to wait until the 
current state matched for instance: {}",
+            intervalWaitTimeMs, instanceName);
+        return PinotResourceManagerResponse
+            .failure("Got interrupted when waiting for instance to be " + 
(enableInstance ? "enabled" : "disabled"));
+      }
     }
-    return PinotResourceManagerResponse.failure("Instance enable/disable 
failed, timeout");
+    return PinotResourceManagerResponse
+        .failure("Instance " + (enableInstance ? "enable" : "disable") + " 
failed, timeout");
   }
 
   public RebalanceResult rebalanceTable(String tableNameWithType, 
Configuration rebalanceConfig)
diff --git 
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
 
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
index da923c6..2ccf9fd 100644
--- 
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
+++ 
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.controller.helix;
 
+import java.io.IOException;
 import java.util.Set;
 import org.apache.helix.model.ExternalView;
 import org.apache.pinot.common.config.TableConfig;
@@ -26,6 +27,7 @@ import org.apache.pinot.common.config.TagNameUtils;
 import org.apache.pinot.common.utils.CommonConstants;
 import org.apache.pinot.common.utils.helix.HelixHelper;
 import org.apache.pinot.controller.utils.SegmentMetadataMockUtils;
+import org.apache.pinot.util.TestUtils;
 import org.testng.Assert;
 import org.testng.annotations.AfterClass;
 import org.testng.annotations.BeforeClass;
@@ -34,6 +36,7 @@ import org.testng.annotations.Test;
 
 public class ControllerInstanceToggleTest extends ControllerTest {
   private static final String RAW_TABLE_NAME = "testTable";
+  private static final long TIMEOUT_MS = 10_000L;
   private static final String OFFLINE_TABLE_NAME = 
TableNameBuilder.OFFLINE.tableNameWithType(RAW_TABLE_NAME);
   private static final String SERVER_TAG_NAME = 
TagNameUtils.getOfflineTagForTenant(null);
   private static final String BROKER_TAG_NAME = 
TagNameUtils.getBrokerTagForTenant(null);
@@ -75,25 +78,25 @@ public class ControllerInstanceToggleTest extends 
ControllerTest {
     // Disable server instances
     int numEnabledInstances = NUM_INSTANCES;
     for (String instanceName : 
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(), 
SERVER_TAG_NAME)) {
-      
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName), 
"disable");
+      toggleInstanceState(instanceName, "disable");
       checkNumOnlineInstancesFromExternalView(OFFLINE_TABLE_NAME, 
--numEnabledInstances);
     }
 
     // Enable server instances
     for (String instanceName : 
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(), 
SERVER_TAG_NAME)) {
-      
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName), 
"ENABLE");
+      toggleInstanceState(instanceName, "ENABLE");
       checkNumOnlineInstancesFromExternalView(OFFLINE_TABLE_NAME, 
++numEnabledInstances);
     }
 
     // Disable broker instances
     for (String instanceName : 
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(), 
BROKER_TAG_NAME)) {
-      
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName), 
"Disable");
+      toggleInstanceState(instanceName, "Disable");
       
checkNumOnlineInstancesFromExternalView(CommonConstants.Helix.BROKER_RESOURCE_INSTANCE,
 --numEnabledInstances);
     }
 
     // Enable broker instances
     for (String instanceName : 
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(), 
BROKER_TAG_NAME)) {
-      
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName), 
"Enable");
+      toggleInstanceState(instanceName, "Enable");
       
checkNumOnlineInstancesFromExternalView(CommonConstants.Helix.BROKER_RESOURCE_INSTANCE,
 ++numEnabledInstances);
     }
 
@@ -104,9 +107,22 @@ public class ControllerInstanceToggleTest extends 
ControllerTest {
             .getPartitionSet().size(), 0);
   }
 
+  private void toggleInstanceState(String instanceName, String state) {
+    // It may take time for an instance to toggle the state.
+    TestUtils.waitForCondition(aVoid -> {
+      try {
+        
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName), 
state);
+      } catch (IOException ioe) {
+        // receive non-200 status code
+        return false;
+      }
+      return true;
+    }, TIMEOUT_MS, "Failed to toggle instance state: '" + state + "' for 
instance: " + instanceName);
+  }
+
   private void checkNumOnlineInstancesFromExternalView(String resourceName, 
int expectedNumOnlineInstances)
       throws InterruptedException {
-    long endTime = System.currentTimeMillis() + 10_000L;
+    long endTime = System.currentTimeMillis() + TIMEOUT_MS;
     while (System.currentTimeMillis() < endTime) {
       ExternalView resourceExternalView = 
_helixAdmin.getResourceExternalView(getHelixClusterName(), resourceName);
       Set<String> instanceSet = 
HelixHelper.getOnlineInstanceFromExternalView(resourceExternalView);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to