This is an automated email from the ASF dual-hosted git repository.
jlli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 8a80760 Refactor toggleInstanceState API (#4560)
8a80760 is described below
commit 8a80760a20483b878510d39a31e020400c5ee06c
Author: Jialiang Li <[email protected]>
AuthorDate: Tue Oct 1 10:10:30 2019 -0700
Refactor toggleInstanceState API (#4560)
* Fix toggle instance API
* Fix ControllerInstanceToggleTest
---
.../resources/PinotInstanceRestletResource.java | 12 ++--
.../helix/core/PinotHelixResourceManager.java | 79 ++++++++++++++--------
.../helix/ControllerInstanceToggleTest.java | 26 +++++--
3 files changed, 78 insertions(+), 39 deletions(-)
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
index 79788c1..e6c0553 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/api/resources/PinotInstanceRestletResource.java
@@ -130,13 +130,17 @@ public class PinotInstanceRestletResource {
}
if (StateType.ENABLE.name().equalsIgnoreCase(state)) {
- if
(!pinotHelixResourceManager.enableInstance(instanceName).isSuccessful()) {
- throw new ControllerApplicationException(LOGGER, "Failed to enable
instance " + instanceName,
+ PinotResourceManagerResponse response =
pinotHelixResourceManager.enableInstance(instanceName);
+ if (!response.isSuccessful()) {
+ throw new ControllerApplicationException(LOGGER,
+ "Failed to enable instance " + instanceName + " - " +
response.getMessage(),
Response.Status.INTERNAL_SERVER_ERROR);
}
} else if (StateType.DISABLE.name().equalsIgnoreCase(state)) {
- if
(!pinotHelixResourceManager.disableInstance(instanceName).isSuccessful()) {
- throw new ControllerApplicationException(LOGGER, "Failed to disable
instance " + instanceName,
+ PinotResourceManagerResponse response =
pinotHelixResourceManager.disableInstance(instanceName);
+ if (!response.isSuccessful()) {
+ throw new ControllerApplicationException(LOGGER,
+ "Failed to disable instance " + instanceName + " - " +
response.getMessage(),
Response.Status.INTERNAL_SERVER_ERROR);
}
} else if (StateType.DROP.name().equalsIgnoreCase(state)) {
diff --git
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
index 9afd643..81ebe7f 100644
---
a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
+++
b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/PinotHelixResourceManager.java
@@ -2084,11 +2084,11 @@ public class PinotHelixResourceManager {
}
public PinotResourceManagerResponse enableInstance(String instanceName) {
- return toggleInstance(instanceName, true, 10);
+ return enableInstance(instanceName, true, 10_000L);
}
public PinotResourceManagerResponse disableInstance(String instanceName) {
- return toggleInstance(instanceName, false, 10);
+ return enableInstance(instanceName, false, 10_000L);
}
/**
@@ -2140,52 +2140,71 @@ public class PinotHelixResourceManager {
* Keeps checking until ideal-state is successfully updated or times out.
*
* @param instanceName: Name of Instance for which the status needs to be
toggled.
- * @param toggle: 'True' for ONLINE 'False' for OFFLINE.
- * @param timeOutInSeconds: Time-out for setting ideal-state.
+ * @param enableInstance: 'True' for enabling the instance and 'False' for
disabling the instance.
+ * @param timeOutMs: Time-out for setting ideal-state.
* @return
*/
- public PinotResourceManagerResponse toggleInstance(String instanceName,
boolean toggle, int timeOutInSeconds) {
+ private PinotResourceManagerResponse enableInstance(String instanceName,
boolean enableInstance, long timeOutMs) {
if (!instanceExists(instanceName)) {
return PinotResourceManagerResponse.failure("Instance " + instanceName +
" not found");
}
- _helixAdmin.enableInstance(_helixClusterName, instanceName, toggle);
- long deadline = System.currentTimeMillis() + 1000 * timeOutInSeconds;
- boolean toggleSucceed = false;
- String beforeToggleStates =
- (toggle) ? SegmentOnlineOfflineStateModel.OFFLINE :
SegmentOnlineOfflineStateModel.ONLINE;
+ _helixAdmin.enableInstance(_helixClusterName, instanceName,
enableInstance);
+ long intervalWaitTimeMs = 500L;
+ long deadline = System.currentTimeMillis() + timeOutMs;
+ String offlineState = SegmentOnlineOfflineStateModel.OFFLINE;
while (System.currentTimeMillis() < deadline) {
- toggleSucceed = true;
PropertyKey liveInstanceKey = _keyBuilder.liveInstance(instanceName);
LiveInstance liveInstance =
_helixDataAccessor.getProperty(liveInstanceKey);
if (liveInstance == null) {
- return toggle ? PinotResourceManagerResponse.FAILURE :
PinotResourceManagerResponse.SUCCESS;
- }
- PropertyKey instanceCurrentStatesKey =
_keyBuilder.currentStates(instanceName, liveInstance.getSessionId());
- List<CurrentState> instanceCurrentStates =
_helixDataAccessor.getChildValues(instanceCurrentStatesKey);
- if (instanceCurrentStates == null) {
- return PinotResourceManagerResponse.SUCCESS;
+ if (!enableInstance) {
+ // If we disable the instance, we actually don't care whether live
instance being null. Thus, returning success should be good.
+ // Otherwise, wait until timeout.
+ return PinotResourceManagerResponse.SUCCESS;
+ }
} else {
- for (CurrentState currentState : instanceCurrentStates) {
- for (String state : currentState.getPartitionStateMap().values()) {
- if (beforeToggleStates.equals(state)) {
- toggleSucceed = false;
+ boolean toggleSucceeded = true;
+ // Checks all the current states fall into the target states
+ PropertyKey instanceCurrentStatesKey =
_keyBuilder.currentStates(instanceName, liveInstance.getSessionId());
+ List<CurrentState> instanceCurrentStates =
_helixDataAccessor.getChildValues(instanceCurrentStatesKey);
+ if (instanceCurrentStates.isEmpty()) {
+ return PinotResourceManagerResponse.SUCCESS;
+ } else {
+ for (CurrentState currentState : instanceCurrentStates) {
+ for (String state : currentState.getPartitionStateMap().values()) {
+ // If instance is enabled, all the partitions should not
eventually be offline.
+ // If instance is disabled, all the partitions should eventually
be offline.
+ // TODO: Handle the case when realtime segments are in OFFLINE
state because there're some problem with realtime segment consumption,
+ // and realtime segment will mark itself as OFFLINE in ideal
state.
+ // Issue: https://github.com/apache/incubator-pinot/issues/4653
+ if ((enableInstance && !offlineState.equals(state)) ||
(!enableInstance && offlineState.equals(state))) {
+ toggleSucceeded = false;
+ break;
+ }
+ }
+ if (!toggleSucceeded) {
+ break;
}
}
}
- }
- if (toggleSucceed) {
- return (toggle) ? PinotResourceManagerResponse.success("Instance " +
instanceName + " enabled")
- : PinotResourceManagerResponse.success("Instance " + instanceName
+ " disabled");
- } else {
- try {
- Thread.sleep(500);
- } catch (InterruptedException e) {
+ if (toggleSucceeded) {
+ return (enableInstance) ?
PinotResourceManagerResponse.success("Instance " + instanceName + " enabled")
+ : PinotResourceManagerResponse.success("Instance " +
instanceName + " disabled");
}
}
+
+ try {
+ Thread.sleep(intervalWaitTimeMs);
+ } catch (InterruptedException e) {
+ LOGGER.warn("Got interrupted when sleeping for {}ms to wait until the
current state matched for instance: {}",
+ intervalWaitTimeMs, instanceName);
+ return PinotResourceManagerResponse
+ .failure("Got interrupted when waiting for instance to be " +
(enableInstance ? "enabled" : "disabled"));
+ }
}
- return PinotResourceManagerResponse.failure("Instance enable/disable
failed, timeout");
+ return PinotResourceManagerResponse
+ .failure("Instance " + (enableInstance ? "enable" : "disable") + "
failed, timeout");
}
public RebalanceResult rebalanceTable(String tableNameWithType,
Configuration rebalanceConfig)
diff --git
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
index da923c6..2ccf9fd 100644
---
a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
+++
b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/ControllerInstanceToggleTest.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.controller.helix;
+import java.io.IOException;
import java.util.Set;
import org.apache.helix.model.ExternalView;
import org.apache.pinot.common.config.TableConfig;
@@ -26,6 +27,7 @@ import org.apache.pinot.common.config.TagNameUtils;
import org.apache.pinot.common.utils.CommonConstants;
import org.apache.pinot.common.utils.helix.HelixHelper;
import org.apache.pinot.controller.utils.SegmentMetadataMockUtils;
+import org.apache.pinot.util.TestUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
@@ -34,6 +36,7 @@ import org.testng.annotations.Test;
public class ControllerInstanceToggleTest extends ControllerTest {
private static final String RAW_TABLE_NAME = "testTable";
+ private static final long TIMEOUT_MS = 10_000L;
private static final String OFFLINE_TABLE_NAME =
TableNameBuilder.OFFLINE.tableNameWithType(RAW_TABLE_NAME);
private static final String SERVER_TAG_NAME =
TagNameUtils.getOfflineTagForTenant(null);
private static final String BROKER_TAG_NAME =
TagNameUtils.getBrokerTagForTenant(null);
@@ -75,25 +78,25 @@ public class ControllerInstanceToggleTest extends
ControllerTest {
// Disable server instances
int numEnabledInstances = NUM_INSTANCES;
for (String instanceName :
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(),
SERVER_TAG_NAME)) {
-
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName),
"disable");
+ toggleInstanceState(instanceName, "disable");
checkNumOnlineInstancesFromExternalView(OFFLINE_TABLE_NAME,
--numEnabledInstances);
}
// Enable server instances
for (String instanceName :
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(),
SERVER_TAG_NAME)) {
-
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName),
"ENABLE");
+ toggleInstanceState(instanceName, "ENABLE");
checkNumOnlineInstancesFromExternalView(OFFLINE_TABLE_NAME,
++numEnabledInstances);
}
// Disable broker instances
for (String instanceName :
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(),
BROKER_TAG_NAME)) {
-
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName),
"Disable");
+ toggleInstanceState(instanceName, "Disable");
checkNumOnlineInstancesFromExternalView(CommonConstants.Helix.BROKER_RESOURCE_INSTANCE,
--numEnabledInstances);
}
// Enable broker instances
for (String instanceName :
_helixAdmin.getInstancesInClusterWithTag(getHelixClusterName(),
BROKER_TAG_NAME)) {
-
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName),
"Enable");
+ toggleInstanceState(instanceName, "Enable");
checkNumOnlineInstancesFromExternalView(CommonConstants.Helix.BROKER_RESOURCE_INSTANCE,
++numEnabledInstances);
}
@@ -104,9 +107,22 @@ public class ControllerInstanceToggleTest extends
ControllerTest {
.getPartitionSet().size(), 0);
}
+ private void toggleInstanceState(String instanceName, String state) {
+ // It may take time for an instance to toggle the state.
+ TestUtils.waitForCondition(aVoid -> {
+ try {
+
sendPostRequest(_controllerRequestURLBuilder.forInstanceState(instanceName),
state);
+ } catch (IOException ioe) {
+ // receive non-200 status code
+ return false;
+ }
+ return true;
+ }, TIMEOUT_MS, "Failed to toggle instance state: '" + state + "' for
instance: " + instanceName);
+ }
+
private void checkNumOnlineInstancesFromExternalView(String resourceName,
int expectedNumOnlineInstances)
throws InterruptedException {
- long endTime = System.currentTimeMillis() + 10_000L;
+ long endTime = System.currentTimeMillis() + TIMEOUT_MS;
while (System.currentTimeMillis() < endTime) {
ExternalView resourceExternalView =
_helixAdmin.getResourceExternalView(getHelixClusterName(), resourceName);
Set<String> instanceSet =
HelixHelper.getOnlineInstanceFromExternalView(resourceExternalView);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]