siddharthteotia commented on a change in pull request #4446: Add support in the
rebalancer for the user to provide minimum number of serving replicas
URL: https://github.com/apache/incubator-pinot/pull/4446#discussion_r307194232
##########
File path:
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/TableRebalancer.java
##########
@@ -257,47 +390,162 @@ private void setTargetState(IdealState idealState,
String segmentId, Map<String,
}
/**
- * Check if IdealState = ExternalView. If its not equal, return the number
of differing segments.
+ * Check if external view has converged to ideal state
+ * @param tableName name of table that we are rebalancing
+ * @return true if external view is same as ideal state, false otherwise
*/
- public int isStable(String tableName) {
+ private boolean isStable(String tableName) {
IdealState idealState =
_helixAdmin.getResourceIdealState(_helixClusterName, tableName);
ExternalView externalView =
_helixAdmin.getResourceExternalView(_helixClusterName, tableName);
Map<String, Map<String, String>> mapFieldsIS =
idealState.getRecord().getMapFields();
Map<String, Map<String, String>> mapFieldsEV =
externalView.getRecord().getMapFields();
- int numDiff = 0;
+
+ LOGGER.info("Checking if ideal state and external view are same for table
{}", tableName);
+
+ boolean stable = true;
for (String segment : mapFieldsIS.keySet()) {
Map<String, String> mapIS = mapFieldsIS.get(segment);
Map<String, String> mapEV = mapFieldsEV.get(segment);
+ if (mapEV == null) {
+ LOGGER.debug("Host-state mapping of segment {} not yet available in
external view", segment);
+ // we have found that external view hasn't yet converged to ideal
state.
+ // still go on to check for other segments just so that we can dump
debug
+ // messages or we can detect error. that's why we don't return
+ // false immediately
+ stable = false;
+ continue;
+ }
+
+ if (LOGGER.isDebugEnabled()) {
+ LOGGER.debug("Hosts and states for segment {} in ideal state",
segment);
+ prettyPrintMapDebug(mapIS);
+ LOGGER.debug("Hosts and states for segment {} in external view",
segment);
+ prettyPrintMapDebug(mapEV);
+ }
+
for (String server : mapIS.keySet()) {
- String state = mapIS.get(server);
- if (mapEV == null || mapEV.get(server) == null ||
!mapEV.get(server).equals(state)) {
- LOGGER.debug("Mismatch: segment" + segment + " server:" + server + "
state:" + state);
- numDiff = numDiff + 1;
+ if (!mapEV.containsKey(server)) {
+ LOGGER.debug("Host-state mapping of segment {} doesn't yet have
server {} in external view",
+ segment, server);
+ // external view not yet converged
+ stable = false;
+ } else if (mapEV.get(server).equalsIgnoreCase("error")) {
+ LOGGER.error("Detected error state for segment {} for server {}",
segment, server);
+ prettyPrintMapError(mapIS);
+ prettyPrintMapError(mapEV);
+ throw new IllegalStateException("External view reports error state
for segment " + segment + " for host " + server,
+ new ExternalViewErrored());
+ } else {
+ final String stateInIdealState = mapIS.get(server);
+ final String stateInExternalView = mapEV.get(server);
+ if (!stateInIdealState.equalsIgnoreCase(stateInExternalView)) {
+ LOGGER.debug("Host-state mapping of segment {} has state {} in
external view and state {} in ideal state",
+ segment, stateInExternalView, stateInIdealState);
+ // external view not yet converged
+ stable = false;
+ }
}
}
}
- return numDiff;
+ return stable;
+ }
+
+ private static void prettyPrintMapDebug(final Map<String, String> map) {
+ final Joiner.MapJoiner mapJoiner =
Joiner.on(",").withKeyValueSeparator(":");
+ LOGGER.debug(mapJoiner.join(map));
+ }
+
+ private static void prettyPrintMapError(final Map<String, String> map) {
+ final Joiner.MapJoiner mapJoiner =
Joiner.on(",").withKeyValueSeparator(":");
+ LOGGER.error(mapJoiner.join(map));
}
/**
* Wait till state has stabilized {@link #isStable(String)}
*/
- private void waitForStable(String resourceName)
- throws InterruptedException {
- int diff;
+ private void waitForStable(final String resourceName) {
int INITIAL_WAIT_MS = 3000;
- Thread.sleep(INITIAL_WAIT_MS);
- do {
- diff = isStable(resourceName);
- if (diff == 0) {
- break;
- } else {
- LOGGER.info(
- "Waiting for externalView to match idealstate for table:" +
resourceName + " Num segments difference:"
- + diff);
- Thread.sleep(EXTERNAL_VIEW_CHECK_INTERVAL_MS);
- }
- } while (diff > 0);
+ try {
+ Thread.sleep(INITIAL_WAIT_MS);
+ while (true) {
+ if (isStable(resourceName)) {
+ break;
+ } else {
+ LOGGER.info("Waiting for externalView to match idealstate for
table:" + resourceName);
+ Thread.sleep(EXTERNAL_VIEW_CHECK_INTERVAL_MS);
Review comment:
Although we now bail out on detecting error in external view, there is still
a possibility of rebalancer waiting indefinitely for external view to converge
simply because the changes haven't yet reflected and there is no error. I feel
after updating ideal state in ZK when we come here to wait for stabilization,
we should spend a fix amount of time in this method if isStable() never returns
true
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]