xyuanlu commented on code in PR #2189:
URL: https://github.com/apache/helix/pull/2189#discussion_r941605098
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/WagedRebalancer.java:
##########
@@ -562,6 +590,57 @@ private Map<String, ResourceAssignment> partialRebalance(
}
_partialRebalanceLatency.endMeasuringLatency();
LOG.info("Finish calculating the new best possible assignment.");
+
+ if (isBestPossibleChanged) {
+ LOG.info("Schedule a new rebalance after the new best possible
calculation has finished.");
+ RebalanceUtil.scheduleOnDemandPipeline(clusterData.getClusterName(), 0L,
false);
+ }
+ }
+
+ private Map<String, ResourceAssignment> emergencyRebalance(
+ ResourceControllerDataProvider clusterData, Map<String, Resource>
resourceMap,
+ Set<String> activeNodes, final CurrentStateOutput currentStateOutput,
+ RebalanceAlgorithm algorithm)
+ throws HelixRebalanceException {
+ Map<String, ResourceAssignment> currentBestPossibleAssignment =
+ getBestPossibleAssignment(_assignmentMetadataStore, currentStateOutput,
+ resourceMap.keySet());
+ ClusterModel clusterModel;
+ try {
+ clusterModel = ClusterModelProvider
+ .generateClusterModelForEmergencyRebalance(clusterData, resourceMap,
activeNodes,
+ currentBestPossibleAssignment);
+ } catch (Exception ex) {
+ throw new HelixRebalanceException("Failed to generate cluster model for
emergency rebalance.",
+ HelixRebalanceException.Type.INVALID_CLUSTER_STATUS, ex);
+ }
+
+ // Only calculate if there are illegal placements, meaning the cluster
model is non null;
+ // otherwise, start partial rebalance and return the current best possible.
+ if (clusterModel == null) {
+ // Perform partial rebalance for a new best possible assignment
+ partialRebalance(clusterData, resourceMap, activeNodes,
currentStateOutput, algorithm);
+ return currentBestPossibleAssignment;
+ }
+
+ _asyncPartialRebalanceResult.cancel(true);
+ partialRebalance(clusterData, resourceMap, activeNodes,
currentStateOutput, algorithm);
+
+ Map<String, ResourceAssignment> newAssignment =
calculateAssignment(clusterModel, algorithm);
+ if (_assignmentMetadataStore != null) {
+ try {
+ _writeLatency.startMeasuringLatency();
+ _assignmentMetadataStore.persistBestPossibleAssignment(newAssignment);
+ _writeLatency.endMeasuringLatency();
+ } catch (Exception ex) {
+ throw new HelixRebalanceException("Failed to persist the new best
possible assignment.",
+ HelixRebalanceException.Type.INVALID_REBALANCER_STATUS, ex);
+ }
+ } else {
+ LOG.debug("Assignment Metadata Store is null. Skip persisting the best
possible assignment.");
Review Comment:
Q: Shouldn't this be an error or at least warning log? Why we have debug
here?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]