junkaixue commented on code in PR #2699:
URL: https://github.com/apache/helix/pull/2699#discussion_r1419676466
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java:
##########
@@ -190,16 +224,39 @@ private static int estimateAvgReplicaCount(int
replicaCount, int instanceCount)
return (int) Math.floor((float) replicaCount / instanceCount);
}
+ /**
+ * Estimates the max utilization number from all capacity categories and
their usages.
+ * If the preferredScoringKey is specified then max utilization number is
computed based op the
+ * specified capacity category (key) only.
+ *
+ * For example, if totalCapacity is {CPU: 0.6, MEM: 0.7, DISK: 0.9},
totalUsage is {CPU: 0.1, MEM: 0.2, DISK: 0.3},
+ * preferredScoringKey: CPU. Then this call shall return 0.16. If
preferredScoringKey
+ * is not specified, this call returns 0.33 which would be the max
utilization for the DISK.
+ *
+ * @param totalCapacity Sum total of max capacity of all active nodes
managed by a rebalancer
+ * @param totalUsage Sum total of capacity usage of all partition
replicas that are managed by the rebalancer
+ * @param preferredScoringKey if provided, the max utilization will be
calculated based on
+ * the supplied key only, else across all
capacity categories.
+ * @param clusterName clusterName is used for logging purposes only.
+ * @return The max utilization number from the specified capacity categories.
+ */
+
private static float estimateMaxUtilization(Map<String, Integer>
totalCapacity,
- Map<String, Integer> totalUsage) {
+ Map<String, Integer> totalUsage,
+ String preferredScoringKey,
String clusterName) {
float estimatedMaxUsage = 0;
- for (String capacityKey : totalCapacity.keySet()) {
+ Set<String> capacityKeySet = totalCapacity.keySet();
+ if (preferredScoringKey != null &&
capacityKeySet.contains(preferredScoringKey)) {
+ capacityKeySet = ImmutableSet.of(preferredScoringKey);
+ }
+ for (String capacityKey : capacityKeySet) {
int maxCapacity = totalCapacity.get(capacityKey);
int usage = totalUsage.getOrDefault(capacityKey, 0);
float utilization = (maxCapacity == 0) ? 1 : (float) usage / maxCapacity;
estimatedMaxUsage = Math.max(estimatedMaxUsage, utilization);
}
-
+ LOG.info("[DEPEND-29018] clusterName: {}, totalCapacity: {}, totalUsage:
{}, preferredScoringKey: {}, estimatedMaxUsage: {}",
Review Comment:
And here.
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/TopStateMaxCapacityUsageInstanceConstraint.java:
##########
@@ -41,7 +44,10 @@ protected double getAssignmentScore(AssignableNode node,
AssignableReplica repli
}
float estimatedTopStateMaxUtilization =
clusterContext.getEstimatedTopStateMaxUtilization();
float projectedHighestUtilization =
- node.getTopStateProjectedHighestUtilization(replica.getCapacity());
- return computeUtilizationScore(estimatedTopStateMaxUtilization,
projectedHighestUtilization);
+ node.getTopStateProjectedHighestUtilization(replica.getCapacity(),
clusterContext.getPreferredScoringKey());
+ double utilizationScore =
computeUtilizationScore(estimatedTopStateMaxUtilization,
projectedHighestUtilization);
+ LOG.info("[DEPEND-29018] clusterName: {}, estimatedTopStateMaxUtilization:
{}, projectedHighestUtilization: {}, utilizationScore: {}, preferredScoringKey:
{}",
Review Comment:
Same here. This is not allowed.
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/AssignableNode.java:
##########
@@ -253,18 +276,46 @@ public float
getGeneralProjectedHighestUtilization(Map<String, Integer> newUsage
* @return The highest utilization number of the node among all the capacity
category.
*/
public float getTopStateProjectedHighestUtilization(Map<String, Integer>
newUsage) {
- return getProjectedHighestUtilization(newUsage,
_remainingTopStateCapacity);
+ return getProjectedHighestUtilization(newUsage,
_remainingTopStateCapacity, null);
+ }
+
+ /**
+ * Return the most concerning capacity utilization number for evenly
partition assignment.
+ * The method dynamically calculates the projected highest utilization
number among all the
+ * capacity categories assuming the new capacity usage is added to the node.
+ *
+ * If the preferredScoringKey is specified then utilization number is
computed based op the
+ * specified capacity category (key) only.
+ *
+ * For example, if the current node usage is {CPU: 0.9, MEM: 0.4, DISK:
0.6}, preferredScoringKey: CPU
+ * Then this call shall return 0.9.
+ *
+ * This function returns projected highest utilization for only top state
partitions.
+ *
+ * @param newUsage the proposed new additional capacity usage.
+ * @param preferredScoringKey if provided, the capacity utilization will be
calculated based on
+ * the supplied key only, else across all
capacity categories.
+ * @return The highest utilization number of the node among all the capacity
category.
+ */
+ public float getTopStateProjectedHighestUtilization(Map<String, Integer>
newUsage, String preferredScoringKey) {
+ return getProjectedHighestUtilization(newUsage,
_remainingTopStateCapacity, preferredScoringKey);
}
private float getProjectedHighestUtilization(Map<String, Integer> newUsage,
- Map<String, Integer> remainingCapacity) {
+ Map<String, Integer> remainingCapacity, String preferredScoringKey) {
+ Set<String> capacityKeySet = _maxAllowedCapacity.keySet();
+ if (preferredScoringKey != null &&
capacityKeySet.contains(preferredScoringKey)) {
+ capacityKeySet = ImmutableSet.of(preferredScoringKey);
+ }
float highestCapacityUtilization = 0;
- for (String capacityKey : _maxAllowedCapacity.keySet()) {
+ for (String capacityKey : capacityKeySet) {
float capacityValue = _maxAllowedCapacity.get(capacityKey);
float utilization = (capacityValue - remainingCapacity.get(capacityKey)
+ newUsage
.getOrDefault(capacityKey, 0)) / capacityValue;
highestCapacityUtilization = Math.max(highestCapacityUtilization,
utilization);
}
+ LOG.info("[DEPEND-29018] clusterName: {}, newUsage: {}, remainingCapacity:
{}, preferredScoringKey: {}, highestCapacityUtilization: {}",
Review Comment:
And here.
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/constraints/MaxCapacityUsageInstanceConstraint.java:
##########
@@ -31,13 +33,17 @@
* It is a greedy approach since it evaluates only on the most used capacity
key.
*/
class MaxCapacityUsageInstanceConstraint extends UsageSoftConstraint {
+ private static final Logger LOG =
LoggerFactory.getLogger(MaxCapacityUsageInstanceConstraint.class.getName());
@Override
protected double getAssignmentScore(AssignableNode node, AssignableReplica
replica,
ClusterContext clusterContext) {
float estimatedMaxUtilization =
clusterContext.getEstimatedMaxUtilization();
float projectedHighestUtilization =
- node.getGeneralProjectedHighestUtilization(replica.getCapacity());
- return computeUtilizationScore(estimatedMaxUtilization,
projectedHighestUtilization);
+ node.getGeneralProjectedHighestUtilization(replica.getCapacity(),
clusterContext.getPreferredScoringKey());
+ double utilizationScore = computeUtilizationScore(estimatedMaxUtilization,
projectedHighestUtilization);
+ LOG.info("[DEPEND-29018] clusterName: {}, estimatedMaxUtilization: {},
projectedHighestUtilization: {}, utilizationScore: {}, preferredScoringKey: {}",
Review Comment:
+1. Also let's not have this at info level, debug level should be better.
Otherwise, it will be per replica per partition per resource log. That's too
much.
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/ClusterContext.java:
##########
@@ -85,14 +102,14 @@ public class ClusterContext {
if (replica.isReplicaTopState()) {
totalTopStateReplicas += 1;
replica.getCapacity().forEach(
- (key, value) -> totalTopStateUsage.compute(key, (k, v) -> (v ==
null) ? value : (v + value)));
+ (key, value) -> totalTopStateUsage.compute(key, (k, v) -> (v
== null) ? value : (v + value)));
Review Comment:
Helix has its own formatter. Let's not use other formatter style template.
##########
helix-core/src/main/java/org/apache/helix/controller/rebalancer/waged/model/AssignableNode.java:
##########
@@ -253,18 +276,46 @@ public float
getGeneralProjectedHighestUtilization(Map<String, Integer> newUsage
* @return The highest utilization number of the node among all the capacity
category.
*/
public float getTopStateProjectedHighestUtilization(Map<String, Integer>
newUsage) {
- return getProjectedHighestUtilization(newUsage,
_remainingTopStateCapacity);
+ return getProjectedHighestUtilization(newUsage,
_remainingTopStateCapacity, null);
+ }
+
+ /**
+ * Return the most concerning capacity utilization number for evenly
partition assignment.
+ * The method dynamically calculates the projected highest utilization
number among all the
+ * capacity categories assuming the new capacity usage is added to the node.
+ *
+ * If the preferredScoringKey is specified then utilization number is
computed based op the
+ * specified capacity category (key) only.
+ *
+ * For example, if the current node usage is {CPU: 0.9, MEM: 0.4, DISK:
0.6}, preferredScoringKey: CPU
+ * Then this call shall return 0.9.
+ *
+ * This function returns projected highest utilization for only top state
partitions.
+ *
+ * @param newUsage the proposed new additional capacity usage.
+ * @param preferredScoringKey if provided, the capacity utilization will be
calculated based on
+ * the supplied key only, else across all
capacity categories.
+ * @return The highest utilization number of the node among all the capacity
category.
+ */
+ public float getTopStateProjectedHighestUtilization(Map<String, Integer>
newUsage, String preferredScoringKey) {
+ return getProjectedHighestUtilization(newUsage,
_remainingTopStateCapacity, preferredScoringKey);
}
private float getProjectedHighestUtilization(Map<String, Integer> newUsage,
- Map<String, Integer> remainingCapacity) {
+ Map<String, Integer> remainingCapacity, String preferredScoringKey) {
+ Set<String> capacityKeySet = _maxAllowedCapacity.keySet();
+ if (preferredScoringKey != null &&
capacityKeySet.contains(preferredScoringKey)) {
+ capacityKeySet = ImmutableSet.of(preferredScoringKey);
Review Comment:
I don't get this. Does this mean the preferred score will override all
others? Then why we just setup one dimension instead of having other dimension
for computation? That would be easier?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]