somandal commented on code in PR #15317:
URL: https://github.com/apache/pinot/pull/15317#discussion_r2010963378
##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/DefaultRebalancePreChecker.java:
##########
@@ -279,6 +299,48 @@ private RebalancePreCheckerResult
checkDiskUtilization(Map<String, Map<String, S
: RebalancePreCheckerResult.error(message.toString());
}
+ private RebalancePreCheckerResult checkRebalanceConfig(RebalanceConfig
rebalanceConfig, TableConfig tableConfig,
+ Map<String, Map<String, String>> currentAssignment, Map<String,
Map<String, String>> targetAssignment) {
+ StringBuilder message = new StringBuilder();
+ boolean pass = true;
+ if (rebalanceConfig.isBestEfforts()) {
+ pass = false;
+ message.append("bestEfforts is enabled, only enable it if you know what
you are doing\n");
+ }
+ List<String> segmentsToMove =
SegmentAssignmentUtils.getSegmentsToMove(currentAssignment, targetAssignment);
+
+ int numReplicas = Integer.MAX_VALUE;
+ for (String segment : segmentsToMove) {
+ numReplicas = Math.min(targetAssignment.get(segment).size(),
numReplicas);
+ }
+
+ if (rebalanceConfig.isDowntime() && !segmentsToMove.isEmpty() &&
numReplicas > 1) {
+ pass = false;
+ message.append("Number of replicas (")
+ .append(numReplicas)
+ .append(") is greater than 1, downtime is not recommended.\n");
+ }
+ if (rebalanceConfig.getMinAvailableReplicas() >= 0) {
+ // For non-negative value, use it as min available replicas
+ if (rebalanceConfig.getMinAvailableReplicas() >= numReplicas) {
+ pass = false;
+ message.append("The number of replicas (")
+ .append(numReplicas)
+ .append(") is less than minAvailableReplicas. The rebalance would
fail.\n");
+ }
+ }
+ if (!rebalanceConfig.isIncludeConsuming() && tableConfig.getTableType() ==
TableType.REALTIME) {
+ pass = false;
+ message.append("includeConsuming is disabled for a realtime table.\n");
+ }
+ if (rebalanceConfig.isBootstrap()) {
+ pass = false;
+ message.append("bootstrap is enabled, only enable it if you know what
you are doing\n");
Review Comment:
nit: let's reword this: "bootstrap is enabled which can cause a large amount
of data movement, double check if this is intended"
##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/DefaultRebalancePreChecker.java:
##########
@@ -279,6 +299,48 @@ private RebalancePreCheckerResult
checkDiskUtilization(Map<String, Map<String, S
: RebalancePreCheckerResult.error(message.toString());
}
+ private RebalancePreCheckerResult checkRebalanceConfig(RebalanceConfig
rebalanceConfig, TableConfig tableConfig,
+ Map<String, Map<String, String>> currentAssignment, Map<String,
Map<String, String>> targetAssignment) {
+ StringBuilder message = new StringBuilder();
+ boolean pass = true;
+ if (rebalanceConfig.isBestEfforts()) {
+ pass = false;
+ message.append("bestEfforts is enabled, only enable it if you know what
you are doing\n");
+ }
+ List<String> segmentsToMove =
SegmentAssignmentUtils.getSegmentsToMove(currentAssignment, targetAssignment);
+
+ int numReplicas = Integer.MAX_VALUE;
+ for (String segment : segmentsToMove) {
+ numReplicas = Math.min(targetAssignment.get(segment).size(),
numReplicas);
+ }
Review Comment:
Can you move all of this into:
```
if (rebalanceConfig.isDowntime()) {
}
```
That way we don't need to calculate this at all if `downtime=false`
##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/DefaultRebalancePreChecker.java:
##########
@@ -279,6 +299,48 @@ private RebalancePreCheckerResult
checkDiskUtilization(Map<String, Map<String, S
: RebalancePreCheckerResult.error(message.toString());
}
+ private RebalancePreCheckerResult checkRebalanceConfig(RebalanceConfig
rebalanceConfig, TableConfig tableConfig,
+ Map<String, Map<String, String>> currentAssignment, Map<String,
Map<String, String>> targetAssignment) {
+ StringBuilder message = new StringBuilder();
+ boolean pass = true;
+ if (rebalanceConfig.isBestEfforts()) {
+ pass = false;
+ message.append("bestEfforts is enabled, only enable it if you know what
you are doing\n");
+ }
+ List<String> segmentsToMove =
SegmentAssignmentUtils.getSegmentsToMove(currentAssignment, targetAssignment);
+
+ int numReplicas = Integer.MAX_VALUE;
+ for (String segment : segmentsToMove) {
+ numReplicas = Math.min(targetAssignment.get(segment).size(),
numReplicas);
+ }
+
+ if (rebalanceConfig.isDowntime() && !segmentsToMove.isEmpty() &&
numReplicas > 1) {
+ pass = false;
+ message.append("Number of replicas (")
+ .append(numReplicas)
+ .append(") is greater than 1, downtime is not recommended.\n");
+ }
+ if (rebalanceConfig.getMinAvailableReplicas() >= 0) {
+ // For non-negative value, use it as min available replicas
+ if (rebalanceConfig.getMinAvailableReplicas() >= numReplicas) {
+ pass = false;
+ message.append("The number of replicas (")
+ .append(numReplicas)
+ .append(") is less than minAvailableReplicas. The rebalance would
fail.\n");
+ }
+ }
+ if (!rebalanceConfig.isIncludeConsuming() && tableConfig.getTableType() ==
TableType.REALTIME) {
+ pass = false;
+ message.append("includeConsuming is disabled for a realtime table.\n");
+ }
+ if (rebalanceConfig.isBootstrap()) {
+ pass = false;
+ message.append("bootstrap is enabled, only enable it if you know what
you are doing\n");
+ }
+
+ return pass ? RebalancePreCheckerResult.pass("") :
RebalancePreCheckerResult.warn(message.toString());
Review Comment:
can we add a pass message? even something simple like, "No rebalance
parameters to double check found"
##########
pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java:
##########
@@ -862,31 +881,45 @@ public void testRebalancePreChecks()
checkRebalancePreCheckStatus(rebalanceResult, RebalanceResult.Status.NO_OP,
"Instance assignment not allowed, no need for minimizeDataMovement",
RebalancePreCheckerResult.PreCheckStatus.PASS, "Reload needed prior to
running rebalance",
- RebalancePreCheckerResult.PreCheckStatus.WARN);
+ RebalancePreCheckerResult.PreCheckStatus.WARN, "",
RebalancePreCheckerResult.PreCheckStatus.PASS);
// Keep schema change and update table config to add minimizeDataMovement
tableConfig.setInstanceAssignmentConfigMap(createInstanceAssignmentConfigMap(true));
rebalanceResult = _tableRebalancer.rebalance(tableConfig, rebalanceConfig,
null);
checkRebalancePreCheckStatus(rebalanceResult, RebalanceResult.Status.NO_OP,
"minimizeDataMovement is enabled",
RebalancePreCheckerResult.PreCheckStatus.PASS,
- "Reload needed prior to running rebalance",
RebalancePreCheckerResult.PreCheckStatus.WARN);
+ "Reload needed prior to running rebalance",
RebalancePreCheckerResult.PreCheckStatus.WARN, "",
+ RebalancePreCheckerResult.PreCheckStatus.PASS);
// Keep schema change and update table config to add instance config map
with minimizeDataMovement = false
tableConfig.setInstanceAssignmentConfigMap(createInstanceAssignmentConfigMap(false));
rebalanceResult = _tableRebalancer.rebalance(tableConfig, rebalanceConfig,
null);
checkRebalancePreCheckStatus(rebalanceResult, RebalanceResult.Status.NO_OP,
- "minimizeDataMovement is not enabled but instance assignment is
allowed",
- RebalancePreCheckerResult.PreCheckStatus.WARN, "Reload needed prior to
running rebalance",
- RebalancePreCheckerResult.PreCheckStatus.WARN);
+ "minimizeDataMovement is enabled",
+ RebalancePreCheckerResult.PreCheckStatus.PASS, "Reload needed prior to
running rebalance",
+ RebalancePreCheckerResult.PreCheckStatus.WARN, "",
RebalancePreCheckerResult.PreCheckStatus.PASS);
// Add a new server (to force change in instance assignment) and enable
reassignInstances
- BaseServerStarter serverStarter1 = startOneServer(NUM_SERVERS);
+ BaseServerStarter serverStarter1 = startOneServer(NUM_SERVERS + 1);
rebalanceConfig.setReassignInstances(true);
+ rebalanceConfig.setMinAvailableReplicas(-1);
tableConfig.setInstanceAssignmentConfigMap(null);
rebalanceResult = _tableRebalancer.rebalance(tableConfig, rebalanceConfig,
null);
checkRebalancePreCheckStatus(rebalanceResult, RebalanceResult.Status.DONE,
"Instance assignment not allowed, no need for minimizeDataMovement",
RebalancePreCheckerResult.PreCheckStatus.PASS, "Reload needed prior to
running rebalance",
+ RebalancePreCheckerResult.PreCheckStatus.WARN,
+ "", RebalancePreCheckerResult.PreCheckStatus.PASS);
+
+ // Rebalance is expected to fail because there's one available replica.
Rebalance config check should warn
+ rebalanceConfig.setMinAvailableReplicas(getNumReplicas());
+ rebalanceResult = _tableRebalancer.rebalance(tableConfig, rebalanceConfig,
null);
+ checkRebalancePreCheckStatus(rebalanceResult, RebalanceResult.Status.DONE,
+ "Instance assignment not allowed, no need for minimizeDataMovement",
+ RebalancePreCheckerResult.PreCheckStatus.PASS, "Reload needed prior to
running rebalance",
+ RebalancePreCheckerResult.PreCheckStatus.WARN,
+ "The number of replicas (" + getNumReplicas()
Review Comment:
as mentioned, let's not have this replicas check. instead can you test flags
like `bestEffort`, `bootstrap`, and `downtime` when replication > 1?
##########
pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/rebalance/DefaultRebalancePreChecker.java:
##########
@@ -279,6 +299,48 @@ private RebalancePreCheckerResult
checkDiskUtilization(Map<String, Map<String, S
: RebalancePreCheckerResult.error(message.toString());
}
+ private RebalancePreCheckerResult checkRebalanceConfig(RebalanceConfig
rebalanceConfig, TableConfig tableConfig,
+ Map<String, Map<String, String>> currentAssignment, Map<String,
Map<String, String>> targetAssignment) {
+ StringBuilder message = new StringBuilder();
+ boolean pass = true;
+ if (rebalanceConfig.isBestEfforts()) {
+ pass = false;
+ message.append("bestEfforts is enabled, only enable it if you know what
you are doing\n");
+ }
+ List<String> segmentsToMove =
SegmentAssignmentUtils.getSegmentsToMove(currentAssignment, targetAssignment);
+
+ int numReplicas = Integer.MAX_VALUE;
+ for (String segment : segmentsToMove) {
+ numReplicas = Math.min(targetAssignment.get(segment).size(),
numReplicas);
+ }
+
+ if (rebalanceConfig.isDowntime() && !segmentsToMove.isEmpty() &&
numReplicas > 1) {
+ pass = false;
+ message.append("Number of replicas (")
+ .append(numReplicas)
+ .append(") is greater than 1, downtime is not recommended.\n");
+ }
+ if (rebalanceConfig.getMinAvailableReplicas() >= 0) {
+ // For non-negative value, use it as min available replicas
+ if (rebalanceConfig.getMinAvailableReplicas() >= numReplicas) {
+ pass = false;
+ message.append("The number of replicas (")
+ .append(numReplicas)
+ .append(") is less than minAvailableReplicas. The rebalance would
fail.\n");
+ }
+ }
Review Comment:
I think we can skip this check, since the actual rebalance will check and
fail anyways if this happens
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]