siddhantsangwan commented on code in PR #6241:
URL: https://github.com/apache/ozone/pull/6241#discussion_r1524695054
##########
hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java:
##########
@@ -38,49 +38,98 @@ public class ContainerBalancerStartSubcommand extends
ScmSubcommand {
@Option(names = {"-t", "--threshold"},
description = "Percentage deviation from average utilization of " +
- "the cluster after which a datanode will be rebalanced (for " +
- "example, '10' for 10%%).")
+ "the cluster after which a datanode will be rebalanced. Value " +
+ "should be in the range [0.0, 100.0) and default value is 10 " +
+ "(for example, '10' for 10%%).")
private Optional<Double> threshold;
@Option(names = {"-i", "--iterations"},
- description = "Maximum consecutive iterations that" +
- " balancer will run for.")
+ description = "Maximum consecutive iterations that " +
+ "balancer will run for. Value should be positive " +
+ "or -1 and default value is 10 (for example, '10' for 10
iterations).")
private Optional<Integer> iterations;
@Option(names = {"-d", "--max-datanodes-percentage-to-involve-per-iteration",
"--maxDatanodesPercentageToInvolvePerIteration"},
description = "Max percentage of healthy, in service datanodes " +
- "that can be involved in balancing in one iteration (for example, " +
- "'20' for 20%%).")
+ "that can be involved in balancing in one iteration. Value " +
+ "should be in the range [0,100] and default value is 20 (for " +
+ "example, '20' for 20%%).")
private Optional<Integer> maxDatanodesPercentageToInvolvePerIteration;
@Option(names = {"-s", "--max-size-to-move-per-iteration-in-gb",
"--maxSizeToMovePerIterationInGB"},
description = "Maximum size that can be moved per iteration of " +
- "balancing (for example, '500' for 500GB).")
+ "balancing. Value should be positive and default value is 500 " +
+ "(for example, '500' for 500GB).")
private Optional<Long> maxSizeToMovePerIterationInGB;
@Option(names = {"-e", "--max-size-entering-target-in-gb",
"--maxSizeEnteringTargetInGB"},
description = "Maximum size that can enter a target datanode while " +
- "balancing. This is the sum of data from multiple sources (for " +
- "example, '26' for 26GB).")
+ "balancing. This is the sum of data from multiple sources. Value " +
+ "should be positive and default value is 26 (for example, " +
+ "'26' for 26GB).")
private Optional<Long> maxSizeEnteringTargetInGB;
@Option(names = {"-l", "--max-size-leaving-source-in-gb",
"--maxSizeLeavingSourceInGB"},
description = "Maximum size that can leave a source datanode while " +
- "balancing. This is the sum of data moving to multiple targets " +
+ "balancing. This is the sum of data moving to multiple targets. " +
+ "Value should be positive and default value is 26 " +
"(for example, '26' for 26GB).")
private Optional<Long> maxSizeLeavingSourceInGB;
+ @Option(names = {"--balancing-iteration-interval-minutes"},
+ description = "The interval period in minutes between each iteration of
Container Balancer. " +
+ "Value should be greater than '0' and default value is 70 (for
example, '70' for 70 minutes).")
+ private Optional<Long> balancingInterval;
Review Comment:
Since time is being specified in minutes, we don't need to make this a long.
Integer should be enough. Same comment for the other time related variables.
##########
hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java:
##########
@@ -38,49 +38,98 @@ public class ContainerBalancerStartSubcommand extends
ScmSubcommand {
@Option(names = {"-t", "--threshold"},
description = "Percentage deviation from average utilization of " +
- "the cluster after which a datanode will be rebalanced (for " +
- "example, '10' for 10%%).")
+ "the cluster after which a datanode will be rebalanced. Value " +
+ "should be in the range [0.0, 100.0) and default value is 10 " +
+ "(for example, '10' for 10%%).")
private Optional<Double> threshold;
@Option(names = {"-i", "--iterations"},
- description = "Maximum consecutive iterations that" +
- " balancer will run for.")
+ description = "Maximum consecutive iterations that " +
+ "balancer will run for. Value should be positive " +
+ "or -1 and default value is 10 (for example, '10' for 10
iterations).")
private Optional<Integer> iterations;
@Option(names = {"-d", "--max-datanodes-percentage-to-involve-per-iteration",
"--maxDatanodesPercentageToInvolvePerIteration"},
description = "Max percentage of healthy, in service datanodes " +
- "that can be involved in balancing in one iteration (for example, " +
- "'20' for 20%%).")
+ "that can be involved in balancing in one iteration. Value " +
+ "should be in the range [0,100] and default value is 20 (for " +
+ "example, '20' for 20%%).")
private Optional<Integer> maxDatanodesPercentageToInvolvePerIteration;
@Option(names = {"-s", "--max-size-to-move-per-iteration-in-gb",
"--maxSizeToMovePerIterationInGB"},
description = "Maximum size that can be moved per iteration of " +
- "balancing (for example, '500' for 500GB).")
+ "balancing. Value should be positive and default value is 500 " +
+ "(for example, '500' for 500GB).")
private Optional<Long> maxSizeToMovePerIterationInGB;
@Option(names = {"-e", "--max-size-entering-target-in-gb",
"--maxSizeEnteringTargetInGB"},
description = "Maximum size that can enter a target datanode while " +
- "balancing. This is the sum of data from multiple sources (for " +
- "example, '26' for 26GB).")
+ "balancing. This is the sum of data from multiple sources. Value " +
+ "should be positive and default value is 26 (for example, " +
Review Comment:
My concern with mentioning defaults here is maintainability. While it is a
better user experience, what happens when someone changes the default value in
scm and forgets to change that here?
I don't have a strong opinion here though, and I'm fine with mentioning them.
##########
hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/TestContainerBalancerOperations.java:
##########
@@ -115,4 +125,47 @@ public void testContainerBalancerCLIOperations() throws
Exception {
}
//TODO: add more acceptance after container balancer is fully completed
+
+ /**
+ * Test if Container Balancer CLI options override the default
configurations.
+ */
Review Comment:
Let's also test that a CLI option overrides an option specified in the
configs. For example, if balancing interval is 40 in ozone-site.xml but 30 is
passed in through the CLI, 30 should be used.
##########
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java:
##########
@@ -1100,11 +1107,53 @@ public StartContainerBalancerResponseProto
startContainerBalancer(
long msls = maxSizeLeavingSource.get();
auditMap.put("maxSizeLeavingSource", String.valueOf(msls));
Preconditions.checkState(msls > 0,
- "maxSizeLeavingSource must be " +
+ "Max Size Leaving Source must be " +
"greater than zero.");
cbc.setMaxSizeLeavingSource(msls * OzoneConsts.GB);
}
+ if (balancingInterval.isPresent()) {
+ long bi = balancingInterval.get();
+ auditMap.put("balancingInterval", String.valueOf(bi));
+ Preconditions.checkState(bi > 0,
Review Comment:
I think it's fine to have both server and client side validations. Having
validations in the server would protect it from incompatible future changes in
the client.
##########
hadoop-hdds/tools/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerBalancerStartSubcommand.java:
##########
@@ -38,49 +38,98 @@ public class ContainerBalancerStartSubcommand extends
ScmSubcommand {
@Option(names = {"-t", "--threshold"},
description = "Percentage deviation from average utilization of " +
- "the cluster after which a datanode will be rebalanced (for " +
- "example, '10' for 10%%).")
+ "the cluster after which a datanode will be rebalanced. Value " +
+ "should be in the range [0.0, 100.0) and default value is 10 " +
+ "(for example, '10' for 10%%).")
Review Comment:
The text in the brackets doesn't read well now. It's not clear that writing
`10` is how a user can specify the threshold. Let's rephrase this part. Same
for the other configurations.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]