clintropolis commented on a change in pull request #7154: rename maintenance
mode to decommission
URL: https://github.com/apache/incubator-druid/pull/7154#discussion_r262739896
##########
File path:
server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java
##########
@@ -95,37 +95,38 @@ private void balanceTier(
{
if (params.getAvailableSegments().size() == 0) {
- log.info("Metadata segments are not available. Cannot balance.");
+ log.warn("Metadata segments are not available. Cannot balance.");
return;
}
currentlyMovingSegments.computeIfAbsent(tier, t -> new
ConcurrentHashMap<>());
if (!currentlyMovingSegments.get(tier).isEmpty()) {
reduceLifetimes(tier);
- log.info("[%s]: Still waiting on %,d segments to be moved", tier,
currentlyMovingSegments.get(tier).size());
+ log.info(
+ "[%s]: Still waiting on %,d segments to be moved. Skipping balance.",
+ tier,
+ currentlyMovingSegments.get(tier).size()
+ );
return;
}
/*
- Take as much segments from maintenance servers as priority allows and
find the best location for them on
- available servers. After that, balance segments within available servers
pool.
+ Take as many segments from decommissioning servers as velocity allows
and find the best location for them on
+ active servers. After that, balance segments within active servers pool.
*/
Map<Boolean, List<ServerHolder>> partitions =
-
servers.stream().collect(Collectors.partitioningBy(ServerHolder::isInMaintenance));
- final List<ServerHolder> maintenanceServers = partitions.get(true);
- final List<ServerHolder> availableServers = partitions.get(false);
+
servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning));
+ final List<ServerHolder> decommissioningServers = partitions.get(true);
+ final List<ServerHolder> activeServers = partitions.get(false);
log.info(
- "Found %d servers in maintenance, %d available servers servers",
- maintenanceServers.size(),
- availableServers.size()
+ "Found %d active servers, %d decommissioning servers",
+ activeServers.size(),
+ decommissioningServers.size()
);
- if (maintenanceServers.isEmpty()) {
- if (availableServers.size() <= 1) {
- log.info("[%s]: %d available servers servers found. Cannot balance.",
tier, availableServers.size());
- }
- } else if (availableServers.isEmpty()) {
- log.info("[%s]: no available servers servers found during maintenance.
Cannot balance.", tier);
+ if ((decommissioningServers.isEmpty() && activeServers.size() <= 1) ||
activeServers.isEmpty()) {
+ log.warn("[%s]: insufficient active servers. Cannot balance.", tier);
+ return;
Review comment:
>At very minimum, a comment like "not emitting moved and unmoved counts on
purpose here" is needed.
Hmm, currently, all of the return points in `balanceTier` are effectively
suppressing emitting the stats if it can't find anything to do, but I'm unsure
if the return points are to explicitly not emit stats or just an optimization
to exit fast and not do extra work that just overlooked emitting 0 values for
stats. Is this correct behavior? In other words, _should_ `balanceTier` always
emit these stats or is it sensible like it is?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]