clintropolis commented on a change in pull request #7154: rename maintenance
mode to decommission
URL: https://github.com/apache/incubator-druid/pull/7154#discussion_r262287102
##########
File path:
server/src/main/java/org/apache/druid/server/coordinator/helper/DruidCoordinatorBalancer.java
##########
@@ -95,37 +95,38 @@ private void balanceTier(
{
if (params.getAvailableSegments().size() == 0) {
- log.info("Metadata segments are not available. Cannot balance.");
+ log.warn("Metadata segments are not available. Cannot balance.");
return;
}
currentlyMovingSegments.computeIfAbsent(tier, t -> new
ConcurrentHashMap<>());
if (!currentlyMovingSegments.get(tier).isEmpty()) {
reduceLifetimes(tier);
- log.info("[%s]: Still waiting on %,d segments to be moved", tier,
currentlyMovingSegments.get(tier).size());
+ log.info(
+ "[%s]: Still waiting on %,d segments to be moved. Skipping balance.",
+ tier,
+ currentlyMovingSegments.get(tier).size()
+ );
return;
}
/*
- Take as much segments from maintenance servers as priority allows and
find the best location for them on
- available servers. After that, balance segments within available servers
pool.
+ Take as many segments from decommissioning servers as velocity allows
and find the best location for them on
+ active servers. After that, balance segments within active servers pool.
*/
Map<Boolean, List<ServerHolder>> partitions =
-
servers.stream().collect(Collectors.partitioningBy(ServerHolder::isInMaintenance));
- final List<ServerHolder> maintenanceServers = partitions.get(true);
- final List<ServerHolder> availableServers = partitions.get(false);
+
servers.stream().collect(Collectors.partitioningBy(ServerHolder::isDecommissioning));
+ final List<ServerHolder> decommissioningServers = partitions.get(true);
+ final List<ServerHolder> activeServers = partitions.get(false);
log.info(
- "Found %d servers in maintenance, %d available servers servers",
- maintenanceServers.size(),
- availableServers.size()
+ "Found %d active servers, %d decommissioning servers",
+ activeServers.size(),
+ decommissioningServers.size()
);
- if (maintenanceServers.isEmpty()) {
- if (availableServers.size() <= 1) {
- log.info("[%s]: %d available servers servers found. Cannot balance.",
tier, availableServers.size());
- }
- } else if (availableServers.isEmpty()) {
- log.info("[%s]: no available servers servers found during maintenance.
Cannot balance.", tier);
+ if ((decommissioningServers.isEmpty() && activeServers.size() <= 1) ||
activeServers.isEmpty()) {
+ log.warn("[%s]: insufficient active servers. Cannot balance.", tier);
+ return;
Review comment:
>Is it planned that we don't emit stats in this case? If yes, a comment
should be left explicitly stating this.
I think it is pretty consistent with behavior of other 'cannot balance'
conditions such as still having segments moving, not having any segments, and
was changed to `return` here based on this comment
https://github.com/apache/incubator-druid/pull/7154#discussion_r261374035 which
seemed reasonable to me.
>I think makes sense to include the numbers of active and decommissioning
servers to this logging statement, so that the cases are distinguishable in
logs.
I removed the count here because the log statement immediately before this
one lists the count of 'active' and 'decommissioning' servers. Should these log
messages be consolidated, or just print the count info twice?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]