kfaraz commented on code in PR #14517:
URL: https://github.com/apache/druid/pull/14517#discussion_r1252999110
##########
server/src/main/java/org/apache/druid/server/coordination/ChangeRequestHttpSyncer.java:
##########
@@ -396,35 +397,38 @@ private void addNextSyncToWorkQueue()
}
catch (Throwable th) {
if (executor.isShutdown()) {
- log.warn(
- th,
- "Couldn't schedule next sync. [%s] is not being synced any more,
probably because executor is stopped.",
- logIdentity
- );
+ log.warn(th, "Could not schedule sync for server[%s] because
executor is stopped.", logIdentity);
} else {
log.makeAlert(
th,
- "Couldn't schedule next sync. [%s] is not being synced any more,
restarting Druid process on that "
- + "server might fix the issue.",
+ "Could not schedule sync for server [%s]. Try restarting the
Druid process on that server.",
logIdentity
).emit();
}
}
}
}
- private boolean incrementFailedAttemptAndCheckUnstabilityTimeout()
+ private void markServerUnstableAndAlert(Throwable throwable, String action)
{
- if (consecutiveFailedAttemptCount > 0
- && (System.currentTimeMillis() - unstableStartTime) >
serverUnstabilityTimeout) {
- return true;
- }
-
if (consecutiveFailedAttemptCount++ == 0) {
- unstableStartTime = System.currentTimeMillis();
+ sinceUnstable.restart();
}
- return false;
+ final long unstableSeconds = getUnstableTimeMillis() / 1000;
+ final String message = StringUtils.format(
Review Comment:
Added an action in case of exceeding the maxUnstableDuration.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]