maedhroz commented on code in PR #2660:
URL: https://github.com/apache/cassandra/pull/2660#discussion_r1320060428
##########
src/java/org/apache/cassandra/repair/messages/RepairMessage.java:
##########
@@ -51,48 +65,165 @@ protected RepairMessage(RepairJobDesc desc)
this.desc = desc;
}
+ public TimeUUID parentRepairSession()
+ {
+ return desc.parentSessionId;
+ }
+
public interface RepairFailureCallback
{
void onFailure(Exception e);
}
- public static void sendMessageWithFailureCB(RepairMessage request, Verb
verb, InetAddressAndPort endpoint, RepairFailureCallback failureCallback)
+ private static Backoff backoff(SharedContext.MessagingContext ctx, Verb
verb)
+ {
+ RepairRetrySpec.Verb configVerb = toConfigVerb(verb);
+ if (configVerb == null)
+ return Backoff.None.INSTANCE;
+ RepairRetrySpec retrySpec = DatabaseDescriptor.getRepairRetrys();
+ RetrySpec spec = retrySpec.get(configVerb);
+ if (!spec.isEnabled())
+ return Backoff.None.INSTANCE;
+ switch (spec.type)
+ {
+ case Expoential:
+ return new Backoff.ExpoentialBackoff(spec.maxAttempts.value,
spec.baseSleepTime.toMilliseconds(), spec.maxSleepTime.toMilliseconds(),
ctx.random().get()::nextDouble);
+ default:
+ throw new IllegalArgumentException("Unknown type: " +
spec.type);
+ }
+ }
+
+ @Nullable
+ private static RepairRetrySpec.Verb toConfigVerb(Verb verb)
{
- RequestCallback<?> callback = new RequestCallback<Object>()
+ switch (verb)
+ {
+ case PREPARE_MSG: return RepairRetrySpec.Verb.PREPARE;
+ case VALIDATION_REQ: return RepairRetrySpec.Verb.VALIDATION_REQ;
+ case VALIDATION_RSP: return RepairRetrySpec.Verb.VALIDATION_RSP;
+ case SYNC_REQ: return RepairRetrySpec.Verb.SYNC_REQ;
+ case SYNC_RSP: return RepairRetrySpec.Verb.SYNC_RSP;
+ case SNAPSHOT_MSG: return RepairRetrySpec.Verb.SNAPSHOT;
+ case CLEANUP_MSG: return RepairRetrySpec.Verb.CLEANUP;
+ default: return null;
+ }
+ }
+
+ public static <T> void
sendMessageWithRetries(SharedContext.MessagingContext ctx, RepairMessage
request, Verb verb, InetAddressAndPort endpoint, RequestCallback<T>
finalCallback)
+ {
+ // 0 means no retries
+ sendMessageWithRetries(ctx, backoff(ctx, verb), request, verb,
endpoint, finalCallback, 0);
+ }
+
+ public static <T> void
sendMessageWithRetries(SharedContext.MessagingContext ctx, RepairMessage
request, Verb verb, InetAddressAndPort endpoint)
+ {
+ sendMessageWithRetries(ctx, backoff(ctx, verb), request, verb,
endpoint, new RequestCallback<>()
{
@Override
public void onResponse(Message<Object> msg)
{
- logger.info("[#{}] {} received by {}",
request.desc.parentSessionId, verb, endpoint);
- // todo: at some point we should make repair messages follow
the normal path, actually using this
+ // TODO (now): define
+ }
+
+ @Override
+ public void onFailure(InetAddressAndPort from,
RequestFailureReason failureReason)
+ {
+ // TODO (now): define
+ }
+ }, 0);
+ }
+
+ private static <T> void
sendMessageWithRetries(SharedContext.MessagingContext ctx, Backoff backoff,
RepairMessage request, Verb verb, InetAddressAndPort endpoint,
RequestCallback<T> finalCallback, int attempt)
+ {
+ // TODO (now, api): expose a way for callers to stop early even if
backoff allows more progress
+ RequestCallback<T> callback = new RequestCallback<>()
+ {
+ @Override
+ public void onResponse(Message<T> msg)
+ {
+ finalCallback.onResponse(msg);
+ }
+
+ @Override
+ public void onFailure(InetAddressAndPort from,
RequestFailureReason failureReason)
+ {
+ ErrorHandling allowed = errorHandlingSupported(ctx, endpoint,
verb, request.parentRepairSession());
+ switch (allowed)
+ {
+ case NONE:
+ logger.error("[#{}] {} failed on {}: {}",
request.parentRepairSession(), verb, from, failureReason);
+ return;
+ case TIMEOUT:
+ finalCallback.onFailure(from, failureReason);
+ return;
+ case RETRY:
+ int maxAttempts = backoff.maxAttempts();
+ if ((maxAttempts < 0 || attempt < maxAttempts) &&
failureReason == RequestFailureReason.TIMEOUT)
+ {
+ ctx.optionalTasks().schedule(() ->
sendMessageWithRetries(ctx, backoff, request, verb, endpoint, finalCallback,
attempt + 1),
Review Comment:
Using the optional task pool doesn't seem like it would be too problematic,
if all it's competing with are hint maintenance activities and other similar
low-frequency things.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]