This is an automated email from the ASF dual-hosted git repository.
dcapwell pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/trunk by this push:
new 4f49ca5e29 TCM's Retry.Deadline#retryIndefinitely is dangerous if used
with RemoteProcessor as the deadline does not impact message retries
4f49ca5e29 is described below
commit 4f49ca5e29d9c7207654a1f3c4eac9c9f0b84e5e
Author: David Capwell <[email protected]>
AuthorDate: Fri Nov 8 13:43:50 2024 -0800
TCM's Retry.Deadline#retryIndefinitely is dangerous if used with
RemoteProcessor as the deadline does not impact message retries
patch by David Capwell; reviewed by Alex Petrov, Sam Tunnicliffe for
CASSANDRA-20059
---
src/java/org/apache/cassandra/tcm/Processor.java | 37 ++++++++++++++++++++++--
src/java/org/apache/cassandra/tcm/Retry.java | 32 +-------------------
2 files changed, 35 insertions(+), 34 deletions(-)
diff --git a/src/java/org/apache/cassandra/tcm/Processor.java
b/src/java/org/apache/cassandra/tcm/Processor.java
index e3f12852c2..fdb4cf23bb 100644
--- a/src/java/org/apache/cassandra/tcm/Processor.java
+++ b/src/java/org/apache/cassandra/tcm/Processor.java
@@ -20,9 +20,11 @@ package org.apache.cassandra.tcm;
import java.util.concurrent.TimeUnit;
+import com.codahale.metrics.Meter;
import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.metrics.TCMMetrics;
import org.apache.cassandra.tcm.log.Entry;
+import org.apache.cassandra.utils.Clock;
public interface Processor
{
@@ -37,9 +39,7 @@ public interface Processor
// submit the STARTUP message. This allows the bounces affecting
majority of CMS nodes to finish successfully.
if (transform.kind() == Transformation.Kind.STARTUP)
{
- return commit(entryId, transform, lastKnown,
-
Retry.Deadline.retryIndefinitely(DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS),
-
TCMMetrics.instance.commitRetries));
+ return commit(entryId, transform, lastKnown,
unsafeRetryIndefinitely());
}
return commit(entryId, transform, lastKnown,
@@ -47,6 +47,37 @@ public interface Processor
new
Retry.Jitter(TCMMetrics.instance.commitRetries)));
}
+ /**
+ * Since we are using message expiration for communicating timeouts to CMS
nodes, we have to be careful not
+ * to overflow the long, since messaging is using only 32 bits for
deadlines. To achieve that, we are
+ * giving `timeoutNanos` every time we retry, but will retry indefinitely.
+ */
+ private static Retry.Deadline unsafeRetryIndefinitely()
+ {
+ long timeoutNanos =
DatabaseDescriptor.getCmsAwaitTimeout().to(TimeUnit.NANOSECONDS);
+ Meter retryMeter = TCMMetrics.instance.commitRetries;
+ return new Retry.Deadline(Clock.Global.nanoTime() + timeoutNanos,
+ new Retry.Jitter(retryMeter))
+ {
+ @Override
+ public boolean reachedMax()
+ {
+ return false;
+ }
+
+ @Override
+ public long remainingNanos()
+ {
+ return timeoutNanos;
+ }
+
+ public String toString()
+ {
+ return String.format("RetryIndefinitely{tries=%d}",
currentTries());
+ }
+ };
+ }
+
Commit.Result commit(Entry.Id entryId, Transformation transform, Epoch
lastKnown, Retry.Deadline retryPolicy);
/**
diff --git a/src/java/org/apache/cassandra/tcm/Retry.java
b/src/java/org/apache/cassandra/tcm/Retry.java
index 703e590466..3277531444 100644
--- a/src/java/org/apache/cassandra/tcm/Retry.java
+++ b/src/java/org/apache/cassandra/tcm/Retry.java
@@ -27,7 +27,6 @@ import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.utils.Clock;
import static
com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
-import static org.apache.cassandra.tcm.Retry.Jitter.MAX_JITTER_MS;
public abstract class Retry
{
@@ -143,7 +142,7 @@ public abstract class Retry
public final long deadlineNanos;
protected final Retry delegate;
- private Deadline(long deadlineNanos, Retry delegate)
+ public Deadline(long deadlineNanos, Retry delegate)
{
super(delegate.maxTries, delegate.retryMeter);
assert deadlineNanos > 0 : String.format("Deadline should be
strictly positive but was %d.", deadlineNanos);
@@ -161,35 +160,6 @@ public abstract class Retry
return new Deadline(Clock.Global.nanoTime() + timeoutNanos,
delegate);
}
- /**
- * Since we are using message expiration for communicating timeouts to
CMS nodes, we have to be careful not
- * to overflow the long, since messaging is using only 32 bits for
deadlines. To achieve that, we are
- * giving `timeoutNanos` every time we retry, but will retry
indefinitely.
- */
- public static Deadline retryIndefinitely(long timeoutNanos, Meter
retryMeter)
- {
- return new Deadline(Clock.Global.nanoTime() + timeoutNanos,
- new Retry.Jitter(Integer.MAX_VALUE,
MAX_JITTER_MS, new Random(), retryMeter))
- {
- @Override
- public boolean reachedMax()
- {
- return false;
- }
-
- @Override
- public long remainingNanos()
- {
- return timeoutNanos;
- }
-
- public String toString()
- {
- return String.format("RetryIndefinitely{tries=%d}",
currentTries());
- }
- };
- }
-
@Override
public boolean reachedMax()
{
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]