This is an automated email from the ASF dual-hosted git repository.
jmckenzie pushed a commit to branch cassandra-4.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-4.0 by this push:
new f6fce7ab51 Improve debuggability and correctness of ref detection
f6fce7ab51 is described below
commit f6fce7ab51e41dfd8d9584ce4c8dd35e0ed61598
Author: Josh McKenzie <[email protected]>
AuthorDate: Wed Mar 15 11:53:07 2023 -0400
Improve debuggability and correctness of ref detection
This patch is a backport of CASSANDRA-17205 and also raises
logging level to 'error' from 'warn' in StrongLeakDetector.
Patch by jmckenzie; reviewed by smiklosovic for CASSANDRA-18332
---
CHANGES.txt | 1 +
.../cassandra/db/lifecycle/LogTransaction.java | 25 ++++++++++++++++------
.../org/apache/cassandra/utils/concurrent/Ref.java | 2 +-
3 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index d231ded448..c661db958e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
4.0.9
+ * Backport CASSANDRA-17205 to 4.0 branch - Remove self-reference in
SSTableTidier (CASSANDRA-18332)
* Avoid loading the preferred IP for BulkLoader streaming (CASSANDRA-18370)
* Fix BufferPool incorrect memoryInUse when putUnusedPortion is used
(CASSANDRA-18311)
* Improve memtable allocator accounting when updating AtomicBTreePartition
(CASSANDRA-18125)
diff --git a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
index 85df4d64e0..a3c3837dc6 100644
--- a/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
+++ b/src/java/org/apache/cassandra/db/lifecycle/LogTransaction.java
@@ -28,6 +28,7 @@ import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;
+import com.codahale.metrics.Counter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.util.concurrent.Runnables;
@@ -35,6 +36,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.concurrent.ScheduledExecutors;
+import org.apache.cassandra.config.DatabaseDescriptor;
import org.apache.cassandra.schema.TableMetadata;
import org.apache.cassandra.db.Directories;
import org.apache.cassandra.db.SystemKeyspace;
@@ -349,29 +351,35 @@ class LogTransaction extends
Transactional.AbstractTransactional implements Tran
// must not retain a reference to the SSTableReader, else leak
detection cannot kick in
private final Descriptor desc;
private final long sizeOnDisk;
- private final Tracker tracker;
private final boolean wasNew;
private final Object lock;
private final Ref<LogTransaction> parentRef;
- private final UUID txnId;
+ private final Counter totalDiskSpaceUsed;
public SSTableTidier(SSTableReader referent, boolean wasNew,
LogTransaction parent)
{
this.desc = referent.descriptor;
this.sizeOnDisk = referent.bytesOnDisk();
- this.tracker = parent.tracker;
this.wasNew = wasNew;
this.lock = parent.lock;
this.parentRef = parent.selfRef.tryRef();
- this.txnId = parent.id();
if (this.parentRef == null)
throw new IllegalStateException("Transaction already
completed");
+
+ // While the parent cfs may be dropped in the interim of us taking
a reference to this and using it, at worst
+ // we'll be updating a metric for a now dropped ColumnFamilyStore.
We do not hold a reference to the tracker or
+ // cfs as that would create a strong ref loop and violate our
ability to do leak detection.
+ totalDiskSpaceUsed = parent.tracker != null &&
parent.tracker.cfstore != null ?
+
parent.tracker.cfstore.metric.totalDiskSpaceUsed :
+ null;
}
public void run()
{
- if (tracker != null && !tracker.isDummy())
+ // While this may be a dummy tracker w/out information in the
metrics table, we attempt to delete regardless
+ // and allow the delete to silently fail if this is an invalid ks
+ cf combination at time of tidy run.
+ if (DatabaseDescriptor.isDaemonInitialized())
SystemKeyspace.clearSSTableReadMeter(desc.ksname, desc.cfname,
desc.generation);
synchronized (lock)
@@ -399,8 +407,11 @@ class LogTransaction extends
Transactional.AbstractTransactional implements Tran
return;
}
- if (tracker != null && tracker.cfstore != null && !wasNew)
- tracker.cfstore.metric.totalDiskSpaceUsed.dec(sizeOnDisk);
+ // It's possible we're the last one's holding a ref to this
metric if it's already been released in the
+ // parent TableMetrics; we run this regardless rather than
holding a ref to that CFS or Tracker and thus
+ // creating a strong ref loop
+ if (DatabaseDescriptor.isDaemonInitialized() &&
totalDiskSpaceUsed != null && !wasNew)
+ totalDiskSpaceUsed.dec(sizeOnDisk);
// release the referent to the parent so that the all
transaction files can be released
parentRef.release();
diff --git a/src/java/org/apache/cassandra/utils/concurrent/Ref.java
b/src/java/org/apache/cassandra/utils/concurrent/Ref.java
index a373347445..121e71d99f 100644
--- a/src/java/org/apache/cassandra/utils/concurrent/Ref.java
+++ b/src/java/org/apache/cassandra/utils/concurrent/Ref.java
@@ -687,7 +687,7 @@ public final class Ref<T> implements RefCounted<T>
List<String> names = new ArrayList<>(this.candidates.size());
for (Tidy tidy : this.candidates)
names.add(tidy.name());
- logger.warn("Strong reference leak candidates detected: {}",
names);
+ logger.error("Strong reference leak candidates detected: {}",
names);
}
this.candidates = candidates;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]