[
https://issues.apache.org/jira/browse/CASSANDRA-21390?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18088124#comment-18088124
]
Cameron Zemek commented on CASSANDRA-21390:
-------------------------------------------
[~bschoeni] can you use the following patch to gather more information please:
{code:java}
diff --git a/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
b/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
index 139d4a06b2..5246ebc9ed 100644
--- a/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
+++ b/src/java/org/apache/cassandra/utils/memory/MemtableAllocator.java
@@ -115,6 +115,8 @@ public abstract class MemtableAllocator
// the amount of memory we are reporting to collect; this may be
inaccurate, but is close
// and is used only to ensure that once we have reclaimed we mark the
tracker with the same amount
private volatile long reclaiming;
+ // diagnostic: avoid noisy repeated logs once ownership has gone
negative
+ private volatile boolean negativeOwnsLogged;
SubAllocator(MemtablePool.SubPool parent)
{
@@ -150,7 +152,9 @@ public abstract class MemtableAllocator
*/
void releaseAll()
{
- parent.released(ownsUpdater.getAndSet(this, 0));
+ long owned = ownsUpdater.getAndSet(this, 0);
+ maybeLogNegativeOwns("releaseAll", owned, 0);
+ parent.released(owned);
parent.reclaimed(reclaimingUpdater.getAndSet(this, 0));
}
@@ -204,7 +208,8 @@ public abstract class MemtableAllocator
private void allocated(long size)
{
parent.allocated(size);
- ownsUpdater.addAndGet(this, size);
+ long newOwns = ownsUpdater.addAndGet(this, size);
+ maybeLogNegativeOwns("allocated", newOwns, size);
if (state == LifeCycle.DISCARDING)
{
@@ -222,7 +227,8 @@ public abstract class MemtableAllocator
private void acquired(long size)
{
parent.acquired();
- ownsUpdater.addAndGet(this, size);
+ long newOwns = ownsUpdater.addAndGet(this, size);
+ maybeLogNegativeOwns("acquired", newOwns, size);
if (state == LifeCycle.DISCARDING)
{
@@ -246,7 +252,8 @@ public abstract class MemtableAllocator
if (state == LifeCycle.LIVE)
{
parent.released(size);
- ownsUpdater.addAndGet(this, -size);
+ long newOwns = ownsUpdater.addAndGet(this, -size);
+ maybeLogNegativeOwns("released", newOwns, size);
}
else
{
@@ -255,6 +262,23 @@ public abstract class MemtableAllocator
}
}
+ private void maybeLogNegativeOwns(String action, long currentOwns,
long size)
+ {
+ if (currentOwns >= 0 || negativeOwnsLogged)
+ return;
+
+ synchronized (this)
+ {
+ if (negativeOwnsLogged)
+ return;
+ negativeOwnsLogged = true;
+ }
+
+ logger.error("Memtable allocator ownership went negative.
action={}, size={}, owns={}, state={}, parentAllocated={}, parentReclaiming={},
parentLimit={}",
+ action, size, currentOwns, state, parent.used(),
parent.getReclaiming(), parent.limit,
+ new IllegalStateException("Negative memtable
ownership transition"));
+ }
+
/**
* Mark what we currently own as reclaiming, both here and in our
parent.
* This method is called for the first time when the memtable is
scheduled for flushing,
{code}
> TrieMemtable MemtableReclaimMemory AssertionError: Negative released in
> MemtablePool$SubPool
> --------------------------------------------------------------------------------------------
>
> Key: CASSANDRA-21390
> URL: https://issues.apache.org/jira/browse/CASSANDRA-21390
> Project: Apache Cassandra
> Issue Type: Bug
> Components: Local/Memtable
> Reporter: Praveen Reddy Arra
> Assignee: Dmitry Konstantinov
> Priority: Normal
> Attachments: image-2026-05-21-09-17-49-716.png
>
>
> We have started seeing this fatal exception in Apache Cassandra 5.0.6 on one
> of our clusters.
> {code:java}
> [ERROR] [MemtableReclaimMemory:1] cluster_id=xxx ip_address=xxx.xxx.xxx.xxx
> JVMStabilityInspector.java:70 - Exception in thread
> Thread[MemtableReclaimMemory:1,5,MemtableReclaimMemory]
> java.lang.AssertionError: Negative released: -4332
> at
> org.apache.cassandra.utils.memory.MemtablePool$SubPool.released(MemtablePool.java:194)
> at
> org.apache.cassandra.utils.memory.MemtableAllocator$SubAllocator.releaseAll(MemtableAllocator.java:153)
> at
> org.apache.cassandra.utils.memory.MemtableAllocator$SubAllocator.setDiscarded(MemtableAllocator.java:144)
> at
> org.apache.cassandra.utils.memory.MemtableAllocator.setDiscarded(MemtableAllocator.java:95)
> at
> org.apache.cassandra.utils.memory.NativeAllocator.setDiscarded(NativeAllocator.java:205)
> at
> org.apache.cassandra.db.memtable.AbstractAllocatorMemtable.discard(AbstractAllocatorMemtable.java:171)
> at
> org.apache.cassandra.db.memtable.TrieMemtable.discard(TrieMemtable.java:163)
> at
> org.apache.cassandra.db.ColumnFamilyStore$Flush$1.runMayThrow(ColumnFamilyStore.java:1398)
> at org.apache.cassandra.utils.WrappedRunnable.run(WrappedRunnable.java:26)
> at
> org.apache.cassandra.concurrent.ExecutionFailure$1.run(ExecutionFailure.java:133)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
> at
> java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
> at
> io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
> at java.base/java.lang.Thread.run(Thread.java:842)
> {code}
> {code:yaml}
> memtable_allocation_type - heap_buffers
> file_cache_enabled: true
> file_cache_size: 2048MiB
> memtable:
> configurations:
> skiplist:
> class_name: SkipListMemtable
> trie:
> class_name: TrieMemtable
> default:
> inherits: trie
> {code}
> it looks similar to the open MemtableReclaimMemory assert issue in
> [CASSANDRA-18159|https://issues.apache.org/jira/browse/CASSANDRA-18159]
> Environment is RHEL 8.10 with OpenJDK 17 and 16GB heap, -ea enabled.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]