A simple diagnostic utility I use to detect these problems: import java.lang.ref.WeakReference; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import org.apache.ignite.Ignite; import org.apache.ignite.internal.GridComponent; import org.apache.ignite.internal.IgniteKernal; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger;
public class IgniteWeakRefTracker { private static final Logger LOGGER = LogManager.getLogger(IgniteWeakRefTracker.class); private final String clazz; private final String testName; private final String name; private final WeakReference<Ignite> innerRef; private final List<WeakReference<GridComponent>> componentRefs = new ArrayList<>(128); private static final LinkedList<IgniteWeakRefTracker> refs = new LinkedList<>(); private IgniteWeakRefTracker(String testName, Ignite ignite) { this.clazz = ignite.getClass().getCanonicalName(); this.innerRef = new WeakReference<>(ignite); this.name = ignite.name(); this.testName = testName; if (ignite instanceof IgniteKernal) { IgniteKernal ik = (IgniteKernal) ignite; List<GridComponent> components = ik.context().components(); for (GridComponent c : components) { componentRefs.add(new WeakReference<>(c)); } } } public static void register(String testName, Ignite ignite) { refs.add(new IgniteWeakRefTracker(testName, ignite)); } public static void trimCollectedRefs() { List<IgniteWeakRefTracker> toRemove = new ArrayList<>(); for (IgniteWeakRefTracker ref : refs) { if (ref.isIgniteCollected()) { LOGGER.info("Collected ignite: ignite {} from test {}", ref.getIgniteName(), ref.getTestName()); toRemove.add(ref); if (ref.igniteComponentsNonCollectedCount() != 0) { throw new IllegalStateException("Non collected components for collected ignite."); } } else { LOGGER.warn("Leaked ignite: ignite {} from test {}", ref.getIgniteName(), ref.getTestName()); } } refs.removeAll(toRemove); LOGGER.info("Leaked ignites count: {}", refs.size()); } public static int getLeakedSize() { return refs.size(); } public boolean isIgniteCollected() { return innerRef.get() == null; } public int igniteComponentsNonCollectedCount() { int res = 0; for (WeakReference<GridComponent> cr : componentRefs) { GridComponent gridComponent = cr.get(); if (gridComponent != null) { LOGGER.warn("Uncollected component: {}", gridComponent.getClass().getSimpleName()); res++; } } return res; } public String getClazz() { return clazz; } public String getTestName() { return testName; } public String getIgniteName() { return name; } } On Fri, Mar 20, 2020 at 11:51 PM Andrey Davydov <andrey.davy...@gmail.com> wrote: > I found one more way for leak and understand reason: > > > this - value: org.apache.ignite.internal.IgniteKernal #1 > <- grid - class: org.apache.ignite.internal.GridKernalContextImpl, > value: org.apache.ignite.internal.IgniteKernal #1 > <- ctx - class: > org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor, value: > org.apache.ignite.internal.GridKernalContextImpl #3 > <- this$0 - class: > org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor$CancelableTask, > value: org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor #1 > <- stmtCleanupTask - class: > org.apache.ignite.internal.processors.query.h2.ConnectionManager, value: > org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor$CancelableTask > #11 > <- arg$1 - class: > org.apache.ignite.internal.processors.query.h2.ConnectionManager$$Lambda$174, > value: org.apache.ignite.internal.processors.query.h2.ConnectionManager #1 > <- recycler - class: > org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool, > value: > org.apache.ignite.internal.processors.query.h2.ConnectionManager$$Lambda$174 > #1 > <- this$0 - class: > org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool$Reusable, > value: org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool > #1 > <- value - class: java.lang.ThreadLocal$ThreadLocalMap$Entry, > value: > org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool$Reusable > #1 > <- [411] - class: > java.lang.ThreadLocal$ThreadLocalMap$Entry[], value: > java.lang.ThreadLocal$ThreadLocalMap$Entry #35 > <- table - class: java.lang.ThreadLocal$ThreadLocalMap, > value: java.lang.ThreadLocal$ThreadLocalMap$Entry[] #25 > <- threadLocals (thread object) - class: java.lang.Thread, > value: java.lang.ThreadLocal$ThreadLocalMap #2 > > > > Reason: > > > org.apache.ignite.internal.processors.query.h2.ConnectionManager has some > ThreadLocal fields, including connPool, threadConns, threadConn, > detachedConns etc. > > > ConnectionManager store Lambdas it this thread local storages, so link to > ConnectionManager leaks to thread local context. > > > And seems that method not valid enoght > > private void closeConnections() { > threadConns.values().forEach(set -> > set.keySet().forEach(U::closeQuiet)); > detachedConns.keySet().forEach(U::closeQuiet); > > threadConns.clear(); > detachedConns.clear(); > } > > > So when Ignition.start() and Ignition.stop() was from different thread, > caches not cleared properly and starter thread save link to > ConnectionManager via ThreadLocal context. And we get one Ignite instance > leak every time. > > > Im sure you run "tens of thousands nodes during every suite run." But > majority of runs may be without Indexing, and start and stop node in same > thread. > > > To reproduce leak, start ignite with indexing, save lint to weak > reference, and stop it asynchroniouly in other thread, null local link, > check weak ref and see heap dump. > > > > Andrey. > > > > *От: *Andrey Davydov <andrey.davy...@gmail.com> > *Отправлено: *18 марта 2020 г. в 18:37 > *Кому: *user@ignite.apache.org > *Тема: *Ignite memory leaks in 2.8.0 > > > > Hello, > > > > There are at least two way link to IgniteKernal leaks to GC root and makes > it unavailable for GC. > > > > 1. The first one: > > > > this - value: org.apache.ignite.internal.IgniteKernal #1 > > <- grid - class: org.apache.ignite.internal.GridKernalContextImpl, > value: org.apache.ignite.internal.IgniteKernal #1 > > <- ctx - class: > org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing, value: > org.apache.ignite.internal.GridKernalContextImpl #2 > > <- this$0 - class: > org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing$10, value: > org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing #2 > > <- serializer - class: org.h2.util.JdbcUtils, value: > org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing$10 #1 > > <- [5395] - class: java.lang.Object[], value: > org.h2.util.JdbcUtils class JdbcUtils > > <- elementData - class: java.util.Vector, value: > java.lang.Object[] #37309 > > <- classes - class: sun.misc.Launcher$AppClassLoader, value: > java.util.Vector #31 > > <- contextClassLoader (thread object) - class: > java.lang.Thread, value: sun.misc.Launcher$AppClassLoader #1 > > > > org.h2.util.JdbcUtils has static field JavaObjectSerializer serializer, which > see IgniteKernal via IgniteH2Indexing. It make closed and stopped > IgniteKernal non collectable by GC. > > If some Ignites run in same JVM, JdbcUtils will always use only one, and > it can cause some races. > > > > 1. The second way: > > > > this - value: org.apache.ignite.internal.IgniteKernal #2 > > <- grid - class: org.apache.ignite.internal.GridKernalContextImpl, > value: org.apache.ignite.internal.IgniteKernal #2 > > <- ctx - class: > org.apache.ignite.internal.processors.cache.GridCacheContext, value: > org.apache.ignite.internal.GridKernalContextImpl #1 > > <- cctx - class: > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry, > value: org.apache.ignite.internal.processors.cache.GridCacheContext #24 > > <- parent - class: > org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate, value: > org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry > #4 > > <- [0] - class: java.lang.Object[], value: > org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate #1 > > <- elements - class: java.util.ArrayDeque, value: > java.lang.Object[] #43259 > > <- value - class: java.lang.ThreadLocal$ThreadLocalMap$Entry, > value: java.util.ArrayDeque #816 > > <- [119] - class: > java.lang.ThreadLocal$ThreadLocalMap$Entry[], value: > java.lang.ThreadLocal$ThreadLocalMap$Entry #51 > > <- table - class: java.lang.ThreadLocal$ThreadLocalMap, > value: java.lang.ThreadLocal$ThreadLocalMap$Entry[] #21 > > <- threadLocals (thread object) - class: java.lang.Thread, > value: java.lang.ThreadLocal$ThreadLocalMap #2 > > > > Link to IgniteKernal leaks to ThreadLocal variable, so when we start/stop > many instances of Ignite in same jvm during testing, we got many stopped > “zomby” ignites on ThreadLocal context of main test thread and it cause > OutOfMemory after some dozens of tests. > > > > Andrey. > > > > >