A simple diagnostic utility I use to detect these problems:

import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import org.apache.ignite.Ignite;
import org.apache.ignite.internal.GridComponent;
import org.apache.ignite.internal.IgniteKernal;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

public class IgniteWeakRefTracker {

    private static final Logger LOGGER =
LogManager.getLogger(IgniteWeakRefTracker.class);

    private final String clazz;
    private final String testName;
    private final String name;
    private final WeakReference<Ignite> innerRef;
    private final List<WeakReference<GridComponent>> componentRefs = new
ArrayList<>(128);

    private static final LinkedList<IgniteWeakRefTracker> refs = new
LinkedList<>();

    private IgniteWeakRefTracker(String testName, Ignite ignite) {
        this.clazz = ignite.getClass().getCanonicalName();
        this.innerRef = new WeakReference<>(ignite);
        this.name = ignite.name();
        this.testName = testName;

        if (ignite instanceof IgniteKernal) {
            IgniteKernal ik = (IgniteKernal) ignite;
            List<GridComponent> components = ik.context().components();
            for (GridComponent c : components) {
                componentRefs.add(new WeakReference<>(c));
            }
        }
    }

    public static void register(String testName, Ignite ignite) {
        refs.add(new IgniteWeakRefTracker(testName, ignite));
    }

    public static void trimCollectedRefs() {

        List<IgniteWeakRefTracker> toRemove = new ArrayList<>();

        for (IgniteWeakRefTracker ref : refs) {
            if (ref.isIgniteCollected()) {
                LOGGER.info("Collected ignite: ignite {} from test {}",
ref.getIgniteName(), ref.getTestName());
                toRemove.add(ref);
                if (ref.igniteComponentsNonCollectedCount() != 0) {
                    throw new IllegalStateException("Non collected
components for collected ignite.");
                }
            } else {
                LOGGER.warn("Leaked ignite: ignite {} from test {}",
ref.getIgniteName(), ref.getTestName());
            }
        }

        refs.removeAll(toRemove);

        LOGGER.info("Leaked ignites count:  {}", refs.size());

    }

    public static int getLeakedSize() {
        return refs.size();
    }

    public boolean isIgniteCollected() {
        return innerRef.get() == null;
    }

    public int igniteComponentsNonCollectedCount() {
        int res = 0;

        for (WeakReference<GridComponent> cr : componentRefs) {
            GridComponent gridComponent = cr.get();
            if (gridComponent != null) {
                LOGGER.warn("Uncollected component: {}",
gridComponent.getClass().getSimpleName());
                res++;
            }
        }

        return res;
    }

    public String getClazz() {
        return clazz;
    }

    public String getTestName() {
        return testName;
    }

    public String getIgniteName() {
        return name;
    }

}


On Fri, Mar 20, 2020 at 11:51 PM Andrey Davydov <andrey.davy...@gmail.com>
wrote:

> I found one more way for leak and understand reason:
>
>
> this     - value: org.apache.ignite.internal.IgniteKernal #1
>  <- grid     - class: org.apache.ignite.internal.GridKernalContextImpl,
> value: org.apache.ignite.internal.IgniteKernal #1
>   <- ctx     - class:
> org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor, value:
> org.apache.ignite.internal.GridKernalContextImpl #3
>    <- this$0     - class:
> org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor$CancelableTask,
> value: org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor #1
>     <- stmtCleanupTask     - class:
> org.apache.ignite.internal.processors.query.h2.ConnectionManager, value:
> org.apache.ignite.internal.processors.timeout.GridTimeoutProcessor$CancelableTask
> #11
>      <- arg$1     - class:
> org.apache.ignite.internal.processors.query.h2.ConnectionManager$$Lambda$174,
> value: org.apache.ignite.internal.processors.query.h2.ConnectionManager #1
>       <- recycler     - class:
> org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool,
> value:
> org.apache.ignite.internal.processors.query.h2.ConnectionManager$$Lambda$174
> #1
>        <- this$0     - class:
> org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool$Reusable,
> value: org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool
> #1
>         <- value     - class: java.lang.ThreadLocal$ThreadLocalMap$Entry,
> value:
> org.apache.ignite.internal.processors.query.h2.ThreadLocalObjectPool$Reusable
> #1
>          <- [411]     - class:
> java.lang.ThreadLocal$ThreadLocalMap$Entry[], value:
> java.lang.ThreadLocal$ThreadLocalMap$Entry #35
>           <- table     - class: java.lang.ThreadLocal$ThreadLocalMap,
> value: java.lang.ThreadLocal$ThreadLocalMap$Entry[] #25
>            <- threadLocals (thread object)     - class: java.lang.Thread,
> value: java.lang.ThreadLocal$ThreadLocalMap #2
>
>
>
> Reason:
>
>
> org.apache.ignite.internal.processors.query.h2.ConnectionManager has some
> ThreadLocal fields, including connPool, threadConns,  threadConn,
> detachedConns etc.
>
>
> ConnectionManager store Lambdas it this thread local storages, so link to
> ConnectionManager leaks to thread local context.
>
>
> And seems that method not valid enoght
>
>     private void closeConnections() {
>         threadConns.values().forEach(set ->
> set.keySet().forEach(U::closeQuiet));
>         detachedConns.keySet().forEach(U::closeQuiet);
>
>         threadConns.clear();
>         detachedConns.clear();
>     }
>
>
> So when Ignition.start() and Ignition.stop()  was from different thread,
> caches not cleared properly and starter thread save link to
> ConnectionManager via ThreadLocal context. And we get one Ignite instance
> leak every time.
>
>
> Im sure you run "tens of thousands nodes during every suite run." But
> majority of runs may be without Indexing, and start and stop node in same
> thread.
>
>
> To reproduce leak, start ignite with indexing, save lint to weak
> reference, and stop it asynchroniouly in other thread, null local link,
> check weak ref and see heap dump.
>
>
>
> Andrey.
>
>
>
> *От: *Andrey Davydov <andrey.davy...@gmail.com>
> *Отправлено: *18 марта 2020 г. в 18:37
> *Кому: *user@ignite.apache.org
> *Тема: *Ignite memory leaks in 2.8.0
>
>
>
> Hello,
>
>
>
> There are at least two way link to IgniteKernal leaks to GC root and makes
> it unavailable for GC.
>
>
>
>    1. The first one:
>
>
>
> this     - value: org.apache.ignite.internal.IgniteKernal #1
>
> <- grid     - class: org.apache.ignite.internal.GridKernalContextImpl,
> value: org.apache.ignite.internal.IgniteKernal #1
>
>   <- ctx     - class:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing, value:
> org.apache.ignite.internal.GridKernalContextImpl #2
>
>    <- this$0     - class:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing$10, value:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing #2
>
>     <- serializer     - class: org.h2.util.JdbcUtils, value:
> org.apache.ignite.internal.processors.query.h2.IgniteH2Indexing$10 #1
>
>      <- [5395]     - class: java.lang.Object[], value:
> org.h2.util.JdbcUtils class JdbcUtils
>
>       <- elementData     - class: java.util.Vector, value:
> java.lang.Object[] #37309
>
>        <- classes     - class: sun.misc.Launcher$AppClassLoader, value:
> java.util.Vector #31
>
>         <- contextClassLoader (thread object)     - class:
> java.lang.Thread, value: sun.misc.Launcher$AppClassLoader #1
>
>
>
> org.h2.util.JdbcUtils has static field JavaObjectSerializer serializer, which
> see IgniteKernal via IgniteH2Indexing. It make closed and stopped
> IgniteKernal non collectable by GC.
>
> If some Ignites run in same JVM, JdbcUtils will always use only one, and
> it can cause some races.
>
>
>
>    1. The second way:
>
>
>
> this     - value: org.apache.ignite.internal.IgniteKernal #2
>
> <- grid     - class: org.apache.ignite.internal.GridKernalContextImpl,
> value: org.apache.ignite.internal.IgniteKernal #2
>
>   <- ctx     - class:
> org.apache.ignite.internal.processors.cache.GridCacheContext, value:
> org.apache.ignite.internal.GridKernalContextImpl #1
>
>    <- cctx     - class:
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry,
> value: org.apache.ignite.internal.processors.cache.GridCacheContext #24
>
>     <- parent     - class:
> org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate, value:
> org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtCacheEntry
> #4
>
>      <- [0]     - class: java.lang.Object[], value:
> org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate #1
>
>       <- elements     - class: java.util.ArrayDeque, value:
> java.lang.Object[] #43259
>
>        <- value     - class: java.lang.ThreadLocal$ThreadLocalMap$Entry,
> value: java.util.ArrayDeque #816
>
>         <- [119]     - class:
> java.lang.ThreadLocal$ThreadLocalMap$Entry[], value:
> java.lang.ThreadLocal$ThreadLocalMap$Entry #51
>
>          <- table     - class: java.lang.ThreadLocal$ThreadLocalMap,
> value: java.lang.ThreadLocal$ThreadLocalMap$Entry[] #21
>
>           <- threadLocals (thread object)     - class: java.lang.Thread,
> value: java.lang.ThreadLocal$ThreadLocalMap #2
>
>
>
> Link to IgniteKernal leaks to ThreadLocal variable, so when we start/stop
> many instances of Ignite in same jvm during testing, we got many stopped
> “zomby” ignites on ThreadLocal context of main test thread and it cause
> OutOfMemory after some dozens of tests.
>
>
>
> Andrey.
>
>
>
>
>

Reply via email to