Repository: hive Updated Branches: refs/heads/branch-2 92a641265 -> 3baae5f4c
HIVE-19642 : add cache dump when LLAP cache is full on branch-2 (and fix retry issues) (Sergey Shelukhin, reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3baae5f4 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3baae5f4 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3baae5f4 Branch: refs/heads/branch-2 Commit: 3baae5f4c7c850e728c9c619f8c6db92dc60b8c9 Parents: 92a6412 Author: sergey <[email protected]> Authored: Tue May 22 15:40:47 2018 -0700 Committer: sergey <[email protected]> Committed: Tue May 22 15:40:47 2018 -0700 ---------------------------------------------------------------------- .../llap/cache/LowLevelCacheMemoryManager.java | 83 +++++++++++++++----- .../hive/llap/io/api/impl/LlapIoImpl.java | 11 +-- 2 files changed, 70 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3baae5f4/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java index 2132574..e331f1b 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java @@ -18,14 +18,13 @@ package org.apache.hadoop.hive.llap.cache; -import com.google.common.annotations.VisibleForTesting; import java.util.concurrent.atomic.AtomicLong; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; + import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics; +import com.google.common.annotations.VisibleForTesting; + /** * Implementation of memory manager for low level cache. Note that memory is released during * reserve most of the time, by calling the evictor to evict some memory. releaseMemory is @@ -36,6 +35,11 @@ public class LowLevelCacheMemoryManager implements MemoryManager { private final LowLevelCachePolicy evictor; private final LlapDaemonCacheMetrics metrics; private long maxSize; + private LlapOomDebugDump memoryDumpRoot; + + private static final long LOCKING_DEBUG_DUMP_PERIOD_NS = 30 * 1000000000L; // 30 sec. + private static final int LOCKING_DEBUG_DUMP_THRESHOLD = 5; + private static final AtomicLong lastCacheDumpNs = new AtomicLong(0); public LowLevelCacheMemoryManager( long maxSize, LowLevelCachePolicy evictor, LlapDaemonCacheMetrics metrics) { @@ -63,9 +67,10 @@ public class LowLevelCacheMemoryManager implements MemoryManager { public boolean reserveMemory(final long memoryToReserve, boolean waitForEviction) { // TODO: if this cannot evict enough, it will spin infinitely. Terminate at some point? int badCallCount = 0; - int nextLog = 4; long evictedTotalMetric = 0, reservedTotalMetric = 0, remainingToReserve = memoryToReserve; boolean result = true; + int waitTimeMs = 4; + boolean didDumpIoState = false; while (remainingToReserve > 0) { long usedMem = usedMemory.get(), newUsedMem = usedMem + remainingToReserve; if (newUsedMem <= maxSize) { @@ -75,28 +80,29 @@ public class LowLevelCacheMemoryManager implements MemoryManager { } continue; } - if (evictor == null) return false; - // TODO: for one-block case, we could move notification for the last block out of the loop. + if (evictor == null) { + result = false; + break; + } long evicted = evictor.evictSomeBlocks(remainingToReserve); if (evicted == 0) { if (!waitForEviction) { result = false; - break; + break; // Test code path where we don't do more than one attempt. } - ++badCallCount; - if (badCallCount == nextLog) { - LlapIoImpl.LOG.warn("Cannot evict blocks for " + badCallCount + " calls; cache full?"); - nextLog <<= 1; - try { - Thread.sleep(Math.min(1000, nextLog)); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - result = false; - break; - } + didDumpIoState = logEvictionIssue(++badCallCount, didDumpIoState); + waitTimeMs = Math.min(1000, waitTimeMs << 1); + assert waitTimeMs > 0; + try { + Thread.sleep(waitTimeMs); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + result = false; + break; } continue; } + evictedTotalMetric += evicted; badCallCount = 0; // Adjust the memory - we have to account for what we have just evicted. @@ -110,6 +116,7 @@ public class LowLevelCacheMemoryManager implements MemoryManager { } usedMem = usedMemory.get(); } + } if (!result) { releaseMemory(reservedTotalMetric); @@ -120,6 +127,39 @@ public class LowLevelCacheMemoryManager implements MemoryManager { } + private boolean logEvictionIssue(int badCallCount, boolean didDumpIoState) { + if (badCallCount <= LOCKING_DEBUG_DUMP_THRESHOLD) return didDumpIoState; + String ioStateDump = maybeDumpIoState(didDumpIoState); + if (ioStateDump == null) { + LlapIoImpl.LOG.warn("Cannot evict blocks for " + badCallCount + " calls; cache full?"); + return didDumpIoState; + } else { + LlapIoImpl.LOG.warn("Cannot evict blocks; IO state:\n " + ioStateDump); + return true; + } + } + + private String maybeDumpIoState(boolean didDumpIoState) { + if (didDumpIoState) return null; // No more than once per reader. + long now = System.nanoTime(), last = lastCacheDumpNs.get(); + while (true) { + if (last != 0 && (now - last) < LOCKING_DEBUG_DUMP_PERIOD_NS) { + return null; // We have recently dumped IO state into log. + } + if (lastCacheDumpNs.compareAndSet(last, now)) break; + now = System.nanoTime(); + last = lastCacheDumpNs.get(); + } + try { + StringBuilder sb = new StringBuilder(); + memoryDumpRoot.debugDumpShort(sb); + return sb.toString(); + } catch (Throwable t) { + return "Failed to dump cache state: " + t.getClass() + " " + t.getMessage(); + } + } + + @Override public long forceReservedMemory(int allocationSize, int count) { if (evictor == null) return 0; @@ -152,4 +192,9 @@ public class LowLevelCacheMemoryManager implements MemoryManager { public void updateMaxSize(long maxSize) { this.maxSize = maxSize; } + + + public void setMemoryDumpRoot(LlapOomDebugDump memoryDumpRoot) { + this.memoryDumpRoot = memoryDumpRoot; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/3baae5f4/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java ---------------------------------------------------------------------- diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java index 58535d7..253532a 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java @@ -84,7 +84,7 @@ public class LlapIoImpl implements LlapIo<VectorizedRowBatch> { private final LlapDaemonIOMetrics ioMetrics; private ObjectName buddyAllocatorMXBean; private final Allocator allocator; - private final LlapOomDebugDump memoryDump; + private final LlapOomDebugDump memoryDumpRoot; private LlapIoImpl(Configuration conf) throws IOException { String ioMode = HiveConf.getVar(conf, HiveConf.ConfVars.LLAP_IO_MEMORY_MODE); @@ -150,7 +150,8 @@ public class LlapIoImpl implements LlapIo<VectorizedRowBatch> { // Cache uses allocator to allocate and deallocate, create allocator and then caches. BuddyAllocator allocator = new BuddyAllocator(conf, memManager, cacheMetrics); this.allocator = allocator; - this.memoryDump = allocator; + this.memoryDumpRoot = allocator; + memManager.setMemoryDumpRoot(this.memoryDumpRoot); // TODO: This should be refactored... LowLevelCacheImpl cacheImpl = new LowLevelCacheImpl( cacheMetrics, cachePolicy, allocator, true); cache = cacheImpl; @@ -174,7 +175,7 @@ public class LlapIoImpl implements LlapIo<VectorizedRowBatch> { bufferManager = cacheImpl; // Cache also serves as buffer manager. } else { this.allocator = new SimpleAllocator(conf); - memoryDump = null; + memoryDumpRoot = null; SimpleBufferManager sbm = new SimpleBufferManager(allocator, cacheMetrics); bufferManager = sbm; cache = sbm; @@ -202,9 +203,9 @@ public class LlapIoImpl implements LlapIo<VectorizedRowBatch> { @Override public String getMemoryInfo() { - if (memoryDump == null) return "\nNot using the allocator"; + if (memoryDumpRoot == null) return "\nNot using the allocator"; StringBuilder sb = new StringBuilder(); - memoryDump.debugDumpShort(sb); + memoryDumpRoot.debugDumpShort(sb); return sb.toString(); }
