Repository: hive
Updated Branches:
  refs/heads/branch-2 92a641265 -> 3baae5f4c


HIVE-19642 : add cache dump when LLAP cache is full on branch-2 (and fix retry 
issues) (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3baae5f4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3baae5f4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3baae5f4

Branch: refs/heads/branch-2
Commit: 3baae5f4c7c850e728c9c619f8c6db92dc60b8c9
Parents: 92a6412
Author: sergey <[email protected]>
Authored: Tue May 22 15:40:47 2018 -0700
Committer: sergey <[email protected]>
Committed: Tue May 22 15:40:47 2018 -0700

----------------------------------------------------------------------
 .../llap/cache/LowLevelCacheMemoryManager.java  | 83 +++++++++++++++-----
 .../hive/llap/io/api/impl/LlapIoImpl.java       | 11 +--
 2 files changed, 70 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3baae5f4/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java
 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java
index 2132574..e331f1b 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheMemoryManager.java
@@ -18,14 +18,13 @@
 
 package org.apache.hadoop.hive.llap.cache;
 
-import com.google.common.annotations.VisibleForTesting;
 import java.util.concurrent.atomic.AtomicLong;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+
 import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;
 import org.apache.hadoop.hive.llap.metrics.LlapDaemonCacheMetrics;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * Implementation of memory manager for low level cache. Note that memory is 
released during
  * reserve most of the time, by calling the evictor to evict some memory. 
releaseMemory is
@@ -36,6 +35,11 @@ public class LowLevelCacheMemoryManager implements 
MemoryManager {
   private final LowLevelCachePolicy evictor;
   private final LlapDaemonCacheMetrics metrics;
   private long maxSize;
+  private LlapOomDebugDump memoryDumpRoot;
+
+  private static final long LOCKING_DEBUG_DUMP_PERIOD_NS = 30 * 1000000000L; 
// 30 sec.
+  private static final int LOCKING_DEBUG_DUMP_THRESHOLD = 5;
+  private static final AtomicLong lastCacheDumpNs = new AtomicLong(0);
 
   public LowLevelCacheMemoryManager(
       long maxSize, LowLevelCachePolicy evictor, LlapDaemonCacheMetrics 
metrics) {
@@ -63,9 +67,10 @@ public class LowLevelCacheMemoryManager implements 
MemoryManager {
   public boolean reserveMemory(final long memoryToReserve, boolean 
waitForEviction) {
     // TODO: if this cannot evict enough, it will spin infinitely. Terminate 
at some point?
     int badCallCount = 0;
-    int nextLog = 4;
     long evictedTotalMetric = 0, reservedTotalMetric = 0, remainingToReserve = 
memoryToReserve;
     boolean result = true;
+    int waitTimeMs = 4;
+    boolean didDumpIoState = false;
     while (remainingToReserve > 0) {
       long usedMem = usedMemory.get(), newUsedMem = usedMem + 
remainingToReserve;
       if (newUsedMem <= maxSize) {
@@ -75,28 +80,29 @@ public class LowLevelCacheMemoryManager implements 
MemoryManager {
         }
         continue;
       }
-      if (evictor == null) return false;
-      // TODO: for one-block case, we could move notification for the last 
block out of the loop.
+      if (evictor == null) {
+        result = false;
+        break;
+      }
       long evicted = evictor.evictSomeBlocks(remainingToReserve);
       if (evicted == 0) {
         if (!waitForEviction) {
           result = false;
-          break;
+          break; // Test code path where we don't do more than one attempt.
         }
-        ++badCallCount;
-        if (badCallCount == nextLog) {
-          LlapIoImpl.LOG.warn("Cannot evict blocks for " + badCallCount + " 
calls; cache full?");
-          nextLog <<= 1;
-          try {
-            Thread.sleep(Math.min(1000, nextLog));
-          } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-            result = false;
-            break;
-          }
+        didDumpIoState = logEvictionIssue(++badCallCount, didDumpIoState);
+        waitTimeMs = Math.min(1000, waitTimeMs << 1);
+        assert waitTimeMs > 0;
+        try {
+          Thread.sleep(waitTimeMs);
+        } catch (InterruptedException e) {
+          Thread.currentThread().interrupt();
+          result = false;
+          break;
         }
         continue;
       }
+
       evictedTotalMetric += evicted;
       badCallCount = 0;
       // Adjust the memory - we have to account for what we have just evicted.
@@ -110,6 +116,7 @@ public class LowLevelCacheMemoryManager implements 
MemoryManager {
         }
         usedMem = usedMemory.get();
       }
+
     }
     if (!result) {
       releaseMemory(reservedTotalMetric);
@@ -120,6 +127,39 @@ public class LowLevelCacheMemoryManager implements 
MemoryManager {
   }
 
 
+  private boolean logEvictionIssue(int badCallCount, boolean didDumpIoState) {
+    if (badCallCount <= LOCKING_DEBUG_DUMP_THRESHOLD) return didDumpIoState;
+    String ioStateDump = maybeDumpIoState(didDumpIoState);
+    if (ioStateDump == null) {
+      LlapIoImpl.LOG.warn("Cannot evict blocks for " + badCallCount + " calls; 
cache full?");
+      return didDumpIoState;
+    } else {
+      LlapIoImpl.LOG.warn("Cannot evict blocks; IO state:\n " + ioStateDump);
+      return true;
+    }
+  }
+
+  private String maybeDumpIoState(boolean didDumpIoState) {
+    if (didDumpIoState) return null; // No more than once per reader.
+    long now = System.nanoTime(), last = lastCacheDumpNs.get();
+    while (true) {
+      if (last != 0 && (now - last) < LOCKING_DEBUG_DUMP_PERIOD_NS) {
+        return null; // We have recently dumped IO state into log.
+      }
+      if (lastCacheDumpNs.compareAndSet(last, now)) break;
+      now = System.nanoTime();
+      last = lastCacheDumpNs.get();
+    }
+    try {
+      StringBuilder sb = new StringBuilder();
+      memoryDumpRoot.debugDumpShort(sb);
+      return sb.toString();
+    } catch (Throwable t) {
+      return "Failed to dump cache state: " + t.getClass() + " " + 
t.getMessage();
+    }
+  }
+
+
   @Override
   public long forceReservedMemory(int allocationSize, int count) {
     if (evictor == null) return 0;
@@ -152,4 +192,9 @@ public class LowLevelCacheMemoryManager implements 
MemoryManager {
   public void updateMaxSize(long maxSize) {
     this.maxSize = maxSize;
   }
+
+
+  public void setMemoryDumpRoot(LlapOomDebugDump memoryDumpRoot) {
+    this.memoryDumpRoot = memoryDumpRoot;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/3baae5f4/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
----------------------------------------------------------------------
diff --git 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
index 58535d7..253532a 100644
--- 
a/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
+++ 
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
@@ -84,7 +84,7 @@ public class LlapIoImpl implements LlapIo<VectorizedRowBatch> 
{
   private final LlapDaemonIOMetrics ioMetrics;
   private ObjectName buddyAllocatorMXBean;
   private final Allocator allocator;
-  private final LlapOomDebugDump memoryDump;
+  private final LlapOomDebugDump memoryDumpRoot;
 
   private LlapIoImpl(Configuration conf) throws IOException {
     String ioMode = HiveConf.getVar(conf, 
HiveConf.ConfVars.LLAP_IO_MEMORY_MODE);
@@ -150,7 +150,8 @@ public class LlapIoImpl implements 
LlapIo<VectorizedRowBatch> {
       // Cache uses allocator to allocate and deallocate, create allocator and 
then caches.
       BuddyAllocator allocator = new BuddyAllocator(conf, memManager, 
cacheMetrics);
       this.allocator = allocator;
-      this.memoryDump = allocator;
+      this.memoryDumpRoot = allocator;
+      memManager.setMemoryDumpRoot(this.memoryDumpRoot); // TODO: This should 
be refactored...
       LowLevelCacheImpl cacheImpl = new LowLevelCacheImpl(
           cacheMetrics, cachePolicy, allocator, true);
       cache = cacheImpl;
@@ -174,7 +175,7 @@ public class LlapIoImpl implements 
LlapIo<VectorizedRowBatch> {
       bufferManager = cacheImpl; // Cache also serves as buffer manager.
     } else {
       this.allocator = new SimpleAllocator(conf);
-      memoryDump = null;
+      memoryDumpRoot = null;
       SimpleBufferManager sbm = new SimpleBufferManager(allocator, 
cacheMetrics);
       bufferManager = sbm;
       cache = sbm;
@@ -202,9 +203,9 @@ public class LlapIoImpl implements 
LlapIo<VectorizedRowBatch> {
 
   @Override
   public String getMemoryInfo() {
-    if (memoryDump == null) return "\nNot using the allocator";
+    if (memoryDumpRoot == null) return "\nNot using the allocator";
     StringBuilder sb = new StringBuilder();
-    memoryDump.debugDumpShort(sb);
+    memoryDumpRoot.debugDumpShort(sb);
     return sb.toString();
   }
 

Reply via email to