This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a commit to branch feature/GEODE-7312
in repository https://gitbox.apache.org/repos/asf/geode.git

commit 497f746cfa2714196725273f9c88b667a1872942
Author: Bruce Schuchardt <[email protected]>
AuthorDate: Fri Oct 18 11:04:47 2019 -0700

    GEODE-7312: modify the ThreadMonitor to print the stack of a blocking thread
    
    Log a thread trace for a thread that's blocking a "stuck" thread.
    
    This will help a lot when a user experiences hung operations.  Prior to
    this change we needed to request thread dumps for servers and that was
    usually not possible to obtain because the servers had already been
    terminated & restarted.  This change puts the relevant thread dumps in
    the server's log file, which is much easier for folks to gather after
    the fact.
---
 .../monitoring/executor/AbstractExecutor.java      | 52 +++++++++++------
 .../executor/AbstractExecutorGroupJUnitTest.java   | 65 +++++++++++++++++++++-
 2 files changed, 98 insertions(+), 19 deletions(-)

diff --git 
a/geode-core/src/main/java/org/apache/geode/internal/monitoring/executor/AbstractExecutor.java
 
b/geode-core/src/main/java/org/apache/geode/internal/monitoring/executor/AbstractExecutor.java
index 8f98926..6beab92 100644
--- 
a/geode-core/src/main/java/org/apache/geode/internal/monitoring/executor/AbstractExecutor.java
+++ 
b/geode-core/src/main/java/org/apache/geode/internal/monitoring/executor/AbstractExecutor.java
@@ -28,6 +28,7 @@ public abstract class AbstractExecutor {
 
   private static final int THREAD_DUMP_DEPTH = 40;
   private static final Logger logger = LogService.getLogger();
+  public static final String LOCK_OWNER_THREAD_STACK = "Lock owner thread 
stack";
   private long threadID;
   private String groupName;
   private short numIterationsStuck;
@@ -47,10 +48,10 @@ public abstract class AbstractExecutor {
 
   public void handleExpiry(long stuckTime) {
     this.incNumIterationsStuck();
-    logger.warn(handleLogMessage(stuckTime));
+    logger.warn(createThreadReport(stuckTime));
   }
 
-  private String handleLogMessage(long stuckTime) {
+  String createThreadReport(long stuckTime) {
 
     DateFormat dateFormat = new SimpleDateFormat("dd MMM yyyy HH:mm:ss zzz");
 
@@ -58,42 +59,57 @@ public abstract class AbstractExecutor {
         ManagementFactory.getThreadMXBean().getThreadInfo(this.threadID, 
THREAD_DUMP_DEPTH);
     boolean logThreadDetails = (thread != null);
 
-    StringBuilder strb = new StringBuilder();
+    StringBuilder stringBuilder = new StringBuilder();
+    final String lineSeparator = System.lineSeparator();
 
-    strb.append("Thread <").append(this.threadID).append("> (0x")
+    stringBuilder.append("Thread <").append(this.threadID).append("> (0x")
         .append(Long.toHexString(this.threadID)).append(") that was executed 
at <")
         .append(dateFormat.format(this.getStartTime())).append("> has been 
stuck for <")
         .append((float) stuckTime / 1000)
         .append(" seconds> and number of thread monitor iteration <")
-        .append(this.numIterationsStuck).append("> 
").append(System.lineSeparator());
+        .append(this.numIterationsStuck).append("> ").append(lineSeparator);
     if (logThreadDetails) {
-      strb.append("Thread Name <").append(thread.getThreadName()).append(">")
+      stringBuilder.append("Thread Name 
<").append(thread.getThreadName()).append(">")
           .append(" state <").append(thread.getThreadState())
-          .append(">").append(System.lineSeparator());
+          .append(">").append(lineSeparator);
 
       if (thread.getLockName() != null)
-        strb.append("Waiting on <").append(thread.getLockName()).append(">")
-            .append(System.lineSeparator());
+        stringBuilder.append("Waiting on 
<").append(thread.getLockName()).append(">")
+            .append(lineSeparator);
 
       if (thread.getLockOwnerName() != null)
-        strb.append("Owned By <").append(thread.getLockOwnerName()).append("> 
with ID <")
-            
.append(thread.getLockOwnerId()).append(">").append(System.lineSeparator());
+        stringBuilder.append("Owned By 
<").append(thread.getLockOwnerName()).append("> with ID <")
+            .append(thread.getLockOwnerId()).append(">").append(lineSeparator);
     }
 
 
-    strb.append("Executor Group 
<").append(groupName).append(">").append(System.lineSeparator())
+    stringBuilder.append("Executor Group 
<").append(groupName).append(">").append(
+        lineSeparator)
         .append("Monitored metric <ResourceManagerStats.numThreadsStuck>")
-        .append(System.lineSeparator());
+        .append(lineSeparator);
 
     if (logThreadDetails) {
-      strb.append("Thread Stack:").append(System.lineSeparator());
-      for (int i = 0; i < thread.getStackTrace().length; i++) {
-        String row = thread.getStackTrace()[i].toString();
-        strb.append(row).append(System.lineSeparator());
+      writeThreadStack(thread, "Thread stack:", stringBuilder);
+    }
+
+    if (thread.getLockOwnerName() != null) {
+      ThreadInfo lockOwnerThread = ManagementFactory.getThreadMXBean()
+          .getThreadInfo(thread.getLockOwnerId(), THREAD_DUMP_DEPTH);
+      if (lockOwnerThread != null) {
+        writeThreadStack(lockOwnerThread, LOCK_OWNER_THREAD_STACK, 
stringBuilder);
       }
     }
 
-    return strb.toString();
+    return stringBuilder.toString();
+  }
+
+  private void writeThreadStack(ThreadInfo thread, String header, 
StringBuilder strb) {
+    final String lineSeparator = System.lineSeparator();
+    strb.append(header).append(lineSeparator);
+    for (int i = 0; i < thread.getStackTrace().length; i++) {
+      String row = thread.getStackTrace()[i].toString();
+      strb.append(row).append(lineSeparator);
+    }
   }
 
   public long getStartTime() {
diff --git 
a/geode-core/src/test/java/org/apache/geode/internal/monitoring/executor/AbstractExecutorGroupJUnitTest.java
 
b/geode-core/src/test/java/org/apache/geode/internal/monitoring/executor/AbstractExecutorGroupJUnitTest.java
index de57c32..ece6c95 100644
--- 
a/geode-core/src/test/java/org/apache/geode/internal/monitoring/executor/AbstractExecutorGroupJUnitTest.java
+++ 
b/geode-core/src/test/java/org/apache/geode/internal/monitoring/executor/AbstractExecutorGroupJUnitTest.java
@@ -18,8 +18,10 @@ import static org.junit.Assert.assertTrue;
 
 import org.junit.Test;
 
+import org.apache.geode.test.awaitility.GeodeAwaitility;
+
 /**
- * Contains simple tests for the {@link ThreadMonitoringUtils}.
+ * Contains simple tests for the {@link AbstractExecutor}.
  *
  *
  * @since Geode 1.5
@@ -29,6 +31,8 @@ public class AbstractExecutorGroupJUnitTest {
   private final AbstractExecutor abstractExecutorGroup =
       new FunctionExecutionPooledExecutorGroup(null);
 
+  private static final long timeoutInMilliseconds = 
GeodeAwaitility.getTimeout().getValueInMS();
+
   @Test
   public void testInitializationValues() {
     assertTrue(abstractExecutorGroup.getStartTime() <= 
System.currentTimeMillis());
@@ -41,4 +45,63 @@ public class AbstractExecutorGroupJUnitTest {
     abstractExecutorGroup.handleExpiry(12);
     assertTrue(abstractExecutorGroup.getNumIterationsStuck() == 1);
   }
+
+  /**
+   * If a thread is blocked by another thread we want to see the other thread's
+   * stack in a "stuck thread" report. This test creates such a thread and
+   * generates a "stuck thread" report to make sure the report on the other 
thread
+   * is included.
+   */
+  @Test
+  public void lockOwnerThreadStackIsReported() throws InterruptedException {
+    final Object syncObject = new Object();
+    final Object releaseObject = new Object();
+    final boolean[] blockingThreadWaiting = new boolean[1];
+    final boolean[] blockedThreadWaiting = new boolean[1];
+    Thread blockingThread = new Thread("blocking thread") {
+      public void run() {
+        synchronized (syncObject) {
+          synchronized (releaseObject) {
+            try {
+              blockingThreadWaiting[0] = true;
+              releaseObject.wait(timeoutInMilliseconds);
+            } catch (InterruptedException e) {
+              return;
+            }
+          }
+        }
+      }
+    };
+    Thread blockedThread = new Thread("blocked thread") {
+      public void run() {
+        blockedThreadWaiting[0] = true;
+        synchronized (syncObject) {
+          try {
+            syncObject.wait(timeoutInMilliseconds);
+          } catch (InterruptedException e) {
+            return;
+          }
+        }
+      }
+    };
+    blockingThread.start();
+    GeodeAwaitility.await().until(() -> blockingThreadWaiting[0]);
+    blockedThread.start();
+    GeodeAwaitility.await().until(() -> blockedThreadWaiting[0]);
+    try {
+      AbstractExecutor executor = new AbstractExecutor(null, 
blockedThread.getId()) {
+        @Override
+        public void handleExpiry(long stuckTime) {
+          // no-op
+        }
+      };
+      String threadReport = executor.createThreadReport(60000);
+      
assertTrue(threadReport.contains(AbstractExecutor.LOCK_OWNER_THREAD_STACK));
+    } finally {
+      blockingThread.interrupt();
+      blockedThread.interrupt();
+      blockingThread.join(timeoutInMilliseconds);
+      blockedThread.join(timeoutInMilliseconds);
+    }
+  }
 }

Reply via email to