lhotari commented on code in PR #23634:
URL: https://github.com/apache/pulsar/pull/23634#discussion_r2360128756


##########
pulsar-broker-common/src/main/java/org/apache/pulsar/common/configuration/VipStatus.java:
##########
@@ -38,25 +46,84 @@ public class VipStatus {
     public static final String ATTRIBUTE_STATUS_FILE_PATH = "statusFilePath";
     public static final String ATTRIBUTE_IS_READY_PROBE = "isReadyProbe";
 
+    // log a full thread dump when a deadlock is detected in status check once 
every 10 minutes
+    // to prevent excessive logging
+    private static final long LOG_THREADDUMP_INTERVAL_WHEN_DEADLOCK_DETECTED = 
600000L;
+    // Rate limit status checks to every 500ms to prevent DoS
+    private static final long CHECK_STATUS_INTERVAL = 500L;
+
+    private static volatile long lastCheckStatusTimestamp;
+    private static volatile long lastPrintThreadDumpTimestamp;
+    private static volatile boolean lastCheckStatusResult;
+
+    private long printThreadDumpIntervalMs;
+    private Clock clock;
+
     @Context
     protected ServletContext servletContext;
 
+    public VipStatus() {
+        this.clock = Clock.systemUTC();
+        this.printThreadDumpIntervalMs = 
LOG_THREADDUMP_INTERVAL_WHEN_DEADLOCK_DETECTED;
+    }
+
+    @VisibleForTesting
+    public VipStatus(ServletContext servletContext, long 
printThreadDumpIntervalMs) {
+        this.servletContext = servletContext;
+        this.printThreadDumpIntervalMs = printThreadDumpIntervalMs;
+        this.clock = Clock.systemUTC();
+    }
+
     @GET
     public String checkStatus() {
-        String statusFilePath = (String) 
servletContext.getAttribute(ATTRIBUTE_STATUS_FILE_PATH);
-        @SuppressWarnings("unchecked")
-        Supplier<Boolean> isReadyProbe = (Supplier<Boolean>) 
servletContext.getAttribute(ATTRIBUTE_IS_READY_PROBE);
+        // Locking classes to avoid deadlock detection in multi-thread 
concurrent requests.
+        synchronized (VipStatus.class) {
+            if (clock.millis() - lastCheckStatusTimestamp < 
CHECK_STATUS_INTERVAL) {
+                if (lastCheckStatusResult) {
+                    return "OK";
+                } else {
+                    throw new 
WebApplicationException(Status.SERVICE_UNAVAILABLE);
+                }
+            }
+            lastCheckStatusTimestamp = clock.millis();
 
-        boolean isReady = isReadyProbe != null ? isReadyProbe.get() : true;
+            String statusFilePath = (String) 
servletContext.getAttribute(ATTRIBUTE_STATUS_FILE_PATH);
+            @SuppressWarnings("unchecked")
+            Supplier<Boolean> isReadyProbe = (Supplier<Boolean>) 
servletContext.getAttribute(ATTRIBUTE_IS_READY_PROBE);
+            boolean isReady = isReadyProbe != null ? isReadyProbe.get() : true;
 
-        if (statusFilePath != null) {
-            File statusFile = new File(statusFilePath);
-            if (isReady && statusFile.exists() && statusFile.isFile()) {
-                return "OK";
+            if (statusFilePath != null) {
+                File statusFile = new File(statusFilePath);
+                if (isReady && statusFile.exists() && statusFile.isFile()) {
+                    // check deadlock
+                    ThreadMXBean threadBean = 
ManagementFactory.getThreadMXBean();
+                    long[] threadIds = threadBean.findDeadlockedThreads();
+                    if (threadIds != null && threadIds.length > 0) {
+                        ThreadInfo[] threadInfos = 
threadBean.getThreadInfo(threadIds, false,
+                                false);
+                        String threadNames = Arrays.stream(threadInfos)
+                                .map(threadInfo -> threadInfo.getThreadName()
+                                        + "(tid=" + threadInfo.getThreadId() + 
")")
+                                .collect(Collectors.joining(", "));
+                        if (clock.millis() - lastPrintThreadDumpTimestamp > 
printThreadDumpIntervalMs) {
+                            String diagnosticResult = 
ThreadDumpUtil.buildThreadDiagnosticString();
+                            log.error("Deadlock detected, service may be 
unavailable, "
+                                    + "thread stack details are as follows: 
{}.", diagnosticResult);

Review Comment:
   It's better that the message is similar in both cases, and so that it 
includes the thread names. That will make it easier to find the information in 
logs. Some logging systems might cut off long log entries.
   ```suggestion
                               log.error("Deadlocked threads detected. {}. 
Service may be unavailable, "
                                       + "thread stack details are as follows: 
{}.", threadNames, diagnosticResult);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@pulsar.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to