lhotari commented on code in PR #23634: URL: https://github.com/apache/pulsar/pull/23634#discussion_r2360128756
########## pulsar-broker-common/src/main/java/org/apache/pulsar/common/configuration/VipStatus.java: ########## @@ -38,25 +46,84 @@ public class VipStatus { public static final String ATTRIBUTE_STATUS_FILE_PATH = "statusFilePath"; public static final String ATTRIBUTE_IS_READY_PROBE = "isReadyProbe"; + // log a full thread dump when a deadlock is detected in status check once every 10 minutes + // to prevent excessive logging + private static final long LOG_THREADDUMP_INTERVAL_WHEN_DEADLOCK_DETECTED = 600000L; + // Rate limit status checks to every 500ms to prevent DoS + private static final long CHECK_STATUS_INTERVAL = 500L; + + private static volatile long lastCheckStatusTimestamp; + private static volatile long lastPrintThreadDumpTimestamp; + private static volatile boolean lastCheckStatusResult; + + private long printThreadDumpIntervalMs; + private Clock clock; + @Context protected ServletContext servletContext; + public VipStatus() { + this.clock = Clock.systemUTC(); + this.printThreadDumpIntervalMs = LOG_THREADDUMP_INTERVAL_WHEN_DEADLOCK_DETECTED; + } + + @VisibleForTesting + public VipStatus(ServletContext servletContext, long printThreadDumpIntervalMs) { + this.servletContext = servletContext; + this.printThreadDumpIntervalMs = printThreadDumpIntervalMs; + this.clock = Clock.systemUTC(); + } + @GET public String checkStatus() { - String statusFilePath = (String) servletContext.getAttribute(ATTRIBUTE_STATUS_FILE_PATH); - @SuppressWarnings("unchecked") - Supplier<Boolean> isReadyProbe = (Supplier<Boolean>) servletContext.getAttribute(ATTRIBUTE_IS_READY_PROBE); + // Locking classes to avoid deadlock detection in multi-thread concurrent requests. + synchronized (VipStatus.class) { + if (clock.millis() - lastCheckStatusTimestamp < CHECK_STATUS_INTERVAL) { + if (lastCheckStatusResult) { + return "OK"; + } else { + throw new WebApplicationException(Status.SERVICE_UNAVAILABLE); + } + } + lastCheckStatusTimestamp = clock.millis(); - boolean isReady = isReadyProbe != null ? isReadyProbe.get() : true; + String statusFilePath = (String) servletContext.getAttribute(ATTRIBUTE_STATUS_FILE_PATH); + @SuppressWarnings("unchecked") + Supplier<Boolean> isReadyProbe = (Supplier<Boolean>) servletContext.getAttribute(ATTRIBUTE_IS_READY_PROBE); + boolean isReady = isReadyProbe != null ? isReadyProbe.get() : true; - if (statusFilePath != null) { - File statusFile = new File(statusFilePath); - if (isReady && statusFile.exists() && statusFile.isFile()) { - return "OK"; + if (statusFilePath != null) { + File statusFile = new File(statusFilePath); + if (isReady && statusFile.exists() && statusFile.isFile()) { + // check deadlock + ThreadMXBean threadBean = ManagementFactory.getThreadMXBean(); + long[] threadIds = threadBean.findDeadlockedThreads(); + if (threadIds != null && threadIds.length > 0) { + ThreadInfo[] threadInfos = threadBean.getThreadInfo(threadIds, false, + false); + String threadNames = Arrays.stream(threadInfos) + .map(threadInfo -> threadInfo.getThreadName() + + "(tid=" + threadInfo.getThreadId() + ")") + .collect(Collectors.joining(", ")); + if (clock.millis() - lastPrintThreadDumpTimestamp > printThreadDumpIntervalMs) { + String diagnosticResult = ThreadDumpUtil.buildThreadDiagnosticString(); + log.error("Deadlock detected, service may be unavailable, " + + "thread stack details are as follows: {}.", diagnosticResult); Review Comment: It's better that the message is similar in both cases, and so that it includes the thread names. That will make it easier to find the information in logs. Some logging systems might cut off long log entries. ```suggestion log.error("Deadlocked threads detected. {}. Service may be unavailable, " + "thread stack details are as follows: {}.", threadNames, diagnosticResult); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@pulsar.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org