Author: slaurent Date: Mon May 19 20:26:23 2014 New Revision: 1596047 URL: http://svn.apache.org/r1596047 Log: Fix https://issues.apache.org/bugzilla/show_bug.cgi?id=56526 Improved StuckThreadDetectionValve to allow to interrupt stuck threads. merged r1595331 from trunk
Added: tomcat/tc7.0.x/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java - copied unchanged from r1595331, tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java Modified: tomcat/tc7.0.x/trunk/ (props changed) tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml Propchange: tomcat/tc7.0.x/trunk/ ------------------------------------------------------------------------------ Merged /tomcat/trunk:r1595331 Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties?rev=1596047&r1=1596046&r2=1596047&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties Mon May 19 20:26:23 2014 @@ -57,6 +57,7 @@ sslValve.invalidProvider=The SSL provide #Stuck thread detection Valve stuckThreadDetectionValve.notifyStuckThreadDetected=Thread "{0}" (id={6}) has been active for {1} milliseconds (since {2}) to serve the same request for {4} and may be stuck (configured threshold for this StuckThreadDetectionValve is {5} seconds). There is/are {3} thread(s) in total that are monitored by this Valve and may be stuck. stuckThreadDetectionValve.notifyStuckThreadCompleted=Thread "{0}" (id={3}) was previously reported to be stuck but has completed. It was active for approximately {1} milliseconds.{2,choice,0#|0< There is/are still {2} thread(s) that are monitored by this Valve and may be stuck.} +stuckThreadDetectionValve.notifyStuckThreadInterrupted=Thread "{0}" (id={5}) has been interrupted because it was active for {1} milliseconds (since {2}) to serve the same request for {3} and was probably stuck (configured interruption threshold for this StuckThreadDetectionValve is {4} seconds). # HTTP status reports # All status codes registered with IANA can be found at Modified: tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java?rev=1596047&r1=1596046&r2=1596047&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java (original) +++ tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java Mon May 19 20:26:23 2014 @@ -23,7 +23,9 @@ import java.util.List; import java.util.Queue; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.Semaphore; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; import javax.servlet.ServletException; @@ -62,11 +64,21 @@ public class StuckThreadDetectionValve e private final AtomicInteger stuckCount = new AtomicInteger(0); /** + * Keeps count of the number of stuck threads that have been interruoted + */ + private AtomicLong interruptedThreadsCount = new AtomicLong(); + + /** * In seconds. Default 600 (10 minutes). */ private int threshold = 600; /** + * In seconds. Default is -1 to disable interruption. + */ + private int interruptThreadThreshold; + + /** * The only references we keep to actual running Thread objects are in * this Map (which is automatically cleaned in invoke()s finally clause). * That way, Threads can be GC'ed, eventhough the Valve still thinks they @@ -81,7 +93,7 @@ public class StuckThreadDetectionValve e new ConcurrentLinkedQueue<CompletedStuckThread>(); /** - * Specify the threshold (in seconds) used when checking for stuck threads. + * Specifies the threshold (in seconds) used when checking for stuck threads. * If <=0, the detection is disabled. The default is 600 seconds. * * @param threshold @@ -100,6 +112,22 @@ public class StuckThreadDetectionValve e } + public int getInterruptThreadThreshold() { + return interruptThreadThreshold; + } + + /** + * Specifies the threshold (in seconds) before stuck threads are interrupted. + * If <=0, the interruption is disabled. The default is -1. + * If >=0, the value must actually be >= threshold. + * + * @param interruptThreadThreshold + * The new thread interruption threshold in seconds + */ + public void setInterruptThreadThreshold(int interruptThreadThreshold) { + this.interruptThreadThreshold = interruptThreadThreshold; + } + /** * Required to enable async support. */ @@ -186,7 +214,7 @@ public class StuckThreadDetectionValve e requestUrl.append(request.getQueryString()); } MonitoredThread monitoredThread = new MonitoredThread(Thread.currentThread(), - requestUrl.toString()); + requestUrl.toString(), interruptThreadThreshold > 0); activeThreads.put(key, monitoredThread); try { @@ -194,6 +222,9 @@ public class StuckThreadDetectionValve e } finally { activeThreads.remove(key); if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) { + if(monitoredThread.wasInterrupted()) { + interruptedThreadsCount.incrementAndGet(); + } completedStuckThreadsQueue.add( new CompletedStuckThread(monitoredThread.getThread(), monitoredThread.getActiveTimeInMillis())); @@ -205,7 +236,7 @@ public class StuckThreadDetectionValve e public void backgroundProcess() { super.backgroundProcess(); - long thresholdInMillis = threshold * 1000; + long thresholdInMillis = threshold * 1000L; // Check monitored threads, being careful that the request might have // completed by the time we examine it @@ -216,6 +247,9 @@ public class StuckThreadDetectionValve e int numStuckThreads = stuckCount.incrementAndGet(); notifyStuckThreadDetected(monitoredThread, activeTime, numStuckThreads); } + if(interruptThreadThreshold > 0 && activeTime >= interruptThreadThreshold*1000L) { + monitoredThread.interruptIfStuck(interruptThreadThreshold); + } } // Check if any threads previously reported as stuck, have finished. for (CompletedStuckThread completedStuckThread = completedStuckThreadsQueue.poll(); @@ -251,6 +285,11 @@ public class StuckThreadDetectionValve e return nameList.toArray(new String[nameList.size()]); } + public long getInterruptedThreadsCount() { + return interruptedThreadsCount.get(); + } + + private static class MonitoredThread { /** @@ -261,11 +300,27 @@ public class StuckThreadDetectionValve e private final long start; private final AtomicInteger state = new AtomicInteger( MonitoredThreadState.RUNNING.ordinal()); + /** + * Semaphore to synchronize the stuck thread with the background-process + * thread. It's not used if the interruption feature is not active. + */ + private final Semaphore interruptionSemaphore; + /** + * Set to true after the thread is interrupted. No need to make it + * volatile since it is accessed right after acquiring the semaphore. + */ + private boolean interrupted; - public MonitoredThread(Thread thread, String requestUri) { + public MonitoredThread(Thread thread, String requestUri, + boolean interruptible) { this.thread = thread; this.requestUri = requestUri; this.start = System.currentTimeMillis(); + if (interruptible) { + interruptionSemaphore = new Semaphore(1); + } else { + interruptionSemaphore = null; + } } public Thread getThread() { @@ -291,12 +346,63 @@ public class StuckThreadDetectionValve e public MonitoredThreadState markAsDone() { int val = this.state.getAndSet(MonitoredThreadState.DONE.ordinal()); - return MonitoredThreadState.values()[val]; + MonitoredThreadState threadState = MonitoredThreadState.values()[val]; + + if (threadState == MonitoredThreadState.STUCK + && interruptionSemaphore != null) { + try { + // use the semaphore to synchronize with the background thread + // which might try to interrupt this current thread. + // Otherwise, the current thread might be interrupted after + // going out from here, maybe already serving a new request + this.interruptionSemaphore.acquire(); + } catch (InterruptedException e) { + log.debug( + "thread interrupted after the request is finished, ignoring", + e); + } + // no need to release the semaphore, it will be GCed + } + //else the request went through before being marked as stuck, no need + //to sync agains the semaphore + return threadState; } boolean isMarkedAsStuck() { return this.state.get() == MonitoredThreadState.STUCK.ordinal(); } + + public boolean interruptIfStuck(long interruptThreadThreshold) { + if (!isMarkedAsStuck() || interruptionSemaphore == null + || !this.interruptionSemaphore.tryAcquire()) { + // if the semaphore is already acquired, it means that the + // request thread got unstuck before we interrupted it + return false; + } + try { + if (log.isWarnEnabled()) { + String msg = sm.getString( + "stuckThreadDetectionValve.notifyStuckThreadInterrupted", + this.getThread().getName(), + Long.valueOf(getActiveTimeInMillis()), + this.getStartTime(), this.getRequestUri(), + Long.valueOf(interruptThreadThreshold), + String.valueOf(this.getThread().getId())); + Throwable th = new Throwable(); + th.setStackTrace(this.getThread().getStackTrace()); + log.warn(msg, th); + } + this.thread.interrupt(); + } finally { + this.interrupted = true; + this.interruptionSemaphore.release(); + } + return true; + } + + public boolean wasInterrupted() { + return interrupted; + } } private static class CompletedStuckThread { Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1596047&r1=1596046&r2=1596047&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original) +++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Mon May 19 20:26:23 2014 @@ -55,6 +55,17 @@ They eventually become mixed with the numbered issues. (I.e., numbered issues to not "pop up" wrt. others). --> +<section name="Tomcat 7.0.55"> + <subsection name="Catalina"> + <changelog> + <add> + <bug>56526</bug>: Improved the <code>StuckThreadDetectionValve</code> to + optionally interrupt stuck threads to attempt to unblock them. + (slaurent) + </add> + </changelog> + </subsection> +</section> <section name="Tomcat 7.0.54 (violetagg)"> <subsection name="Catalina"> <changelog> Modified: tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml URL: http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml?rev=1596047&r1=1596046&r2=1596047&view=diff ============================================================================== --- tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml (original) +++ tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml Mon May 19 20:26:23 2014 @@ -1541,10 +1541,12 @@ <subsection name="Introduction"> - <p>This valve allows to detect requests that take a long time to process, which might - indicate that the thread that is processing it is stuck.</p> - <p>When such a request is detected, the current stack trace of its thread is written - to Tomcat log with a WARN level.</p> + <p>This valve allows to detect requests that take a long time to process, + which might indicate that the thread that is processing it is stuck. + Additionally it can optionally interrupt such threads to try and unblock + them.</p> + <p>When such a request is detected, the current stack trace of its thread is + written to Tomcat log with a WARN level.</p> <p>The IDs and names of the stuck threads are available through JMX in the <code>stuckThreadIds</code> and <code>stuckThreadNames</code> attributes. The IDs can be used with the standard Threading JVM MBean @@ -1569,9 +1571,20 @@ <attribute name="threshold" required="false"> <p>Minimum duration in seconds after which a thread is considered stuck. Default is 600 seconds. If set to 0, the detection is disabled.</p> - <p>Note: since the detection is done in the background thread of the Container - (Engine, Host or Context) declaring this Valve, the threshold should be higher - than the <code>backgroundProcessorDelay</code> of this Container.</p> + <p>Note: since the detection (and optional interruption) is done in the + background thread of the Container (Engine, Host or Context) declaring + this Valve, the threshold should be higher than the + <code>backgroundProcessorDelay</code> of this Container.</p> + </attribute> + + <attribute name="interruptThreadThreshold" required="false"> + <p>Minimum duration in seconds after which a stuck thread should be + interrupted to attempt to "free" it.</p> + <p>Note that there's no guarantee that the thread will get unstuck. + This usually works well for threads stuck on I/O or locks, but is + probably useless in case of infinite loops.</p> + <p>Default is -1 which disables the feature. To enable it, the value + must be greater or equal to <code>threshold</code>.</p> </attribute> </attributes> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org