Author: slaurent
Date: Mon May 19 20:26:23 2014
New Revision: 1596047
URL: http://svn.apache.org/r1596047
Log:
Fix https://issues.apache.org/bugzilla/show_bug.cgi?id=56526
Improved StuckThreadDetectionValve to allow to interrupt stuck threads.
merged r1595331 from trunk
Added:
tomcat/tc7.0.x/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java
- copied unchanged from r1595331,
tomcat/trunk/test/org/apache/catalina/valves/TestStuckThreadDetectionValve.java
Modified:
tomcat/tc7.0.x/trunk/ (props changed)
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml
Propchange: tomcat/tc7.0.x/trunk/
------------------------------------------------------------------------------
Merged /tomcat/trunk:r1595331
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties?rev=1596047&r1=1596046&r2=1596047&view=diff
==============================================================================
---
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties
(original)
+++
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/LocalStrings.properties
Mon May 19 20:26:23 2014
@@ -57,6 +57,7 @@ sslValve.invalidProvider=The SSL provide
#Stuck thread detection Valve
stuckThreadDetectionValve.notifyStuckThreadDetected=Thread "{0}" (id={6}) has
been active for {1} milliseconds (since {2}) to serve the same request for {4}
and may be stuck (configured threshold for this StuckThreadDetectionValve is
{5} seconds). There is/are {3} thread(s) in total that are monitored by this
Valve and may be stuck.
stuckThreadDetectionValve.notifyStuckThreadCompleted=Thread "{0}" (id={3}) was
previously reported to be stuck but has completed. It was active for
approximately {1} milliseconds.{2,choice,0#|0< There is/are still {2} thread(s)
that are monitored by this Valve and may be stuck.}
+stuckThreadDetectionValve.notifyStuckThreadInterrupted=Thread "{0}" (id={5})
has been interrupted because it was active for {1} milliseconds (since {2}) to
serve the same request for {3} and was probably stuck (configured interruption
threshold for this StuckThreadDetectionValve is {4} seconds).
# HTTP status reports
# All status codes registered with IANA can be found at
Modified:
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java?rev=1596047&r1=1596046&r2=1596047&view=diff
==============================================================================
---
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
(original)
+++
tomcat/tc7.0.x/trunk/java/org/apache/catalina/valves/StuckThreadDetectionValve.java
Mon May 19 20:26:23 2014
@@ -23,7 +23,9 @@ import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
import javax.servlet.ServletException;
@@ -62,11 +64,21 @@ public class StuckThreadDetectionValve e
private final AtomicInteger stuckCount = new AtomicInteger(0);
/**
+ * Keeps count of the number of stuck threads that have been interruoted
+ */
+ private AtomicLong interruptedThreadsCount = new AtomicLong();
+
+ /**
* In seconds. Default 600 (10 minutes).
*/
private int threshold = 600;
/**
+ * In seconds. Default is -1 to disable interruption.
+ */
+ private int interruptThreadThreshold;
+
+ /**
* The only references we keep to actual running Thread objects are in
* this Map (which is automatically cleaned in invoke()s finally clause).
* That way, Threads can be GC'ed, eventhough the Valve still thinks they
@@ -81,7 +93,7 @@ public class StuckThreadDetectionValve e
new ConcurrentLinkedQueue<CompletedStuckThread>();
/**
- * Specify the threshold (in seconds) used when checking for stuck threads.
+ * Specifies the threshold (in seconds) used when checking for stuck
threads.
* If <=0, the detection is disabled. The default is 600 seconds.
*
* @param threshold
@@ -100,6 +112,22 @@ public class StuckThreadDetectionValve e
}
+ public int getInterruptThreadThreshold() {
+ return interruptThreadThreshold;
+ }
+
+ /**
+ * Specifies the threshold (in seconds) before stuck threads are
interrupted.
+ * If <=0, the interruption is disabled. The default is -1.
+ * If >=0, the value must actually be >= threshold.
+ *
+ * @param interruptThreadThreshold
+ * The new thread interruption threshold in seconds
+ */
+ public void setInterruptThreadThreshold(int interruptThreadThreshold) {
+ this.interruptThreadThreshold = interruptThreadThreshold;
+ }
+
/**
* Required to enable async support.
*/
@@ -186,7 +214,7 @@ public class StuckThreadDetectionValve e
requestUrl.append(request.getQueryString());
}
MonitoredThread monitoredThread = new
MonitoredThread(Thread.currentThread(),
- requestUrl.toString());
+ requestUrl.toString(), interruptThreadThreshold > 0);
activeThreads.put(key, monitoredThread);
try {
@@ -194,6 +222,9 @@ public class StuckThreadDetectionValve e
} finally {
activeThreads.remove(key);
if (monitoredThread.markAsDone() == MonitoredThreadState.STUCK) {
+ if(monitoredThread.wasInterrupted()) {
+ interruptedThreadsCount.incrementAndGet();
+ }
completedStuckThreadsQueue.add(
new CompletedStuckThread(monitoredThread.getThread(),
monitoredThread.getActiveTimeInMillis()));
@@ -205,7 +236,7 @@ public class StuckThreadDetectionValve e
public void backgroundProcess() {
super.backgroundProcess();
- long thresholdInMillis = threshold * 1000;
+ long thresholdInMillis = threshold * 1000L;
// Check monitored threads, being careful that the request might have
// completed by the time we examine it
@@ -216,6 +247,9 @@ public class StuckThreadDetectionValve e
int numStuckThreads = stuckCount.incrementAndGet();
notifyStuckThreadDetected(monitoredThread, activeTime,
numStuckThreads);
}
+ if(interruptThreadThreshold > 0 && activeTime >=
interruptThreadThreshold*1000L) {
+ monitoredThread.interruptIfStuck(interruptThreadThreshold);
+ }
}
// Check if any threads previously reported as stuck, have finished.
for (CompletedStuckThread completedStuckThread =
completedStuckThreadsQueue.poll();
@@ -251,6 +285,11 @@ public class StuckThreadDetectionValve e
return nameList.toArray(new String[nameList.size()]);
}
+ public long getInterruptedThreadsCount() {
+ return interruptedThreadsCount.get();
+ }
+
+
private static class MonitoredThread {
/**
@@ -261,11 +300,27 @@ public class StuckThreadDetectionValve e
private final long start;
private final AtomicInteger state = new AtomicInteger(
MonitoredThreadState.RUNNING.ordinal());
+ /**
+ * Semaphore to synchronize the stuck thread with the
background-process
+ * thread. It's not used if the interruption feature is not active.
+ */
+ private final Semaphore interruptionSemaphore;
+ /**
+ * Set to true after the thread is interrupted. No need to make it
+ * volatile since it is accessed right after acquiring the semaphore.
+ */
+ private boolean interrupted;
- public MonitoredThread(Thread thread, String requestUri) {
+ public MonitoredThread(Thread thread, String requestUri,
+ boolean interruptible) {
this.thread = thread;
this.requestUri = requestUri;
this.start = System.currentTimeMillis();
+ if (interruptible) {
+ interruptionSemaphore = new Semaphore(1);
+ } else {
+ interruptionSemaphore = null;
+ }
}
public Thread getThread() {
@@ -291,12 +346,63 @@ public class StuckThreadDetectionValve e
public MonitoredThreadState markAsDone() {
int val =
this.state.getAndSet(MonitoredThreadState.DONE.ordinal());
- return MonitoredThreadState.values()[val];
+ MonitoredThreadState threadState =
MonitoredThreadState.values()[val];
+
+ if (threadState == MonitoredThreadState.STUCK
+ && interruptionSemaphore != null) {
+ try {
+ // use the semaphore to synchronize with the background
thread
+ // which might try to interrupt this current thread.
+ // Otherwise, the current thread might be interrupted after
+ // going out from here, maybe already serving a new request
+ this.interruptionSemaphore.acquire();
+ } catch (InterruptedException e) {
+ log.debug(
+ "thread interrupted after the request is finished,
ignoring",
+ e);
+ }
+ // no need to release the semaphore, it will be GCed
+ }
+ //else the request went through before being marked as stuck, no
need
+ //to sync agains the semaphore
+ return threadState;
}
boolean isMarkedAsStuck() {
return this.state.get() == MonitoredThreadState.STUCK.ordinal();
}
+
+ public boolean interruptIfStuck(long interruptThreadThreshold) {
+ if (!isMarkedAsStuck() || interruptionSemaphore == null
+ || !this.interruptionSemaphore.tryAcquire()) {
+ // if the semaphore is already acquired, it means that the
+ // request thread got unstuck before we interrupted it
+ return false;
+ }
+ try {
+ if (log.isWarnEnabled()) {
+ String msg = sm.getString(
+
"stuckThreadDetectionValve.notifyStuckThreadInterrupted",
+ this.getThread().getName(),
+ Long.valueOf(getActiveTimeInMillis()),
+ this.getStartTime(), this.getRequestUri(),
+ Long.valueOf(interruptThreadThreshold),
+ String.valueOf(this.getThread().getId()));
+ Throwable th = new Throwable();
+ th.setStackTrace(this.getThread().getStackTrace());
+ log.warn(msg, th);
+ }
+ this.thread.interrupt();
+ } finally {
+ this.interrupted = true;
+ this.interruptionSemaphore.release();
+ }
+ return true;
+ }
+
+ public boolean wasInterrupted() {
+ return interrupted;
+ }
}
private static class CompletedStuckThread {
Modified: tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml?rev=1596047&r1=1596046&r2=1596047&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml (original)
+++ tomcat/tc7.0.x/trunk/webapps/docs/changelog.xml Mon May 19 20:26:23 2014
@@ -55,6 +55,17 @@
They eventually become mixed with the numbered issues. (I.e., numbered
issues to not "pop up" wrt. others).
-->
+<section name="Tomcat 7.0.55">
+ <subsection name="Catalina">
+ <changelog>
+ <add>
+ <bug>56526</bug>: Improved the <code>StuckThreadDetectionValve</code>
to
+ optionally interrupt stuck threads to attempt to unblock them.
+ (slaurent)
+ </add>
+ </changelog>
+ </subsection>
+</section>
<section name="Tomcat 7.0.54 (violetagg)">
<subsection name="Catalina">
<changelog>
Modified: tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml
URL:
http://svn.apache.org/viewvc/tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml?rev=1596047&r1=1596046&r2=1596047&view=diff
==============================================================================
--- tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml (original)
+++ tomcat/tc7.0.x/trunk/webapps/docs/config/valve.xml Mon May 19 20:26:23 2014
@@ -1541,10 +1541,12 @@
<subsection name="Introduction">
- <p>This valve allows to detect requests that take a long time to process,
which might
- indicate that the thread that is processing it is stuck.</p>
- <p>When such a request is detected, the current stack trace of its thread
is written
- to Tomcat log with a WARN level.</p>
+ <p>This valve allows to detect requests that take a long time to process,
+ which might indicate that the thread that is processing it is stuck.
+ Additionally it can optionally interrupt such threads to try and unblock
+ them.</p>
+ <p>When such a request is detected, the current stack trace of its thread
is
+ written to Tomcat log with a WARN level.</p>
<p>The IDs and names of the stuck threads are available through JMX in the
<code>stuckThreadIds</code> and <code>stuckThreadNames</code> attributes.
The IDs can be used with the standard Threading JVM MBean
@@ -1569,9 +1571,20 @@
<attribute name="threshold" required="false">
<p>Minimum duration in seconds after which a thread is considered
stuck.
Default is 600 seconds. If set to 0, the detection is disabled.</p>
- <p>Note: since the detection is done in the background thread of the
Container
- (Engine, Host or Context) declaring this Valve, the threshold should
be higher
- than the <code>backgroundProcessorDelay</code> of this Container.</p>
+ <p>Note: since the detection (and optional interruption) is done in the
+ background thread of the Container (Engine, Host or Context) declaring
+ this Valve, the threshold should be higher than the
+ <code>backgroundProcessorDelay</code> of this Container.</p>
+ </attribute>
+
+ <attribute name="interruptThreadThreshold" required="false">
+ <p>Minimum duration in seconds after which a stuck thread should be
+ interrupted to attempt to "free" it.</p>
+ <p>Note that there's no guarantee that the thread will get unstuck.
+ This usually works well for threads stuck on I/O or locks, but is
+ probably useless in case of infinite loops.</p>
+ <p>Default is -1 which disables the feature. To enable it, the value
+ must be greater or equal to <code>threshold</code>.</p>
</attribute>
</attributes>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]