Repository: cassandra
Updated Branches:
  refs/heads/trunk 3cb00db3f -> 4047dd121


Failure detector detects and ignores local pauses

Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4047dd12
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4047dd12
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4047dd12

Branch: refs/heads/trunk
Commit: 4047dd1213ed99b3d7bec253b551b4cae911990a
Parents: 3cb00db
Author: Brandon Williams <brandonwilli...@apache.org>
Authored: Thu May 7 14:15:51 2015 -0500
Committer: Brandon Williams <brandonwilli...@apache.org>
Committed: Thu May 7 14:16:47 2015 -0500

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 29 ++++++++++++++++++++
 2 files changed, 30 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/4047dd12/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 5a8ee93..9cfc772 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
  * Remove Thrift dependencies in bundled tools (CASSANDRA-8358)
  * Disable memory mapping of hsperfdata file for JVM statistics 
(CASSANDRA-9242)
  * Add pre-startup checks to detect potential incompatibilities 
(CASSANDRA-8049)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/4047dd12/src/java/org/apache/cassandra/gms/FailureDetector.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java 
b/src/java/org/apache/cassandra/gms/FailureDetector.java
index fe825b6..b8c20d7 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
     public static final String MBEAN_NAME = 
"org.apache.cassandra.net:type=FailureDetector";
     private static final int SAMPLE_SIZE = 1000;
     protected static final long INITIAL_VALUE_NANOS = 
TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS);
+    private static final long DEFAULT_MAX_PAUSE = 5000L * 1000000L; // 5 
seconds
+    private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause();
+    private long lastInterpret = System.nanoTime();
+    private boolean wasPaused = false;
+
+    private static long getMaxLocalPause()
+    {
+        if (System.getProperty("cassandra.max_local_pause_in_ms") != null)
+        {
+            long pause = 
Long.parseLong(System.getProperty("cassandra.max_local_pause_in_ms"));
+            logger.warn("Overriding max local pause time to {}ms", pause);
+            return pause * 1000000L;
+        }
+        else
+            return DEFAULT_MAX_PAUSE;
+    }
 
     public static final IFailureDetector instance = new FailureDetector();
 
@@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
             return;
         }
         long now = System.nanoTime();
+        long diff = now - lastInterpret;
+        lastInterpret = now;
+        if (diff > MAX_LOCAL_PAUSE_IN_NANOS)
+        {
+            logger.warn("Not marking nodes down due to local pause of {} > 
{}", diff, MAX_LOCAL_PAUSE_IN_NANOS);
+            wasPaused = true;
+            return;
+        }
+        if (wasPaused)
+        {
+            wasPaused = false;
+            return;
+        }
         double phi = hbWnd.phi(now);
         if (logger.isTraceEnabled())
             logger.trace("PHI for {} : {}", ep, phi);

Reply via email to