[1/4] cassandra git commit: Failure detector detects and ignores local pauses

2015-05-12 Thread brandonwilliams
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.1 75e5b3b55 - 4012134f6
  refs/heads/trunk 68b2d2858 - 54956e984


Failure detector detects and ignores local pauses

Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4012134f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4012134f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4012134f

Branch: refs/heads/cassandra-2.1
Commit: 4012134f6e03b61bbf5ce64cc456cf7592675d67
Parents: 75e5b3b
Author: Brandon Williams brandonwilli...@apache.org
Authored: Tue May 12 18:38:48 2015 -0500
Committer: Brandon Williams brandonwilli...@apache.org
Committed: Tue May 12 18:38:48 2015 -0500

--
 CHANGES.txt |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 29 
 2 files changed, 30 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 1643f9c..7cb0dfd 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.6
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
  * Add utility class to support for rate limiting a given log statement 
(CASSANDRA-9029)
  * Add missing consistency levels to cassandra-stess (CASSANDRA-9361)
  * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/src/java/org/apache/cassandra/gms/FailureDetector.java
--
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java 
b/src/java/org/apache/cassandra/gms/FailureDetector.java
index 0c40ae3..322aae2 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
 public static final String MBEAN_NAME = 
org.apache.cassandra.net:type=FailureDetector;
 private static final int SAMPLE_SIZE = 1000;
 protected static final long INITIAL_VALUE_NANOS = 
TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS);
+private static final long DEFAULT_MAX_PAUSE = 5000L * 100L; // 5 
seconds
+private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause();
+private long lastInterpret = System.nanoTime();
+private boolean wasPaused = false;
+
+private static long getMaxLocalPause()
+{
+if (System.getProperty(cassandra.max_local_pause_in_ms) != null)
+{
+long pause = 
Long.parseLong(System.getProperty(cassandra.max_local_pause_in_ms));
+logger.warn(Overriding max local pause time to {}ms, pause);
+return pause * 100L;
+}
+else
+return DEFAULT_MAX_PAUSE;
+}
 
 public static final IFailureDetector instance = new FailureDetector();
 
@@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
 return;
 }
 long now = System.nanoTime();
+long diff = now - lastInterpret;
+lastInterpret = now;
+if (diff  MAX_LOCAL_PAUSE_IN_NANOS)
+{
+logger.warn(Not marking nodes down due to local pause of {}  
{}, diff, MAX_LOCAL_PAUSE_IN_NANOS);
+wasPaused = true;
+return;
+}
+if (wasPaused)
+{
+wasPaused = false;
+return;
+}
 double phi = hbWnd.phi(now);
 if (logger.isTraceEnabled())
 logger.trace(PHI for  + ep +  :  + phi);



[2/4] cassandra git commit: Failure detector detects and ignores local pauses

2015-05-12 Thread brandonwilliams
Failure detector detects and ignores local pauses

Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4012134f
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4012134f
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4012134f

Branch: refs/heads/trunk
Commit: 4012134f6e03b61bbf5ce64cc456cf7592675d67
Parents: 75e5b3b
Author: Brandon Williams brandonwilli...@apache.org
Authored: Tue May 12 18:38:48 2015 -0500
Committer: Brandon Williams brandonwilli...@apache.org
Committed: Tue May 12 18:38:48 2015 -0500

--
 CHANGES.txt |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 29 
 2 files changed, 30 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 1643f9c..7cb0dfd 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.1.6
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
  * Add utility class to support for rate limiting a given log statement 
(CASSANDRA-9029)
  * Add missing consistency levels to cassandra-stess (CASSANDRA-9361)
  * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/src/java/org/apache/cassandra/gms/FailureDetector.java
--
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java 
b/src/java/org/apache/cassandra/gms/FailureDetector.java
index 0c40ae3..322aae2 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
 public static final String MBEAN_NAME = 
org.apache.cassandra.net:type=FailureDetector;
 private static final int SAMPLE_SIZE = 1000;
 protected static final long INITIAL_VALUE_NANOS = 
TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS);
+private static final long DEFAULT_MAX_PAUSE = 5000L * 100L; // 5 
seconds
+private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause();
+private long lastInterpret = System.nanoTime();
+private boolean wasPaused = false;
+
+private static long getMaxLocalPause()
+{
+if (System.getProperty(cassandra.max_local_pause_in_ms) != null)
+{
+long pause = 
Long.parseLong(System.getProperty(cassandra.max_local_pause_in_ms));
+logger.warn(Overriding max local pause time to {}ms, pause);
+return pause * 100L;
+}
+else
+return DEFAULT_MAX_PAUSE;
+}
 
 public static final IFailureDetector instance = new FailureDetector();
 
@@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
 return;
 }
 long now = System.nanoTime();
+long diff = now - lastInterpret;
+lastInterpret = now;
+if (diff  MAX_LOCAL_PAUSE_IN_NANOS)
+{
+logger.warn(Not marking nodes down due to local pause of {}  
{}, diff, MAX_LOCAL_PAUSE_IN_NANOS);
+wasPaused = true;
+return;
+}
+if (wasPaused)
+{
+wasPaused = false;
+return;
+}
 double phi = hbWnd.phi(now);
 if (logger.isTraceEnabled())
 logger.trace(PHI for  + ep +  :  + phi);



cassandra git commit: Failure detector detects and ignores local pauses

2015-05-07 Thread brandonwilliams
Repository: cassandra
Updated Branches:
  refs/heads/trunk 3cb00db3f - 4047dd121


Failure detector detects and ignores local pauses

Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4047dd12
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4047dd12
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4047dd12

Branch: refs/heads/trunk
Commit: 4047dd1213ed99b3d7bec253b551b4cae911990a
Parents: 3cb00db
Author: Brandon Williams brandonwilli...@apache.org
Authored: Thu May 7 14:15:51 2015 -0500
Committer: Brandon Williams brandonwilli...@apache.org
Committed: Thu May 7 14:16:47 2015 -0500

--
 CHANGES.txt |  1 +
 .../apache/cassandra/gms/FailureDetector.java   | 29 
 2 files changed, 30 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/cassandra/blob/4047dd12/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 5a8ee93..9cfc772 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 3.0
+ * Failure detector detects and ignores local pauses (CASSANDRA-9183)
  * Remove Thrift dependencies in bundled tools (CASSANDRA-8358)
  * Disable memory mapping of hsperfdata file for JVM statistics 
(CASSANDRA-9242)
  * Add pre-startup checks to detect potential incompatibilities 
(CASSANDRA-8049)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/4047dd12/src/java/org/apache/cassandra/gms/FailureDetector.java
--
diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java 
b/src/java/org/apache/cassandra/gms/FailureDetector.java
index fe825b6..b8c20d7 100644
--- a/src/java/org/apache/cassandra/gms/FailureDetector.java
+++ b/src/java/org/apache/cassandra/gms/FailureDetector.java
@@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
 public static final String MBEAN_NAME = 
org.apache.cassandra.net:type=FailureDetector;
 private static final int SAMPLE_SIZE = 1000;
 protected static final long INITIAL_VALUE_NANOS = 
TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS);
+private static final long DEFAULT_MAX_PAUSE = 5000L * 100L; // 5 
seconds
+private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause();
+private long lastInterpret = System.nanoTime();
+private boolean wasPaused = false;
+
+private static long getMaxLocalPause()
+{
+if (System.getProperty(cassandra.max_local_pause_in_ms) != null)
+{
+long pause = 
Long.parseLong(System.getProperty(cassandra.max_local_pause_in_ms));
+logger.warn(Overriding max local pause time to {}ms, pause);
+return pause * 100L;
+}
+else
+return DEFAULT_MAX_PAUSE;
+}
 
 public static final IFailureDetector instance = new FailureDetector();
 
@@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, 
FailureDetectorMBean
 return;
 }
 long now = System.nanoTime();
+long diff = now - lastInterpret;
+lastInterpret = now;
+if (diff  MAX_LOCAL_PAUSE_IN_NANOS)
+{
+logger.warn(Not marking nodes down due to local pause of {}  
{}, diff, MAX_LOCAL_PAUSE_IN_NANOS);
+wasPaused = true;
+return;
+}
+if (wasPaused)
+{
+wasPaused = false;
+return;
+}
 double phi = hbWnd.phi(now);
 if (logger.isTraceEnabled())
 logger.trace(PHI for {} : {}, ep, phi);