[1/4] cassandra git commit: Failure detector detects and ignores local pauses
Repository: cassandra Updated Branches: refs/heads/cassandra-2.1 75e5b3b55 - 4012134f6 refs/heads/trunk 68b2d2858 - 54956e984 Failure detector detects and ignores local pauses Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4012134f Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4012134f Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4012134f Branch: refs/heads/cassandra-2.1 Commit: 4012134f6e03b61bbf5ce64cc456cf7592675d67 Parents: 75e5b3b Author: Brandon Williams brandonwilli...@apache.org Authored: Tue May 12 18:38:48 2015 -0500 Committer: Brandon Williams brandonwilli...@apache.org Committed: Tue May 12 18:38:48 2015 -0500 -- CHANGES.txt | 1 + .../apache/cassandra/gms/FailureDetector.java | 29 2 files changed, 30 insertions(+) -- http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 1643f9c..7cb0dfd 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.1.6 + * Failure detector detects and ignores local pauses (CASSANDRA-9183) * Add utility class to support for rate limiting a given log statement (CASSANDRA-9029) * Add missing consistency levels to cassandra-stess (CASSANDRA-9361) * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339) http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/src/java/org/apache/cassandra/gms/FailureDetector.java -- diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java index 0c40ae3..322aae2 100644 --- a/src/java/org/apache/cassandra/gms/FailureDetector.java +++ b/src/java/org/apache/cassandra/gms/FailureDetector.java @@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean public static final String MBEAN_NAME = org.apache.cassandra.net:type=FailureDetector; private static final int SAMPLE_SIZE = 1000; protected static final long INITIAL_VALUE_NANOS = TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS); +private static final long DEFAULT_MAX_PAUSE = 5000L * 100L; // 5 seconds +private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause(); +private long lastInterpret = System.nanoTime(); +private boolean wasPaused = false; + +private static long getMaxLocalPause() +{ +if (System.getProperty(cassandra.max_local_pause_in_ms) != null) +{ +long pause = Long.parseLong(System.getProperty(cassandra.max_local_pause_in_ms)); +logger.warn(Overriding max local pause time to {}ms, pause); +return pause * 100L; +} +else +return DEFAULT_MAX_PAUSE; +} public static final IFailureDetector instance = new FailureDetector(); @@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean return; } long now = System.nanoTime(); +long diff = now - lastInterpret; +lastInterpret = now; +if (diff MAX_LOCAL_PAUSE_IN_NANOS) +{ +logger.warn(Not marking nodes down due to local pause of {} {}, diff, MAX_LOCAL_PAUSE_IN_NANOS); +wasPaused = true; +return; +} +if (wasPaused) +{ +wasPaused = false; +return; +} double phi = hbWnd.phi(now); if (logger.isTraceEnabled()) logger.trace(PHI for + ep + : + phi);
[2/4] cassandra git commit: Failure detector detects and ignores local pauses
Failure detector detects and ignores local pauses Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4012134f Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4012134f Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4012134f Branch: refs/heads/trunk Commit: 4012134f6e03b61bbf5ce64cc456cf7592675d67 Parents: 75e5b3b Author: Brandon Williams brandonwilli...@apache.org Authored: Tue May 12 18:38:48 2015 -0500 Committer: Brandon Williams brandonwilli...@apache.org Committed: Tue May 12 18:38:48 2015 -0500 -- CHANGES.txt | 1 + .../apache/cassandra/gms/FailureDetector.java | 29 2 files changed, 30 insertions(+) -- http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 1643f9c..7cb0dfd 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 2.1.6 + * Failure detector detects and ignores local pauses (CASSANDRA-9183) * Add utility class to support for rate limiting a given log statement (CASSANDRA-9029) * Add missing consistency levels to cassandra-stess (CASSANDRA-9361) * Fix commitlog getCompletedTasks to not increment (CASSANDRA-9339) http://git-wip-us.apache.org/repos/asf/cassandra/blob/4012134f/src/java/org/apache/cassandra/gms/FailureDetector.java -- diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java index 0c40ae3..322aae2 100644 --- a/src/java/org/apache/cassandra/gms/FailureDetector.java +++ b/src/java/org/apache/cassandra/gms/FailureDetector.java @@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean public static final String MBEAN_NAME = org.apache.cassandra.net:type=FailureDetector; private static final int SAMPLE_SIZE = 1000; protected static final long INITIAL_VALUE_NANOS = TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS); +private static final long DEFAULT_MAX_PAUSE = 5000L * 100L; // 5 seconds +private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause(); +private long lastInterpret = System.nanoTime(); +private boolean wasPaused = false; + +private static long getMaxLocalPause() +{ +if (System.getProperty(cassandra.max_local_pause_in_ms) != null) +{ +long pause = Long.parseLong(System.getProperty(cassandra.max_local_pause_in_ms)); +logger.warn(Overriding max local pause time to {}ms, pause); +return pause * 100L; +} +else +return DEFAULT_MAX_PAUSE; +} public static final IFailureDetector instance = new FailureDetector(); @@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean return; } long now = System.nanoTime(); +long diff = now - lastInterpret; +lastInterpret = now; +if (diff MAX_LOCAL_PAUSE_IN_NANOS) +{ +logger.warn(Not marking nodes down due to local pause of {} {}, diff, MAX_LOCAL_PAUSE_IN_NANOS); +wasPaused = true; +return; +} +if (wasPaused) +{ +wasPaused = false; +return; +} double phi = hbWnd.phi(now); if (logger.isTraceEnabled()) logger.trace(PHI for + ep + : + phi);
cassandra git commit: Failure detector detects and ignores local pauses
Repository: cassandra Updated Branches: refs/heads/trunk 3cb00db3f - 4047dd121 Failure detector detects and ignores local pauses Patch by brandonwilliams, reviewed by Richard Low for CASSANDRA-9183 Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/4047dd12 Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/4047dd12 Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/4047dd12 Branch: refs/heads/trunk Commit: 4047dd1213ed99b3d7bec253b551b4cae911990a Parents: 3cb00db Author: Brandon Williams brandonwilli...@apache.org Authored: Thu May 7 14:15:51 2015 -0500 Committer: Brandon Williams brandonwilli...@apache.org Committed: Thu May 7 14:16:47 2015 -0500 -- CHANGES.txt | 1 + .../apache/cassandra/gms/FailureDetector.java | 29 2 files changed, 30 insertions(+) -- http://git-wip-us.apache.org/repos/asf/cassandra/blob/4047dd12/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 5a8ee93..9cfc772 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0 + * Failure detector detects and ignores local pauses (CASSANDRA-9183) * Remove Thrift dependencies in bundled tools (CASSANDRA-8358) * Disable memory mapping of hsperfdata file for JVM statistics (CASSANDRA-9242) * Add pre-startup checks to detect potential incompatibilities (CASSANDRA-8049) http://git-wip-us.apache.org/repos/asf/cassandra/blob/4047dd12/src/java/org/apache/cassandra/gms/FailureDetector.java -- diff --git a/src/java/org/apache/cassandra/gms/FailureDetector.java b/src/java/org/apache/cassandra/gms/FailureDetector.java index fe825b6..b8c20d7 100644 --- a/src/java/org/apache/cassandra/gms/FailureDetector.java +++ b/src/java/org/apache/cassandra/gms/FailureDetector.java @@ -48,6 +48,22 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean public static final String MBEAN_NAME = org.apache.cassandra.net:type=FailureDetector; private static final int SAMPLE_SIZE = 1000; protected static final long INITIAL_VALUE_NANOS = TimeUnit.NANOSECONDS.convert(getInitialValue(), TimeUnit.MILLISECONDS); +private static final long DEFAULT_MAX_PAUSE = 5000L * 100L; // 5 seconds +private static final long MAX_LOCAL_PAUSE_IN_NANOS = getMaxLocalPause(); +private long lastInterpret = System.nanoTime(); +private boolean wasPaused = false; + +private static long getMaxLocalPause() +{ +if (System.getProperty(cassandra.max_local_pause_in_ms) != null) +{ +long pause = Long.parseLong(System.getProperty(cassandra.max_local_pause_in_ms)); +logger.warn(Overriding max local pause time to {}ms, pause); +return pause * 100L; +} +else +return DEFAULT_MAX_PAUSE; +} public static final IFailureDetector instance = new FailureDetector(); @@ -228,6 +244,19 @@ public class FailureDetector implements IFailureDetector, FailureDetectorMBean return; } long now = System.nanoTime(); +long diff = now - lastInterpret; +lastInterpret = now; +if (diff MAX_LOCAL_PAUSE_IN_NANOS) +{ +logger.warn(Not marking nodes down due to local pause of {} {}, diff, MAX_LOCAL_PAUSE_IN_NANOS); +wasPaused = true; +return; +} +if (wasPaused) +{ +wasPaused = false; +return; +} double phi = hbWnd.phi(now); if (logger.isTraceEnabled()) logger.trace(PHI for {} : {}, ep, phi);