Author: brandonwilliams
Date: Mon Oct  3 20:36:35 2011
New Revision: 1178563

URL: http://svn.apache.org/viewvc?rev=1178563&view=rev
Log:
Fix bug where the FailureDetector can take a very long time to mark a
host down.
Patch by brandonwilliams, reviewed by Paul Cannon for CASSANDRA-3273

Modified:
    cassandra/branches/cassandra-0.8/CHANGES.txt
    
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/FailureDetector.java
    
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/Gossiper.java
    
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/IFailureDetector.java
    cassandra/branches/cassandra-1.0.0/CHANGES.txt
    
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/FailureDetector.java
    
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/Gossiper.java
    
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/IFailureDetector.java

Modified: cassandra/branches/cassandra-0.8/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.8/CHANGES.txt?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- cassandra/branches/cassandra-0.8/CHANGES.txt (original)
+++ cassandra/branches/cassandra-0.8/CHANGES.txt Mon Oct  3 20:36:35 2011
@@ -18,6 +18,8 @@
  * (Hadoop) allow wrapping ranges in queries (CASSANDRA-3137)
  * (Hadoop) check all interfaces for a match with split location
    before falling back to random replica (CASSANDRA-3211)
+ * Fix bug where the failure detector can take too long to mark a host 
+   down (CASSANDRA-3273)
 
 
 0.8.6

Modified: 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/FailureDetector.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/FailureDetector.java?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/FailureDetector.java
 (original)
+++ 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/FailureDetector.java
 Mon Oct  3 20:36:35 2011
@@ -121,7 +121,14 @@ public class FailureDetector implements 
             logger_.error("unknown endpoint " + ep);
         return epState != null && epState.isAlive();
     }
-    
+
+    public void clear(InetAddress ep)
+    {
+        ArrivalWindow heartbeatWindow = arrivalSamples_.get(ep);
+        if (heartbeatWindow != null)
+            heartbeatWindow.clear();
+    }
+
     public void report(InetAddress ep)
     {
         if (logger_.isTraceEnabled())
@@ -149,7 +156,9 @@ public class FailureDetector implements 
             logger_.trace("PHI for " + ep + " : " + phi);
         
         if ( phi > phiConvictThreshold_ )
-        {     
+        {
+            logger_.trace("notifying listeners that {} is down", ep);
+            logger_.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
             for ( IFailureDetectionEventListener listener : fdEvntListeners_ )
             {
                 listener.convict(ep, phi);
@@ -206,6 +215,11 @@ class ArrivalWindow
     // change.
     private final double PHI_FACTOR = 1.0 / Math.log(10.0);
 
+    // in the event of a long partition, never record an interval longer than 
the rpc timeout,
+    // since if a host is regularly experiencing connectivity problems lasting 
this long we'd
+    // rather mark it down quickly instead of adapting
+    private final double MAX_INTERVAL_IN_MS = 
DatabaseDescriptor.getRpcTimeout();
+
     ArrivalWindow(int size)
     {
         arrivalIntervals_ = new BoundedStatsDeque(size);
@@ -216,14 +230,17 @@ class ArrivalWindow
         double interArrivalTime;
         if ( tLast_ > 0L )
         {                        
-            interArrivalTime = (value - tLast_);            
+            interArrivalTime = (value - tLast_);
         }
         else
         {
             interArrivalTime = Gossiper.intervalInMillis / 2;
         }
-        tLast_ = value;            
-        arrivalIntervals_.add(interArrivalTime);        
+        if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+            arrivalIntervals_.add(interArrivalTime);
+        else
+            logger_.debug("Ignoring interval time of {}", interArrivalTime);
+        tLast_ = value;
     }
     
     synchronized double sum()

Modified: 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/Gossiper.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/Gossiper.java?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/Gossiper.java
 (original)
+++ 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/Gossiper.java
 Mon Oct  3 20:36:35 2011
@@ -665,6 +665,13 @@ public class Gossiper implements IFailur
             if ( remoteGeneration > localGeneration )
             {
                 localEndpointState.updateTimestamp();
+                // this node was dead and the generation changed, this 
indicates a reboot, or possibly a takeover
+                // we will clean the fd intervals for it and relearn them
+                if (!localEndpointState.isAlive())
+                {
+                    logger.debug("Clearing interval times for {} due to 
generation change", endpoint);
+                    fd.clear(endpoint);
+                }
                 fd.report(endpoint);
                 return;
             }
@@ -676,6 +683,7 @@ public class Gossiper implements IFailur
                 if ( remoteVersion > localVersion )
                 {
                     localEndpointState.updateTimestamp();
+                    // just a version change, report to the fd
                     fd.report(endpoint);
                 }
             }

Modified: 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/IFailureDetector.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/IFailureDetector.java?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/IFailureDetector.java
 (original)
+++ 
cassandra/branches/cassandra-0.8/src/java/org/apache/cassandra/gms/IFailureDetector.java
 Mon Oct  3 20:36:35 2011
@@ -37,7 +37,13 @@ public interface IFailureDetector
      * @return true if UP and false if DOWN.
      */
     public boolean isAlive(InetAddress ep);
-    
+
+    /**
+     * Clear any existing interval timings for this endpoint
+     * @param ep
+     */
+    public void clear(InetAddress ep);
+
     /**
      * This method is invoked by any entity wanting to interrogate the status 
of an endpoint. 
      * In our case it would be the Gossiper. The Failure Detector will then 
calculate Phi and

Modified: cassandra/branches/cassandra-1.0.0/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/CHANGES.txt?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- cassandra/branches/cassandra-1.0.0/CHANGES.txt (original)
+++ cassandra/branches/cassandra-1.0.0/CHANGES.txt Mon Oct  3 20:36:35 2011
@@ -5,6 +5,8 @@
    (CASSANDRA-3269)
  * Evict gossip state immediately when a token is taken over by a new IP 
(CASSANDRA-3259)
  * tolerate whitespace in seed CDL (CASSANDRA-3263)
+ * Fix bug where the failure detector can take too long to mark a host
+   down (CASSANDRA-3273)
 
 
 1.0.0-rc2

Modified: 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/FailureDetector.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/FailureDetector.java?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/FailureDetector.java
 (original)
+++ 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/FailureDetector.java
 Mon Oct  3 20:36:35 2011
@@ -149,7 +149,14 @@ public class FailureDetector implements 
             logger_.error("unknown endpoint " + ep);
         return epState != null && epState.isAlive();
     }
-    
+
+    public void clear(InetAddress ep)
+    {
+        ArrivalWindow heartbeatWindow = arrivalSamples_.get(ep);
+        if (heartbeatWindow != null)
+            heartbeatWindow.clear();
+    }
+
     public void report(InetAddress ep)
     {
         if (logger_.isTraceEnabled())
@@ -177,7 +184,9 @@ public class FailureDetector implements 
             logger_.trace("PHI for " + ep + " : " + phi);
         
         if ( phi > phiConvictThreshold_ )
-        {     
+        {
+            logger_.trace("notifying listeners that {} is down", ep);
+            logger_.trace("intervals: {} mean: {}", hbWnd, hbWnd.mean());
             for ( IFailureDetectionEventListener listener : fdEvntListeners_ )
             {
                 listener.convict(ep, phi);
@@ -234,6 +243,11 @@ class ArrivalWindow
     // change.
     private final double PHI_FACTOR = 1.0 / Math.log(10.0);
 
+    // in the event of a long partition, never record an interval longer than 
the rpc timeout,
+    // since if a host is regularly experiencing connectivity problems lasting 
this long we'd
+    // rather mark it down quickly instead of adapting
+    private final double MAX_INTERVAL_IN_MS = 
DatabaseDescriptor.getRpcTimeout();
+
     ArrivalWindow(int size)
     {
         arrivalIntervals_ = new BoundedStatsDeque(size);
@@ -244,14 +258,17 @@ class ArrivalWindow
         double interArrivalTime;
         if ( tLast_ > 0L )
         {                        
-            interArrivalTime = (value - tLast_);            
+            interArrivalTime = (value - tLast_);
         }
         else
         {
             interArrivalTime = Gossiper.intervalInMillis / 2;
         }
-        tLast_ = value;            
-        arrivalIntervals_.add(interArrivalTime);        
+        if (interArrivalTime <= MAX_INTERVAL_IN_MS)
+            arrivalIntervals_.add(interArrivalTime);
+        else
+            logger_.debug("Ignoring interval time of {}", interArrivalTime);
+        tLast_ = value;
     }
     
     synchronized double sum()

Modified: 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/Gossiper.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/Gossiper.java?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/Gossiper.java
 (original)
+++ 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/Gossiper.java
 Mon Oct  3 20:36:35 2011
@@ -685,6 +685,13 @@ public class Gossiper implements IFailur
             if ( remoteGeneration > localGeneration )
             {
                 localEndpointState.updateTimestamp();
+                // this node was dead and the generation changed, this 
indicates a reboot, or possibly a takeover
+                // we will clean the fd intervals for it and relearn them
+                if (!localEndpointState.isAlive())
+                {
+                    logger.debug("Clearing interval times for {} due to 
generation change", endpoint);
+                    fd.clear(endpoint);
+                }
                 fd.report(endpoint);
                 return;
             }
@@ -696,6 +703,7 @@ public class Gossiper implements IFailur
                 if ( remoteVersion > localVersion )
                 {
                     localEndpointState.updateTimestamp();
+                    // just a version change, report to the fd
                     fd.report(endpoint);
                 }
             }

Modified: 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/IFailureDetector.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/IFailureDetector.java?rev=1178563&r1=1178562&r2=1178563&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/IFailureDetector.java
 (original)
+++ 
cassandra/branches/cassandra-1.0.0/src/java/org/apache/cassandra/gms/IFailureDetector.java
 Mon Oct  3 20:36:35 2011
@@ -37,7 +37,13 @@ public interface IFailureDetector
      * @return true if UP and false if DOWN.
      */
     public boolean isAlive(InetAddress ep);
-    
+
+    /**
+     * Clear any existing interval timings for this endpoint
+     * @param ep
+     */
+    public void clear(InetAddress ep);
+
     /**
      * This method is invoked by any entity wanting to interrogate the status 
of an endpoint. 
      * In our case it would be the Gossiper. The Failure Detector will then 
calculate Phi and


Reply via email to