Make HostDBRoundRobin::select_best_http take last_failure time into 
consideration for all RR types

In the current setup it only checks that status of the reals if you use 
"default RR" (which is actually consistent hashing... but we'll let that 
slide). This patch consolidates the alive() check into the HostDBInfo struct, 
and then calls if from all 3 LB mechanisms. Since you can control if/when a 
host is marked as down in ATS there is no reason to not check.

Issue: TS-3724


Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo
Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/be68bd8f
Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/be68bd8f
Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/be68bd8f

Branch: refs/heads/master
Commit: be68bd8f47f7ecde5403d9a63dbf81604d9bdf56
Parents: 6a56fd2
Author: Thomas Jackson <[email protected]>
Authored: Thu Jun 25 18:50:28 2015 -0700
Committer: Thomas Jackson <[email protected]>
Committed: Mon Jun 29 18:59:01 2015 -0700

----------------------------------------------------------------------
 iocore/hostdb/I_HostDBProcessor.h | 35 +++++++++++++++++++++++++++++
 iocore/hostdb/P_HostDBProcessor.h | 41 ++++++++++++++--------------------
 2 files changed, 52 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/trafficserver/blob/be68bd8f/iocore/hostdb/I_HostDBProcessor.h
----------------------------------------------------------------------
diff --git a/iocore/hostdb/I_HostDBProcessor.h 
b/iocore/hostdb/I_HostDBProcessor.h
index bcdc3a0..e4ef5f0 100644
--- a/iocore/hostdb/I_HostDBProcessor.h
+++ b/iocore/hostdb/I_HostDBProcessor.h
@@ -254,6 +254,41 @@ struct HostDBInfo {
 
   uint64_t md5_high;
 
+  /*
+   * Given the current time `now` and the fail_window, determine if this real 
is alive
+   */
+  bool
+  alive(ink_time_t now, int32_t fail_window)
+  {
+    unsigned int last_failure = app.http_data.last_failure;
+
+    if (last_failure == 0 || (unsigned int)(now - fail_window) > last_failure) 
{
+      return true;
+    } else {
+      // Entry is marked down.  Make sure some nasty clock skew
+      //  did not occur.  Use the retry time to set an upper bound
+      //  as to how far in the future we should tolerate bogus last
+      //  failure times.  This sets the upper bound that we would ever
+      //  consider a server down to 2*down_server_timeout
+      if (now + fail_window < last_failure) {
+#ifdef DEBUG
+        // because this region is mmaped, I cann't get anything
+        //   useful from the structure in core files,  therefore
+        //   copy the revelvant info to the stack so it will
+        //   be readble in the core
+        HostDBInfo current_info;
+        HostDBRoundRobin current_rr;
+        memcpy(&current_info, &info[i], sizeof(HostDBInfo));
+        memcpy(&current_rr, this, sizeof(HostDBRoundRobin));
+#endif
+        ink_assert(!"extreme clock skew");
+        app.http_data.last_failure = 0;
+        return false;
+      }
+      return false;
+    }
+
+  }
   bool
   failed()
   {

http://git-wip-us.apache.org/repos/asf/trafficserver/blob/be68bd8f/iocore/hostdb/P_HostDBProcessor.h
----------------------------------------------------------------------
diff --git a/iocore/hostdb/P_HostDBProcessor.h 
b/iocore/hostdb/P_HostDBProcessor.h
index d80bc6e..d276112 100644
--- a/iocore/hostdb/P_HostDBProcessor.h
+++ b/iocore/hostdb/P_HostDBProcessor.h
@@ -284,9 +284,17 @@ HostDBRoundRobin::select_best_http(sockaddr const 
*client_ip, ink_time_t now, in
   int best_any = 0;
   int best_up = -1;
 
+  // Basic round robin, increment current and mod with how many we have
   if (HostDBProcessor::hostdb_strict_round_robin) {
     Debug("hostdb", "Using strict round robin");
-    best_up = current++ % good;
+    // Check that the host we selected is alive
+    for (int i=0; i < good; i++){
+      best_any = current++ % good;
+      if (info[best_any].alive(now, fail_window)){
+        best_up = best_any;
+        break;
+      }
+    }
   } else if (HostDBProcessor::hostdb_timed_round_robin > 0) {
     Debug("hostdb", "Using timed round-robin for HTTP");
     if ((now - timed_rr_ctime) > HostDBProcessor::hostdb_timed_round_robin) {
@@ -294,7 +302,13 @@ HostDBRoundRobin::select_best_http(sockaddr const 
*client_ip, ink_time_t now, in
       ++current;
       timed_rr_ctime = now;
     }
-    best_up = current % good;
+    for (int i=0; i < good; i++){
+      best_any = current++ % good;
+      if (info[best_any].alive(now, fail_window)){
+        best_up = best_any;
+        break;
+      }
+    }
     Debug("hostdb", "Using %d for best_up", best_up);
   } else {
     Debug("hostdb", "Using default round robin");
@@ -308,32 +322,11 @@ HostDBRoundRobin::select_best_http(sockaddr const 
*client_ip, ink_time_t now, in
         best_any = i;
         best_hash_any = h;
       }
-      if (info[i].app.http_data.last_failure == 0 || (unsigned int)(now - 
fail_window) > info[i].app.http_data.last_failure) {
-        // Entry is marked up
+      if (info[i].alive(now, fail_window)){
         if (best_hash_up <= h) {
           best_up = i;
           best_hash_up = h;
         }
-      } else {
-        // Entry is marked down.  Make sure some nasty clock skew
-        //  did not occur.  Use the retry time to set an upper bound
-        //  as to how far in the future we should tolerate bogus last
-        //  failure times.  This sets the upper bound that we would ever
-        //  consider a server down to 2*down_server_timeout
-        if (now + fail_window < (int32_t)(info[i].app.http_data.last_failure)) 
{
-#ifdef DEBUG
-          // because this region is mmaped, I cann't get anything
-          //   useful from the structure in core files,  therefore
-          //   copy the revelvant info to the stack so it will
-          //   be readble in the core
-          HostDBInfo current_info;
-          HostDBRoundRobin current_rr;
-          memcpy(&current_info, &info[i], sizeof(HostDBInfo));
-          memcpy(&current_rr, this, sizeof(HostDBRoundRobin));
-#endif
-          ink_assert(!"extreme clock skew");
-          info[i].app.http_data.last_failure = 0;
-        }
       }
     }
   }

Reply via email to