Author: rjung
Date: Tue Dec 23 18:19:04 2014
New Revision: 1647636

URL: http://svn.apache.org/r1647636
Log:
PR 52334: LB: Calculate worker recovery time based
on last recovery attempt time instead of original
error time after the first recovery attempt.

Modified:
    tomcat/jk/trunk/native/common/jk_lb_worker.c
    tomcat/jk/trunk/native/common/jk_shm.h
    tomcat/jk/trunk/native/common/jk_status.c
    tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml

Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_lb_worker.c (original)
+++ tomcat/jk/trunk/native/common/jk_lb_worker.c Tue Dec 23 18:19:04 2014
@@ -601,7 +601,7 @@ static int recover_workers(lb_worker_t *
         w = &p->lb_workers[i];
         aw = (ajp_worker_t *)w->worker->worker_private;
         if (w->s->state == JK_LB_STATE_ERROR) {
-            elapsed = (int)difftime(now, w->s->error_time);
+            elapsed = (int)difftime(now, w->s->last_error_time);
             if (elapsed <= p->recover_wait_time) {
                 if (JK_IS_DEBUG_LEVEL(l))
                     jk_log(l, JK_LOG_DEBUG,
@@ -620,8 +620,8 @@ static int recover_workers(lb_worker_t *
                 non_error++;
             }
         }
-        else if (w->s->error_time > 0 &&
-                 (int)difftime(now, w->s->error_time) >= 
p->error_escalation_time &&
+        else if (w->s->first_error_time > 0 &&
+                 (int)difftime(now, w->s->first_error_time) >= 
p->error_escalation_time &&
                  w->s->state != JK_LB_STATE_RECOVER) {
             if (JK_IS_DEBUG_LEVEL(l))
                 jk_log(l, JK_LOG_DEBUG,
@@ -1428,7 +1428,8 @@ static int JK_METHOD service(jk_endpoint
                      */
                     rec->s->state  = JK_LB_STATE_OK;
                     p->states[rec->i] = JK_LB_STATE_OK;
-                    rec->s->error_time = 0;
+                    rec->s->first_error_time = 0;
+                    rec->s->last_error_time = 0;
                     rc = JK_TRUE;
                     recoverable = JK_UNSET;
                 }
@@ -1439,7 +1440,8 @@ static int JK_METHOD service(jk_endpoint
                      */
                     rec->s->state  = JK_LB_STATE_OK;
                     p->states[rec->i] = JK_LB_STATE_ERROR;
-                    rec->s->error_time = 0;
+                    rec->s->first_error_time = 0;
+                    rec->s->last_error_time = 0;
                     rc = JK_CLIENT_ERROR;
                     recoverable = JK_FALSE;
                 }
@@ -1472,7 +1474,8 @@ static int JK_METHOD service(jk_endpoint
                      */
                     rec->s->state  = JK_LB_STATE_OK;
                     p->states[rec->i] = JK_LB_STATE_ERROR;
-                    rec->s->error_time = 0;
+                    rec->s->first_error_time = 0;
+                    rec->s->last_error_time = 0;
                     rc = JK_FALSE;
                 }
                 else if (service_stat == JK_STATUS_FATAL_ERROR) {
@@ -1485,7 +1488,8 @@ static int JK_METHOD service(jk_endpoint
                     rec->s->errors++;
                     rec->s->state = JK_LB_STATE_ERROR;
                     p->states[rec->i] = JK_LB_STATE_ERROR;
-                    rec->s->error_time = time(NULL);
+                    rec->s->first_error_time = time(NULL);
+                    rec->s->last_error_time = rec->s->first_error_time;
                     rc = JK_FALSE;
                 }
                 else if (service_stat == JK_REPLY_TIMEOUT) {
@@ -1499,7 +1503,8 @@ static int JK_METHOD service(jk_endpoint
                         rec->s->errors++;
                         rec->s->state = JK_LB_STATE_ERROR;
                         p->states[rec->i] = JK_LB_STATE_ERROR;
-                        rec->s->error_time = time(NULL);
+                        rec->s->first_error_time = time(NULL);
+                        rec->s->last_error_time = rec->s->first_error_time;
                     }
                     else {
                         /*
@@ -1523,8 +1528,8 @@ static int JK_METHOD service(jk_endpoint
                     rec->s->errors++;
                     if (rec->s->busy == 0 ||
                         p->worker->error_escalation_time == 0 ||
-                        (rec->s->error_time > 0 &&
-                         (int)difftime(now, rec->s->error_time) >= 
p->worker->error_escalation_time)) {
+                        (rec->s->first_error_time > 0 &&
+                         (int)difftime(now, rec->s->first_error_time) >= 
p->worker->error_escalation_time)) {
                         if (JK_IS_DEBUG_LEVEL(l))
                             jk_log(l, JK_LOG_DEBUG,
                                    "worker %s escalating local error to global 
error",
@@ -1532,9 +1537,10 @@ static int JK_METHOD service(jk_endpoint
                         rec->s->state = JK_LB_STATE_ERROR;
                     }
                     p->states[rec->i] = JK_LB_STATE_ERROR;
-                    if (rec->s->error_time == 0) {
-                        rec->s->error_time = now;
+                    if (rec->s->first_error_time == 0) {
+                        rec->s->first_error_time = now;
                     }
+                    rec->s->last_error_time = now;
                     rc = JK_FALSE;
                 }
                 if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC)
@@ -1743,7 +1749,8 @@ static int JK_METHOD validate(jk_worker_
 
                 p->lb_workers[i].s->lb_value = 0;
                 p->lb_workers[i].s->state = JK_LB_STATE_IDLE;
-                p->lb_workers[i].s->error_time = 0;
+                p->lb_workers[i].s->first_error_time = 0;
+                p->lb_workers[i].s->last_error_time = 0;
                 p->lb_workers[i].s->elected_snapshot = 0;
                 p->lb_workers[i].s->sessions = 0;
                 p->lb_workers[i].activation =

Modified: tomcat/jk/trunk/native/common/jk_shm.h
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_shm.h?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_shm.h (original)
+++ tomcat/jk/trunk/native/common/jk_shm.h Tue Dec 23 18:19:04 2014
@@ -150,8 +150,10 @@ struct jk_shm_lb_sub_worker
     volatile jk_uint64_t lb_mult;
     /* Current lb value  */
     volatile jk_uint64_t lb_value;
-    /* Statistical data */
-    volatile time_t error_time;
+    /* First consecutive error time */
+    volatile time_t first_error_time;
+    /* Last consecutive error time */
+    volatile time_t last_error_time;
     /* Number of times the worker was elected - snapshot during maintenance */
     volatile jk_uint64_t  elected_snapshot;
     /* Number of non-sticky requests handled, that were not marked as 
stateless */

Modified: tomcat/jk/trunk/native/common/jk_status.c
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_status.c?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/native/common/jk_status.c (original)
+++ tomcat/jk/trunk/native/common/jk_status.c Tue Dec 23 18:19:04 2014
@@ -1802,9 +1802,9 @@ static void display_worker_ajp_details(j
         name = lb->name;
         sub_name = wr->name;
         ajp_name = wr->name;
-        error_time = wr->s->error_time;
+        error_time = wr->s->first_error_time;
         if (wr->s->state == JK_LB_STATE_ERROR) {
-            rs_min = lb->recover_wait_time - (int)difftime(now, 
wr->s->error_time);
+            rs_min = lb->recover_wait_time - (int)difftime(now, 
wr->s->last_error_time);
             if (rs_min < 0) {
                 rs_min = 0;
             }
@@ -4300,7 +4300,8 @@ static int reset_worker(jk_ws_service_t
                 wr->s->state            = JK_LB_STATE_IDLE;
                 wr->s->elected_snapshot = 0;
                 wr->s->sessions         = 0;
-                wr->s->error_time       = 0;
+                wr->s->first_error_time = 0;
+                wr->s->last_error_time  = 0;
                 wr->s->errors           = 0;
                 wr->s->lb_value         = 0;
                 aw->s->used             = 0;
@@ -4328,7 +4329,8 @@ static int reset_worker(jk_ws_service_t
             wr->s->state            = JK_LB_STATE_IDLE;
             wr->s->elected_snapshot = 0;
             wr->s->sessions         = 0;
-            wr->s->error_time       = 0;
+            wr->s->first_error_time = 0;
+            wr->s->last_error_time  = 0;
             wr->s->errors           = 0;
             wr->s->lb_value         = 0;
             aw->s->used             = 0;

Modified: tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml
URL: 
http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml?rev=1647636&r1=1647635&r2=1647636&view=diff
==============================================================================
--- tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml (original)
+++ tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Tue Dec 23 18:19:04 2014
@@ -102,6 +102,11 @@
         "connection_pool_timeout" and "keepAliveTimeout" or "connectionTimeout"
         in the Tomcat AJP connector configuration. (rjung)
       </fix>
+      <fix>
+        <bug>52334</bug>: LB: Calculate worker recovery time based on last
+        recovery attempt time instead of original error time after the first
+        recovery attempt. (rjung)
+      </fix>
     </changelog>
   </subsection>
 </section>



---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org
For additional commands, e-mail: dev-h...@tomcat.apache.org

Reply via email to