Author: rjung Date: Tue Dec 23 18:19:04 2014 New Revision: 1647636 URL: http://svn.apache.org/r1647636 Log: PR 52334: LB: Calculate worker recovery time based on last recovery attempt time instead of original error time after the first recovery attempt.
Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c tomcat/jk/trunk/native/common/jk_shm.h tomcat/jk/trunk/native/common/jk_status.c tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Modified: tomcat/jk/trunk/native/common/jk_lb_worker.c URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_lb_worker.c?rev=1647636&r1=1647635&r2=1647636&view=diff ============================================================================== --- tomcat/jk/trunk/native/common/jk_lb_worker.c (original) +++ tomcat/jk/trunk/native/common/jk_lb_worker.c Tue Dec 23 18:19:04 2014 @@ -601,7 +601,7 @@ static int recover_workers(lb_worker_t * w = &p->lb_workers[i]; aw = (ajp_worker_t *)w->worker->worker_private; if (w->s->state == JK_LB_STATE_ERROR) { - elapsed = (int)difftime(now, w->s->error_time); + elapsed = (int)difftime(now, w->s->last_error_time); if (elapsed <= p->recover_wait_time) { if (JK_IS_DEBUG_LEVEL(l)) jk_log(l, JK_LOG_DEBUG, @@ -620,8 +620,8 @@ static int recover_workers(lb_worker_t * non_error++; } } - else if (w->s->error_time > 0 && - (int)difftime(now, w->s->error_time) >= p->error_escalation_time && + else if (w->s->first_error_time > 0 && + (int)difftime(now, w->s->first_error_time) >= p->error_escalation_time && w->s->state != JK_LB_STATE_RECOVER) { if (JK_IS_DEBUG_LEVEL(l)) jk_log(l, JK_LOG_DEBUG, @@ -1428,7 +1428,8 @@ static int JK_METHOD service(jk_endpoint */ rec->s->state = JK_LB_STATE_OK; p->states[rec->i] = JK_LB_STATE_OK; - rec->s->error_time = 0; + rec->s->first_error_time = 0; + rec->s->last_error_time = 0; rc = JK_TRUE; recoverable = JK_UNSET; } @@ -1439,7 +1440,8 @@ static int JK_METHOD service(jk_endpoint */ rec->s->state = JK_LB_STATE_OK; p->states[rec->i] = JK_LB_STATE_ERROR; - rec->s->error_time = 0; + rec->s->first_error_time = 0; + rec->s->last_error_time = 0; rc = JK_CLIENT_ERROR; recoverable = JK_FALSE; } @@ -1472,7 +1474,8 @@ static int JK_METHOD service(jk_endpoint */ rec->s->state = JK_LB_STATE_OK; p->states[rec->i] = JK_LB_STATE_ERROR; - rec->s->error_time = 0; + rec->s->first_error_time = 0; + rec->s->last_error_time = 0; rc = JK_FALSE; } else if (service_stat == JK_STATUS_FATAL_ERROR) { @@ -1485,7 +1488,8 @@ static int JK_METHOD service(jk_endpoint rec->s->errors++; rec->s->state = JK_LB_STATE_ERROR; p->states[rec->i] = JK_LB_STATE_ERROR; - rec->s->error_time = time(NULL); + rec->s->first_error_time = time(NULL); + rec->s->last_error_time = rec->s->first_error_time; rc = JK_FALSE; } else if (service_stat == JK_REPLY_TIMEOUT) { @@ -1499,7 +1503,8 @@ static int JK_METHOD service(jk_endpoint rec->s->errors++; rec->s->state = JK_LB_STATE_ERROR; p->states[rec->i] = JK_LB_STATE_ERROR; - rec->s->error_time = time(NULL); + rec->s->first_error_time = time(NULL); + rec->s->last_error_time = rec->s->first_error_time; } else { /* @@ -1523,8 +1528,8 @@ static int JK_METHOD service(jk_endpoint rec->s->errors++; if (rec->s->busy == 0 || p->worker->error_escalation_time == 0 || - (rec->s->error_time > 0 && - (int)difftime(now, rec->s->error_time) >= p->worker->error_escalation_time)) { + (rec->s->first_error_time > 0 && + (int)difftime(now, rec->s->first_error_time) >= p->worker->error_escalation_time)) { if (JK_IS_DEBUG_LEVEL(l)) jk_log(l, JK_LOG_DEBUG, "worker %s escalating local error to global error", @@ -1532,9 +1537,10 @@ static int JK_METHOD service(jk_endpoint rec->s->state = JK_LB_STATE_ERROR; } p->states[rec->i] = JK_LB_STATE_ERROR; - if (rec->s->error_time == 0) { - rec->s->error_time = now; + if (rec->s->first_error_time == 0) { + rec->s->first_error_time = now; } + rec->s->last_error_time = now; rc = JK_FALSE; } if (p->worker->lblock == JK_LB_LOCK_PESSIMISTIC) @@ -1743,7 +1749,8 @@ static int JK_METHOD validate(jk_worker_ p->lb_workers[i].s->lb_value = 0; p->lb_workers[i].s->state = JK_LB_STATE_IDLE; - p->lb_workers[i].s->error_time = 0; + p->lb_workers[i].s->first_error_time = 0; + p->lb_workers[i].s->last_error_time = 0; p->lb_workers[i].s->elected_snapshot = 0; p->lb_workers[i].s->sessions = 0; p->lb_workers[i].activation = Modified: tomcat/jk/trunk/native/common/jk_shm.h URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_shm.h?rev=1647636&r1=1647635&r2=1647636&view=diff ============================================================================== --- tomcat/jk/trunk/native/common/jk_shm.h (original) +++ tomcat/jk/trunk/native/common/jk_shm.h Tue Dec 23 18:19:04 2014 @@ -150,8 +150,10 @@ struct jk_shm_lb_sub_worker volatile jk_uint64_t lb_mult; /* Current lb value */ volatile jk_uint64_t lb_value; - /* Statistical data */ - volatile time_t error_time; + /* First consecutive error time */ + volatile time_t first_error_time; + /* Last consecutive error time */ + volatile time_t last_error_time; /* Number of times the worker was elected - snapshot during maintenance */ volatile jk_uint64_t elected_snapshot; /* Number of non-sticky requests handled, that were not marked as stateless */ Modified: tomcat/jk/trunk/native/common/jk_status.c URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/native/common/jk_status.c?rev=1647636&r1=1647635&r2=1647636&view=diff ============================================================================== --- tomcat/jk/trunk/native/common/jk_status.c (original) +++ tomcat/jk/trunk/native/common/jk_status.c Tue Dec 23 18:19:04 2014 @@ -1802,9 +1802,9 @@ static void display_worker_ajp_details(j name = lb->name; sub_name = wr->name; ajp_name = wr->name; - error_time = wr->s->error_time; + error_time = wr->s->first_error_time; if (wr->s->state == JK_LB_STATE_ERROR) { - rs_min = lb->recover_wait_time - (int)difftime(now, wr->s->error_time); + rs_min = lb->recover_wait_time - (int)difftime(now, wr->s->last_error_time); if (rs_min < 0) { rs_min = 0; } @@ -4300,7 +4300,8 @@ static int reset_worker(jk_ws_service_t wr->s->state = JK_LB_STATE_IDLE; wr->s->elected_snapshot = 0; wr->s->sessions = 0; - wr->s->error_time = 0; + wr->s->first_error_time = 0; + wr->s->last_error_time = 0; wr->s->errors = 0; wr->s->lb_value = 0; aw->s->used = 0; @@ -4328,7 +4329,8 @@ static int reset_worker(jk_ws_service_t wr->s->state = JK_LB_STATE_IDLE; wr->s->elected_snapshot = 0; wr->s->sessions = 0; - wr->s->error_time = 0; + wr->s->first_error_time = 0; + wr->s->last_error_time = 0; wr->s->errors = 0; wr->s->lb_value = 0; aw->s->used = 0; Modified: tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml?rev=1647636&r1=1647635&r2=1647636&view=diff ============================================================================== --- tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml (original) +++ tomcat/jk/trunk/xdocs/miscellaneous/changelog.xml Tue Dec 23 18:19:04 2014 @@ -102,6 +102,11 @@ "connection_pool_timeout" and "keepAliveTimeout" or "connectionTimeout" in the Tomcat AJP connector configuration. (rjung) </fix> + <fix> + <bug>52334</bug>: LB: Calculate worker recovery time based on last + recovery attempt time instead of original error time after the first + recovery attempt. (rjung) + </fix> </changelog> </subsection> </section> --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@tomcat.apache.org For additional commands, e-mail: dev-h...@tomcat.apache.org