mturk 2005/02/19 00:12:30 Modified: jk/native/common jk_lb_worker.c jk_shm.h jk_status.c Log: Add worker is_busy runtime status flag. When we can not obtain a free endpoint, mark the worker as busy rather then in error, so that recovery is not needed. Revision Changes Path 1.63 +44 -25 jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.c Index: jk_lb_worker.c =================================================================== RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.c,v retrieving revision 1.62 retrieving revision 1.63 diff -u -r1.62 -r1.63 --- jk_lb_worker.c 17 Feb 2005 13:48:47 -0000 1.62 +++ jk_lb_worker.c 19 Feb 2005 08:12:29 -0000 1.63 @@ -42,6 +42,9 @@ #define WAIT_BEFORE_RECOVER (60*1) #define WORKER_RECOVER_TIME ("recover_time") +#define JK_WORKER_IN_ERROR(w) ((w)->in_error_state && !(w)->is_disabled && !(w)->is_busy) +#define JK_WORKER_USABLE(w) (!(w)->in_error_state && !(w)->is_disabled && !(w)->is_busy) + struct lb_endpoint { jk_endpoint_t *e; @@ -183,6 +186,7 @@ w->s->name); w->s->in_recovering = JK_TRUE; w->s->in_error_state = JK_FALSE; + w->s->is_busy = JK_FALSE; } JK_TRACE_EXIT(l); @@ -225,8 +229,7 @@ /* Take into calculation only the workers that are * not in error state or not disabled. */ - if (!p->lb_workers[i].s->in_error_state && - !p->lb_workers[i].s->is_disabled) { + if (JK_WORKER_USABLE(p->lb_workers[i].s)) { if (p->lbmethod == JK_LB_BYREQUESTS) { p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor; total_factor += p->lb_workers[i].s->lb_factor; @@ -269,15 +272,13 @@ * The worker might still be unusable, but we try * anyway. */ - if (p->lb_workers[i].s->in_error_state && - !p->lb_workers[i].s->is_disabled) { + if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) { retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l); } /* Take into calculation only the workers that are * not in error state or not disabled. */ - if (!p->lb_workers[i].s->in_error_state && - !p->lb_workers[i].s->is_disabled) { + if (JK_WORKER_USABLE(p->lb_workers[i].s)) { p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor; total_factor += p->lb_workers[i].s->lb_factor; if (!candidate || p->lb_workers[i].s->lb_value > candidate->s->lb_value) @@ -309,15 +310,13 @@ * The worker might still be unusable, but we try * anyway. */ - if (p->lb_workers[i].s->in_error_state && - !p->lb_workers[i].s->is_disabled) { + if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) { retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l); } /* Take into calculation only the workers that are * not in error state or not disabled. */ - if (!p->lb_workers[i].s->in_error_state && - !p->lb_workers[i].s->is_disabled) { + if (JK_WORKER_USABLE(p->lb_workers[i].s)) { mytraffic = (p->lb_workers[i].s->transferred/p->lb_workers[i].s->lb_factor) + (p->lb_workers[i].s->readed/p->lb_workers[i].s->lb_factor); if (!candidate || mytraffic < curmin) { @@ -346,9 +345,9 @@ return rc; } -static worker_record_t *find_session_route(lb_worker_t *p, - const char *name, - jk_logger_t *l) +static worker_record_t *find_bysession_route(lb_worker_t *p, + const char *name, + jk_logger_t *l) { unsigned int i; int total_factor = 0; @@ -361,13 +360,12 @@ candidate = find_best_bydomain(p, name, l); } if (candidate) { - if (candidate->s->in_error_state && !candidate->s->is_disabled) { + if (JK_WORKER_IN_ERROR(candidate->s)) { retry_worker(candidate, p->s->recover_wait_time, l); } if (candidate->s->in_error_state) { - /* We have a worker that is unusable. - * It can be in error or disabled, but in case - * it has a redirection set use that redirection worker. + /* We have a worker that is error state. + * If it has a redirection set use that redirection worker. * This enables to safely remove the member from the * balancer. Of course you will need a some kind of * session replication between those two remote. @@ -384,8 +382,7 @@ } if (candidate && !uses_domain) { for (i = 0; i < p->num_of_workers; i++) { - if (!p->lb_workers[i].s->in_error_state && - !p->lb_workers[i].s->is_disabled) { + if (JK_WORKER_USABLE(p->lb_workers[i].s)) { /* Skip all workers that are not member of candidate domain */ if (*candidate->s->domain && strcmp(p->lb_workers[i].s->domain, candidate->s->domain)) @@ -413,10 +410,10 @@ /* No need to find the best worker * if there is a single one */ - if (p->lb_workers[0].s->in_error_state && - !p->lb_workers[0].s->is_disabled) { + if (JK_WORKER_IN_ERROR(p->lb_workers[0].s)) { retry_worker(&p->lb_workers[0], p->s->recover_wait_time, l); } + /* Check if worker is marked for retry */ if (!p->lb_workers[0].s->in_error_state) { p->lb_workers[0].r = &(p->lb_workers[0].s->name[0]); JK_TRACE_EXIT(l); @@ -428,6 +425,9 @@ } } else if (p->s->sticky_session) { + /* Use sessionid only if sticky_session is + * defined for this load balancer + */ sessionid = get_sessionid(s); } JK_ENTER_CS(&(p->cs), r); @@ -464,17 +464,18 @@ session_route); /* We have a session route. Whow! */ - rc = find_session_route(p, session_route, l); + rc = find_bysession_route(p, session_route, l); if (rc) { JK_LEAVE_CS(&(p->cs), r); if (JK_IS_DEBUG_LEVEL(l)) jk_log(l, JK_LOG_DEBUG, - "found worker %s for partial sessionid %s", - rc->s->name, sessionid); + "found worker %s for route %s and partial sessionid %s", + rc->s->name, session_route, sessionid); JK_TRACE_EXIT(l); return rc; } } + /* Try next partial sessionid if present */ sessionid = next; } if (!rc && p->s->sticky_session_force) { @@ -556,6 +557,10 @@ rec->s->readed += end->rd; rec->s->transferred += end->wr; end->done(&end, l); + /* When returning the endpoint mark the worker as not busy. + * We have at least one endpoint free + */ + rec->s->is_busy = JK_FALSE; /* Decrement the busy worker count */ rec->s->busy--; p->worker->s->busy--; @@ -567,6 +572,19 @@ return JK_TRUE; } } + else { + /* If we can not get the endpoint + * mark the worker as busy rather then + * as in error + */ + rec->s->is_busy = JK_TRUE; + jk_log(l, JK_LOG_INFO, + "could not get free endpoint for worker %s", + rec->s->name); + /* Decrement the worker count and try another worker */ + --num_of_workers; + continue; + } if (!service_ok) { /* * Service failed !!! @@ -712,6 +730,7 @@ p->lb_workers[i].s->lb_value = p->lb_workers[i].s->lb_factor; p->lb_workers[i].s->in_error_state = JK_FALSE; p->lb_workers[i].s->in_recovering = JK_FALSE; + p->lb_workers[i].s->is_busy = JK_FALSE; p->lb_workers[i].s->error_time = 0; /* Worker can be initaly disabled as hot standby */ p->lb_workers[i].s->is_disabled = jk_get_is_worker_disabled(props, worker_names[i]); 1.13 +2 -5 jakarta-tomcat-connectors/jk/native/common/jk_shm.h Index: jk_shm.h =================================================================== RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native/common/jk_shm.h,v retrieving revision 1.12 retrieving revision 1.13 diff -u -r1.12 -r1.13 --- jk_shm.h 17 Feb 2005 15:03:15 -0000 1.12 +++ jk_shm.h 19 Feb 2005 08:12:29 -0000 1.13 @@ -56,10 +56,6 @@ int id; /* Number of currently busy channels */ int busy; - /* Number of currently idle channels */ - int idle; - /* Maximum number of channels */ - int max_conn; /* worker name */ char name[JK_SHM_STR_SIZ+1]; /* worker domain */ @@ -68,6 +64,7 @@ char redirect[JK_SHM_STR_SIZ+1]; /* current status of the worker */ int is_disabled; + int is_busy; /* Current lb factor */ int lb_factor; /* Current lb value */ 1.18 +6 -3 jakarta-tomcat-connectors/jk/native/common/jk_status.c Index: jk_status.c =================================================================== RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native/common/jk_status.c,v retrieving revision 1.17 retrieving revision 1.18 diff -u -r1.17 -r1.18 --- jk_status.c 17 Feb 2005 13:41:04 -0000 1.17 +++ jk_status.c 19 Feb 2005 08:12:29 -0000 1.18 @@ -199,7 +199,7 @@ return "True"; } -static const char *status_val_status(int d, int e, int r) +static const char *status_val_status(int d, int e, int r, int b) { if (d) return "Disabled"; @@ -207,6 +207,8 @@ return "Recovering"; else if (e) return "Error"; + else if (b) + return "Busy"; else return "OK"; } @@ -374,7 +376,8 @@ jk_putv(s, "<td>", status_val_status(wr->s->is_disabled, wr->s->in_error_state, - wr->s->in_recovering), + wr->s->in_recovering, + wr->s->is_busy), "</td>", NULL); jk_printf(s, "<td>%d</td>", wr->s->lb_factor); jk_printf(s, "<td>%d</td>", wr->s->lb_value);
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]