mturk 2005/02/19 00:12:30
Modified: jk/native/common jk_lb_worker.c jk_shm.h jk_status.c
Log:
Add worker is_busy runtime status flag. When we can not obtain a free
endpoint, mark the worker as busy rather then in error, so that recovery
is not needed.
Revision Changes Path
1.63 +44 -25 jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.c
Index: jk_lb_worker.c
===================================================================
RCS file:
/home/cvs/jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.c,v
retrieving revision 1.62
retrieving revision 1.63
diff -u -r1.62 -r1.63
--- jk_lb_worker.c 17 Feb 2005 13:48:47 -0000 1.62
+++ jk_lb_worker.c 19 Feb 2005 08:12:29 -0000 1.63
@@ -42,6 +42,9 @@
#define WAIT_BEFORE_RECOVER (60*1)
#define WORKER_RECOVER_TIME ("recover_time")
+#define JK_WORKER_IN_ERROR(w) ((w)->in_error_state && !(w)->is_disabled &&
!(w)->is_busy)
+#define JK_WORKER_USABLE(w) (!(w)->in_error_state && !(w)->is_disabled &&
!(w)->is_busy)
+
struct lb_endpoint
{
jk_endpoint_t *e;
@@ -183,6 +186,7 @@
w->s->name);
w->s->in_recovering = JK_TRUE;
w->s->in_error_state = JK_FALSE;
+ w->s->is_busy = JK_FALSE;
}
JK_TRACE_EXIT(l);
@@ -225,8 +229,7 @@
/* Take into calculation only the workers that are
* not in error state or not disabled.
*/
- if (!p->lb_workers[i].s->in_error_state &&
- !p->lb_workers[i].s->is_disabled) {
+ if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
if (p->lbmethod == JK_LB_BYREQUESTS) {
p->lb_workers[i].s->lb_value +=
p->lb_workers[i].s->lb_factor;
total_factor += p->lb_workers[i].s->lb_factor;
@@ -269,15 +272,13 @@
* The worker might still be unusable, but we try
* anyway.
*/
- if (p->lb_workers[i].s->in_error_state &&
- !p->lb_workers[i].s->is_disabled) {
+ if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
}
/* Take into calculation only the workers that are
* not in error state or not disabled.
*/
- if (!p->lb_workers[i].s->in_error_state &&
- !p->lb_workers[i].s->is_disabled) {
+ if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
p->lb_workers[i].s->lb_value += p->lb_workers[i].s->lb_factor;
total_factor += p->lb_workers[i].s->lb_factor;
if (!candidate || p->lb_workers[i].s->lb_value >
candidate->s->lb_value)
@@ -309,15 +310,13 @@
* The worker might still be unusable, but we try
* anyway.
*/
- if (p->lb_workers[i].s->in_error_state &&
- !p->lb_workers[i].s->is_disabled) {
+ if (JK_WORKER_IN_ERROR(p->lb_workers[i].s)) {
retry_worker(&p->lb_workers[i], p->s->recover_wait_time, l);
}
/* Take into calculation only the workers that are
* not in error state or not disabled.
*/
- if (!p->lb_workers[i].s->in_error_state &&
- !p->lb_workers[i].s->is_disabled) {
+ if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
mytraffic =
(p->lb_workers[i].s->transferred/p->lb_workers[i].s->lb_factor) +
(p->lb_workers[i].s->readed/p->lb_workers[i].s->lb_factor);
if (!candidate || mytraffic < curmin) {
@@ -346,9 +345,9 @@
return rc;
}
-static worker_record_t *find_session_route(lb_worker_t *p,
- const char *name,
- jk_logger_t *l)
+static worker_record_t *find_bysession_route(lb_worker_t *p,
+ const char *name,
+ jk_logger_t *l)
{
unsigned int i;
int total_factor = 0;
@@ -361,13 +360,12 @@
candidate = find_best_bydomain(p, name, l);
}
if (candidate) {
- if (candidate->s->in_error_state && !candidate->s->is_disabled) {
+ if (JK_WORKER_IN_ERROR(candidate->s)) {
retry_worker(candidate, p->s->recover_wait_time, l);
}
if (candidate->s->in_error_state) {
- /* We have a worker that is unusable.
- * It can be in error or disabled, but in case
- * it has a redirection set use that redirection worker.
+ /* We have a worker that is error state.
+ * If it has a redirection set use that redirection worker.
* This enables to safely remove the member from the
* balancer. Of course you will need a some kind of
* session replication between those two remote.
@@ -384,8 +382,7 @@
}
if (candidate && !uses_domain) {
for (i = 0; i < p->num_of_workers; i++) {
- if (!p->lb_workers[i].s->in_error_state &&
- !p->lb_workers[i].s->is_disabled) {
+ if (JK_WORKER_USABLE(p->lb_workers[i].s)) {
/* Skip all workers that are not member of candidate domain
*/
if (*candidate->s->domain &&
strcmp(p->lb_workers[i].s->domain, candidate->s->domain))
@@ -413,10 +410,10 @@
/* No need to find the best worker
* if there is a single one
*/
- if (p->lb_workers[0].s->in_error_state &&
- !p->lb_workers[0].s->is_disabled) {
+ if (JK_WORKER_IN_ERROR(p->lb_workers[0].s)) {
retry_worker(&p->lb_workers[0], p->s->recover_wait_time, l);
}
+ /* Check if worker is marked for retry */
if (!p->lb_workers[0].s->in_error_state) {
p->lb_workers[0].r = &(p->lb_workers[0].s->name[0]);
JK_TRACE_EXIT(l);
@@ -428,6 +425,9 @@
}
}
else if (p->s->sticky_session) {
+ /* Use sessionid only if sticky_session is
+ * defined for this load balancer
+ */
sessionid = get_sessionid(s);
}
JK_ENTER_CS(&(p->cs), r);
@@ -464,17 +464,18 @@
session_route);
/* We have a session route. Whow! */
- rc = find_session_route(p, session_route, l);
+ rc = find_bysession_route(p, session_route, l);
if (rc) {
JK_LEAVE_CS(&(p->cs), r);
if (JK_IS_DEBUG_LEVEL(l))
jk_log(l, JK_LOG_DEBUG,
- "found worker %s for partial sessionid %s",
- rc->s->name, sessionid);
+ "found worker %s for route %s and partial
sessionid %s",
+ rc->s->name, session_route, sessionid);
JK_TRACE_EXIT(l);
return rc;
}
}
+ /* Try next partial sessionid if present */
sessionid = next;
}
if (!rc && p->s->sticky_session_force) {
@@ -556,6 +557,10 @@
rec->s->readed += end->rd;
rec->s->transferred += end->wr;
end->done(&end, l);
+ /* When returning the endpoint mark the worker as not
busy.
+ * We have at least one endpoint free
+ */
+ rec->s->is_busy = JK_FALSE;
/* Decrement the busy worker count */
rec->s->busy--;
p->worker->s->busy--;
@@ -567,6 +572,19 @@
return JK_TRUE;
}
}
+ else {
+ /* If we can not get the endpoint
+ * mark the worker as busy rather then
+ * as in error
+ */
+ rec->s->is_busy = JK_TRUE;
+ jk_log(l, JK_LOG_INFO,
+ "could not get free endpoint for worker %s",
+ rec->s->name);
+ /* Decrement the worker count and try another worker */
+ --num_of_workers;
+ continue;
+ }
if (!service_ok) {
/*
* Service failed !!!
@@ -712,6 +730,7 @@
p->lb_workers[i].s->lb_value = p->lb_workers[i].s->lb_factor;
p->lb_workers[i].s->in_error_state = JK_FALSE;
p->lb_workers[i].s->in_recovering = JK_FALSE;
+ p->lb_workers[i].s->is_busy = JK_FALSE;
p->lb_workers[i].s->error_time = 0;
/* Worker can be initaly disabled as hot standby */
p->lb_workers[i].s->is_disabled =
jk_get_is_worker_disabled(props, worker_names[i]);
1.13 +2 -5 jakarta-tomcat-connectors/jk/native/common/jk_shm.h
Index: jk_shm.h
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native/common/jk_shm.h,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -r1.12 -r1.13
--- jk_shm.h 17 Feb 2005 15:03:15 -0000 1.12
+++ jk_shm.h 19 Feb 2005 08:12:29 -0000 1.13
@@ -56,10 +56,6 @@
int id;
/* Number of currently busy channels */
int busy;
- /* Number of currently idle channels */
- int idle;
- /* Maximum number of channels */
- int max_conn;
/* worker name */
char name[JK_SHM_STR_SIZ+1];
/* worker domain */
@@ -68,6 +64,7 @@
char redirect[JK_SHM_STR_SIZ+1];
/* current status of the worker */
int is_disabled;
+ int is_busy;
/* Current lb factor */
int lb_factor;
/* Current lb value */
1.18 +6 -3 jakarta-tomcat-connectors/jk/native/common/jk_status.c
Index: jk_status.c
===================================================================
RCS file: /home/cvs/jakarta-tomcat-connectors/jk/native/common/jk_status.c,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- jk_status.c 17 Feb 2005 13:41:04 -0000 1.17
+++ jk_status.c 19 Feb 2005 08:12:29 -0000 1.18
@@ -199,7 +199,7 @@
return "True";
}
-static const char *status_val_status(int d, int e, int r)
+static const char *status_val_status(int d, int e, int r, int b)
{
if (d)
return "Disabled";
@@ -207,6 +207,8 @@
return "Recovering";
else if (e)
return "Error";
+ else if (b)
+ return "Busy";
else
return "OK";
}
@@ -374,7 +376,8 @@
jk_putv(s, "<td>",
status_val_status(wr->s->is_disabled,
wr->s->in_error_state,
- wr->s->in_recovering),
+ wr->s->in_recovering,
+ wr->s->is_busy),
"</td>", NULL);
jk_printf(s, "<td>%d</td>", wr->s->lb_factor);
jk_printf(s, "<td>%d</td>", wr->s->lb_value);
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]