Author: rjung Date: Sat Jan 27 06:58:59 2007 New Revision: 500540 URL: http://svn.apache.org/viewvc?view=rev&rev=500540 Log: Add status worker feature, that allows to mark an errored lb sub worker administratively for recovery.
Modified: tomcat/connectors/trunk/jk/native/common/jk_status.c tomcat/connectors/trunk/jk/xdocs/miscellaneous/changelog.xml tomcat/connectors/trunk/jk/xdocs/reference/status.xml Modified: tomcat/connectors/trunk/jk/native/common/jk_status.c URL: http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/native/common/jk_status.c?view=diff&rev=500540&r1=500539&r2=500540 ============================================================================== --- tomcat/connectors/trunk/jk/native/common/jk_status.c (original) +++ tomcat/connectors/trunk/jk/native/common/jk_status.c Sat Jan 27 06:58:59 2007 @@ -105,8 +105,9 @@ #define JK_STATUS_CMD_UPDATE (4) #define JK_STATUS_CMD_RESET (5) #define JK_STATUS_CMD_VERSION (6) +#define JK_STATUS_CMD_RECOVER (7) #define JK_STATUS_CMD_DEF (JK_STATUS_CMD_LIST) -#define JK_STATUS_CMD_MAX (JK_STATUS_CMD_VERSION) +#define JK_STATUS_CMD_MAX (JK_STATUS_CMD_RECOVER) #define JK_STATUS_CMD_TEXT_UNKNOWN ("unknown") #define JK_STATUS_CMD_TEXT_LIST ("list") #define JK_STATUS_CMD_TEXT_SHOW ("show") @@ -114,6 +115,7 @@ #define JK_STATUS_CMD_TEXT_UPDATE ("update") #define JK_STATUS_CMD_TEXT_RESET ("reset") #define JK_STATUS_CMD_TEXT_VERSION ("version") +#define JK_STATUS_CMD_TEXT_RECOVER ("recover") #define JK_STATUS_CMD_TEXT_DEF (JK_STATUS_CMD_TEXT_LIST) #define JK_STATUS_MIME_UNKNOWN (0) @@ -289,6 +291,7 @@ JK_STATUS_CMD_TEXT_UPDATE, JK_STATUS_CMD_TEXT_RESET, JK_STATUS_CMD_TEXT_VERSION, + JK_STATUS_CMD_TEXT_RECOVER, NULL }; @@ -768,6 +771,8 @@ return JK_STATUS_CMD_RESET; else if (!strcmp(cmd, JK_STATUS_CMD_TEXT_VERSION)) return JK_STATUS_CMD_VERSION; + else if (!strcmp(cmd, JK_STATUS_CMD_TEXT_RECOVER)) + return JK_STATUS_CMD_RECOVER; return JK_STATUS_CMD_UNKNOWN; } @@ -848,7 +853,8 @@ prev = status_cmd_int(arg); if (cmd == JK_STATUS_CMD_UNKNOWN) { if (prev == JK_STATUS_CMD_UPDATE || - prev == JK_STATUS_CMD_RESET) { + prev == JK_STATUS_CMD_RESET || + prev == JK_STATUS_CMD_RECOVER) { cmd = from; } } @@ -856,7 +862,8 @@ jk_printf(s, "%s%s=%s", started ? "&" : "?", JK_STATUS_ARG_CMD, status_cmd_text(cmd)); if (cmd == JK_STATUS_CMD_EDIT || - cmd == JK_STATUS_CMD_RESET) { + cmd == JK_STATUS_CMD_RESET || + cmd == JK_STATUS_CMD_RECOVER) { jk_printf(s, "%s%s=%s", "&", JK_STATUS_ARG_FROM, status_cmd_text(prev)); } @@ -1323,6 +1330,11 @@ jk_puts(s, "|"); status_write_uri(s, p, "R", JK_STATUS_CMD_RESET, JK_STATUS_MIME_UNKNOWN, name, wr->s->name, 0, 0, "", l); + if (wr->s->state == JK_LB_STATE_ERROR) { + jk_puts(s, "|"); + status_write_uri(s, p, "T", JK_STATUS_CMD_RECOVER, JK_STATUS_MIME_UNKNOWN, + name, wr->s->name, 0, 0, "", l); + } jk_puts(s, "] </td>"); jk_printf(s, JK_STATUS_SHOW_MEMBER_ROW, wr->s->name, @@ -2827,6 +2839,79 @@ return JK_FALSE; } +static int recover_worker(jk_ws_service_t *s, + status_endpoint_t *p, + jk_logger_t *l) +{ + unsigned int i; + const char *worker; + const char *sub_worker; + lb_worker_t *lb; + jk_worker_t *jw = NULL; + worker_record_t *wr = NULL; + + JK_TRACE_ENTER(l); + status_get_string(p, JK_STATUS_ARG_WORKER, "", &worker, l); + status_get_string(p, JK_STATUS_ARG_SUB_WORKER, "", &sub_worker, l); + jk_log(l, JK_LOG_INFO, + "recovering worker '%s' sub worker '%s'", + worker ? worker : "(null)", sub_worker ? sub_worker : "(null)"); + if (!worker || !worker[0]) { + jk_log(l, JK_LOG_WARNING, + "NULL or EMPTY worker param"); + JK_TRACE_EXIT(l); + return JK_FALSE; + } + jw = wc_get_worker_for_name(worker, l); + if (!jw) { + jk_log(l, JK_LOG_WARNING, + "could not find worker '%s'", + worker); + JK_TRACE_EXIT(l); + return JK_FALSE; + } + if (jw->type != JK_LB_WORKER_TYPE) { + jk_log(l, JK_LOG_WARNING, + "worker type not implemented"); + JK_TRACE_EXIT(l); + return JK_FALSE; + } + lb = (lb_worker_t *)jw->worker_private; + if (!lb) { + jk_log(l, JK_LOG_WARNING, + "lb structure is (null)"); + JK_TRACE_EXIT(l); + return JK_FALSE; + } + + if (!sub_worker || !sub_worker[0]) { + jk_log(l, JK_LOG_WARNING, + "only lb sub workers can be recovered"); + JK_TRACE_EXIT(l); + return JK_FALSE; + } + + for (i = 0; i < (int)lb->num_of_workers; i++) { + wr = &(lb->lb_workers[i]); + if (strcmp(sub_worker, wr->s->name) == 0) + break; + } + if (!wr || i == (int)lb->num_of_workers) { + jk_log(l, JK_LOG_WARNING, + "could not find worker '%s'", + sub_worker); + JK_TRACE_EXIT(l); + return JK_FALSE; + } + if (wr->s->state == JK_LB_STATE_ERROR) { + wr->s->state = JK_LB_STATE_RECOVER; + JK_TRACE_EXIT(l); + return JK_TRUE; + } + JK_TRACE_EXIT(l); + return JK_FALSE; +} + static int JK_METHOD service(jk_endpoint_t *e, jk_ws_service_t *s, jk_logger_t *l, int *is_error) @@ -2939,7 +3024,8 @@ if (w->read_only && (cmd == JK_STATUS_CMD_EDIT || cmd == JK_STATUS_CMD_UPDATE || - cmd == JK_STATUS_CMD_RESET)) { + cmd == JK_STATUS_CMD_RESET || + cmd == JK_STATUS_CMD_RECOVER)) { err = "This command is not allowed in read only mode."; } } @@ -2999,6 +3085,26 @@ } } } + else if (cmd == JK_STATUS_CMD_RECOVER) { + /* lock shared memory */ + jk_shm_lock(); + if (recover_worker(s, p, l) == JK_FALSE) { + err = "Marking worker for recovery failed"; + } + /* unlock the shared memory */ + jk_shm_unlock(); + if (mime == JK_STATUS_MIME_HTML) { + jk_puts(s, "\n<meta http-equiv=\"Refresh\" content=\"" + JK_STATUS_WAIT_AFTER_UPDATE ";url="); + status_write_uri(s, p, NULL, JK_STATUS_CMD_UNKNOWN, JK_STATUS_MIME_UNKNOWN, + NULL, NULL, 0, 0, NULL, l); + jk_puts(s, "\">"); + if (!err) { + jk_putv(s, "<p><b>Result: OK - You will be redirected in " + JK_STATUS_WAIT_AFTER_UPDATE " seconds.</b><p/>", NULL); + } + } + } else { if (mime == JK_STATUS_MIME_XML) { jk_print_xml_start_elt(s, w, 0, 0, "server"); @@ -3189,7 +3295,7 @@ if (!w->read_only && cmd == JK_STATUS_CMD_LIST) jk_puts(s, ", "); if (!w->read_only) - jk_puts(s, "<b>E</b>=Edit worker, <b>R</b>=Reset worker state"); + jk_puts(s, "<b>E</b>=Edit worker, <b>R</b>=Reset worker state, <b>T</b>=Try worker recovery"); jk_puts(s, "]\n"); } if (cmd == JK_STATUS_CMD_LIST) { Modified: tomcat/connectors/trunk/jk/xdocs/miscellaneous/changelog.xml URL: http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/xdocs/miscellaneous/changelog.xml?view=diff&rev=500540&r1=500539&r2=500540 ============================================================================== --- tomcat/connectors/trunk/jk/xdocs/miscellaneous/changelog.xml (original) +++ tomcat/connectors/trunk/jk/xdocs/miscellaneous/changelog.xml Sat Jan 27 06:58:59 2007 @@ -27,6 +27,10 @@ <subsection name="Native"> <changelog> <add> + JkStatus: Allow lb sub workers in error state to be marked for recovery + administratively from the status worker. (rjung) + </add> + <add> Load Balancer: Do not try to recover multiple times in parallel. Use additional runtime states "PROBE" and "FORCED". (rjung) </add> Modified: tomcat/connectors/trunk/jk/xdocs/reference/status.xml URL: http://svn.apache.org/viewvc/tomcat/connectors/trunk/jk/xdocs/reference/status.xml?view=diff&rev=500540&r1=500539&r2=500540 ============================================================================== --- tomcat/connectors/trunk/jk/xdocs/reference/status.xml (original) +++ tomcat/connectors/trunk/jk/xdocs/reference/status.xml Sat Jan 27 06:58:59 2007 @@ -73,6 +73,9 @@ or one of its members. </li> <li> +<b>recover</b>: Mark a member of a load balancer, that is in error state, for recovery. +</li> +<li> <b>version</b>: only show version information of the web server and the JK software </li> </ul> @@ -238,8 +241,8 @@ access to the status worker URLs you have chosen. Nevertheless two configuration attributes of status workers are helpful. The attribute "read_only" disables all features of the status worker, that can be used to change configurations or runtime status of the other workers. -A read_only status worker will not allow access to the edit, update and reset acions. The default value -is "False", ie. read-write. To enable read_only you need to set it to "True". +A read_only status worker will not allow access to the edit, update, reset or recover actions. +The default value is "False", ie. read-write. To enable read_only you need to set it to "True". </p> <p> You could configure two status workers, one has read_only and will be made available to a larger @@ -316,8 +319,9 @@ <br/> <p> The action is determined by the parameter <b>cmd</b>. It can have the values "list", "show", -"edit", "update", "reset" and "version". If you omit the "cmd" parameter, the default "list" -will be used. All actions except for "list" and "refresh" need additional parameters. +"edit", "update", "reset", "recover" and "version". If you omit the "cmd" parameter, +the default "list" will be used. +All actions except for "list" and "refresh" need additional parameters. </p> </subsection> <subsection name="Output Format"> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]