I have noticed that proxy logging about a dead server is overtly verbose
in some circumstances.
If you use proxy_balancer to load balance over a number of servers and
one is dead you'll get 3 lines of error logging every time it retries
the server (to see if it's alive yet). This can get rather obnoxious
when you're balancing over a number of ports which may or may not have a
server listening at the time -- and when you're allowing retries of dead
servers with any frequency.
The attached patch changes the level of such logging to debug for
retries of a worker known to be in an error state, leaving the level at
error for other cases. The result is that you get error logging when a
server is first determined to be unavailable -- and then are simply not
bothered about this any longer.
Note that this patch does have a little bit of unrelated WIN32-specific
material regarding APR_SO_NONBLOCK -- as this is necessary on Windows to
get connection timeouts to work -- at least until someone fixes the
apr_socket_connect() implementation on Windows.
--
Jess Holle
P.S. I've developed a similar patch for the tomcat JK connectors as well.
--- modules/proxy/mod_proxy.h.orig 2008-11-11 14:04:34.000000000 -0600
+++ modules/proxy/mod_proxy.h 2009-01-19 14:53:57.745762300 -0600
@@ -272,6 +272,7 @@
#define PROXY_WORKER_STOPPED 0x0040
#define PROXY_WORKER_IN_ERROR 0x0080
#define PROXY_WORKER_HOT_STANDBY 0x0100
+#define PROXY_WORKER_RETRYING 0x0200 /* Additional status flag to
indicate worker is being retried [PTC] */
#define PROXY_WORKER_NOT_USABLE_BITMAP ( PROXY_WORKER_IN_SHUTDOWN | \
PROXY_WORKER_DISABLED | PROXY_WORKER_STOPPED | PROXY_WORKER_IN_ERROR )
--- modules/proxy/proxy_util.c.orig 2008-11-11 14:04:34.000000000 -0600
+++ modules/proxy/proxy_util.c 2009-01-19 14:57:43.552431300 -0600
@@ -1932,6 +1932,7 @@
if (apr_time_now() > worker->s->error_time + worker->retry) {
++worker->s->retries;
worker->s->status &= ~PROXY_WORKER_IN_ERROR;
+ worker->s->status |= PROXY_WORKER_RETRYING; /* Flag that we're
retrying a worker that was in error [PTC] */
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, s,
"proxy: %s: worker for (%s) has been marked for
retry",
proxy_function, worker->hostname);
@@ -2223,6 +2224,7 @@
{
apr_status_t rv;
int connected = 0;
+ int was_retry; /* Added [PTC] */
int loglevel;
apr_sockaddr_t *backend_addr = conn->addr;
apr_socket_t *newsock;
@@ -2301,13 +2303,25 @@
"proxy: %s: fam %d socket created to connect to %s",
proxy_function, backend_addr->family, worker->hostname);
+/* Workaround needed on Windows to get connection timeout to actually work.
+ Should be fixed in WIN32 implementation of apr_socket_connect(). [PTC] */
+#ifdef WIN32
+ apr_socket_opt_set(newsock, APR_SO_NONBLOCK, 1);
+#endif
+
/* make the connection out of the socket */
rv = apr_socket_connect(newsock, backend_addr);
+/* Second part of workaround noted just above [PTC] */
+#ifdef WIN32
+ apr_socket_opt_set(newsock, APR_SO_NONBLOCK, 0);
+#endif
+
/* if an error occurred, loop round and try again */
if (rv != APR_SUCCESS) {
apr_socket_close(newsock);
- loglevel = backend_addr->next ? APLOG_DEBUG : APLOG_ERR;
+ /* Only log failure at debug level when we know we're retrying a
previously failed worker [PTC] */
+ loglevel = backend_addr->next ? APLOG_DEBUG : ( (worker->s->status
& PROXY_WORKER_RETRYING) ? APLOG_DEBUG : APLOG_ERR );
ap_log_error(APLOG_MARK, loglevel, rv, s,
"proxy: %s: attempt to connect to %pI (%s) failed",
proxy_function,
@@ -2337,19 +2351,27 @@
* Altrough some connections may be alive
* no further connections to the worker could be made
*/
+ /* If we are retrying a worker that had previously failed, then only log
+ at debug level verbosity. Clear retry bitmask from status in all cases.
+ Finally, when a retry fails return -DECLINED rather than DECLINED. [PTC]
+ */
if (!connected && PROXY_WORKER_IS_USABLE(worker) &&
!(worker->s->status & PROXY_WORKER_IGNORE_ERRORS)) {
+ was_retry = worker->s->status & PROXY_WORKER_RETRYING;
+ loglevel = was_retry ? APLOG_DEBUG : APLOG_ERR;
worker->s->status |= PROXY_WORKER_IN_ERROR;
worker->s->error_time = apr_time_now();
- ap_log_error(APLOG_MARK, APLOG_ERR, 0, s,
+ ap_log_error(APLOG_MARK, loglevel, 0, s,
"ap_proxy_connect_backend disabling worker for (%s)",
worker->hostname);
}
else {
worker->s->error_time = 0;
worker->s->retries = 0;
+ was_retry = 0;
}
- return connected ? OK : DECLINED;
+ worker->s->status &= ~PROXY_WORKER_RETRYING;
+ return connected ? OK : ( was_retry ? -DECLINED : DECLINED );
}
PROXY_DECLARE(int) ap_proxy_connection_create(const char *proxy_function,
--- modules/proxy/mod_proxy_ajp.c.orig 2008-11-15 08:25:54.000000000 -0600
+++ modules/proxy/mod_proxy_ajp.c 2009-01-19 14:56:49.478531700 -0600
@@ -655,13 +655,21 @@
break;
/* Step Two: Make the Connection */
- if (ap_proxy_connect_backend(scheme, backend, worker, r->server)) {
- ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
+ {
+ /* If ap_proxy_connect_backend() returns something other than OK or
DECLINED,
+ we assume that means it was a retry of an existing backend, in
which case
+ we change the log level to debug, as repeated errors that a
backend server
+ that was not reachable is still not reachable are noisy and
unhelpful. [PTC]
+ */
+ int conn_result = ap_proxy_connect_backend(scheme, backend, worker,
r->server);
+ if ( conn_result != OK ) {
+ ap_log_error(APLOG_MARK, (( conn_result != DECLINED ) ?
APLOG_DEBUG : APLOG_ERR), 0, r->server,
"proxy: AJP: failed to make connection to backend:
%s",
backend->hostname);
status = HTTP_SERVICE_UNAVAILABLE;
break;
}
+ }
/* Handle CPING/CPONG */
if (worker->ping_timeout_set) {