On Mon, 20 May 2013 21:11:03 +0400 Maxim Dounin <mdou...@mdounin.ru> wrote:
> Patch: > > diff --git a/src/http/modules/ngx_http_upstream_least_conn_module.c > b/src/http/modules/ngx_http_upstream_least_conn_module.c > --- a/src/http/modules/ngx_http_upstream_least_conn_module.c > +++ b/src/http/modules/ngx_http_upstream_least_conn_module.c > @@ -282,7 +282,10 @@ ngx_http_upstream_get_least_conn_peer(ng > } > > best->current_weight -= total; > - best->checked = now; > + > + if (now - best->checked > best->fail_timeout) { > + best->checked = now; > + } > > pc->sockaddr = best->sockaddr; > pc->socklen = best->socklen; > diff --git a/src/http/ngx_http_upstream_round_robin.c > b/src/http/ngx_http_upstream_round_robin.c > --- a/src/http/ngx_http_upstream_round_robin.c > +++ b/src/http/ngx_http_upstream_round_robin.c > @@ -523,7 +523,10 @@ ngx_http_upstream_get_peer(ngx_http_upst > rrp->tried[n] |= m; > > best->current_weight -= total; > - best->checked = now; > + > + if (now - best->checked > best->fail_timeout) { > + best->checked = now; > + } > > return best; > } Still a bug exists because of this code: if (state & NGX_PEER_FAILED) { /* ... */ peer->fails++; peer->accessed = now; peer->checked = now; /* .... */ } else { if (peer->accessed < peer->checked) { peer->fails = 0; } } Consider upstream which fails at least once each second (actually it's enough to fail each fail_timeout seconds). This upstream will be disabled as soon as it will fail max_fails times no matter what fail_timeout is. I propose the following solution: let's have two bits per peer: 1) avail - set if this peer is available for usage (fails < max_fails) 2) avail_check - set if this peer was disabled, but its fail_timeout is expired and we sent one request to check if it's alive and two timestamps: 3) first_fail - we'll count number of fails only inside [first_fail, first_fail + fail_timeout) interval 4) unavail_from - peer is unavailable inside [unavail_from, unavail_from + fail_timeout) interval first_fail will only be used if avail == 1, unavail_from only if avail == 0, so we may use union here. Simplified code: get: if (!avail && (now < unavail_from + fail_timeout || avail_check)) { continue; } if (!avail) avail_check =1; free: if (avail && now >= first_fail + fail_timeout) fails = 0; if (request_failed) { if (fails == 0) first_fail = now; fails++; if (fails >= max_fails) { avail = 0; unavail_from = now; } avail_check = 0; } else { if (avail_check) { fails = 0; avail = 1; avail_check = 0; } } Also I think it's a good idea to split fail_timeout to fail_timeout and disable_time: we'll count fails during fail_timeout and disable upstream for disable_time. patch with bugfix: Upstreams bugfix: fail_timeout could be ignored Upstream could be disabled no matter what fail_timeout is (if requests fail each fail_timeout seconds). Signed-off-by: Dmitry Popov <d...@highloadlab.com> diff -ur old/src/http/ngx_http_upstream_round_robin.h new/src/http/ngx_http_upstream_round_robin.h --- old/src/http/ngx_http_upstream_round_robin.h 2013-05-21 13:22:09.478903537 +0400 +++ new/src/http/ngx_http_upstream_round_robin.h 2013-05-21 15:38:05.488236353 +0400 @@ -24,13 +24,17 @@ ngx_int_t weight; ngx_uint_t fails; - time_t accessed; - time_t checked; + union { + time_t first_fail; + time_t unavail_from; + }; ngx_uint_t max_PCRE: retain input pattern for all regular expressions. Previously, input pattern was kept only for regular expressions with named captures, which resulted in error log entries without input pattern for PCRE errors that occured while processing regular expressions without them. Signed-off-by: Piotr Sikora <piotr at cloudflare.com> fails; time_t fail_timeout; - ngx_uint_t down; /* unsigned down:1; */ + unsigned avail:1; + unsigned avail_check:1; + unsigned down:1; #if (NGX_HTTP_SSL) ngx_ssl_session_t *ssl_session; /* local to a process */ diff -ur old/src/http/modules/ngx_http_upstream_ip_hash_module.c new/src/http/modules/ngx_http_upstream_ip_hash_module.c --- old/src/http/modules/ngx_http_upstream_ip_hash_module.c 2013-05-21 13:22:09.475570203 +0400 +++ new/src/http/modules/ngx_http_upstream_ip_hash_module.c 2013-05-21 15:44:22.671564821 +0400 @@ -208,12 +208,14 @@ if (!peer->down) { - if (peer->max_fails == 0 || peer->fails < peer->max_fails) { + if (peer->avail) { break; } - if (now - peer->checked > peer->fail_timeout) { - peer->checked = now; + if (now - peer->unavail_from >= peer->fail_timeout + && !peer->avail_check) + { + peer->avail_check = 1; break; } } diff -ur old/src/http/modules/ngx_http_upstream_least_conn_module.c new/src/http/modules/ngx_http_upstream_least_conn_module.c --- old/src/http/modules/ngx_http_upstream_least_conn_module.c 2013-05-21 13:22:09.472236869 +0400 +++ new/src/http/modules/ngx_http_upstream_least_conn_module.c 2013-05-21 15:45:35.254897219 +0400 @@ -203,9 +203,9 @@ continue; } - if (peer->max_fails - && peer->fails >= peer->max_fails - && now - peer->checked <= peer->fail_timeout) + if (!peer->avail + && (now - peer->unavail_from < peer->fail_timeout + || peer->avail_check)) { continue; } @@ -260,9 +260,9 @@ continue; } - if (peer->max_fails - && peer->fails >= peer->max_fails - && now - peer->checked <= peer->fail_timeout) + if (!peer->avail + && (now - peer->unavail_from < peer->fail_timeout + || peer->avail_check)) { continue; } @@ -282,7 +282,9 @@ } best->current_weight -= total; - best->checked = now; + if (!best->avail) { + best->avail_check = 1; + } pc->sockaddr = best->sockaddr; pc->socklen = best->socklen; @@ -331,6 +333,9 @@ for (i = 0; i < peers->number; i++) { peers->peer[i].fails = 0; + peers->peer[i].avail = 1; + peers->peer[i].avail_check = 0; + /* peers->peer[i].first_fail = 0; */ } pc->name = peers->name; diff -ur old/src/http/ngx_http_upstream_round_robin.c new/src/http/ngx_http_upstream_round_robin.c --- old/src/http/ngx_http_upstream_round_robin.c 2013-05-21 13:22:09.472236869 +0400 +++ new/src/http/ngx_http_upstream_round_robin.c 2013-05-21 15:46:07.171563474 +0400 @@ -85,6 +85,8 @@ peers->peer[n].weight = server[i].weight; peers->peer[n].effective_weight = server[i].weight; peers->peer[n].current_weight = 0; + peers->peer[n].avail = 1; + peers->peer[n].avail_check = 0; n++; } } @@ -139,6 +141,8 @@ backup->peer[n].max_fails = server[i].max_fails; backup->peer[n].fail_timeout = server[i].fail_timeout; backup->peer[n].down = server[i].down; + backup->peer[n].avail = 1; + backup->peer[n].avail_check = 0; n++; } } @@ -196,6 +200,8 @@ peers->peer[i].current_weight = 0; peers->peer[i].max_fails = 1; peers->peer[i].fail_timeout = 10; + peers->peer[i].avail = 1; + peers->peer[i].avail_check = 0; } us->peer.data = peers; @@ -301,6 +307,8 @@ peers->peer[0].current_weight = 0; peers->peer[0].max_fails = 1; peers->peer[0].fail_timeout = 10; + peers->peer[0].avail = 1; + peers->peer[0].avail_check = 0; } else { @@ -334,6 +342,8 @@ peers->peer[i].current_weight = 0; peers->peer[i].max_fails = 1; peers->peer[i].fail_timeout = 10; + peers->peer[i].avail = 1; + peers->peer[i].avail_check = 0; } } @@ -451,6 +461,9 @@ for (i = 0; i < peers->number; i++) { peers->peer[i].fails = 0; + peers->peer[i].avail = 1; + peers->peer[i].avail_check = 0; + /* peers->peer[i].first_fail = 0; */ } /* ngx_unlock_mutex(peers->mutex); */ @@ -490,9 +503,9 @@ continue; } - if (peer->max_fails - && peer->fails >= peer->max_fails - && now - peer->checked <= peer->fail_timeout) + if (!peer->avail + && (now - peer->unavail_from < peer->fail_timeout + || peer->avail_check)) { continue; } @@ -523,7 +536,9 @@ rrp->tried[n] |= m; best->current_weight -= total; - best->checked = now; + if (!best->avail) { + best->avail_check = 1; + } return best; } @@ -550,35 +565,49 @@ peer = &rrp->peers->peer[rrp->current]; - if (state & NGX_PEER_FAILED) { + if (peer->max_fails) { now = ngx_time(); - /* ngx_lock_mutex(rrp->peers->mutex); */ + if (peer->avail && now - peer->first_fail >= peer->fail_timeout) { + peer->fails = 0; + } + + if (state & NGX_PEER_FAILED) { + /* ngx_lock_mutex(rrp->peers->mutex); */ + + if (peer->fails++ == 0) { + peer->first_fail = now; + } + + if (peer->fails >= peer->max_fails) { + peer->avail = 0; + peer->unavail_from = now; + } - peer->fails++; - peer->accessed = now; - peer->checked = now; + peer->avail_check = 0; - if (peer->max_fails) { peer->effective_weight -= peer->weight / peer->max_fails; - } - ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, - "free rr peer failed: %ui %i", - rrp->current, peer->effective_weight); + ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0, + "free rr peer failed: %ui %i", + rrp->current, peer->effective_weight); - if (peer->effective_weight < 0) { - peer->effective_weight = 0; - } + if (peer->effective_weight < 0) { + peer->effective_weight = 0; + } - /* ngx_unlock_mutex(rrp->peers->mutex); */ + /* ngx_unlock_mutex(rrp->peers->mutex); */ - } else { + } else { - /* mark peer live if check passed */ + /* mark peer live if check passed */ - if (peer->accessed < peer->checked) { - peer->fails = 0; + if (peer->avail_check) { + peer->fails = 0; + peer->avail = 1; + peer->avail_check = 0; + /* peers->peer[i].first_fail = 0; */ + } } } -- Dmitry Popov Highloadlab _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel