Hello,
The attached patch against trunk restores Squid ability to retry
connections that fail due to a pconn race condition. Currently, we serve
an ERR_ZERO_SIZE_OBJECT "Bad Gateway" error in those cases.
This patch is meant to reflect previous discussions on this topic but is
simpler/tighter than my previous attempts that were posted as PREVIEW
patches.
Thank you,
Alex.
Retry requests that failed due to a persistent connection race
instead of replying with ERR_ZERO_SIZE_OBJECT "Bad Gateway".
The ERR_ZERO_SIZE_OBJECT errors were visible to the client when the
destination had only one address because serverDestinations.shift()
made the list of destination empty and startConnectionOrFail() failed.
When FwdState starts to use a pinned connection, the connection is treated as
an idle persistent connection as far as race detection is concerned.
Currently, pinned connections cannot be reopened, repinned, and retried after
a pconn race. This will change when server-side bumped connections become
pinned.
It feels wrong that a failed serverConn may remain set while we are opening a
new connection to replace it. I added an XXX to mark that preexisting problem.
=== modified file 'src/forward.cc'
--- src/forward.cc 2012-02-10 03:30:02 +0000
+++ src/forward.cc 2012-02-17 00:54:18 +0000
@@ -80,40 +80,41 @@
FwdState::abort(void* d)
{
FwdState* fwd = (FwdState*)d;
Pointer tmp = fwd; // Grab a temporary pointer to keep the object alive during our scope.
if (Comm::IsConnOpen(fwd->serverConnection())) {
comm_remove_close_handler(fwd->serverConnection()->fd, fwdServerClosedWrapper, fwd);
}
fwd->serverDestinations.clean();
fwd->self = NULL;
}
/**** PUBLIC INTERFACE ********************************************************/
FwdState::FwdState(const Comm::ConnectionPointer &client, StoreEntry * e, HttpRequest * r)
{
debugs(17, 2, HERE << "Forwarding client request " << client << ", url=" << e->url() );
entry = e;
clientConn = client;
request = HTTPMSGLOCK(r);
+ pconnRace = raceImpossible;
start_t = squid_curtime;
serverDestinations.reserve(Config.forward_max_tries);
e->lock();
EBIT_SET(e->flags, ENTRY_FWD_HDR_WAIT);
}
// Called once, right after object creation, when it is safe to set self
void FwdState::start(Pointer aSelf)
{
// Protect ourselves from being destroyed when the only Server pointing
// to us is gone (while we expect to talk to more Servers later).
// Once we set self, we are responsible for clearing it when we do not
// expect to talk to any servers.
self = aSelf; // refcounted
// We hope that either the store entry aborts or peer is selected.
// Otherwise we are going to leak our object.
entry->registerAbort(FwdState::abort, this);
@@ -308,40 +309,43 @@
if (!err) {
ErrorState *anErr = new ErrorState(ERR_CANNOT_FORWARD, HTTP_INTERNAL_SERVER_ERROR, request);
anErr->xerrno = errno;
fail(anErr);
} // else use actual error from last connection attempt
self = NULL; // refcounted
}
}
void
FwdState::fail(ErrorState * errorState)
{
debugs(17, 3, HERE << err_type_str[errorState->type] << " \"" << httpStatusString(errorState->httpStatus) << "\"\n\t" << entry->url() );
delete err;
err = errorState;
if (!errorState->request)
errorState->request = HTTPMSGLOCK(request);
+ if (pconnRace == racePossible && errorState->type == ERR_ZERO_SIZE_OBJECT)
+ pconnRace = raceHappened;
+
#if USE_SSL
if (errorState->type == ERR_SECURE_CONNECT_FAIL && errorState->detail)
request->detailError(errorState->type, errorState->detail->errorNo());
else
#endif
request->detailError(errorState->type, errorState->xerrno);
}
/**
* Frees fwdState without closing FD or generating an abort
*/
void
FwdState::unregister(Comm::ConnectionPointer &conn)
{
debugs(17, 3, HERE << entry->url() );
assert(serverConnection() == conn);
assert(Comm::IsConnOpen(conn));
comm_remove_close_handler(conn->fd, fwdServerClosedWrapper, this);
serverConn = NULL;
}
@@ -518,40 +522,44 @@
default:
return false;
}
return true;
}
void
FwdState::serverClosed(int fd)
{
debugs(17, 2, HERE << "FD " << fd << " " << entry->url());
retryOrBail();
}
void
FwdState::retryOrBail()
{
if (checkRetry()) {
debugs(17, 3, HERE << "re-forwarding (" << n_tries << " tries, " << (squid_curtime - start_t) << " secs)");
+ // we should retry the same destination if it failed due to pconn race
+ if (pconnRace == raceHappened)
+ debugs(17, 4, HERE << "retrying the same destination");
+ else
serverDestinations.shift(); // last one failed. try another.
startConnectionOrFail();
return;
}
// TODO: should we call completed() here and move doneWithRetries there?
doneWithRetries();
if (self != NULL && !err && shutting_down) {
ErrorState *anErr = new ErrorState(ERR_SHUTTING_DOWN, HTTP_SERVICE_UNAVAILABLE, request);
errorAppendEntry(entry, anErr);
}
self = NULL; // refcounted
}
// If the Server quits before nibbling at the request body, the body sender
// will not know (so that we can retry). Call this if we will not retry. We
// will notify the sender so that it does not get stuck waiting for space.
void
@@ -810,97 +818,113 @@
int ftimeout = Config.Timeout.forward - (squid_curtime - start_t);
if (ftimeout < 0)
ftimeout = 5;
if (ftimeout < ctimeout)
ctimeout = ftimeout;
if (serverDestinations[0]->getPeer() && request->flags.sslBumped == true) {
debugs(50, 4, "fwdConnectStart: Ssl bumped connections through parrent proxy are not allowed");
ErrorState *anErr = new ErrorState(ERR_CANNOT_FORWARD, HTTP_SERVICE_UNAVAILABLE, request);
fail(anErr);
self = NULL; // refcounted
return;
}
request->flags.pinned = 0; // XXX: what if the ConnStateData set this to flag existing credentials?
// XXX: answer: the peer selection *should* catch it and give us only the pinned peer. so we reverse the =0 step below.
// XXX: also, logs will now lie if pinning is broken and leads to an error message.
if (serverDestinations[0]->peerType == PINNED) {
ConnStateData *pinned_connection = request->pinnedConnection();
- assert(pinned_connection);
+ // pinned_connection may become nil after a pconn race
+ if (pinned_connection)
serverConn = pinned_connection->validatePinnedConnection(request, serverDestinations[0]->getPeer());
+ else
+ serverConn = NULL;
if (Comm::IsConnOpen(serverConn)) {
#if 0
if (!serverConn->getPeer())
serverConn->peerType = HIER_DIRECT;
#endif
n_tries++;
request->flags.pinned = 1;
if (pinned_connection->pinnedAuth())
request->flags.auth = 1;
+ // the server may close the pinned connection before this request
+ pconnRace = racePossible;
dispatch();
return;
}
/* Failure. Fall back on next path */
debugs(17,2,HERE << " Pinned connection " << pinned_connection << " not valid.");
serverDestinations.shift();
startConnectionOrFail();
return;
}
// Use pconn to avoid opening a new connection.
const char *host;
if (serverDestinations[0]->getPeer()) {
host = serverDestinations[0]->getPeer()->host;
} else {
host = request->GetHost();
}
- Comm::ConnectionPointer temp = fwdPconnPool->pop(serverDestinations[0], host, checkRetriable());
+
+ Comm::ConnectionPointer temp;
+ // Avoid pconns after races so that the same client does not suffer twice.
+ // This does not increase the total number of connections because we just
+ // closed the connection that failed the race. And re-pinning assumes this.
+ if (pconnRace != raceHappened)
+ temp = fwdPconnPool->pop(serverDestinations[0], host, checkRetriable());
+
+ const bool openedPconn = Comm::IsConnOpen(temp);
+ pconnRace = openedPconn ? racePossible : raceImpossible;
// if we found an open persistent connection to use. use it.
- if (Comm::IsConnOpen(temp)) {
+ if (openedPconn) {
serverConn = temp;
debugs(17, 3, HERE << "reusing pconn " << serverConnection());
n_tries++;
if (!serverConnection()->getPeer())
origin_tries++;
comm_add_close_handler(serverConnection()->fd, fwdServerClosedWrapper, this);
/* Update server side TOS and Netfilter mark on the connection. */
if (Ip::Qos::TheConfig.isAclTosActive()) {
temp->tos = GetTosToServer(request);
Ip::Qos::setSockTos(temp, temp->tos);
}
#if SO_MARK
if (Ip::Qos::TheConfig.isAclNfmarkActive()) {
temp->nfmark = GetNfmarkToServer(request);
Ip::Qos::setSockNfmark(temp, temp->nfmark);
}
#endif
dispatch();
return;
}
+ // XXX: stale serverConn may remain set even though we are opening new conn
+
#if URL_CHECKSUM_DEBUG
entry->mem_obj->checkUrlChecksum();
#endif
/* Get the server side TOS and Netfilter mark to be set on the connection. */
if (Ip::Qos::TheConfig.isAclTosActive()) {
serverDestinations[0]->tos = GetTosToServer(request);
}
#if SO_MARK && USE_LIBCAP
serverDestinations[0]->nfmark = GetNfmarkToServer(request);
debugs(17, 3, "fwdConnectStart: got outgoing addr " << serverDestinations[0]->local << ", tos " << int(serverDestinations[0]->tos)
<< ", netfilter mark " << serverDestinations[0]->nfmark);
#else
serverDestinations[0]->nfmark = 0;
debugs(17, 3, "fwdConnectStart: got outgoing addr " << serverDestinations[0]->local << ", tos " << int(serverDestinations[0]->tos));
#endif
calls.connector = commCbCall(17,3, "fwdConnectDoneWrapper", CommConnectCbPtrFun(fwdConnectDoneWrapper, this));
Comm::ConnOpener *cs = new Comm::ConnOpener(serverDestinations[0], calls.connector, ctimeout);
cs->setHost(host);
=== modified file 'src/forward.h'
--- src/forward.h 2011-08-20 15:57:06 +0000
+++ src/forward.h 2012-02-17 00:42:24 +0000
@@ -88,25 +88,29 @@
time_t start_t;
int n_tries;
int origin_tries;
// AsyncCalls which we set and may need cancelling.
struct {
AsyncCall::Pointer connector; ///< a call linking us to the ConnOpener producing serverConn.
} calls;
struct {
unsigned int connected_okay:1; ///< TCP link ever opened properly. This affects retry of POST,PUT,CONNECT,etc
unsigned int dont_retry:1;
unsigned int forward_completed:1;
} flags;
/** connections to open, in order, until successful */
Comm::ConnectionList serverDestinations;
Comm::ConnectionPointer serverConn; ///< a successfully opened connection to a server.
+ /// possible pconn race states
+ typedef enum { raceImpossible, racePossible, raceHappened } PconnRace;
+ PconnRace pconnRace; ///< current pconn race state
+
// NP: keep this last. It plays with private/public
CBDATA_CLASS2(FwdState);
};
#endif /* SQUID_FORWARD_H */