On Sat, May 30, 2015 at 10:27:49AM -0400, sven falempin wrote: > it is possible to remove the inflight var by opening the socket before > accepting. > Thus there is no need for guessing if the peer socket can be open,
How should that work when HTTP headers of the request determine to which system a connection should be made? > On Sat, May 30, 2015 at 7:40 AM, Claudio Jeker <[email protected]> wrote: > > > On Fri, May 22, 2015 at 09:18:29PM +0200, Alexander Bluhm wrote: > > > This breaks relayd with TLS inspection. Moving down the "called > > > once" check after the F_TLSINSPECT block fixes the plain SSL case. > > > But HTTPS still hangs. I have just commited a test. Try > > > > > > cd /usr/src/regress/usr.sbin/relayd && make > > run-regress-args-https-inspect.pl > > > > > > bluhm > > > > > > On Fri, May 22, 2015 at 03:55:16PM +0200, Claudio Jeker wrote: > > > > On our production systems we did hit the "relay_connect: no connection > > in > > > > flight" on a so regular bases that I had to make it non-fatal with the > > > > result of leaking sockets. > > > > > > > > After more investigation I found the problem to be a race against > > > > connecting to the backend servers. In short: > > > > - relay_read_http() will open a connection if following conditions are > > met > > > > cre->dir == RELAY_DIR_REQUEST && cre->toread <= 0 && cre->dst->bev > > == NULL > > > > > > > > - relay_connect() does not initialize con->se_out.bev (which is also > > > > cre->dst->bev). Instead this is deferred to relay_connected() > > > > > > > > - if a event happens that calls relay_read_http() while connecting to > > the > > > > backend then relay_connect() will be called again. Result is the > > panic > > > > since the count gets out of sync. > > > > > > > > The following diff solves this issue by adding an extra flag to > > > > ctl_relay_event to know if a relay is already connected (or the > > connect is > > > > pending). relay_close() will then clean the flag when closing the > > session. > > > > I decided to use a flag since the EMFILE || ENFILE case is hard to > > detect > > > > otherwise. > > > > > > > > Running with this on production with no visible issues at the moment. > > > > I think it would make sense to restructure the http proxy code more and > > > > introduce a proper state machine but that is a much bigger and complex > > > > issue, so lets fix the bug first. > > > > > > > > New version that now passes all regress tests. It got a fair bit more > > complex because the F_TLSINSPECT case results in multiple entries of > > relay_connect and relay_connected. So I switched to use a state variable > > instead of a flag. It seems to work (and also works in the EMFILE/ENFILE > > case). At least it "worked" for me by forcing that code path all the time. > > > > -- > > :wq Claudio > > > > Index: relay.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/relayd/relay.c,v > > retrieving revision 1.194 > > diff -u -p -r1.194 relay.c > > --- relay.c 18 May 2015 16:57:20 -0000 1.194 > > +++ relay.c 30 May 2015 11:12:27 -0000 > > @@ -1408,8 +1408,10 @@ relay_connect_retry(int fd, short sig, v > > struct relay *rlay = con->se_relay; > > int bnds = -1; > > > > - if (relay_inflight < 1) > > - fatalx("relay_connect_retry: no connection in flight"); > > + if (relay_inflight < 1) { > > + log_warnx("relay_connect_retry: no connection in flight"); > > + relay_inflight = 1; > > + } > > > > DPRINTF("%s: retry %d of %d, inflight: %d",__func__, > > con->se_retrycount, con->se_retry, relay_inflight); > > @@ -1466,6 +1468,10 @@ relay_connect_retry(int fd, short sig, v > > return; > > } > > > > + if (rlay->rl_conf.flags & F_TLSINSPECT) > > + con->se_out.state = PRECONNECT; > > + else > > + con->se_out.state = CONNECTED; > > relay_inflight--; > > DPRINTF("%s: inflight decremented, now %d",__func__, > > relay_inflight); > > > > @@ -1484,9 +1490,14 @@ relay_connect_retry(int fd, short sig, v > > int > > relay_preconnect(struct rsession *con) > > { > > + int rv; > > + > > log_debug("%s: session %d: process %d", __func__, > > con->se_id, privsep_process); > > - return (relay_connect(con)); > > + rv = relay_connect(con); > > + if (con->se_out.state == CONNECTED) > > + con->se_out.state = PRECONNECT; > > + return (rv); > > } > > > > int > > @@ -1496,18 +1507,28 @@ relay_connect(struct rsession *con) > > struct timeval evtpause = { 1, 0 }; > > int bnds = -1, ret; > > > > + /* relay_connect should only be called once per relay */ > > + if (con->se_out.state == CONNECTED) { > > + log_debug("%s: connect already called once", __func__); > > + return (0); > > + } > > + > > /* Connection is already established but session not active */ > > - if ((rlay->rl_conf.flags & F_TLSINSPECT) && con->se_out.s != -1) { > > + if ((rlay->rl_conf.flags & F_TLSINSPECT) && > > + con->se_out.state == PRECONNECT) { > > if (con->se_out.ssl == NULL) { > > log_debug("%s: tls connect failed", __func__); > > return (-1); > > } > > relay_connected(con->se_out.s, EV_WRITE, con); > > + con->se_out.state = CONNECTED; > > return (0); > > } > > > > - if (relay_inflight < 1) > > - fatalx("relay_connect: no connection in flight"); > > + if (relay_inflight < 1) { > > + log_warnx("relay_connect: no connection in flight"); > > + relay_inflight = 1; > > + } > > > > getmonotime(&con->se_tv_start); > > > > @@ -1555,6 +1576,8 @@ relay_connect(struct rsession *con) > > event_del(&rlay->rl_ev); > > evtimer_add(&con->se_inflightevt, &evtpause); > > evtimer_add(&rlay->rl_evt, &evtpause); > > + /* this connect is pending */ > > + con->se_out.state = PENDING; > > return (0); > > } else { > > if (con->se_retry) { > > @@ -1572,6 +1595,7 @@ relay_connect(struct rsession *con) > > } > > } > > > > + con->se_out.state = CONNECTED; > > relay_inflight--; > > DPRINTF("%s: inflight decremented, now %d",__func__, > > relay_inflight); > > @@ -1673,6 +1697,7 @@ relay_close(struct rsession *con, const > > event_add(&rlay->rl_ev, NULL); > > } > > } > > + con->se_out.state = INIT; > > > > if (con->se_out.buf != NULL) > > free(con->se_out.buf); > > Index: relay_http.c > > =================================================================== > > RCS file: /cvs/src/usr.sbin/relayd/relay_http.c,v > > retrieving revision 1.47 > > diff -u -p -r1.47 relay_http.c > > --- relay_http.c 22 May 2015 01:34:13 -0000 1.47 > > +++ relay_http.c 30 May 2015 11:12:53 -0000 > > @@ -275,8 +275,10 @@ relay_read_http(struct bufferevent *bev, > > goto lookup; > > } else if (cre->line == 1 && cre->dir == > > RELAY_DIR_REQUEST) { > > if ((desc->http_method = > > relay_httpmethod_byname(key)) > > - == HTTP_METHOD_NONE) > > + == HTTP_METHOD_NONE) { > > + free(line); > > goto fail; > > + } > > /* > > * Decode request path and query > > */ > > @@ -419,7 +421,7 @@ relay_read_http(struct bufferevent *bev, > > relay_reset_http(cre); > > done: > > if (cre->dir == RELAY_DIR_REQUEST && cre->toread <= 0 && > > - cre->dst->bev == NULL) { > > + cre->dst->state != CONNECTED) { > > if (rlay->rl_conf.fwdmode == FWD_TRANS) { > > relay_bindanyreq(con, 0, IPPROTO_TCP); > > return; > > Index: relayd.h > > =================================================================== > > RCS file: /cvs/src/usr.sbin/relayd/relayd.h,v > > retrieving revision 1.209 > > diff -u -p -r1.209 relayd.h > > --- relayd.h 2 May 2015 13:15:24 -0000 1.209 > > +++ relayd.h 30 May 2015 10:36:28 -0000 > > @@ -200,6 +200,7 @@ struct ctl_relay_event { > > int line; > > int done; > > int timedout; > > + enum { INIT, PENDING, PRECONNECT, CONNECTED } state; > > enum direction dir; > > > > u_int8_t *buf; > > > > > > > -- > --------------------------------------------------------------------------------------------------------------------- > () ascii ribbon campaign - against html e-mail > /\ -- :wq Claudio
