phk had the great idea to add a "quick close" for SC_RX_BAD and SC_RX_JUNK.

Another updated patch attached.

On 02/03/15 21:05, Nils Goroll wrote:
> On 01/03/15 10:05, Poul-Henning Kamp wrote:
>> The problem with !SO_LINGER (we tried it some years back) is that
>> all queued data is discarded the moment you call close(2), and we
>> have no way to know if our last response is still lingering in the
>> kernel's tcp-buffers and dribbling out to a slow client with a bad
>> mobile phone connection.
> 
> I finally have understood the issue with http keepalive here and that kernels
> behave differently than how I thought they would. Also I have now understood
> that phk had already invested significant effort into this and I basically 
> also
> ended up "out-of-ideas".
> 
> Except for this: As long was we know that we cannot possibly have written
> anything, we should we OK to close with RST.
> 
> The attached updated patch takes a simplistic approach to this.
> 
> Nils
> 
> 
> 
> _______________________________________________
> varnish-dev mailing list
> [email protected]
> https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev
> 
>From 2bdb6963a09f67a7574d321d0a083df45f697e06 Mon Sep 17 00:00:00 2001
From: Nils Goroll <[email protected]>
Date: Mon, 2 Mar 2015 20:33:15 +0100
Subject: [PATCH] Instruct the kernel to reset the connection for SC_RX_TIMEOUT
 and others

(others being: SC_RX_BODY, SC_RX_OVERFLOW)

Perform a quick close (linger with a short (3s) timeout) for abnormal
conditions (cases where we send a hardcoded 400 response).

Add a timeout argument to VTCP_linger.

Background:

Closing TCP connections with SO_LINGER unset instructs the kernel to
attempt an orderly shutdown intending to finish a 3-way TCP FIN
sequence. This is not the optimal solution for cases where we have no
data to send to the client: A broken or malicious client may not
cooperate shutting down the connection and keeping a connection with
the varnish->client direction closed does not make any sense: If the
client was to send any data on its still-open direction, our kernel
would respond with RST anyway.

So we might as well instruct our kernel to close with RST in the first
place, with the potential additional benefit of freeing resources used
by our kernel and other stateful network equipment.

Punchline: closing with SO_LINGER set and a zero timeout (implicit in
VTCP_linger()), where applicable, should improve efficiency and
eliminate yet another potential DoS vector.

With HTTP keepalive connections, we need to consider the case that
read timeouts can occur when there is still data to be sent or in
transit to the peer (from a previous response). Ideally, we would like
to instruct the kernel to inform us when there is nothing left in the
send queue or, as phk put it >>a socketopt that says "close X seconds
after send-queue empty, provided nothing has been received."<<

We know no way of achieving this (on current Linux and FreeBSD kernels),
so we fall back to a simplistic approach: Close with VTCP_linger set
as long as we know we cannot possibly have written anything.

This patch introduces a simple requests-per-session counter which
may also have other uses in future.
---
 bin/varnishd/cache/cache.h         |  9 ++++++++-
 bin/varnishd/cache/cache_panic.c   |  2 +-
 bin/varnishd/cache/cache_req_fsm.c |  1 +
 bin/varnishd/cache/cache_session.c | 17 ++++++++++++++++
 include/tbl/sess_close.h           | 41 +++++++++++++++++++++++++-------------
 include/vtcp.h                     |  2 +-
 lib/libvarnish/vtcp.c              |  6 +++---
 7 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index fcb8c69..f0e61f4 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -82,9 +82,15 @@ enum req_body_state_e {
 
 /*--------------------------------------------------------------------*/
 
+enum sess_close_action_e {
+	SC_ACT_FIN = 0,
+	SC_ACT_RST,
+	SC_ACT_QCK
+};
+
 enum sess_close {
 	SC_NULL = 0,
-#define SESS_CLOSE(nm, desc)	SC_##nm,
+#define SESS_CLOSE(nm, desc, act)	SC_##nm,
 #include "tbl/sess_close.h"
 #undef SESS_CLOSE
 };
@@ -693,6 +699,7 @@ struct sess {
 
 	struct vrt_privs	privs[1];
 
+	int			reqs;	/* # reqs seen on this session */
 };
 
 /* Prototypes etc ----------------------------------------------------*/
diff --git a/bin/varnishd/cache/cache_panic.c b/bin/varnishd/cache/cache_panic.c
index bc88148..9ac889f 100644
--- a/bin/varnishd/cache/cache_panic.c
+++ b/bin/varnishd/cache/cache_panic.c
@@ -102,7 +102,7 @@ sess_close_2str(enum sess_close sc, int want_desc)
 {
 	switch (sc) {
 	case SC_NULL:		return(want_desc ? "(null)": "NULL");
-#define SESS_CLOSE(nm, desc)	case SC_##nm: return(want_desc ? desc : #nm);
+#define SESS_CLOSE(nm, desc, act)	case SC_##nm: return(want_desc ? desc : #nm);
 #include "tbl/sess_close.h"
 #undef SESS_CLOSE
 
diff --git a/bin/varnishd/cache/cache_req_fsm.c b/bin/varnishd/cache/cache_req_fsm.c
index d78e24b..fd3138d 100644
--- a/bin/varnishd/cache/cache_req_fsm.c
+++ b/bin/varnishd/cache/cache_req_fsm.c
@@ -576,6 +576,7 @@ cnt_recv(struct worker *wrk, struct req *req)
 
 	VSLb(req->vsl, SLT_ReqStart, "%s %s",
 	    req->sp->client_addr_str, req->sp->client_port_str);
+	req->sp->reqs++;
 
 	http_VSL_log(req->http);
 
diff --git a/bin/varnishd/cache/cache_session.c b/bin/varnishd/cache/cache_session.c
index 7992834..663d088 100644
--- a/bin/varnishd/cache/cache_session.c
+++ b/bin/varnishd/cache/cache_session.c
@@ -211,6 +211,7 @@ SES_pool_accept_task(struct worker *wrk, void *arg)
 	sp->t_open = VTIM_real();
 	sp->t_idle = sp->t_open;
 	sp->vxid = VXID_Get(wrk, VSL_CLIENTMARKER);
+	sp->reqs = 0;
 
 	lsockname = VCA_SetupSess(wrk, sp);
 	ses_vsl_socket(sp, lsockname);
@@ -309,6 +310,8 @@ SES_Wait(struct sess *sp)
  * Close a sessions connection.
  * XXX: Technically speaking we should catch a t_end timestamp here
  * XXX: for SES_Delete() to use.
+ *
+ * XXX: Make the SC_ACT_QCK timeout (3s) configurable?
  */
 
 void
@@ -318,6 +321,20 @@ SES_Close(struct sess *sp, enum sess_close reason)
 
 	assert(sp->fd >= 0);
 	sp->reason = reason;
+	switch (reason) {
+#define SESS_CLOSE(nm, desc, act)					\
+		case SC_ ## nm:					\
+			if (act == SC_ACT_RST && sp->reqs == 0)	\
+				(void)VTCP_linger(sp->fd, 1, 0);	\
+			else if (act == SC_ACT_QCK)			\
+				(void)VTCP_linger(sp->fd, 1, 3);	\
+			break;
+#include "tbl/sess_close.h"
+#undef SESS_CLOSE
+	default:
+		WRONG("Wrong resason in SES_Close");
+	}
+
 	i = close(sp->fd);
 	assert(i == 0 || errno != EBADF); /* XXX EINVAL seen */
 	sp->fd = -1;
diff --git a/include/tbl/sess_close.h b/include/tbl/sess_close.h
index 8930b74..6876e38 100644
--- a/include/tbl/sess_close.h
+++ b/include/tbl/sess_close.h
@@ -25,23 +25,36 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
+ *
+ * Note on the close action:
+ *
+ * SC_ACT_FIN: We intend to terminate the connection gracefully, preferrably
+ *	       flushing our response with a TCP 3way FIN handshake
+ *
+ * SC_ACT_RST: We intend to terminate as efficiently as possible, signalling
+ *	       to the peer that we will not continue reading any data,
+ *	       preferrably with a TCP RST
+ *
+ * SC_ACT_QCK: We cannot RST because we have a response to send, but we intend
+ *	       to do so as quickly and efficiently as possible, because we are
+ *	       handling an abnormal case
  */
 
 /*lint -save -e525 -e539 */
 
-SESS_CLOSE(REM_CLOSE,		"Client Closed")
-SESS_CLOSE(REQ_CLOSE,		"Client requested close")
-SESS_CLOSE(REQ_HTTP10,		"Proto < HTTP/1.1")
-SESS_CLOSE(RX_BAD,		"Received bad req/resp")
-SESS_CLOSE(RX_BODY,		"Failure receiving req.body")
-SESS_CLOSE(RX_JUNK,		"Received junk data")
-SESS_CLOSE(RX_OVERFLOW,		"Received buffer overflow")
-SESS_CLOSE(RX_TIMEOUT,		"Receive timeout")
-SESS_CLOSE(TX_PIPE,		"Piped transaction")
-SESS_CLOSE(TX_ERROR,		"Error transaction")
-SESS_CLOSE(TX_EOF,		"EOF transmission")
-SESS_CLOSE(RESP_CLOSE,		"Backend/VCL requested close")
-SESS_CLOSE(OVERLOAD,		"Out of some resource")
-SESS_CLOSE(SESS_PIPE_OVERFLOW,	"Session pipe overflow")
+SESS_CLOSE(REM_CLOSE,		"Client Closed",		SC_ACT_FIN)
+SESS_CLOSE(REQ_CLOSE,		"Client requested close",	SC_ACT_FIN)
+SESS_CLOSE(REQ_HTTP10,		"Proto < HTTP/1.1",		SC_ACT_FIN)
+SESS_CLOSE(RX_BAD,		"Received bad req/resp",	SC_ACT_QCK)
+SESS_CLOSE(RX_BODY,		"Failure receiving req.body",	SC_ACT_RST)
+SESS_CLOSE(RX_JUNK,		"Received junk data",		SC_ACT_QCK)
+SESS_CLOSE(RX_OVERFLOW,	"Received buffer overflow",	SC_ACT_RST)
+SESS_CLOSE(RX_TIMEOUT,		"Receive timeout",		SC_ACT_RST)
+SESS_CLOSE(TX_PIPE,		"Piped transaction",		SC_ACT_FIN)
+SESS_CLOSE(TX_ERROR,		"Error transaction",		SC_ACT_FIN)
+SESS_CLOSE(TX_EOF,		"EOF transmission",		SC_ACT_FIN)
+SESS_CLOSE(RESP_CLOSE,		"Backend/VCL requested close",	SC_ACT_FIN)
+SESS_CLOSE(OVERLOAD,		"Out of some resource",	SC_ACT_FIN)
+SESS_CLOSE(SESS_PIPE_OVERFLOW,	"Session pipe overflow",	SC_ACT_FIN)
 
 /*lint -restore */
diff --git a/include/vtcp.h b/include/vtcp.h
index f4a9690..98d1fdc 100644
--- a/include/vtcp.h
+++ b/include/vtcp.h
@@ -63,7 +63,7 @@ void VTCP_hisname(int sock, char *abuf, unsigned alen,
 int VTCP_filter_http(int sock);
 int VTCP_blocking(int sock);
 int VTCP_nonblocking(int sock);
-int VTCP_linger(int sock, int linger);
+int VTCP_linger(int sock, int onoff, int timeout);
 int VTCP_check_hup(int sock);
 
 #ifdef SOL_SOCKET
diff --git a/lib/libvarnish/vtcp.c b/lib/libvarnish/vtcp.c
index 1295cc9..b7f968a 100644
--- a/lib/libvarnish/vtcp.c
+++ b/lib/libvarnish/vtcp.c
@@ -314,13 +314,13 @@ VTCP_set_read_timeout(int s, double seconds)
  */
 
 int
-VTCP_linger(int sock, int linger)
+VTCP_linger(int sock, int onoff, int timeout)
 {
 	struct linger lin;
 	int i;
 
-	memset(&lin, 0, sizeof lin);
-	lin.l_onoff = linger;
+	lin.l_onoff = onoff;
+	lin.l_linger = timeout;
 	i = setsockopt(sock, SOL_SOCKET, SO_LINGER, &lin, sizeof lin);
 	VTCP_Assert(i);
 	return (i);
-- 
2.1.4

_______________________________________________
varnish-dev mailing list
[email protected]
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev

Reply via email to