On 26/02/15 11:27, Nils Goroll wrote:
> This tcpdump output illustrates an issue we seem to have with default Linux 
> tcp
> timeouts and the default timeout_req of 2 seconds:

After further pondering of this case and many similar cases from production
systems' tcpdumps, I suggest the attached improvement. I hope the commit message
will provide sufficient background.

Nils
>From 9c0749634cc22d2f25d505a525a587004038ed9a Mon Sep 17 00:00:00 2001
From: Nils Goroll <[email protected]>
Date: Fri, 27 Feb 2015 18:30:35 +0100
Subject: [PATCH] Instruct the kernel to reset the connection for SC_RX_TIMEOUT
 and others

(others being: SC_RX_BODY, SC_RX_JUNK and SC_RX_OVERFLOW)

Closing TCP connections with SO_LINGER unset instructs the kernel to
attempt an orderly shutdown intending to finish a 3-way TCP FIN
sequence. This is not the optimal solution for cases where we have no
data to send to the client: A broken or malicious client may not
cooperate shutting down the connection and keeping a connection with
the varnish->client direction closed does not make any sense: If the
client was to send any data on its still-open direction, our kernel
would respond with RST anyway.

So we might as well instruct our kernel to close with RST in the first
place, with the potential additional benefit of freeing resources used
by our kernel and other stateful network equipment.

Punchline: closing with SO_LINGER set and a zero timeout (implicit in
VTCP_linger()), where applicable, should improve efficiency and
eliminate yet another potential DoS vector.
---
 bin/varnishd/cache/cache.h         |  7 ++++++-
 bin/varnishd/cache/cache_panic.c   |  2 +-
 bin/varnishd/cache/cache_session.c | 14 ++++++++++++--
 include/tbl/sess_close.h           | 37 +++++++++++++++++++++++--------------
 4 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/bin/varnishd/cache/cache.h b/bin/varnishd/cache/cache.h
index fcb8c69..9aa2ab3 100644
--- a/bin/varnishd/cache/cache.h
+++ b/bin/varnishd/cache/cache.h
@@ -82,9 +82,14 @@ enum req_body_state_e {
 
 /*--------------------------------------------------------------------*/
 
+enum sess_close_action_e {
+	SC_ACT_FIN = 0,
+	SC_ACT_RST
+};
+
 enum sess_close {
 	SC_NULL = 0,
-#define SESS_CLOSE(nm, desc)	SC_##nm,
+#define SESS_CLOSE(nm, desc, act)	SC_##nm,
 #include "tbl/sess_close.h"
 #undef SESS_CLOSE
 };
diff --git a/bin/varnishd/cache/cache_panic.c b/bin/varnishd/cache/cache_panic.c
index bc88148..9ac889f 100644
--- a/bin/varnishd/cache/cache_panic.c
+++ b/bin/varnishd/cache/cache_panic.c
@@ -102,7 +102,7 @@ sess_close_2str(enum sess_close sc, int want_desc)
 {
 	switch (sc) {
 	case SC_NULL:		return(want_desc ? "(null)": "NULL");
-#define SESS_CLOSE(nm, desc)	case SC_##nm: return(want_desc ? desc : #nm);
+#define SESS_CLOSE(nm, desc, act)	case SC_##nm: return(want_desc ? desc : #nm);
 #include "tbl/sess_close.h"
 #undef SESS_CLOSE
 
diff --git a/bin/varnishd/cache/cache_session.c b/bin/varnishd/cache/cache_session.c
index 7992834..25c1865 100644
--- a/bin/varnishd/cache/cache_session.c
+++ b/bin/varnishd/cache/cache_session.c
@@ -316,8 +316,18 @@ SES_Close(struct sess *sp, enum sess_close reason)
 {
 	int i;
 
-	assert(sp->fd >= 0);
-	sp->reason = reason;
+	switch (reason) {
+#define SESS_CLOSE(nm, desc, act)				\
+		case SC_ ## nm:				\
+			if (act == SC_ACT_RST)			\
+				(void)VTCP_linger(sp->fd, 1);	\
+			break;
+#include "tbl/sess_close.h"
+#undef SESS_CLOSE
+	default:
+		WRONG("Wrong resason in SES_Close");
+	}
+
 	i = close(sp->fd);
 	assert(i == 0 || errno != EBADF); /* XXX EINVAL seen */
 	sp->fd = -1;
diff --git a/include/tbl/sess_close.h b/include/tbl/sess_close.h
index 8930b74..ea6d147 100644
--- a/include/tbl/sess_close.h
+++ b/include/tbl/sess_close.h
@@ -25,23 +25,32 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
+ *
+ * Note on the close action:
+ *
+ * SC_ACT_FIN: We intend to terminate the connection gracefully, preferrably
+ *	       flushing our response with a TCP 3way FIN handshake
+ *
+ * SC_ACT_RST: We intend to terminate as efficiently as possible, signalling
+ *	       to the peer that we will not continue reading any data,
+ *	       preferrably with a TCP RST
  */
 
 /*lint -save -e525 -e539 */
 
-SESS_CLOSE(REM_CLOSE,		"Client Closed")
-SESS_CLOSE(REQ_CLOSE,		"Client requested close")
-SESS_CLOSE(REQ_HTTP10,		"Proto < HTTP/1.1")
-SESS_CLOSE(RX_BAD,		"Received bad req/resp")
-SESS_CLOSE(RX_BODY,		"Failure receiving req.body")
-SESS_CLOSE(RX_JUNK,		"Received junk data")
-SESS_CLOSE(RX_OVERFLOW,		"Received buffer overflow")
-SESS_CLOSE(RX_TIMEOUT,		"Receive timeout")
-SESS_CLOSE(TX_PIPE,		"Piped transaction")
-SESS_CLOSE(TX_ERROR,		"Error transaction")
-SESS_CLOSE(TX_EOF,		"EOF transmission")
-SESS_CLOSE(RESP_CLOSE,		"Backend/VCL requested close")
-SESS_CLOSE(OVERLOAD,		"Out of some resource")
-SESS_CLOSE(SESS_PIPE_OVERFLOW,	"Session pipe overflow")
+SESS_CLOSE(REM_CLOSE,		"Client Closed",		SC_ACT_FIN)
+SESS_CLOSE(REQ_CLOSE,		"Client requested close",	SC_ACT_FIN)
+SESS_CLOSE(REQ_HTTP10,		"Proto < HTTP/1.1",		SC_ACT_FIN)
+SESS_CLOSE(RX_BAD,		"Received bad req/resp",	SC_ACT_FIN)
+SESS_CLOSE(RX_BODY,		"Failure receiving req.body",	SC_ACT_RST)
+SESS_CLOSE(RX_JUNK,		"Received junk data",		SC_ACT_RST)
+SESS_CLOSE(RX_OVERFLOW,	"Received buffer overflow",	SC_ACT_RST)
+SESS_CLOSE(RX_TIMEOUT,		"Receive timeout",		SC_ACT_RST)
+SESS_CLOSE(TX_PIPE,		"Piped transaction",		SC_ACT_FIN)
+SESS_CLOSE(TX_ERROR,		"Error transaction",		SC_ACT_FIN)
+SESS_CLOSE(TX_EOF,		"EOF transmission",		SC_ACT_FIN)
+SESS_CLOSE(RESP_CLOSE,		"Backend/VCL requested close",	SC_ACT_FIN)
+SESS_CLOSE(OVERLOAD,		"Out of some resource",	SC_ACT_FIN)
+SESS_CLOSE(SESS_PIPE_OVERFLOW,	"Session pipe overflow",	SC_ACT_FIN)
 
 /*lint -restore */
-- 
2.1.4

_______________________________________________
varnish-dev mailing list
[email protected]
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev

Reply via email to