Make HAProxy set the `Vary: Accept-Encoding` response header if the server
response would normally be compressed based off the current configuration.

Specifically make sure to:
1. Disregard the *request* headers ...
2. Disregard the current compression rate and other temporary conditions ...
... when determining whether the `Vary` header must be set.

The *response* headers generated by the upstream are allowed to be taken
into account and this patch does take them into account to not set `Vary`
on e.g. Content-Types that are never going to be compressed.

A small issue remains: The User-Agent is not added to the `Vary` header,
despite being relevant to the response. Adding the User-Agent header would
make responses effectively uncacheable and it's unlikely to see a Mozilla/4
in the wild in 2019.

Add a reg-test to ensure this behaviour.
---
 reg-tests/compression/vary.vtc | 187 +++++++++++++++++++++++++++++++++
 src/flt_http_comp.c            |  81 ++++++++++----
 2 files changed, 248 insertions(+), 20 deletions(-)
 create mode 100644 reg-tests/compression/vary.vtc

diff --git a/reg-tests/compression/vary.vtc b/reg-tests/compression/vary.vtc
new file mode 100644
index 000000000..58514700f
--- /dev/null
+++ b/reg-tests/compression/vary.vtc
@@ -0,0 +1,187 @@
+varnishtest "Compression sets Vary header"
+
+#REQUIRE_VERSION=1.9
+#REQUIRE_OPTION=ZLIB|SLZ
+
+feature ignore_unknown_macro
+
+server s1 {
+        rxreq
+        expect req.url == "/plain/accept-encoding-gzip"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -bodylen 100
+
+        rxreq
+        expect req.url == "/plain/accept-encoding-invalid"
+        expect req.http.accept-encoding == "invalid"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -bodylen 100
+
+        rxreq
+        expect req.url == "/plain/accept-encoding-null"
+        expect req.http.accept-encoding == "<undef>"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -bodylen 100
+
+        rxreq
+        expect req.url == "/html/accept-encoding-gzip"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/html" \
+          -bodylen 100
+
+        rxreq
+        expect req.url == "/html/accept-encoding-invalid"
+        expect req.http.accept-encoding == "invalid"
+        txresp \
+          -hdr "Content-Type: text/html" \
+          -bodylen 100
+
+
+        rxreq
+        expect req.url == "/html/accept-encoding-null"
+        expect req.http.accept-encoding == "<undef>"
+        txresp \
+          -hdr "Content-Type: text/html" \
+          -bodylen 100
+
+        rxreq
+        expect req.url == "/dup-etag/accept-encoding-gzip"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: \"123\"" \
+          -hdr "ETag: \"123\"" \
+          -bodylen 100
+} -repeat 2 -start
+
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        ${no-htx} option http-use-htx
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe-gzip
+        bind "fd@${fe_gzip}"
+        default_backend be-gzip
+
+    backend be-gzip
+        compression algo gzip
+        compression type text/plain
+        server www ${s1_addr}:${s1_port}
+
+    frontend fe-nothing
+        bind "fd@${fe_nothing}"
+        default_backend be-nothing
+
+    backend be-nothing
+        server www ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_gzip_sock} {
+        txreq -url "/plain/accept-encoding-gzip" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "gzip"
+        expect resp.http.vary == "Accept-Encoding"
+        gunzip
+        expect resp.bodylen == 100
+
+        txreq -url "/plain/accept-encoding-invalid" \
+          -hdr "Accept-Encoding: invalid"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "Accept-Encoding"
+        expect resp.bodylen == 100
+
+        txreq -url "/plain/accept-encoding-null"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "Accept-Encoding"
+        expect resp.bodylen == 100
+
+        txreq -url "/html/accept-encoding-gzip" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/html/accept-encoding-invalid" \
+          -hdr "Accept-Encoding: invalid"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/html/accept-encoding-null"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/dup-etag/accept-encoding-gzip" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+} -run
+
+# This Client duplicates c1, against the "nothing" frontend, ensuring no Vary 
header is ever set.
+client c2 -connect ${h1_fe_nothing_sock} {
+        txreq -url "/plain/accept-encoding-gzip" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/plain/accept-encoding-invalid" \
+          -hdr "Accept-Encoding: invalid"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/plain/accept-encoding-null"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/html/accept-encoding-gzip" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/html/accept-encoding-invalid" \
+          -hdr "Accept-Encoding: invalid"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/html/accept-encoding-null"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+
+        txreq -url "/dup-etag/accept-encoding-gzip" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.vary == "<undef>"
+        expect resp.bodylen == 100
+} -run
diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c
index b04dcd145..17e2d4533 100644
--- a/src/flt_http_comp.c
+++ b/src/flt_http_comp.c
@@ -38,6 +38,8 @@ struct comp_state {
        struct comp_ctx  *comp_ctx;   /* compression context */
        struct comp_algo *comp_algo;  /* compression algorithm if not NULL */
 
+       int would_compress;
+
        /* Following fields are used by the legacy code only: */
        int hdrs_len;
        int tlrs_len;
@@ -114,14 +116,15 @@ comp_start_analyze(struct stream *s, struct filter 
*filter, struct channel *chn)
                if (st == NULL)
                        return -1;
 
-               st->comp_algo   = NULL;
-               st->comp_ctx    = NULL;
-               st->hdrs_len    = 0;
-               st->tlrs_len    = 0;
-               st->consumed    = 0;
-               st->initialized = 0;
-               st->finished    = 0;
-               filter->ctx     = st;
+               st->comp_algo      = NULL;
+               st->comp_ctx       = NULL;
+               st->would_compress = 0;
+               st->hdrs_len       = 0;
+               st->tlrs_len       = 0;
+               st->consumed       = 0;
+               st->initialized    = 0;
+               st->finished       = 0;
+               filter->ctx        = st;
 
                /* Register post-analyzer on AN_RES_WAIT_HTTP because we need to
                 * analyze response headers before http-response rules execution
@@ -160,11 +163,11 @@ comp_http_headers(struct stream *s, struct filter 
*filter, struct http_msg *msg)
        if (!(msg->chn->flags & CF_ISRESP))
                select_compression_request_header(st, s, msg);
        else {
+               if (!set_compression_response_header(st, s, msg))
+                       goto end;
                /* Response headers have already been checked in
                 * comp_http_post_analyze callback. */
                if (st->comp_algo) {
-                       if (!set_compression_response_header(st, s, msg))
-                               goto end;
                        register_data_filter(s, msg->chn, filter);
                        if (!IS_HTX_STRM(s))
                                st->hdrs_len = s->txn->rsp.sov;
@@ -476,6 +479,18 @@ http_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *m
        struct http_txn *txn = s->txn;
        struct hdr_ctx ctx;
 
+       if (st->would_compress) {
+               if (http_header_add_tail2(msg, &txn->hdr_idx, "Vary: 
Accept-Encoding", 21) < 0)
+                       goto error;
+       }
+
+       /*
+        * Break if no compression is actually happening.
+        */
+       if (!st->comp_algo) {
+               return 1;
+       }
+
        /*
         * Add Content-Encoding header when it's not identity encoding.
          * RFC 2616 : Identity encoding: This content-coding is used only in 
the
@@ -526,7 +541,8 @@ http_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *m
        return 1;
 
   error:
-       st->comp_algo->end(&st->comp_ctx);
+       if (st->comp_algo)
+               st->comp_algo->end(&st->comp_ctx);
        st->comp_algo = NULL;
        return 0;
 }
@@ -537,6 +553,18 @@ htx_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *ms
        struct htx *htx = htxbuf(&msg->chn->buf);
        struct http_hdr_ctx ctx;
 
+       if (st->would_compress) {
+               if (!http_add_header(htx, ist("Vary"), ist("Accept-Encoding")))
+                       goto error;
+       }
+
+       /*
+        * Break if no compression is actually happening.
+        */
+       if (!st->comp_algo) {
+               return 1;
+       }
+
        /*
         * Add Content-Encoding header when it's not identity encoding.
         * RFC 2616 : Identity encoding: This content-coding is used only in the
@@ -580,7 +608,8 @@ htx_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *ms
        return 1;
 
   error:
-       st->comp_algo->end(&st->comp_ctx);
+       if (st->comp_algo)
+               st->comp_algo->end(&st->comp_ctx);
        st->comp_algo = NULL;
        return 0;
 }
@@ -840,10 +869,6 @@ http_select_comp_reshdr(struct comp_state *st, struct 
stream *s, struct http_msg
        struct hdr_ctx ctx;
        struct comp_type *comp_type;
 
-       /* no common compression algorithm was found in request header */
-       if (st->comp_algo == NULL)
-               goto fail;
-
        /* compression already in progress */
        if (msg->flags & HTTP_MSGF_COMPRESSING)
                goto fail;
@@ -925,6 +950,16 @@ http_select_comp_reshdr(struct comp_state *st, struct 
stream *s, struct http_msg
                        goto fail; /* a content-type was required */
        }
 
+       /*
+        * Indicate that this response would normally be compressed.
+        * This flag is required for the `Vary` response header.
+        */
+       st->would_compress = 1;
+
+       /* no common compression algorithm was found in request header */
+       if (st->comp_algo == NULL)
+               goto fail;
+
        /* limit compression rate */
        if (global.comp_rate_lim > 0)
                if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
@@ -953,10 +988,6 @@ htx_select_comp_reshdr(struct comp_state *st, struct 
stream *s, struct http_msg
        struct http_hdr_ctx ctx;
        struct comp_type *comp_type;
 
-       /* no common compression algorithm was found in request header */
-       if (st->comp_algo == NULL)
-               goto fail;
-
        /* compression already in progress */
        if (msg->flags & HTTP_MSGF_COMPRESSING)
                goto fail;
@@ -1036,6 +1067,16 @@ htx_select_comp_reshdr(struct comp_state *st, struct 
stream *s, struct http_msg
                        goto fail; /* a content-type was required */
        }
 
+       /*
+        * Indicate that this response would normally be compressed.
+        * This flag is required for the `Vary` response header.
+        */
+       st->would_compress = 1;
+
+       /* no common compression algorithm was found in request header */
+       if (st->comp_algo == NULL)
+               goto fail;
+
        /* limit compression rate */
        if (global.comp_rate_lim > 0)
                if (read_freq_ctr(&global.comp_bps_in) > global.comp_rate_lim)
-- 
2.21.0


Reply via email to