Willy,

new patch with the requested changes and updated tests.

Note: I added an `assert` in there to make sure that ht*_select_comp_reshdr
actually verified the ETag header before I am touching it. There *is* precedence
for `assert` in `checks.c`. Please remove the `assert` if you are not happy
with it.

Best regards
Tim Duesterhus

Apply with `git am --scissors` to automatically cut the commit message.
-- >8 --
RFC 7232 section 2.3.3 states:

> Note: Content codings are a property of the representation data,
> so a strong entity-tag for a content-encoded representation has to
> be distinct from the entity tag of an unencoded representation to
> prevent potential conflicts during cache updates and range
> requests.  In contrast, transfer codings (Section 4 of [RFC7230])
> apply only during message transfer and do not result in distinct
> entity-tags.

Thus a strong ETag must be changed when compressing. Usually this is done
by converting it into a weak ETag, which represents a semantically, but not
byte-by-byte identical response. A conversion to a weak ETag still allows
If-None-Match to work.

This should be backported to 1.9 and might be backported to every supported
branch with compression.
---
 doc/configuration.txt            |   3 +-
 reg-tests/compression/h00001.vtc | 226 +++++++++++++++++++++++++++++++
 src/flt_http_comp.c              |  91 ++++++++++++-
 3 files changed, 314 insertions(+), 6 deletions(-)
 create mode 100644 reg-tests/compression/h00001.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 55e56be5..00532a89 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -3114,8 +3114,7 @@ compression offload
     * The response contains a "Content-Encoding" header, indicating that the
       response is already compressed (see compression offload)
 
-  Note: The compression does not rewrite Etag headers, and does not emit the
-        Warning header.
+  Note: The compression does not emit the Warning header.
 
   Examples :
         compression algo gzip
diff --git a/reg-tests/compression/h00001.vtc b/reg-tests/compression/h00001.vtc
new file mode 100644
index 00000000..e21f8b44
--- /dev/null
+++ b/reg-tests/compression/h00001.vtc
@@ -0,0 +1,226 @@
+varnishtest "Compression converts strong ETags to weak ETags"
+
+#REQUIRE_VERSION=1.9
+#REQUIRE_OPTION=ZLIB|SLZ
+
+feature ignore_unknown_macro
+
+server s1 {
+        rxreq
+        expect req.url == "/strong"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: \"123\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/weak"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: W/\"456\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/weak-incorrect-quoting"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: \"W/789\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/empty-strong"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: \"\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/empty-weak"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: W/\"\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/invalid1"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: \"invalid" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/invalid2"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: invalid\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/invalid3"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: invalid" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/invalid4"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: W/\"invalid" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/invalid5"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: W/invalid\"" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/invalid6"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: W/invalid" \
+          -bodylen 100
+        
+        rxreq
+        expect req.url == "/multiple"
+        expect req.http.accept-encoding == "gzip"
+        txresp \
+          -hdr "Content-Type: text/plain" \
+          -hdr "ETag: \"one\"" \
+          -hdr "ETag: \"two\"" \
+          -bodylen 100
+} -start
+
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        ${no-htx} option http-use-htx
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe-gzip
+        bind "fd@${fe_gzip}"
+        default_backend be-gzip
+
+    backend be-gzip
+        compression algo gzip
+        compression type text/html text/plain
+        server www ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_gzip_sock} {
+        txreq -url "/strong" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "gzip"
+        expect resp.http.etag == "W/\"123\""
+        gunzip
+        expect resp.bodylen == 100
+        
+        txreq -url "/weak" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "gzip"
+        expect resp.http.etag == "W/\"456\""
+        gunzip
+        expect resp.bodylen == 100
+        
+        txreq -url "/weak-incorrect-quoting" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "gzip"
+        expect resp.http.etag == "W/\"W/789\""
+        gunzip
+        expect resp.bodylen == 100
+        
+        txreq -url "/empty-strong" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "gzip"
+        expect resp.http.etag == "W/\"\""
+        gunzip
+        expect resp.bodylen == 100
+        
+        txreq -url "/empty-weak" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "gzip"
+        expect resp.http.etag == "W/\"\""
+        gunzip
+        expect resp.bodylen == 100
+        
+        txreq -url "/invalid1" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.http.etag == "\"invalid"
+        expect resp.bodylen == 100
+        
+        txreq -url "/invalid2" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.http.etag == "invalid\""
+        expect resp.bodylen == 100
+        
+        txreq -url "/invalid3" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.http.etag == "invalid"
+        expect resp.bodylen == 100
+        
+        txreq -url "/invalid4" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.http.etag == "W/\"invalid"
+        expect resp.bodylen == 100
+        
+        txreq -url "/invalid5" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.http.etag == "W/invalid\""
+        expect resp.bodylen == 100
+        
+        txreq -url "/invalid6" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.http.etag == "W/invalid"
+        expect resp.bodylen == 100
+        
+        txreq -url "/multiple" \
+          -hdr "Accept-Encoding: gzip"
+        rxresp
+        expect resp.status == 200
+        expect resp.http.content-encoding == "<undef>"
+        expect resp.bodylen == 100
+} -run
diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c
index f8fbf4c9..f11b6ef1 100644
--- a/src/flt_http_comp.c
+++ b/src/flt_http_comp.c
@@ -10,6 +10,8 @@
  *
  */
 
+#include <assert.h>
+
 #include <common/buffer.h>
 #include <common/cfgparse.h>
 #include <common/htx.h>
@@ -466,6 +468,7 @@ static int
 http_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg 
*msg)
 {
        struct http_txn *txn = s->txn;
+       struct hdr_ctx ctx;
 
        /*
         * Add Content-Encoding header when it's not identity encoding.
@@ -486,8 +489,6 @@ http_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *m
 
        /* remove Content-Length header */
        if (msg->flags & HTTP_MSGF_CNT_LEN) {
-               struct hdr_ctx ctx;
-
                ctx.idx = 0;
                while (http_find_header2("Content-Length", 14, 
ci_head(&s->res), &txn->hdr_idx, &ctx))
                        http_remove_header2(msg, &txn->hdr_idx, &ctx);
@@ -499,6 +500,29 @@ http_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *m
                        goto error;
        }
 
+       ctx.idx = 0;
+       if (http_find_full_header2("ETag", 4, ci_head(&s->res), &txn->hdr_idx, 
&ctx)) {
+               assert((
+                               /* Either a weak ETag */
+                               (ctx.vlen >= 4 && memcmp(ctx.line + ctx.val, 
"W/\"", 3) == 0) || 
+                               /* or strong ETag */
+                               (ctx.vlen >= 2 && ctx.line[ctx.val] == '"')
+                       ) && ctx.line[ctx.val + ctx.vlen - 1] == '"');
+
+               if (ctx.line[ctx.val] == '"') {
+                       /* This a strong ETag. Convert it to a weak one. */
+                       trash.data = 8;
+                       if (trash.data + ctx.vlen > trash.size)
+                               goto error;
+                       memcpy(trash.area, "ETag: W/", trash.data);
+                       memcpy(trash.area + trash.data, ctx.line + ctx.val, 
ctx.vlen);
+                       trash.data += ctx.vlen;
+                       trash.area[trash.data] = '\0';
+                       http_remove_header2(msg, &txn->hdr_idx, &ctx);
+                       if (http_header_add_tail2(msg, &txn->hdr_idx, 
trash.area, trash.data) < 0)
+                               goto error;
+               }
+       }
 
        return 1;
 
@@ -512,6 +536,7 @@ static int
 htx_set_comp_reshdr(struct comp_state *st, struct stream *s, struct http_msg 
*msg)
 {
        struct htx *htx = htxbuf(&msg->chn->buf);
+       struct http_hdr_ctx ctx;
 
        /*
         * Add Content-Encoding header when it's not identity encoding.
@@ -528,8 +553,6 @@ htx_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *ms
 
        /* remove Content-Length header */
        if (msg->flags & HTTP_MSGF_CNT_LEN) {
-               struct http_hdr_ctx ctx;
-
                ctx.blk = NULL;
                while (http_find_header(htx, ist("Content-Length"), &ctx, 1))
                        http_remove_header(htx, &ctx);
@@ -541,6 +564,26 @@ htx_set_comp_reshdr(struct comp_state *st, struct stream 
*s, struct http_msg *ms
                        goto error;
        }
 
+       /* convert "ETag" header to a weak ETag */
+       ctx.blk = NULL;
+       if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
+               assert((
+                               /* Either a weak ETag */
+                               (ctx.value.len >= 4 && memcmp(ctx.value.ptr, 
"W/\"", 3) == 0) || 
+                               /* or strong ETag */
+                               (ctx.value.len >= 2 && ctx.value.ptr[0] == '"')
+                       ) && ctx.value.ptr[ctx.value.len - 1] == '"');
+               if (ctx.value.ptr[0] == '"') {
+                       /* This a strong ETag. Convert it to a weak one. */
+                       struct ist v = ist2(trash.area, 0);
+                       if (istcat(&v, ist("W/"), trash.size) == -1 || 
istcat(&v, ctx.value, trash.size) == -1)
+                               goto error;
+
+                       if (!http_replace_header_value(htx, &ctx, v))
+                               goto error;
+               }
+       }
+
        return 1;
 
   error:
@@ -843,6 +886,26 @@ http_select_comp_reshdr(struct comp_state *st, struct 
stream *s, struct http_msg
                        goto fail;
        }
 
+       /* no compression when ETag is malformed */
+       ctx.idx = 0;
+       if (http_find_full_header2("ETag", 4, ci_head(c), &txn->hdr_idx, &ctx)) 
{
+               if (!(
+                       (
+                               /* Either a weak ETag */
+                               (ctx.vlen >= 4 && memcmp(ctx.line + ctx.val, 
"W/\"", 3) == 0) || 
+                               /* or strong ETag */
+                               (ctx.vlen >= 2 && ctx.line[ctx.val] == '"')
+                       ) && ctx.line[ctx.val + ctx.vlen - 1] == '"'
+               )) {
+                       goto fail;
+               }
+       }
+       /* no compression when multiple ETags are present
+        * Note: Do not reset ctx.idx!
+        */
+       if (http_find_full_header2("ETag", 4, ci_head(c), &txn->hdr_idx, &ctx))
+               goto fail;
+
        comp_type = NULL;
 
        /* we don't want to compress multipart content-types, nor content-types 
that are
@@ -939,6 +1002,26 @@ htx_select_comp_reshdr(struct comp_state *st, struct 
stream *s, struct http_msg
                        goto fail;
        }
 
+       /* no compression when ETag is malformed */
+       ctx.blk = NULL;
+       if (http_find_header(htx, ist("ETag"), &ctx, 1)) {
+               if (!(
+                       (
+                               /* Either a weak ETag */
+                               (ctx.value.len >= 4 && memcmp(ctx.value.ptr, 
"W/\"", 3) == 0) || 
+                               /* or strong ETag */
+                               (ctx.value.len >= 2 && ctx.value.ptr[0] == '"')
+                       ) && ctx.value.ptr[ctx.value.len - 1] == '"'
+               )) {
+                       goto fail;
+               }
+       }
+       /* no compression when multiple ETags are present
+        * Note: Do not reset ctx.blk!
+        */
+       if (http_find_header(htx, ist("ETag"), &ctx, 1))
+               goto fail;
+
        comp_type = NULL;
 
        /* we don't want to compress multipart content-types, nor content-types 
that are
-- 
2.20.1


Reply via email to