Willy,
Aleks,
List,

this (absolutely non-ready-to-merge) patch adds support for brotli
compression as suggested in issue #21: 
https://github.com/haproxy/haproxy/issues/21

It is tested on Ubuntu Xenial with libbrotli 1.0.3:

        [timwolla@~]apt-cache policy libbrotli-dev
        libbrotli-dev:
        Installed: 1.0.3-1ubuntu1~16.04.1
        Candidate: 1.0.3-1ubuntu1~16.04.1
        Version table:
        *** 1.0.3-1ubuntu1~16.04.1 500
                500 http://de.archive.ubuntu.com/ubuntu xenial-updates/main 
amd64 Packages
                100 /var/lib/dpkg/status
        [timwolla@~]apt-cache policy libbrotli1
        libbrotli1:
        Installed: 1.0.3-1ubuntu1~16.04.1
        Candidate: 1.0.3-1ubuntu1~16.04.1
        Version table:
        *** 1.0.3-1ubuntu1~16.04.1 500
                500 http://de.archive.ubuntu.com/ubuntu xenial-updates/main 
amd64 Packages
                100 /var/lib/dpkg/status

I am successfully able access brotli compressed URLs with Google Chrome,
this requires me to disable `gzip` though (because haproxy prefers to
select gzip, I suspect because `br` is last in Chrome's `Accept-Encoding`
header).

I also am able to sucessfully download and decompress URLs with `curl`
and the `brotli` CLI utility. The server I use as the backend for these
tests has about 45ms RTT to my machine. The HTML page I use is some random
HTML page on the server, the noise file is 1 MiB of finest /dev/urandom.

You'll notice that brotli compressed requests are both faster as well as
smaller compared to gzip with the hardcoded brotli compression quality
of 3. The default is 11, which is *way* slower than gzip.

        + curl localhost:8080/*snip*.html -H 'Accept-Encoding: gzip'
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                        Dload  Upload   Total   Spent    Left  
Speed
        100 49280    0 49280    0     0   279k      0 --:--:-- --:--:-- 
--:--:--  279k
        + curl localhost:8080/*snip*.html -H 'Accept-Encoding: br'
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                        Dload  Upload   Total   Spent    Left  
Speed
        100 43401    0 43401    0     0   332k      0 --:--:-- --:--:-- 
--:--:--  333k
        + curl localhost:8080/*snip*.html -H 'Accept-Encoding: identity'
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                        Dload  Upload   Total   Spent    Left  
Speed
        100  127k  100  127k    0     0   441k      0 --:--:-- --:--:-- 
--:--:--  441k
        + curl localhost:8080/noise -H 'Accept-Encoding: gzip'
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                        Dload  Upload   Total   Spent    Left  
Speed
        100 1025k    0 1025k    0     0  3330k      0 --:--:-- --:--:-- 
--:--:-- 3338k
        + curl localhost:8080/noise -H 'Accept-Encoding: br'
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                        Dload  Upload   Total   Spent    Left  
Speed
        100 1024k    0 1024k    0     0  3029k      0 --:--:-- --:--:-- 
--:--:-- 3030k
        + curl localhost:8080/noise -H 'Accept-Encoding: identity'
        % Total    % Received % Xferd  Average Speed   Time    Time     Time  
Current
                                        Dload  Upload   Total   Spent    Left  
Speed
        100 1024k  100 1024k    0     0  3003k      0 --:--:-- --:--:-- 
--:--:-- 3002k
        + ls -al
        total 3384
        drwxrwxr-x  2 timwolla timwolla    4096 Feb 13 17:30 .
        drwxrwxrwt 28 root     root       69632 Feb 13 17:25 ..
        -rw-rw-r--  1 timwolla timwolla     598 Feb 13 17:30 download
        -rw-rw-r--  1 timwolla timwolla   43401 Feb 13 17:30 html-br
        -rw-rw-r--  1 timwolla timwolla   49280 Feb 13 17:30 html-gz
        -rw-rw-r--  1 timwolla timwolla  130334 Feb 13 17:30 html-id
        -rw-rw-r--  1 timwolla timwolla 1048949 Feb 13 17:30 noise-br
        -rw-rw-r--  1 timwolla timwolla 1049666 Feb 13 17:30 noise-gz
        -rw-rw-r--  1 timwolla timwolla 1048576 Feb 13 17:30 noise-id
        ++ zcat html-gz
        + sha256sum html-id /dev/fd/63 /dev/fd/62
        ++ brotli --decompress --stdout html-br
        56f1664241b3dbb750f93b69570be76c6baccb8de4f3a62fb4fec0ce1bf440b5  
html-id
        56f1664241b3dbb750f93b69570be76c6baccb8de4f3a62fb4fec0ce1bf440b5  
/dev/fd/63
        56f1664241b3dbb750f93b69570be76c6baccb8de4f3a62fb4fec0ce1bf440b5  
/dev/fd/62
        ++ zcat noise-gz
        + sha256sum noise-id /dev/fd/63 /dev/fd/62
        ++ brotli --decompress --stdout noise-br
        ab23236d9d4acecec239c3f0f9b59e59dd043267eeed9ed723da8b15f46bbf33  
noise-id
        ab23236d9d4acecec239c3f0f9b59e59dd043267eeed9ed723da8b15f46bbf33  
/dev/fd/63
        ab23236d9d4acecec239c3f0f9b59e59dd043267eeed9ed723da8b15f46bbf33  
/dev/fd/62

This patch still spits out a bunch of debug output to `stdout`, because I'm
not sure about the do-while loops in `_flush` and `_finish`. I suppose they
could lead to infinite loops if the encoded data exceeds the out buffer. It
might be necessary to add a new return code that indicates that the `_flush`
/ `_finish` must be retried after making space in the output buffer, because
the libbrotli API states:

> Under some circumstances (e.g. lack of output stream capacity) this
> operation would require several calls to BrotliEncoderCompressStream.
> The method must be called again until both input stream is depleted
> and encoder has no more output (see BrotliEncoderHasMoreOutput) after
> the method is called.

and

> Warning:
> When flushing and finishing, op should not change until operation is
> complete; input stream should not be swapped, reduced or extended as well.

Thus `_add_data` *must not* be called until the flush succeeded completely.

One more thing: brotli theoretically supports passing a custom allocator.
I attempted to use a pool for that, but `BrotliEncoderState` is an opaque
struct.

Best regards
Tim Duesterhus

Apply with `git am --scissors` to automatically cut the commit message.
-- >8 --
see issue #21
---
 Makefile                    |   9 +++
 include/types/compression.h |  37 ++++++++----
 src/compression.c           | 113 +++++++++++++++++++++++++++++++++++-
 src/flt_http_comp.c         |   2 +-
 4 files changed, 149 insertions(+), 12 deletions(-)

diff --git a/Makefile b/Makefile
index e2c4d17a..f1818d01 100644
--- a/Makefile
+++ b/Makefile
@@ -39,6 +39,7 @@
 #   USE_MY_ACCEPT4       : use own implemention of accept4() if glibc < 2.10.
 #   USE_ZLIB             : enable zlib library support.
 #   USE_SLZ              : enable slz library instead of zlib (pick at most 
one).
+#   USE_BROTLI           : enable brotli library support.
 #   USE_CPU_AFFINITY     : enable pinning processes to CPU on Linux. Automatic.
 #   USE_TFO              : enable TCP fast open. Supported on Linux >= 3.7.
 #   USE_NS               : enable network namespace support. Supported on 
Linux >= 2.6.24.
@@ -555,6 +556,14 @@ BUILD_OPTIONS   += $(call ignore_implicit,USE_ZLIB)
 OPTIONS_LDFLAGS += $(if $(ZLIB_LIB),-L$(ZLIB_LIB)) -lz
 endif
 
+ifneq ($(USE_BROTLI),)
+BROTLI_INC =
+BROTLI_LIB =
+OPTIONS_CFLAGS  += -DUSE_BROTLI $(if $(BROTLI_INC),-I$(BROTLI_INC))
+BUILD_OPTIONS   += $(call ignore_implicit,USE_BROTLI)
+OPTIONS_LDFLAGS += $(if $(BROTLI_LIB),-L$(BROTLI_LIB)) -lbrotlienc
+endif
+
 ifneq ($(USE_POLL),)
 OPTIONS_CFLAGS += -DENABLE_POLL
 OPTIONS_OBJS   += src/ev_poll.o
diff --git a/include/types/compression.h b/include/types/compression.h
index e515aadf..3c953e66 100644
--- a/include/types/compression.h
+++ b/include/types/compression.h
@@ -32,6 +32,10 @@
 #include <zlib.h>
 #endif
 
+#if defined(USE_BROTLI)
+#include <brotli/encode.h>
+#endif
+
 #include <common/buffer.h>
 
 struct comp {
@@ -40,22 +44,35 @@ struct comp {
        unsigned int offload;
 };
 
+#if defined(USE_SLZ) || defined(USE_ZLIB) || defined(USE_BROTLI)
 struct comp_ctx {
+       union {
+#if defined(USE_SLZ) || defined(USE_ZLIB)
+               struct {
 #if defined(USE_SLZ)
-       struct slz_stream strm;
-       const void *direct_ptr; /* NULL or pointer to beginning of data */
-       int direct_len;         /* length of direct_ptr if not NULL */
-       struct buffer queued;   /* if not NULL, data already queued */
+                       struct slz_stream strm;
+                       const void *direct_ptr; /* NULL or pointer to beginning 
of data */
+                       int direct_len;         /* length of direct_ptr if not 
NULL */
+                       struct buffer queued;   /* if not NULL, data already 
queued */
 #elif defined(USE_ZLIB)
-       z_stream strm; /* zlib stream */
-       void *zlib_deflate_state;
-       void *zlib_window;
-       void *zlib_prev;
-       void *zlib_pending_buf;
-       void *zlib_head;
+                       z_stream strm; /* zlib stream */
+                       void *zlib_deflate_state;
+                       void *zlib_window;
+                       void *zlib_prev;
+                       void *zlib_pending_buf;
+                       void *zlib_head;
+#endif
+               };
 #endif
+#if defined(USE_BROTLI)
+               struct {
+                       BrotliEncoderState *brotli_state;
+               };
+#endif
+       };
        int cur_lvl;
 };
+#endif
 
 /* Thanks to MSIE/IIS, the "deflate" name is ambigous, as according to the RFC
  * it's a zlib-wrapped deflate stream, but MSIE only understands a raw deflate
diff --git a/src/compression.c b/src/compression.c
index b0307b23..01d46945 100644
--- a/src/compression.c
+++ b/src/compression.c
@@ -26,6 +26,10 @@
 #undef free_func
 #endif /* USE_ZLIB */
 
+#if defined(USE_BROTLI)
+#include <brotli/encode.h>
+#endif
+
 #include <common/cfgparse.h>
 #include <common/compat.h>
 #include <common/hathreads.h>
@@ -94,6 +98,14 @@ static int deflate_end(struct comp_ctx **comp_ctx);
 
 #endif /* USE_ZLIB */
 
+#if defined(USE_BROTLI)
+static int brotli_init(struct comp_ctx **comp_ctx, int level);
+static int brotli_add_data(struct comp_ctx *comp_ctx, const char *in_data, int 
in_len, struct buffer *out);
+static int brotli_flush(struct comp_ctx *comp_ctx, struct buffer *out);
+static int brotli_finish(struct comp_ctx *comp_ctx, struct buffer *out);
+static int brotli_end(struct comp_ctx **comp_ctx);
+#endif
+
 
 const struct comp_algo comp_algos[] =
 {
@@ -107,6 +119,9 @@ const struct comp_algo comp_algos[] =
        { "raw-deflate", 11, "deflate",  7, raw_def_init,  deflate_add_data,  
deflate_flush,  deflate_finish,  deflate_end },
        { "gzip",         4, "gzip",     4, gzip_init,     deflate_add_data,  
deflate_flush,  deflate_finish,  deflate_end },
 #endif /* USE_ZLIB */
+#if defined(USE_BROTLI)
+       { "brotli",       6, "br",       2, brotli_init,   brotli_add_data,   
brotli_flush,   brotli_finish,   brotli_end  },
+#endif
        { NULL,       0, NULL,          0, NULL ,         NULL,              
NULL,           NULL,           NULL }
 };
 
@@ -246,7 +261,6 @@ static int identity_end(struct comp_ctx **comp_ctx)
        return 0;
 }
 
-
 #ifdef USE_SLZ
 
 /* SLZ's gzip format (RFC1952). Returns < 0 on error. */
@@ -691,6 +705,103 @@ static int zlib_parse_global_windowsize(char **args, int 
section_type, struct pr
 
 #endif /* USE_ZLIB */
 
+#ifdef USE_BROTLI
+
+static int brotli_init(struct comp_ctx **comp_ctx, int level)
+{
+       printf("brotli_init\n");
+       BrotliEncoderState *brotli_state;
+       *comp_ctx = pool_alloc(pool_comp_ctx);
+       if (*comp_ctx == NULL)
+               return -1;
+       brotli_state = BrotliEncoderCreateInstance(NULL, NULL, NULL);
+       BrotliEncoderSetParameter(brotli_state, BROTLI_PARAM_QUALITY, 3);
+       if (brotli_state == NULL) {
+               pool_free(pool_comp_ctx, *comp_ctx);
+               *comp_ctx = NULL;
+               return -1;
+       }
+       (*comp_ctx)->brotli_state = brotli_state;
+       
+       return 0;
+}
+
+static int brotli_add_data(struct comp_ctx *comp_ctx, const char *in_data, int 
in_len, struct buffer *out)
+{
+       printf("brotli_add_data\n");
+       uint8_t *out_data = (uint8_t*)b_tail(out);
+       size_t out_len = b_room(out);
+       size_t out_len2 = out_len;
+       const uint8_t *in_data2 = (const uint8_t*)in_data;
+       size_t in_len2 = in_len;
+       
+       printf("BrotliEncoderCompressStream\n");
+       printf("+in_len: %zu\n", in_len2);
+       printf("+out_len: %zu\n", out_len2);
+       if (BrotliEncoderCompressStream(comp_ctx->brotli_state, 
BROTLI_OPERATION_PROCESS, &in_len2, &in_data2, &out_len2, &out_data, NULL) == 
BROTLI_FALSE) {
+               return -1;
+       }
+       printf("-in_len: %zu\n", in_len2);
+       printf("-out_len: %zu = %zu\n", out_len2, out_len - out_len2);
+       b_add(out, out_len - out_len2);
+       
+       return in_len - in_len2;
+}
+
+static int brotli_flush(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+       printf("brotli_flush\n");
+       uint8_t *out_data = (uint8_t*)b_tail(out);
+       size_t out_len = b_room(out);
+       size_t out_len2 = out_len;
+       size_t in_len2 = 0;
+       
+       do
+       {
+               printf("BrotliEncoderCompressStream\n");
+               printf("+out_len: %zu\n", out_len2);
+               if (BrotliEncoderCompressStream(comp_ctx->brotli_state, 
BROTLI_OPERATION_FLUSH, &in_len2, NULL, &out_len2, &out_data, NULL) == 
BROTLI_FALSE) {
+                       return -1;
+               }
+               printf("-out_len: %zu = %zu\n", out_len2, out_len - out_len2);
+       }
+       while (BrotliEncoderHasMoreOutput(comp_ctx->brotli_state) == 
BROTLI_TRUE);
+       b_add(out, out_len - out_len2);
+       printf("Done\n");
+       return out_len - out_len2;
+}
+static int brotli_finish(struct comp_ctx *comp_ctx, struct buffer *out)
+{
+       printf("brotli_finish\n");
+       uint8_t *out_data = (uint8_t*)b_tail(out);
+       size_t out_len = b_room(out);
+       size_t out_len2 = out_len;
+       size_t in_len = 0;
+       
+       do
+       {
+               printf("BrotliEncoderCompressStream\n");
+               printf("+out_len: %zu\n", out_len2);
+               if (BrotliEncoderCompressStream(comp_ctx->brotli_state, 
BROTLI_OPERATION_FINISH, &in_len, NULL, &out_len2, &out_data, NULL) == 
BROTLI_FALSE) {
+                       return -1;
+               }
+               printf("-out_len: %zu = %zu\n", out_len2, out_len - out_len2);
+       }
+       while (BrotliEncoderHasMoreOutput(comp_ctx->brotli_state) == 
BROTLI_TRUE);
+       b_add(out, out_len - out_len2);
+       printf("Done\n");
+       return out_len - out_len2;
+}
+static int brotli_end(struct comp_ctx **comp_ctx)
+{
+       printf("brotli_end\n");
+       BrotliEncoderDestroyInstance((*comp_ctx)->brotli_state);
+       pool_free(pool_comp_ctx, *comp_ctx);
+       *comp_ctx = NULL;
+       return 0;
+}
+
+#endif
 
 /* config keyword parsers */
 static struct cfg_kw_list cfg_kws = {ILH, {
diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c
index 3c2ce365..bedc9e6f 100644
--- a/src/flt_http_comp.c
+++ b/src/flt_http_comp.c
@@ -1178,7 +1178,7 @@ http_compression_buffer_end(struct comp_state *st, struct 
stream *s,
        char *tail;
        int   to_forward, left;
 
-#if defined(USE_SLZ) || defined(USE_ZLIB)
+#if defined(USE_SLZ) || defined(USE_ZLIB) || defined(USE_BROTLI)
        int ret;
 
        /* flush data here */
-- 
2.20.1


Reply via email to