Re: [PATCH v3] MINOR: converter: adding support for url_enc

2021-01-06 Thread Willy Tarreau
On Wed, Jan 06, 2021 at 11:39:50PM +0100, William Dauchy wrote:
> add base support for url encode following RFC3986, supporting `query`
> type only.
(...)

looks perfect now, thus merged :-)

Thank you!
Willy



[PATCH v3] MINOR: converter: adding support for url_enc

2021-01-06 Thread William Dauchy
add base support for url encode following RFC3986, supporting `query`
type only.

- add test checking url_enc/url_dec/url_enc
- update documentation
- leave the door open for future changes

this should resolve github issue #941

Signed-off-by: William Dauchy 
---
 doc/configuration.txt   |  6 +++
 reg-tests/converter/url_enc.vtc | 43 ++
 src/http_conv.c | 77 +
 3 files changed, 126 insertions(+)
 create mode 100644 reg-tests/converter/url_enc.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 6f8ffab3b..31ab5906b 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -16240,6 +16240,12 @@ url_dec([])
   space (' '). Otherwise this will only happen after a question mark indicating
   a query string ('?').
 
+url_enc([])
+  Takes a string provided as input and returns the encoded version as output.
+  The input and the output are of type string. By default the type of encoding
+  is meant for `query` type. There is no other type supported for now but the
+  optional argument is here for future changes.
+
 ungrpc(,[])
   This extracts the protocol buffers message field in raw mode of an input 
binary
   sample representation of a gRPC message with  as field number
diff --git a/reg-tests/converter/url_enc.vtc b/reg-tests/converter/url_enc.vtc
new file mode 100644
index 0..a3f70ade9
--- /dev/null
+++ b/reg-tests/converter/url_enc.vtc
@@ -0,0 +1,43 @@
+varnishtest "url_enc converter test"
+
+#REQUIRE_VERSION=2.4
+
+feature ignore_unknown_macro
+
+server s1 {
+   rxreq
+   txresp
+} -repeat 2 -start
+
+haproxy h1 -conf {
+defaults
+   mode http
+   timeout connect 1s
+   timeout client  1s
+   timeout server  1s
+
+frontend fe
+   bind "fd@${fe}"
+
+   http-request set-var(txn.url0) "str(foo=bar+42 42 )"
+   http-request set-var(txn.url1) "var(txn.url0),url_enc"
+   http-request set-var(txn.url2) "var(txn.url1),url_dec"
+   http-request set-var(txn.url3) "var(txn.url2),url_enc(query)"
+   http-response set-header url_enc0 "%[var(txn.url1)]"
+   http-response set-header url_dec "%[var(txn.url2)]"
+   http-response set-header url_enc1 "%[var(txn.url3)]"
+
+   default_backend be
+
+backend be
+   server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+   txreq -url "/"
+   rxresp
+   expect resp.http.url_enc0 == "foo%3Dbar%2B42%2042%20"
+   expect resp.http.url_dec == "foo=bar+42 42 "
+   expect resp.http.url_enc1 == "foo%3Dbar%2B42%2042%20"
+   expect resp.status == 200
+} -run
diff --git a/src/http_conv.c b/src/http_conv.c
index 4afa6a2fd..c3aa8113b 100644
--- a/src/http_conv.c
+++ b/src/http_conv.c
@@ -268,6 +268,82 @@ static int sample_conv_url_dec(const struct arg *args, 
struct sample *smp, void
return 1;
 }
 
+/* url-encode types and encode maps */
+enum encode_type {
+   ENC_QUERY = 0,
+};
+long query_encode_map[(256 / 8) / sizeof(long)];
+
+/* Check url-encode type */
+static int sample_conv_url_enc_check(struct arg *arg, struct sample_conv *conv,
+const char *file, int line, char **err)
+{
+   enum encode_type enc_type;
+
+   if (strcmp(arg->data.str.area, "") == 0)
+   enc_type = ENC_QUERY;
+   else if (strcmp(arg->data.str.area, "query") == 0)
+   enc_type = ENC_QUERY;
+   else {
+   memprintf(err, "Unexpected encode type. "
+ "Allowed value is 'query'");
+   return 0;
+   }
+
+   chunk_destroy(>data.str);
+   arg->type = ARGT_SINT;
+   arg->data.sint = enc_type;
+   return 1;
+}
+
+/* Initializes some url encode data at boot */
+static void sample_conf_url_enc_init()
+{
+   int i;
+
+   memset(query_encode_map, 0, sizeof(query_encode_map));
+   /* use rfc3986 to determine list of characters to keep unchanged for
+* query string */
+   for (i = 0; i < 256; i++) {
+   if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
+   || (i >= '0' && i <= '9') ||
+   i == '-' || i == '.' || i == '_' || i == '~'))
+   ha_bit_set(i, query_encode_map);
+   }
+}
+
+INITCALL0(STG_PREPARE, sample_conf_url_enc_init);
+
+/* This fetch url-encode any input string. Only support query string for now */
+static int sample_conv_url_enc(const struct arg *args, struct sample *smp, void
+   *private)
+{
+   enum encode_type enc_type;
+   struct buffer *trash = get_trash_chunk();
+   long *encode_map;
+   char *ret;
+
+   enc_type = ENC_QUERY;
+   if (args)
+   enc_type = args->data.sint;
+
+   /* Add final \0 required by encode_string() */
+   smp->data.u.str.area[smp->data.u.str.data] = '\0';
+
+   if (enc_type == ENC_QUERY)
+   encode_map = query_encode_map;
+   else
+