add base support for url encode following RFC3986, supporting `query`
type only.

- add test checking url_enc/url_dec/url_enc
- update documentation
- leave the door open for future changes

this should resolve github issue #941

Signed-off-by: William Dauchy <wdau...@gmail.com>
---
 doc/configuration.txt           |  6 +++
 reg-tests/converter/url_enc.vtc | 43 ++++++++++++++++++
 src/http_conv.c                 | 77 +++++++++++++++++++++++++++++++++
 3 files changed, 126 insertions(+)
 create mode 100644 reg-tests/converter/url_enc.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 6f8ffab3b..31ab5906b 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -16240,6 +16240,12 @@ url_dec([<in_form>])
   space (' '). Otherwise this will only happen after a question mark indicating
   a query string ('?').
 
+url_enc([<enc_type>])
+  Takes a string provided as input and returns the encoded version as output.
+  The input and the output are of type string. By default the type of encoding
+  is meant for `query` type. There is no other type supported for now but the
+  optional argument is here for future changes.
+
 ungrpc(<field_number>,[<field_type>])
   This extracts the protocol buffers message field in raw mode of an input 
binary
   sample representation of a gRPC message with <field_number> as field number
diff --git a/reg-tests/converter/url_enc.vtc b/reg-tests/converter/url_enc.vtc
new file mode 100644
index 000000000..a3f70ade9
--- /dev/null
+++ b/reg-tests/converter/url_enc.vtc
@@ -0,0 +1,43 @@
+varnishtest "url_enc converter test"
+
+#REQUIRE_VERSION=2.4
+
+feature ignore_unknown_macro
+
+server s1 {
+       rxreq
+       txresp
+} -repeat 2 -start
+
+haproxy h1 -conf {
+    defaults
+       mode http
+       timeout connect 1s
+       timeout client  1s
+       timeout server  1s
+
+    frontend fe
+       bind "fd@${fe}"
+
+       http-request set-var(txn.url0) "str(foo=bar+42 42 )"
+       http-request set-var(txn.url1) "var(txn.url0),url_enc"
+       http-request set-var(txn.url2) "var(txn.url1),url_dec"
+       http-request set-var(txn.url3) "var(txn.url2),url_enc(query)"
+       http-response set-header url_enc0 "%[var(txn.url1)]"
+       http-response set-header url_dec "%[var(txn.url2)]"
+       http-response set-header url_enc1 "%[var(txn.url3)]"
+
+       default_backend be
+
+    backend be
+       server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+       txreq -url "/"
+       rxresp
+       expect resp.http.url_enc0 == "foo%3Dbar%2B42%2042%20"
+       expect resp.http.url_dec == "foo=bar+42 42 "
+       expect resp.http.url_enc1 == "foo%3Dbar%2B42%2042%20"
+       expect resp.status == 200
+} -run
diff --git a/src/http_conv.c b/src/http_conv.c
index 4afa6a2fd..c3aa8113b 100644
--- a/src/http_conv.c
+++ b/src/http_conv.c
@@ -268,6 +268,82 @@ static int sample_conv_url_dec(const struct arg *args, 
struct sample *smp, void
        return 1;
 }
 
+/* url-encode types and encode maps */
+enum encode_type {
+       ENC_QUERY = 0,
+};
+long query_encode_map[(256 / 8) / sizeof(long)];
+
+/* Check url-encode type */
+static int sample_conv_url_enc_check(struct arg *arg, struct sample_conv *conv,
+                                    const char *file, int line, char **err)
+{
+       enum encode_type enc_type;
+
+       if (strcmp(arg->data.str.area, "") == 0)
+               enc_type = ENC_QUERY;
+       else if (strcmp(arg->data.str.area, "query") == 0)
+               enc_type = ENC_QUERY;
+       else {
+               memprintf(err, "Unexpected encode type. "
+                         "Allowed value is 'query'");
+               return 0;
+       }
+
+       chunk_destroy(&arg->data.str);
+       arg->type = ARGT_SINT;
+       arg->data.sint = enc_type;
+       return 1;
+}
+
+/* Initializes some url encode data at boot */
+static void sample_conf_url_enc_init()
+{
+       int i;
+
+       memset(query_encode_map, 0, sizeof(query_encode_map));
+       /* use rfc3986 to determine list of characters to keep unchanged for
+        * query string */
+       for (i = 0; i < 256; i++) {
+               if (!((i >= 'a' && i <= 'z') || (i >= 'A' && i <= 'Z')
+                   || (i >= '0' && i <= '9') ||
+                   i == '-' || i == '.' || i == '_' || i == '~'))
+                       ha_bit_set(i, query_encode_map);
+       }
+}
+
+INITCALL0(STG_PREPARE, sample_conf_url_enc_init);
+
+/* This fetch url-encode any input string. Only support query string for now */
+static int sample_conv_url_enc(const struct arg *args, struct sample *smp, void
+               *private)
+{
+       enum encode_type enc_type;
+       struct buffer *trash = get_trash_chunk();
+       long *encode_map;
+       char *ret;
+
+       enc_type = ENC_QUERY;
+       if (args)
+               enc_type = args->data.sint;
+
+       /* Add final \0 required by encode_string() */
+       smp->data.u.str.area[smp->data.u.str.data] = '\0';
+
+       if (enc_type == ENC_QUERY)
+               encode_map = query_encode_map;
+       else
+               return 0;
+
+       ret = encode_string(trash->area, trash->area + trash->size, '%',
+                           encode_map, smp->data.u.str.area);
+       if (ret == NULL || *ret != '\0')
+               return 0;
+       trash->data = ret - trash->area;
+       smp->data.u.str = *trash;
+       return 1;
+}
+
 static int smp_conv_req_capture(const struct arg *args, struct sample *smp, 
void *private)
 {
        struct proxy *fe;
@@ -369,6 +445,7 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, {
        { "capture-req",    smp_conv_req_capture,     ARG1(1,SINT),     NULL,   
SMP_T_STR,  SMP_T_STR},
        { "capture-res",    smp_conv_res_capture,     ARG1(1,SINT),     NULL,   
SMP_T_STR,  SMP_T_STR},
        { "url_dec",        sample_conv_url_dec,      ARG1(0,SINT),     NULL,   
SMP_T_STR,  SMP_T_STR},
+       { "url_enc",        sample_conv_url_enc,      ARG1(1,STR),      
sample_conv_url_enc_check, SMP_T_STR,  SMP_T_STR},
        { NULL, NULL, 0, 0, 0 },
 }};
 
-- 
2.29.2


Reply via email to