Re: [PATCH] MINOR : converter: add param converter

2022-12-13 Thread Willy Tarreau
On Wed, Dec 14, 2022 at 12:19:59AM -0700, Thayne McCombs wrote:
> Add a converter that extracts a parameter from string of delimited
> key/value pairs.

Great, now merged. Thank you!
Willy



[PATCH] MINOR : converter: add param converter

2022-12-13 Thread Thayne McCombs
Add a converter that extracts a parameter from string of delimited
key/value pairs.

Fixes: #1697
---
 doc/configuration.txt | 26 
 reg-tests/converter/param.vtc | 80 +++
 src/sample.c  | 64 
 3 files changed, 170 insertions(+)
 create mode 100644 reg-tests/converter/param.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index c45f0b4b68..0cc2bdee3b 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -17702,6 +17702,32 @@ or()
   This prefix is followed by a name. The separator is a '.'. The name may only
   contain characters 'a-z', 'A-Z', '0-9', '.' and '_'.
 
+param(,[])
+  This extracts the first occurrence of the parameter  in the input 
string
+  where parameters are delimited by , which defaults to "&", and the 
name
+  and value of the parameter are separated by a "=". If there is no "=" and
+  value before the end of the parameter segment, it is treated as equivalent to
+  a value of an empty string.
+
+  This can be useful for extracting parameters from a query string, or possibly
+  a x-www-form-urlencoded body. In particular, `query,param()` can be 
used
+  as an alternative to `urlp()` which only uses "&" as a delimiter,
+  whereas "urlp" also uses "?" and ";".
+
+  Note that this converter doesn't do anything special with url encoded
+  characters. If you want to decode the value, you can use the url_dec 
converter
+  on the output. If the name of the parameter in the input might contain 
encoded
+  characters, you'll probably want do normalize the input before calling
+  "param". This can be done using "http-request normalize-uri", in particular
+  the percent-decode-unreserved and percent-to-uppercase options.
+
+  Example :
+  str(a=b=d=r),param(a)   # b
+  str(a=c),param(a) # ""
+  str(a==a),param(b)  # ""
+  str(a=1;b=2;c=4),param(b,;) # 2
+  query,param(redirect_uri),urldec()
+
 port_only
   Converts a string which contains a Host header value into an integer by
   returning its port.
diff --git a/reg-tests/converter/param.vtc b/reg-tests/converter/param.vtc
new file mode 100644
index 00..1633603823
--- /dev/null
+++ b/reg-tests/converter/param.vtc
@@ -0,0 +1,80 @@
+varnishtest "param converter Test"
+
+feature ignore_unknown_macro
+
+server s1 {
+   rxreq
+   txresp -hdr "Connection: close"
+} -repeat 10 -start
+
+haproxy h1 -conf {
+   defaults
+   mode http
+   timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout client  "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout server  "${HAPROXY_TEST_TIMEOUT-5s}"
+
+   frontend fe
+   bind "fd@${fe}"
+
+   ### requests
+   http-request set-var(txn.query) query
+   http-response set-header Found %[var(txn.query),param(test)] if { 
var(txn.query),param(test) -m found }
+
+   default_backend be
+
+   backend be
+   server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+   txreq -url "/foo/?test=1=4"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "1"
+
+   txreq -url "/?a=1=4=34"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "34"
+
+   txreq -url "/?test=bar"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "bar"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?a=b=t=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "t"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?test="
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+txreq -url "/?a=b"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123=4"
+rxresp
+expect resp.status == 200
+expect resp.http.found == "4"
+} -run
diff --git a/src/sample.c b/src/sample.c
index 62a372b81c..7a612fc033 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2607,6 +2607,69 @@ static int sample_conv_word(const struct arg *arg_p, 
struct sample *smp, void *p
return 1;
 }
 
+static int sample_conv_param_check(struct arg *arg, struct sample_conv *conv,
+   const char *file, int line, char **err)
+{
+   if (arg[1].type == ARGT_STR && arg[1].data.str.data != 1) {
+   memprintf(err, "Delimiter must be exactly 1 character.");
+   return 0;
+   }
+
+   return 1;
+}
+
+static int sample_conv_param(const struct arg *arg_p, struct sample *smp, void 
*private)
+{
+   char *pos, *end, *pend, *equal;
+   char delim = '&';
+   const char *name = 

Re: [PATCH] MINOR : converter: add param converter

2022-12-13 Thread Tim Düsterhus

Thayne,

On 12/9/22 07:22, Thayne McCombs wrote:

Ok. I think this patch addresses all of your feedback. Thanks for
looking at it.


It appears that your mailer mangled the patch. It looks incorrectly 
formatted in my MUA and git fails to apply it. I recommend either using 
'git send-email' or just attaching the patch as a regular attachment. 
Both should be equally simple for Willy to apply.


Best regards
Tim Düsterhus



[PATCH] MINOR : converter: add param converter

2022-12-08 Thread Thayne McCombs
Ok. I think this patch addresses all of your feedback. Thanks for 
looking at it.


-- >8 --

Add a converter that extracts a parameter from string of delimited

key/value pairs.

Fixes: #1697
---
 doc/configuration.txt | 26 
 reg-tests/converter/param.vtc | 80 +++
 src/sample.c  | 64 
 3 files changed, 170 insertions(+)
 create mode 100644 reg-tests/converter/param.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index c45f0b4b68..0cc2bdee3b 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -17702,6 +17702,32 @@ or()
   This prefix is followed by a name. The separator is a '.'. The name 
may only

   contain characters 'a-z', 'A-Z', '0-9', '.' and '_'.

+param(,[])
+  This extracts the first occurrence of the parameter  in the 
input string
+  where parameters are delimited by , which defaults to "&", and 
the name

+  and value of the parameter are separated by a "=". If there is no "=" and
+  value before the end of the parameter segment, it is treated as 
equivalent to

+  a value of an empty string.
+
+  This can be useful for extracting parameters from a query string, or 
possibly
+  a x-www-form-urlencoded body. In particular, `query,param()` 
can be used

+  as an alternative to `urlp()` which only uses "&" as a delimiter,
+  whereas "urlp" also uses "?" and ";".
+
+  Note that this converter doesn't do anything special with url encoded
+  characters. If you want to decode the value, you can use the url_dec 
converter
+  on the output. If the name of the parameter in the input might 
contain encoded

+  characters, you'll probably want do normalize the input before calling
+  "param". This can be done using "http-request normalize-uri", in 
particular

+  the percent-decode-unreserved and percent-to-uppercase options.
+
+  Example :
+  str(a=b=d=r),param(a)   # b
+  str(a=c),param(a) # ""
+  str(a==a),param(b)  # ""
+  str(a=1;b=2;c=4),param(b,;) # 2
+  query,param(redirect_uri),urldec()
+
 port_only
   Converts a string which contains a Host header value into an integer by
   returning its port.
diff --git a/reg-tests/converter/param.vtc b/reg-tests/converter/param.vtc
new file mode 100644
index 00..1633603823
--- /dev/null
+++ b/reg-tests/converter/param.vtc
@@ -0,0 +1,80 @@
+varnishtest "param converter Test"
+
+feature ignore_unknown_macro
+
+server s1 {
+    rxreq
+    txresp -hdr "Connection: close"
+} -repeat 10 -start
+
+haproxy h1 -conf {
+    defaults
+    mode http
+    timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
+    timeout client  "${HAPROXY_TEST_TIMEOUT-5s}"
+    timeout server  "${HAPROXY_TEST_TIMEOUT-5s}"
+
+    frontend fe
+    bind "fd@${fe}"
+
+    ### requests
+    http-request set-var(txn.query) query
+    http-response set-header Found %[var(txn.query),param(test)] if { 
var(txn.query),param(test) -m found }

+
+    default_backend be
+
+    backend be
+    server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+    txreq -url "/foo/?test=1=4"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == "1"
+
+    txreq -url "/?a=1=4=34"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == "34"
+
+    txreq -url "/?test=bar"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == "bar"
+
+    txreq -url "/?a=b=d"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == ""
+
+    txreq -url "/?a=b=t=d"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == "t"
+
+    txreq -url "/?a=b=d"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == ""
+
+    txreq -url "/?test="
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == ""
+
+    txreq -url "/?a=b"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == ""
+
+    txreq -url "/?testing=123"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == ""
+
+    txreq -url "/?testing=123=4"
+    rxresp
+    expect resp.status == 200
+    expect resp.http.found == "4"
+} -run
diff --git a/src/sample.c b/src/sample.c
index 62a372b81c..7a612fc033 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2607,6 +2607,69 @@ static int sample_conv_word(const struct arg 
*arg_p, struct sample *smp, void *p

 return 1;
 }

+static int sample_conv_param_check(struct arg *arg, struct sample_conv 
*conv,

+   const char *file, int line, char **err)
+{
+    if (arg[1].type == ARGT_STR && arg[1].data.str.data != 1) {
+        memprintf(err, "Delimiter must be exactly 1 character.");
+        return 0;
+    }
+
+    return 1;
+}
+
+static int sample_conv_param(const struct arg *arg_p, struct sample 
*smp, void *private)

+{
+    char *pos, *end, *pend, *equal;
+    char delim = '&';
+    const char *name = arg_p[0].data.str.area;
+    size_t name_l = 

Re: [PATCH] MINOR : converter: add param converter

2022-12-07 Thread Willy Tarreau
On Wed, Jun 08, 2022 at 01:56:20AM -0600, astrotha...@gmail.com wrote:
> +param(,[])
> +  This extracts the first occurrence of the parameter  in the input 
> string
> +  where parameters are delimited by , which defaults to "&", and the 
> name
> +  and value of the parameter are separated by a "=". If there is no "=" and 
> value
> +  before the end of the parameter segment, it is treated as equivalent to a 
> value
> +  of an empty string.
> +
> +  This can be useful for extracting parameters from a query string, or 
> possibly a
> +  x-www-form-urlencoded body. In particular, `query,param()` can be 
> used as
> +  an alternative to `urlp()` which only uses "&" as a delimiter, 
> whereas "urlp"
> +  also uses "?" and ";".
> +
> +  Note that this converter doesn't do anything special with url encoded 
> characters. If
> +  you want to decode the value, you can use the url_dec converter on the 
> output. If
> +  the name of the parameter in the input might contain encoded characters, 
> you'll probably

Be careful to trim your long lines in the doc so that they don't overflow 80
characters.

> +static int sample_conv_param(const struct arg *arg_p, struct sample *smp, 
> void *private)
> +{
> + char *pos, *end, *pend, *equal;
> + char delim = '&';
> + const char *name = arg_p[0].data.str.area;
> + size_t name_l = arg_p[0].data.str.data;
> +
> + if (arg_p[1].type == ARGT_STR)
> + delim = *arg_p[1].data.str.area;
> +
> + pos = smp->data.u.str.area;
> + end = pos + smp->data.u.str.data;
> + while (pos < end) {
> + equal = pos + name_l;
> + /* Parameter not found */
> + if (equal > end)
> + break;
> +
> + if (equal == end || *equal == '&') {
  ^^^
here it should be "delim", I guess it's a leftover from a previous version

> + if (memcmp(pos, name, name_l) == 0) {
> + /* input contains parameter, but no value is 
> suplied */
 
^^^
supplied :-)

> + smp->data.u.str.data = 0;
> + return 1;
> + }
> + pos = equal + 1;
> + continue;
> + }
> +
> + if (*equal == '=' && memcmp(pos, name, name_l) == 0) {
> + pos = equal + 1;
> + pend = memchr(pos, delim, end - pos);
> + if (pend == NULL)
> + pend = end;
> + b_set_area_sub(&(smp->data.u.str), pos, pend - pos);

Ah, I didn't notice that b_set_area_sub() comes in a previous patch.
I'm not fond of this function, because it standardizes something that
should never work except in this exceptional case, and will inevitably
result in someone using it for the wrong thing. The point is that the
 part of a buffer is allocated from pools normally and must not
be changed at all, since for regular use, there are start offset and
length that do the job. However here in samples we're still using buffers
to store strings because they're easy to allocate and are convenient
enough to use, but I would say it's just a convenience that's almost an
abuse. Most likely that the string part of samples could be turned to IST
instead. Thus I'd rather see both the ->area and ->data parts here being
manually assigned, it's more readable and doesn't standardize an exception.
I suspect it would simply become:

smp->data.u.str.area = pos;
smp->data.u.str.data = pend - pos;

And that's way more readable for those trying to rule out the possibility of
a bug here.

>  /* Note: must not be declared  as its list will be overwritten */
>  static struct sample_conv_kw_list sample_conv_kws = {ILH, {
> - { "add_item",sample_conv_add_item, ARG3(2,STR,STR,STR),   
> smp_check_add_item,   SMP_T_STR,  SMP_T_STR  },
> - { "debug",   sample_conv_debug,ARG2(0,STR,STR),   
> smp_check_debug,  SMP_T_ANY,  SMP_T_ANY  },
> - { "b64dec",  sample_conv_base642bin,   0, NULL, 
> SMP_T_STR,  SMP_T_BIN  },
> + { "add_item",sample_conv_add_item, ARG3(2,STR,STR,STR),   
> smp_check_add_item,   SMP_T_STR,  SMP_T_STR  }, { "debug",   
> sample_conv_debug,ARG2(0,STR,STR),   smp_check_debug,  
> SMP_T_ANY,  SMP_T_ANY  }, { "b64dec",  sample_conv_base642bin,   0,   
>   NULL, SMP_T_STR,  SMP_T_BIN  },

There's a mistake here, you accidently folded 3 lines. Probably "J" being
pressed there by accident in vim :-)

Otherwise it looks good. Please recheck, but I'm overall fine with merging
this.

Thank you!
Willy



Re: [PATCH] MINOR : converter: add param converter

2022-12-07 Thread Willy Tarreau
On Tue, Dec 06, 2022 at 03:44:00PM -0700, Thayne wrote:
> Any update on this?

Hmm indeed it left forgotten in the dust, we must get back to it to verify
that it's OK then merge it.

Thanks for the reminder, Thayne!
Willy



Re: [PATCH] MINOR : converter: add param converter

2022-12-06 Thread Thayne
Any update on this?


[PATCH] MINOR : converter: add param converter

2022-06-08 Thread astrothayne
From: Thayne McCombs 

Add a converter that extracts a parameter from string of delimited
key/value pairs.

Fixes: #1697
---
 doc/configuration.txt | 26 
 reg-tests/converter/param.vtc | 80 +++
 src/sample.c  | 64 ++--
 3 files changed, 167 insertions(+), 3 deletions(-)
 create mode 100644 reg-tests/converter/param.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 927c97ce3..bce29ef48 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -17411,6 +17411,32 @@ or()
   This prefix is followed by a name. The separator is a '.'. The name may only
   contain characters 'a-z', 'A-Z', '0-9', '.' and '_'.
 
+param(,[])
+  This extracts the first occurrence of the parameter  in the input 
string
+  where parameters are delimited by , which defaults to "&", and the 
name
+  and value of the parameter are separated by a "=". If there is no "=" and 
value
+  before the end of the parameter segment, it is treated as equivalent to a 
value
+  of an empty string.
+
+  This can be useful for extracting parameters from a query string, or 
possibly a
+  x-www-form-urlencoded body. In particular, `query,param()` can be used 
as
+  an alternative to `urlp()` which only uses "&" as a delimiter, whereas 
"urlp"
+  also uses "?" and ";".
+
+  Note that this converter doesn't do anything special with url encoded 
characters. If
+  you want to decode the value, you can use the url_dec converter on the 
output. If
+  the name of the parameter in the input might contain encoded characters, 
you'll probably
+  want do normalize the input before calling "param". This can be done using
+  "http-request normalize-uri", in particular the percent-decode-unreserved and
+  percent-to-uppercase options.
+
+  Example :
+  str(a=b=d=r),param(a)   # b
+  str(a=c),param(a) # ""
+  str(a==a),param(b)  # ""
+  str(a=1;b=2;c=4),param(b,;) # 2
+  query,param(redirect_uri),urldec()
+
 protobuf(,[])
   This extracts the protocol buffers message field in raw mode of an input 
binary
   sample representation of a protocol buffer message with  as 
field
diff --git a/reg-tests/converter/param.vtc b/reg-tests/converter/param.vtc
new file mode 100644
index 0..163360382
--- /dev/null
+++ b/reg-tests/converter/param.vtc
@@ -0,0 +1,80 @@
+varnishtest "param converter Test"
+
+feature ignore_unknown_macro
+
+server s1 {
+   rxreq
+   txresp -hdr "Connection: close"
+} -repeat 10 -start
+
+haproxy h1 -conf {
+   defaults
+   mode http
+   timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout client  "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout server  "${HAPROXY_TEST_TIMEOUT-5s}"
+
+   frontend fe
+   bind "fd@${fe}"
+
+   ### requests
+   http-request set-var(txn.query) query
+   http-response set-header Found %[var(txn.query),param(test)] if { 
var(txn.query),param(test) -m found }
+
+   default_backend be
+
+   backend be
+   server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+   txreq -url "/foo/?test=1=4"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "1"
+
+   txreq -url "/?a=1=4=34"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "34"
+
+   txreq -url "/?test=bar"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "bar"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?a=b=t=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "t"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?test="
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+txreq -url "/?a=b"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123=4"
+rxresp
+expect resp.status == 200
+expect resp.http.found == "4"
+} -run
diff --git a/src/sample.c b/src/sample.c
index 237b88056..b2c80b6c8 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2582,6 +2582,65 @@ static int sample_conv_word(const struct arg *arg_p, 
struct sample *smp, void *p
return 1;
 }
 
+static int sample_conv_param_check(struct arg *arg, struct sample_conv *conv,
+   const char *file, int line, char **err)
+{
+   if (arg[1].type == ARGT_STR && arg[1].data.str.data != 1) {
+   memprintf(err, "Delimiter must be exactly 1 character.");
+   return 0;
+   }
+
+   return 1;
+}
+
+static int sample_conv_param(const struct arg *arg_p, struct sample *smp, void 
*private)
+{