[PATCH] MINOR : converter: add param converter

2022-06-08 Thread astrothayne
From: Thayne McCombs 

Add a converter that extracts a parameter from string of delimited
key/value pairs.

Fixes: #1697
---
 doc/configuration.txt | 26 
 reg-tests/converter/param.vtc | 80 +++
 src/sample.c  | 64 ++--
 3 files changed, 167 insertions(+), 3 deletions(-)
 create mode 100644 reg-tests/converter/param.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 927c97ce3..bce29ef48 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -17411,6 +17411,32 @@ or()
   This prefix is followed by a name. The separator is a '.'. The name may only
   contain characters 'a-z', 'A-Z', '0-9', '.' and '_'.
 
+param(,[])
+  This extracts the first occurrence of the parameter  in the input 
string
+  where parameters are delimited by , which defaults to "&", and the 
name
+  and value of the parameter are separated by a "=". If there is no "=" and 
value
+  before the end of the parameter segment, it is treated as equivalent to a 
value
+  of an empty string.
+
+  This can be useful for extracting parameters from a query string, or 
possibly a
+  x-www-form-urlencoded body. In particular, `query,param()` can be used 
as
+  an alternative to `urlp()` which only uses "&" as a delimiter, whereas 
"urlp"
+  also uses "?" and ";".
+
+  Note that this converter doesn't do anything special with url encoded 
characters. If
+  you want to decode the value, you can use the url_dec converter on the 
output. If
+  the name of the parameter in the input might contain encoded characters, 
you'll probably
+  want do normalize the input before calling "param". This can be done using
+  "http-request normalize-uri", in particular the percent-decode-unreserved and
+  percent-to-uppercase options.
+
+  Example :
+  str(a=b=d=r),param(a)   # b
+  str(a=c),param(a) # ""
+  str(a==a),param(b)  # ""
+  str(a=1;b=2;c=4),param(b,;) # 2
+  query,param(redirect_uri),urldec()
+
 protobuf(,[])
   This extracts the protocol buffers message field in raw mode of an input 
binary
   sample representation of a protocol buffer message with  as 
field
diff --git a/reg-tests/converter/param.vtc b/reg-tests/converter/param.vtc
new file mode 100644
index 0..163360382
--- /dev/null
+++ b/reg-tests/converter/param.vtc
@@ -0,0 +1,80 @@
+varnishtest "param converter Test"
+
+feature ignore_unknown_macro
+
+server s1 {
+   rxreq
+   txresp -hdr "Connection: close"
+} -repeat 10 -start
+
+haproxy h1 -conf {
+   defaults
+   mode http
+   timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout client  "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout server  "${HAPROXY_TEST_TIMEOUT-5s}"
+
+   frontend fe
+   bind "fd@${fe}"
+
+   ### requests
+   http-request set-var(txn.query) query
+   http-response set-header Found %[var(txn.query),param(test)] if { 
var(txn.query),param(test) -m found }
+
+   default_backend be
+
+   backend be
+   server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+   txreq -url "/foo/?test=1=4"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "1"
+
+   txreq -url "/?a=1=4=34"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "34"
+
+   txreq -url "/?test=bar"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "bar"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?a=b=t=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "t"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?test="
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+txreq -url "/?a=b"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123=4"
+rxresp
+expect resp.status == 200
+expect resp.http.found == "4"
+} -run
diff --git a/src/sample.c b/src/sample.c
index 237b88056..b2c80b6c8 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2582,6 +2582,65 @@ static int sample_conv_word(const struct arg *arg_p, 
struct sample *smp, void *p
return 1;
 }
 
+static int sample_conv_param_check(struct arg *arg, struct sample_conv *conv,
+   const char *file, int line, char **err)
+{
+   if (arg[1].type == ARGT_STR && arg[1].data.str.data != 1) {
+   memprintf(err, "Delimiter must be exactly 1 character.");
+   return 0;
+   }
+
+   return 1;
+}
+
+static int sample_conv_param(const struct arg *arg_p, struct sample *smp, void 
*private)
+{

[PATCH] DOC/MINOR: Suggestions for percent encoding in param()

2022-06-03 Thread astrothayne
From: Thayne McCombs 

Add some documentation on how to handle percent encoded characters in
input to the param() converter.
---
 doc/configuration.txt | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index d9f47c2eb..9505fc71d 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -17423,12 +17423,19 @@ param(,[])
   an alternative to `urlp()` which only uses "&" as a delimiter, whereas 
urlp
   also uses "?" and ";".

+  Note that this converter doesn't do anything special with url encoded 
characters. If
+  you want to decode the value, you can use the url_dec converter on the 
output. If
+  the name of the paramater in the input might contain encoded characters, 
you'll probably
+  want do normalize the input before calling param. This can be done using
+  "http-request normalize-uri", in particular the percent-decode-unreserved and
+  percent-to-uppercase options.
+
   Example :
   str(a=b=d=r),param(a)   # b
   str(a=c),param(a) # ""
   str(a==a),param(b)  # ""
   str(a=1;b=2;c=4),param(b,;) # 2
-  query,param(redirect_uri)
+  query,param(redirect_uri),urldec()

 protobuf(,[])
   This extracts the protocol buffers message field in raw mode of an input 
binary
--
2.36.1




[PATCH 2/2] MINOR : converter: add param converter

2022-06-03 Thread astrothayne
From: Thayne McCombs 

Add a converter that extracts a parameter from string of delimited
key/value pairs.

Fixes: #1697
---
 doc/configuration.txt | 19 +
 reg-tests/converter/param.vtc | 80 +++
 src/sample.c  | 64 ++--
 3 files changed, 160 insertions(+), 3 deletions(-)
 create mode 100644 reg-tests/converter/param.vtc

diff --git a/doc/configuration.txt b/doc/configuration.txt
index 927c97ce3..d9f47c2eb 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -17411,6 +17411,25 @@ or()
   This prefix is followed by a name. The separator is a '.'. The name may only
   contain characters 'a-z', 'A-Z', '0-9', '.' and '_'.

+param(,[])
+  This extracts the first occurence of the parameter  in the input string
+  where parameters are delimited by , which defaults to "&", and the 
name
+  and value of the parameter are separated by a "=". If there is no "=" and 
value
+  before the end of the parameter segment, it is treated as equivalent to a 
value
+  of an empty string.
+
+  This can be useful for extracting parameters from a query string, or 
possibly a
+  x-www-form-urlencoded body. In particular, `query,param()` can be used 
as
+  an alternative to `urlp()` which only uses "&" as a delimiter, whereas 
urlp
+  also uses "?" and ";".
+
+  Example :
+  str(a=b=d=r),param(a)   # b
+  str(a=c),param(a) # ""
+  str(a==a),param(b)  # ""
+  str(a=1;b=2;c=4),param(b,;) # 2
+  query,param(redirect_uri)
+
 protobuf(,[])
   This extracts the protocol buffers message field in raw mode of an input 
binary
   sample representation of a protocol buffer message with  as 
field
diff --git a/reg-tests/converter/param.vtc b/reg-tests/converter/param.vtc
new file mode 100644
index 0..163360382
--- /dev/null
+++ b/reg-tests/converter/param.vtc
@@ -0,0 +1,80 @@
+varnishtest "param converter Test"
+
+feature ignore_unknown_macro
+
+server s1 {
+   rxreq
+   txresp -hdr "Connection: close"
+} -repeat 10 -start
+
+haproxy h1 -conf {
+   defaults
+   mode http
+   timeout connect "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout client  "${HAPROXY_TEST_TIMEOUT-5s}"
+   timeout server  "${HAPROXY_TEST_TIMEOUT-5s}"
+
+   frontend fe
+   bind "fd@${fe}"
+
+   ### requests
+   http-request set-var(txn.query) query
+   http-response set-header Found %[var(txn.query),param(test)] if { 
var(txn.query),param(test) -m found }
+
+   default_backend be
+
+   backend be
+   server s1 ${s1_addr}:${s1_port}
+} -start
+
+client c1 -connect ${h1_fe_sock} {
+   txreq -url "/foo/?test=1=4"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "1"
+
+   txreq -url "/?a=1=4=34"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "34"
+
+   txreq -url "/?test=bar"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "bar"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?a=b=t=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == "t"
+
+   txreq -url "/?a=b=d"
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+   txreq -url "/?test="
+   rxresp
+   expect resp.status == 200
+   expect resp.http.found == ""
+
+txreq -url "/?a=b"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123"
+rxresp
+expect resp.status == 200
+expect resp.http.found == ""
+
+txreq -url "/?testing=123=4"
+rxresp
+expect resp.status == 200
+expect resp.http.found == "4"
+} -run
diff --git a/src/sample.c b/src/sample.c
index 237b88056..b2c80b6c8 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2582,6 +2582,65 @@ static int sample_conv_word(const struct arg *arg_p, 
struct sample *smp, void *p
return 1;
 }

+static int sample_conv_param_check(struct arg *arg, struct sample_conv *conv,
+   const char *file, int line, char **err)
+{
+   if (arg[1].type == ARGT_STR && arg[1].data.str.data != 1) {
+   memprintf(err, "Delimiter must be exactly 1 character.");
+   return 0;
+   }
+
+   return 1;
+}
+
+static int sample_conv_param(const struct arg *arg_p, struct sample *smp, void 
*private)
+{
+   char *pos, *end, *pend, *equal;
+   char delim = '&';
+   const char *name = arg_p[0].data.str.area;
+   size_t name_l = arg_p[0].data.str.data;
+
+   if (arg_p[1].type == ARGT_STR)
+   delim = *arg_p[1].data.str.area;
+
+   pos = smp->data.u.str.area;
+   end = pos + smp->data.u.str.data;
+   while (pos < end) {
+   equal = pos + name_l;
+   /* Parameter not found */
+   if (equal > end)
+

[PATCH 1/2] CLEANUP/MINOR: sample: factor out common code for setting buffer

2022-06-03 Thread astrothayne
From: Thayne McCombs 

A couple of functions in sample.c have almost identical code for
updating the samples string buffer. This adds a new helper function to
do this and uses it in both places.
---
 include/haproxy/buf.h | 22 ++
 src/sample.c  | 29 ++---
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/include/haproxy/buf.h b/include/haproxy/buf.h
index 4ea4b73f1..303635c39 100644
--- a/include/haproxy/buf.h
+++ b/include/haproxy/buf.h
@@ -940,6 +940,28 @@ static inline int b_peek_varint(struct buffer *b, size_t 
ofs, uint64_t *vptr)
return size;
 }
 
+/*
+ * b_set_area_sub(): Replace the current buffer with a sub-slice of the
+ * current buffer. Sets  to ,  to , and
+ *  to the new size accounting for the change to area. The range from
+ *  to  must be within the range of the current 
buffer.
+ */
+static inline void b_set_area_sub(struct buffer *b, char *new_area, size_t 
new_data)
+{
+   BUG_ON_HOT(new_area < b->area);
+   BUG_ON_HOT(new_area + new_data > b->area + b->data);
+
+   b->data = new_data;
+   /* If buffer is len 0, no need to
+   change pointers or to update size */
+   if (!new_data)
+   return;
+   /* Compute remaining size if needed */
+   if (b->size)
+   b->size -= new_area - b->area;
+   b-> area = new_area;
+}
+
 
 /*
  * Buffer ring management.
diff --git a/src/sample.c b/src/sample.c
index 50ae76b6e..237b88056 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2490,19 +2490,7 @@ static int sample_conv_field(const struct arg *arg_p, 
struct sample *smp, void *
return 0;
}
 found:
-   smp->data.u.str.data = end - start;
-   /* If ret string is len 0, no need to
-   change pointers or to update size */
-   if (!smp->data.u.str.data)
-   return 1;
-
-   /* Compute remaining size if needed
-   Note: smp->data.u.str.size cannot be set to 0 */
-   if (smp->data.u.str.size)
-   smp->data.u.str.size -= start - smp->data.u.str.area;
-
-   smp->data.u.str.area = start;
-
+   b_set_area_sub(&(smp->data.u.str), start, end - start);
return 1;
 }
 
@@ -2590,20 +2578,7 @@ static int sample_conv_word(const struct arg *arg_p, 
struct sample *smp, void *p
return 1;
}
 found:
-   smp->data.u.str.data = end - start;
-   /* If ret string is len 0, no need to
-   change pointers or to update size */
-   if (!smp->data.u.str.data)
-   return 1;
-
-
-   /* Compute remaining size if needed
-   Note: smp->data.u.str.size cannot be set to 0 */
-   if (smp->data.u.str.size)
-   smp->data.u.str.size -= start - smp->data.u.str.area;
-
-   smp->data.u.str.area = start;
-
+   b_set_area_sub(&(smp->data.u.str), start, end - start);
return 1;
 }
 
-- 
2.36.1




[PATCH] BUG/MEDIUM: sample: Fix adjusting size in word converter

2022-05-25 Thread astrothayne
From: Thayne McCombs 

Adjust the size of the sample buffer before we change the "area"
pointer. Otherwise, we end up not changing the size, because the area
pointer is already the same as "start" before we compute the difference
between the two.

This is similar to the change in b28430591d18f7fda5bac2e0ea590b3a34f04601
but for the word converter instead of field.
---
 src/sample.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/sample.c b/src/sample.c
index 8a0a66b8c..50ae76b6e 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -2596,13 +2596,14 @@ static int sample_conv_word(const struct arg *arg_p, 
struct sample *smp, void *p
if (!smp->data.u.str.data)
return 1;
 
-   smp->data.u.str.area = start;
 
/* Compute remaining size if needed
Note: smp->data.u.str.size cannot be set to 0 */
if (smp->data.u.str.size)
smp->data.u.str.size -= start - smp->data.u.str.area;
 
+   smp->data.u.str.area = start;
+
return 1;
 }
 
-- 
2.36.1




[PATCH] DOC: configuration: add clarification on escaping in keyword arguments

2021-10-04 Thread astrothayne
From: Thayne McCombs 

Add a more precise description on how backslash escaping is different
than the top-level parser, and give examples of how to handle single
quotes inside arguments.
---
 doc/configuration.txt | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/doc/configuration.txt b/doc/configuration.txt
index a7e8cdd80..249730998 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -613,7 +613,7 @@ if a closing parenthesis is needed inside, this one will 
require to have its
 own quotes.
 
 The keyword argument parser is exactly the same as the top-level one regarding
-quotes, except that is will not make special cases of backslashes. But what is
+quotes, except that the \#, \$, and \xNN escapes are not processed. But what is
 not always obvious is that the delimiters used inside must first be escaped or
 quoted so that they are not resolved at the top level.
 
@@ -692,14 +692,22 @@ thus single quotes are preferred (or double escaping). 
Example:
   arg3 __/
 
 Remember that backslashes are not escape characters within single quotes and
-that the whole word3 above is already protected against them using the single
+that the whole word above is already protected against them using the single
 quotes. Conversely, if double quotes had been used around the whole expression,
 single the dollar character and the backslashes would have been resolved at top
 level, breaking the argument contents at the second level.
 
+Unfortunately, since single quotes can't be escaped inside of strong quoting,
+if you need to include single quotes in your argument, you will need to escape
+or quote them twice. There are a few ways to do this:
+
+http-request set-var(txn.foo) str("\\'foo\\'")
+http-request set-var(txn.foo) str(\"\'foo\'\")
+http-request set-var(txn.foo) str(\\\'foo\\\')
+
 When in doubt, simply do not use quotes anywhere, and start to place single or
 double quotes around arguments that require a comma or a closing parenthesis,
-and think about escaping these quotes using a backslash of the string contains
+and think about escaping these quotes using a backslash if the string contains
 a dollar or a backslash. Again, this is pretty similar to what is used under
 a Bourne shell when double-escaping a command passed to "eval". For API writers
 the best is probably to place escaped quotes around each and every argument,
-- 
2.33.0