On Mon, Dec 20, 2010 at 3:24 PM, David Cournapeau <[email protected]> wrote:
> On Mon, Dec 20, 2010 at 2:40 PM, Willy Tarreau <[email protected]> wrote:
>> Hi David,
>>
>> On Sat, Dec 18, 2010 at 02:23:33PM +0900, David Cournapeau wrote:
>>> (My apologies if this email appears twice - it seems it did not get
>>> through the first time)
>>>
>>> Hi,
>>>
>>> This patch against git master branch adds a url_param pattern
>>> extraction method to be used with stick tables, e.g.
>>
>> This is a nice feature, thank you ! I remember someone already asked
>> about it.
>>
>>> The patch is low quality: it keeps reallocating the output buffer,
>>
>> The reallocation is indeed not acceptable. Not only it is extremly
>> expensive, it also introduces a risk of memory leak and of crash
>> due to the lack of return value. In fact you don't need to allocate
>> the output, you can simply make use of the returned chunk to put
>> a pointer to the request buffer there and store its length with it.
>
> I don't think I understand the chunk (as passed in data->str) lifecycle:
> - I noticed that the chunk passed in data->str (inside
> pattern_fetch_url_param function) is always one of two pointers, and I
> don't know how there were allocated (is it ok to change data->str.str
> value or do I create a memory leak) ?
> - At first, I tried to simply point data->str.str to the parameter
> value + updating data->str.len:
>
> /* url_param_value points to the beginning of the url
> parameter value, url_param_value_l is the length of the value */
> data->str.str = url_param_value;
> data->str.len = url_param_value_l;
>
> But that did not work. The case where it broke is as follows:
>
> curl http://localhost:9999/foo?sess=12345 # all subsequent calls with
> sess=12345 are put to the same backend
> curl http://localhost:9999/foo?sess=123456 # all subsequent calls with
> sess=123456 are put to the same backend
> curl http://localhost:9999/foo?sess=12345 # not put to the same
> backend anymore...
>
> I am not sure I understand why allocating a new string works, though.
Hm, after further inspection, this issue seems to be solved by setting
data->str.len to one more byte than the actual size of the value (e.g.
sess=123 would have a length of 4). Reading the chunk code, it seems
that the len parameter is indeed the string length, not the buffer
length, so I don't understand that one-off issue. Could it be by any
chance an issue somewhere else in the haproxy code ?
I have updated my github branch for other issues, including a minimal
doc on the new parameter,
cheers,
David
diff --git a/doc/configuration.txt b/doc/configuration.txt
index a15b035..dbc29de 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -7770,6 +7770,13 @@ The list of currently supported pattern fetch functions is the following :
<lengthoffset> + <lengthsize> else it is absolute.
Ex: see SSL session id example in "stick table" chapter.
+ url_param(name)
+ This extracts the first occurrence of the parameter <name> in
+ the query string of the request and uses the correponding value
+ to match. A typical use is to get sticky session through url (e.g.
+ http://example.com/foo?JESSIONID=some_id with
+ url_param(JSESSIONID)), for cases where cookies cannot be used.
+
The currently available list of transformations include :
lower Convert a string pattern to lower case. This can only be placed
diff --git a/src/proto_http.c b/src/proto_http.c
index db86769..a879f0b 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -8113,6 +8113,170 @@ pattern_fetch_hdr_ip(struct proxy *px, struct session *l4, void *l7, int dir,
return data->ip.s_addr != 0;
}
+/*
+ * XXX: This code to parse url path to get parameter value is ugly and
+ * inefficient. String matching is brute force, and most likely broken in some
+ * cases
+ */
+
+/*
+ * Given a path string and its length, find the position of beginning of the
+ * query string. Returns -1 if no query string is found in the path.
+ *
+ * Example: if path = "/foo/bar/fubar?yo=mama;ye=daddy", and n = 22:
+ *
+ * &path[find_query_string(path, n)] points to "yo=mama;ye=daddy" string
+ */
+static int
+find_query_string_pos(char *path, size_t path_l)
+{
+ size_t i = 0;
+ size_t last = 0;
+
+ while(i < path_l) {
+ if (path[i] == '/') {
+ last = i;
+ }
+ i += 1;
+ }
+
+ /* URL ends with '/' -> no query part */
+ if (last == (path_l-1)) {
+ return -1;
+ } else {
+ i = last;
+ }
+
+ while(i < path_l) {
+ if (path[i] == '?') {
+ break;
+ }
+ i += 1;
+ }
+ if (i < path_l) {
+ return i+1;
+ }
+ return -1;
+}
+
+static int
+is_param_delimiter(char c)
+{
+ return c == '&' || c == ';' || c == ' ';
+}
+
+/*
+ * Given a url parameter, find the starting position of the first occurence
+ * relatively to the query string, or -1 if the parameter is not found
+ *
+ * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
+ * the function will return 8
+ */
+static int
+find_url_param_pos(char* query_string, size_t query_string_l,
+ char* url_param_name, size_t url_param_name_l)
+{
+ int i, j, anchor;
+
+ j = 0;
+ anchor = 0;
+ for (i = 0; i < query_string_l; ++i) {
+ if (query_string[i] == url_param_name[j]) {
+ j += 1;
+ } else {
+ j = 0;
+ anchor = i + 1;
+ }
+ if (j == url_param_name_l) {
+ break;
+ }
+ }
+
+ if (j == url_param_name_l) {
+ if(j >= query_string_l || query_string[anchor+j] != '=') {
+ return -1;
+ }
+ if (anchor == 0) {
+ return 0;
+ } else if (is_param_delimiter(query_string[anchor-1])) {
+ return anchor;
+ }
+ return -1;
+ }
+
+ return -1;
+}
+
+/*
+ * Given a url parameter name, returns its value and size into *value and
+ * *value_l respectively. If the parameter is not found, *value is set to NULL
+ * and value_l is set to 0.
+ */
+static void
+find_url_param_value(char* path, size_t path_l,
+ char* url_param_name, size_t url_param_name_l,
+ char** value, size_t* value_l)
+{
+ char *query_string;
+ char *value_start, *value_end;
+ size_t query_string_l;
+ int anchor;
+
+ anchor = find_query_string_pos(path, path_l);
+ if (anchor < 0) {
+ goto not_found;
+ }
+ query_string = path + anchor;
+ query_string_l = path_l - anchor;
+
+ anchor = find_url_param_pos(query_string, query_string_l,
+ url_param_name, url_param_name_l);
+
+ if (anchor < 0) {
+ goto not_found;
+ } else {
+ value_start = query_string + anchor + url_param_name_l + 1;
+ value_end = value_start;
+ while ((value_end < (query_string + query_string_l - 1))
+ && !is_param_delimiter(*(value_end+1))) {
+ value_end += 1;
+ }
+ *value = value_start;
+ *value_l = value_end + 1 - value_start;
+ }
+ return;
+
+not_found:
+ *value = NULL;
+ value_l = 0;
+ return;
+}
+
+static int
+pattern_fetch_url_param(struct proxy *px, struct session *l4, void *l7, int dir,
+ const struct pattern_arg *arg_p, int arg_i, union pattern_data *data)
+{
+ struct http_txn *txn = l7;
+ struct http_msg *msg = &txn->req;
+ char *path, *url_param_value;
+ size_t path_l, url_param_value_l;
+
+ path = msg->sol + msg->sl.rq.u;
+ path_l = msg->sl.rq.u_l;
+
+ find_url_param_value(path, path_l, arg_p->data.str.str, arg_p->data.str.len,
+ &url_param_value, &url_param_value_l);
+ if (url_param_value == NULL) {
+ return 0;
+ }
+
+ data->str.str = url_param_value;
+ /* url_param_value_l should be enough, but the +1 is necessary to make
+ * this works - could this be a one-off error somewhere else ? */
+ data->str.len = url_param_value_l + 1;
+ chunk_printf(&data->str, "\n");
+ return 1;
+}
/************************************************************************/
@@ -8121,6 +8285,7 @@ pattern_fetch_hdr_ip(struct proxy *px, struct session *l4, void *l7, int dir,
/* Note: must not be declared <const> as its list will be overwritten */
static struct pattern_fetch_kw_list pattern_fetch_keywords = {{ },{
{ "hdr", pattern_fetch_hdr_ip, pattern_arg_str, PATTERN_TYPE_IP, PATTERN_FETCH_REQ },
+ { "url_param", pattern_fetch_url_param, pattern_arg_str, PATTERN_TYPE_STRING, PATTERN_FETCH_REQ },
{ NULL, NULL, NULL, 0, 0 },
}};