On Mon, Dec 20, 2010 at 3:24 PM, David Cournapeau <[email protected]> wrote:
> On Mon, Dec 20, 2010 at 2:40 PM, Willy Tarreau <[email protected]> wrote:
>> Hi David,
>>
>> On Sat, Dec 18, 2010 at 02:23:33PM +0900, David Cournapeau wrote:
>>> (My apologies if this email appears twice - it seems it did not get
>>> through the first time)
>>>
>>> Hi,
>>>
>>> This patch against git master branch adds a url_param pattern
>>> extraction method to be used with stick tables, e.g.
>>
>> This is a nice feature, thank you ! I remember someone already asked
>> about it.
>>
>>> The patch is low quality: it keeps reallocating the output buffer,
>>
>> The reallocation is indeed not acceptable. Not only it is extremly
>> expensive, it also introduces a risk of memory leak and of crash
>> due to the lack of return value. In fact you don't need to allocate
>> the output, you can simply make use of the returned chunk to put
>> a pointer to the request buffer there and store its length with it.
>
> I don't think I understand the chunk (as passed in data->str) lifecycle:
>  - I noticed that the chunk passed in data->str (inside
> pattern_fetch_url_param function) is always one of two pointers, and I
> don't know how there were allocated (is it ok to change data->str.str
> value or do I create a memory leak) ?
>  - At first, I tried to simply point data->str.str to the parameter
> value + updating data->str.len:
>
>        /* url_param_value points to the beginning of the url
> parameter value, url_param_value_l is the length of the value */
>        data->str.str = url_param_value;
>        data->str.len = url_param_value_l;
>
> But that did not work. The case where it broke is as follows:
>
> curl http://localhost:9999/foo?sess=12345 # all subsequent calls with
> sess=12345 are put to the same backend
> curl http://localhost:9999/foo?sess=123456 # all subsequent calls with
> sess=123456 are put to the same backend
> curl http://localhost:9999/foo?sess=12345 # not put to the same
> backend anymore...
>
> I am not sure I understand why allocating a new string works, though.

Hm, after further inspection, this issue seems to be solved by setting
data->str.len to one more byte than the actual size of the value (e.g.
sess=123 would have a length of 4). Reading the chunk code, it seems
that the len parameter is indeed the string length, not the buffer
length, so I don't understand that one-off issue. Could it be by any
chance an issue somewhere else in the haproxy code ?

I have updated my github branch for other issues, including a minimal
doc on the new parameter,

cheers,

David
diff --git a/doc/configuration.txt b/doc/configuration.txt
index a15b035..dbc29de 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -7770,6 +7770,13 @@ The list of currently supported pattern fetch functions is the following :
                <lengthoffset> + <lengthsize> else it is absolute.
                Ex: see SSL session id  example in "stick table" chapter.
                 
+  url_param(name)
+	       This extracts the first occurrence of the parameter <name> in
+	       the query string of the request and uses the correponding value
+               to match. A typical use is to get sticky session through url (e.g.
+	       http://example.com/foo?JESSIONID=some_id with
+	       url_param(JSESSIONID)), for cases where cookies cannot be used.
+
 The currently available list of transformations include :
 
   lower        Convert a string pattern to lower case. This can only be placed
diff --git a/src/proto_http.c b/src/proto_http.c
index db86769..a879f0b 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -8113,6 +8113,170 @@ pattern_fetch_hdr_ip(struct proxy *px, struct session *l4, void *l7, int dir,
 	return data->ip.s_addr != 0;
 }
 
+/*
+ * XXX: This code to parse url path to get parameter value is ugly and
+ * inefficient. String matching is brute force, and most likely broken in some
+ * cases
+ */
+
+/*
+ * Given a path string and its length, find the position of beginning of the
+ * query string. Returns -1 if no query string is found in the path.
+ *
+ * Example: if path = "/foo/bar/fubar?yo=mama;ye=daddy", and n = 22:
+ *
+ * &path[find_query_string(path, n)] points to "yo=mama;ye=daddy" string
+ */
+static int
+find_query_string_pos(char *path, size_t path_l)
+{
+	size_t i = 0;
+	size_t last = 0;
+
+	while(i < path_l) {
+		if (path[i] == '/') {
+			last = i;
+		}
+		i += 1;
+	}
+
+	/* URL ends with '/' -> no query part */
+	if (last == (path_l-1)) {
+		return -1;
+	} else {
+		i = last;
+	}
+
+	while(i < path_l) {
+		if (path[i] == '?') {
+			break;
+		}
+		i += 1;
+	}
+	if (i < path_l) {
+		return i+1;
+	}
+	return -1;
+}
+
+static int
+is_param_delimiter(char c)
+{
+	return c == '&' || c == ';' || c == ' ';
+}
+
+/*
+ * Given a url parameter, find the starting position of the first occurence
+ * relatively to the query string, or -1 if the parameter is not found
+ *
+ * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
+ * the function will return 8
+ */
+static int
+find_url_param_pos(char* query_string, size_t query_string_l,
+            char* url_param_name, size_t url_param_name_l)
+{
+	int i, j, anchor;
+
+	j = 0;
+	anchor = 0;
+	for (i = 0; i < query_string_l; ++i) {
+		if (query_string[i] == url_param_name[j]) {
+			j += 1;
+		} else {
+			j = 0;
+			anchor = i + 1;
+		}
+		if (j == url_param_name_l) {
+			break;
+		}
+	}
+
+	if (j == url_param_name_l) {
+		if(j >= query_string_l || query_string[anchor+j] != '=') {
+			return -1;
+		}
+		if (anchor == 0) {
+			return 0;
+		} else if (is_param_delimiter(query_string[anchor-1])) {
+			return anchor;
+		}
+		return -1;
+	}
+
+	return -1;
+}
+
+/*
+ * Given a url parameter name, returns its value and size into *value and
+ * *value_l respectively. If the parameter is not found, *value is set to NULL
+ * and value_l is set to 0.
+ */
+static void
+find_url_param_value(char* path, size_t path_l,
+		      char* url_param_name, size_t url_param_name_l,
+		      char** value, size_t* value_l)
+{
+	char *query_string;
+	char *value_start, *value_end;
+	size_t query_string_l;
+	int anchor;
+
+	anchor = find_query_string_pos(path, path_l);
+	if (anchor < 0) {
+		goto not_found;
+	}
+	query_string = path + anchor;
+	query_string_l = path_l - anchor;
+
+	anchor = find_url_param_pos(query_string, query_string_l,
+			url_param_name, url_param_name_l);
+
+	if (anchor < 0) {
+		goto not_found;
+	} else {
+		value_start = query_string + anchor + url_param_name_l + 1;
+		value_end = value_start;
+		while ((value_end < (query_string + query_string_l - 1))
+		       && !is_param_delimiter(*(value_end+1))) {
+			value_end += 1;
+		}
+		*value = value_start;
+		*value_l = value_end + 1 - value_start;
+	}
+	return;
+
+not_found:
+	*value = NULL;
+	value_l = 0;
+	return;
+}
+
+static int
+pattern_fetch_url_param(struct proxy *px, struct session *l4, void *l7, int dir,
+                     const struct pattern_arg *arg_p, int arg_i, union pattern_data *data)
+{
+	struct http_txn *txn = l7;
+	struct http_msg *msg = &txn->req;
+	char *path, *url_param_value;
+	size_t path_l, url_param_value_l;
+
+	path = msg->sol + msg->sl.rq.u;
+	path_l = msg->sl.rq.u_l;
+
+	find_url_param_value(path, path_l, arg_p->data.str.str, arg_p->data.str.len,
+			&url_param_value, &url_param_value_l);
+	if (url_param_value == NULL) {
+		return 0;
+	}
+
+	data->str.str = url_param_value;
+	/* url_param_value_l should be enough, but the +1 is necessary to make
+	 * this works - could this be a one-off error somewhere else ? */
+	data->str.len = url_param_value_l + 1;
+	chunk_printf(&data->str, "\n");
+	return 1;
+}
 
 
 /************************************************************************/
@@ -8121,6 +8285,7 @@ pattern_fetch_hdr_ip(struct proxy *px, struct session *l4, void *l7, int dir,
 /* Note: must not be declared <const> as its list will be overwritten */
 static struct pattern_fetch_kw_list pattern_fetch_keywords = {{ },{
 	{ "hdr", pattern_fetch_hdr_ip, pattern_arg_str, PATTERN_TYPE_IP, PATTERN_FETCH_REQ },
+	{ "url_param", pattern_fetch_url_param, pattern_arg_str, PATTERN_TYPE_STRING, PATTERN_FETCH_REQ },
 	{ NULL, NULL, NULL, 0, 0 },
 }};
 

Reply via email to