(My apologies if this email appears twice - it seems it did not get
through the first time)
Hi,
This patch against git master branch adds a url_param pattern
extraction method to be used with stick tables, e.g.
backend default_farm
mode http
balance roundrobin
stick-table type string len 50 size 20k
stick on url_param(sess)
will match on the value of sess query parameter.
The patch is low quality: it keeps reallocating the output buffer,
string search is brute force, and it should reuse haproxy existing
code to get parameter value if possible (I did not find it). It also
only considers the first occurence of the parameter (e.g. with the
config above, and a query string like
"/foo?sess=1234&bar=fubar&sess=4321", it will only consider the value
1234).
I have tested it on a simple configuration with two haproxy instances,
and under valgrind to check for memory leak/out of bounds errors.
Github reference for review/online viewing:
https://github.com/cournape/haproxy/tree/stick_table_url_param_support
https://github.com/cournape/haproxy/compare/master...stick_table_url_param_support
cheers,
David
diff --git a/src/proto_http.c b/src/proto_http.c
index db86769..d97ebeb 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -8113,6 +8113,172 @@ pattern_fetch_hdr_ip(struct proxy *px, struct session *l4, void *l7, int dir,
return data->ip.s_addr != 0;
}
+/*
+ * XXX: This code to parse url path to get parameter value is ugly and
+ * inefficient. String matching is brute force, and most likely broken in some
+ * cases
+ */
+
+/*
+ * Given a path string and its length, find the position of beginning of the
+ * query string. Returns -1 if no query string is found in the path.
+ *
+ * Example: if path = "/foo/bar/fubar?yo=mama;ye=daddy", and n = 22:
+ *
+ * &path[_find_query_string(path, n)] points to "yo=mama;ye=daddy" string
+ */
+static int
+_find_query_string_pos(char *path, size_t path_l)
+{
+ size_t i = 0;
+ size_t last = 0;
+
+ while(i < path_l) {
+ if (path[i] == '/') {
+ last = i;
+ }
+ i += 1;
+ }
+
+ /* URL ends with '/' -> no query part */
+ if (last == (path_l-1)) {
+ return -1;
+ } else {
+ i = last;
+ }
+
+ while(i < path_l) {
+ if (path[i] == '?') {
+ break;
+ }
+ i += 1;
+ }
+ if (i < path_l) {
+ return i+1;
+ }
+ return -1;
+}
+
+static int
+_is_param_delimiter(char c)
+{
+ return c == '&' || c == ';' || c == ' ';
+}
+
+/*
+ * Given a url parameter, find the starting position of the first occurence
+ * relatively to the query string, or -1 if the parameter is not found
+ *
+ * Example: if query_string is "yo=mama;ye=daddy" and url_param_name is "ye",
+ * the function will return 8
+ */
+static int
+_find_url_param_pos(char* query_string, size_t query_string_l,
+ char* url_param_name, size_t url_param_name_l)
+{
+ int i, j, anchor;
+
+ j = 0;
+ anchor = 0;
+ for (i = 0; i < query_string_l; ++i) {
+ if (query_string[i] == url_param_name[j]) {
+ j += 1;
+ } else {
+ j = 0;
+ anchor = i + 1;
+ }
+ if (j == url_param_name_l) {
+ break;
+ }
+ }
+
+ if (j == url_param_name_l) {
+ if(j >= query_string_l || query_string[anchor+j] != '=') {
+ return -1;
+ }
+ if (anchor == 0) {
+ return 0;
+ } else if (_is_param_delimiter(query_string[anchor-1])) {
+ return anchor;
+ }
+ return -1;
+ }
+
+ return -1;
+}
+
+/*
+ * Given a url parameter name, returns its value and size into *value and
+ * *value_l respectively. If the parameter is not found, *value is set to NULL
+ * and value_l is set to 0.
+ */
+static void
+_find_url_param_value(char* path, size_t path_l,
+ char* url_param_name, size_t url_param_name_l,
+ char** value, size_t* value_l)
+{
+ char *query_string;
+ char *value_start, *value_end;
+ size_t query_string_l;
+ int anchor;
+
+ anchor = _find_query_string_pos(path, path_l);
+ if (anchor < 0) {
+ goto not_found;
+ }
+ query_string = path + anchor;
+ query_string_l = path_l - anchor;
+
+ anchor = _find_url_param_pos(query_string, query_string_l,
+ url_param_name, url_param_name_l);
+
+ if (anchor < 0) {
+ goto not_found;
+ } else {
+ value_start = query_string + anchor + url_param_name_l;
+ value_end = value_start;
+ while ((value_end < (query_string + query_string_l - 1))
+ && !_is_param_delimiter(*(value_end+1))) {
+ value_end += 1;
+ }
+ *value = value_start;
+ *value_l = value_end + 1 - value_start;
+ }
+ return;
+
+not_found:
+ *value = NULL;
+ value_l = 0;
+ return;
+}
+
+static int
+pattern_fetch_url_param(struct proxy *px, struct session *l4, void *l7, int dir,
+ const struct pattern_arg *arg_p, int arg_i, union pattern_data *data)
+{
+ struct http_txn *txn = l7;
+ struct http_msg *msg = &txn->req;
+ char *path, *url_param_value;
+ size_t path_l, url_param_value_l;
+ char *buf;
+
+ path = msg->sol + msg->sl.rq.u;
+ path_l = msg->sl.rq.u_l;
+
+ _find_url_param_value(path, path_l, arg_p->data.str.str, arg_p->data.str.len,
+ &url_param_value, &url_param_value_l);
+ if (url_param_value == NULL) {
+ return 0;
+ }
+
+ /* XXX: not sure I understand chunk buffer lifecycle - here we keep
+ * reallocating the buffer, which sounds quite inefficient. Not sure it
+ * is even correct - not copying the string definitely does not work */
+ chunk_destroy(&data->str);
+ buf = strndup(url_param_value, url_param_value_l);
+ chunk_initlen(&data->str, buf, url_param_value_l + 1, url_param_value_l);
+ return 1;
+}
/************************************************************************/
@@ -8121,6 +8287,7 @@ pattern_fetch_hdr_ip(struct proxy *px, struct session *l4, void *l7, int dir,
/* Note: must not be declared <const> as its list will be overwritten */
static struct pattern_fetch_kw_list pattern_fetch_keywords = {{ },{
{ "hdr", pattern_fetch_hdr_ip, pattern_arg_str, PATTERN_TYPE_IP, PATTERN_FETCH_REQ },
+ { "url_param", pattern_fetch_url_param, pattern_arg_str, PATTERN_TYPE_STRING, PATTERN_FETCH_REQ },
{ NULL, NULL, NULL, 0, 0 },
}};