Hi,
Here's a resend of my previous patch, incorporating some changes suggested by
Willy. As my mailer ate my patches last time around, and I'm not completely
sure that it won't do it again, I'm attaching them to this email as well as
sending them inline in the following emails.
Thanks,
Andrew
>From faaef5c05fd829579dc71c2f3d91a165a3d17a3b Mon Sep 17 00:00:00 2001
From: Andrew Rodland <[email protected]>
Date: Tue, 20 Sep 2016 13:40:00 -0400
Subject: [PATCH 1/4] MINOR: proxy: add 'served' field to proxy, equal to total
of all servers'
X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
This will allow lb_chash to determine the total active sessions for a
proxy without any computation.
Signed-off-by: Andrew Rodland <[email protected]>
---
include/types/proxy.h | 1 +
src/queue.c | 1 +
src/stream.c | 2 ++
3 files changed, 4 insertions(+)
diff --git a/include/types/proxy.h b/include/types/proxy.h
index 2f4f9b9..028b3a7 100644
--- a/include/types/proxy.h
+++ b/include/types/proxy.h
@@ -277,6 +277,7 @@ struct proxy {
} tcp_rep;
struct server *srv, defsrv; /* known servers; default server configuration */
int srv_act, srv_bck; /* # of servers eligible for LB (UP|!checked) AND (enabled+weight!=0) */
+ int served; /* # of active sessions currently being served */
struct lbprm lbprm; /* load-balancing parameters */
char *cookie_domain; /* domain used to insert the cookie */
char *cookie_name; /* name of the cookie to look for */
diff --git a/src/queue.c b/src/queue.c
index 1f27c49..08a6c3d 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -126,6 +126,7 @@ struct stream *pendconn_get_next_strm(struct server *srv, struct proxy *px)
strm->target = &srv->obj_type;
stream_add_srv_conn(strm, srv);
srv->served++;
+ srv->proxy->served++;
if (px->lbprm.server_take_conn)
px->lbprm.server_take_conn(srv);
diff --git a/src/stream.c b/src/stream.c
index 151bcb0..738a23c 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -2515,6 +2515,7 @@ void sess_change_server(struct stream *sess, struct server *newsrv)
if (sess->srv_conn) {
sess->srv_conn->served--;
+ sess->srv_conn->proxy->served--;
if (sess->srv_conn->proxy->lbprm.server_drop_conn)
sess->srv_conn->proxy->lbprm.server_drop_conn(sess->srv_conn);
stream_del_srv_conn(sess);
@@ -2522,6 +2523,7 @@ void sess_change_server(struct stream *sess, struct server *newsrv)
if (newsrv) {
newsrv->served++;
+ newsrv->proxy->served++;
if (newsrv->proxy->lbprm.server_take_conn)
newsrv->proxy->lbprm.server_take_conn(newsrv);
stream_add_srv_conn(sess, newsrv);
--
2.9.3
>From 799186e5bcd94befabb18f84bc4500fb6cca07d5 Mon Sep 17 00:00:00 2001
From: Andrew Rodland <[email protected]>
Date: Tue, 20 Sep 2016 13:41:44 -0400
Subject: [PATCH 2/4] MINOR: backend: add hash-balance-factor option for
hash-type consistent
X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
0 will mean no balancing occurs; otherwise it represents the ratio
between the highest-loaded server and the average load, times 100 (i.e.
a value of 150 means a 1.5x ratio), assuming equal weights.
Signed-off-by: Andrew Rodland <[email protected]>
---
doc/configuration.txt | 28 +++++++++++++++++++++++++++-
include/types/lb_chash.h | 1 +
src/cfgparse.c | 15 +++++++++++++++
3 files changed, 43 insertions(+), 1 deletion(-)
diff --git a/doc/configuration.txt b/doc/configuration.txt
index d047524..a93c103 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -2205,6 +2205,32 @@ balance url_param <param> [check_post]
See also : "dispatch", "cookie", "transparent", "hash-type" and "http_proxy".
+balance-factor <factor>
+ Specify the balancing factor for bounded-load consistent hashing
+ May be used in sections : defaults | frontend | listen | backend
+ yes | no | no | yes
+ Arguments :
+ <factor> is the control for the maximum number of concurrent requests to
+ send to a server, expressed as a percentage of the average number
+ of concurrent requests across all of the active servers.
+
+ Specifying a "balance-factor" for a server with "hash-type consistent"
+ enables an algorithm that prevents any one server from getting too many
+ requests at once, even if some hash buckets receive many more requests than
+ others. Setting <factor> to 0 (the default) disables the feature. Otherwise,
+ <factor> is a percentage greater than 100. For example, if <factor> is 150,
+ then no server will be allowed to have a load more than 1.5 times the average.
+ If server weights are used, they will be respected.
+
+ If the first-choice server is disqualified, the algorithm will choose another
+ server based on the request hash, until a server with additional capacity is
+ found. A higher <factor> allows more imbalance between the servers, while a
+ lower <factor> means that more servers will be checked on average, affecting
+ performance. Reasonable values are from 125 to 200.
+
+ See also : "balance" and "hash-type".
+
+
bind [<address>]:<port_range> [, ...] [param*]
bind /<path> [, ...] [param*]
Define one or several listening addresses and/or ports in a frontend.
@@ -3358,7 +3384,7 @@ hash-type <method> <function> <modifier>
default function is "sdbm", the selection of a function should be based on
the range of the values being hashed.
- See also : "balance", "server"
+ See also : "balance", "balance-factor", "server"
http-check disable-on-404
diff --git a/include/types/lb_chash.h b/include/types/lb_chash.h
index 5991ce9..b711636 100644
--- a/include/types/lb_chash.h
+++ b/include/types/lb_chash.h
@@ -30,6 +30,7 @@ struct lb_chash {
struct eb_root act; /* weighted chash entries of active servers */
struct eb_root bck; /* weighted chash entries of backup servers */
struct eb32_node *last; /* last node found in case of round robin (or NULL) */
+ int balance_factor; /* load balancing factor * 100, 0 if disabled */
};
#endif /* _TYPES_LB_CHASH_H */
diff --git a/src/cfgparse.c b/src/cfgparse.c
index cc2f507..229b3ce 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -1983,6 +1983,7 @@ void init_default_instance()
defproxy.maxconn = cfg_maxpconn;
defproxy.conn_retries = CONN_RETRIES;
defproxy.redispatch_after = 0;
+ defproxy.lbprm.chash.balance_factor = 0;
defproxy.defsrv.check.inter = DEF_CHKINTR;
defproxy.defsrv.check.fastinter = 0;
@@ -2825,6 +2826,7 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
if (curproxy->cap & PR_CAP_BE) {
curproxy->lbprm.algo = defproxy.lbprm.algo;
+ curproxy->lbprm.chash.balance_factor = defproxy.lbprm.chash.balance_factor;
curproxy->fullconn = defproxy.fullconn;
curproxy->conn_retries = defproxy.conn_retries;
curproxy->redispatch_after = defproxy.redispatch_after;
@@ -5958,6 +5960,19 @@ stats_error_parsing:
}
}
}
+ else if (strcmp(args[0], "hash-balance-factor") == 0) {
+ if (*(args[1]) == 0) {
+ Alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ curproxy->lbprm.chash.balance_factor = atol(args[1]);
+ if (curproxy->lbprm.chash.balance_factor != 0 && curproxy->lbprm.chash.balance_factor <= 100) {
+ Alert("parsing [%s:%d] : '%s' must be 0 or greater than 100.\n", file, linenum, args[0]);
+ err_code |= ERR_ALERT | ERR_FATAL;
+ goto out;
+ }
+ }
else if (strcmp(args[0], "unique-id-format") == 0) {
if (!*(args[1])) {
Alert("parsing [%s:%d] : %s expects an argument.\n", file, linenum, args[0]);
--
2.9.3
>From 44997ad8dc4f6d64944e7c2fb57e41fa8da674c6 Mon Sep 17 00:00:00 2001
From: Andrew Rodland <[email protected]>
Date: Tue, 20 Sep 2016 13:41:56 -0400
Subject: [PATCH 3/4] MINOR: server: compute a "cumulative weight" to allow
chash balancing to hit its target
X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
For active servers, this is the sum of the eweights of all active
servers before this one in the backend, and
[srv->cumulative_weight .. srv_cumulative_weight + srv_eweight) is a
space occupied by this server in the range [0 .. lbprm.tot_wact), and
likewise for backup servers with tot_wbck. This allows choosing a
server or a range of servers proportional to their weight, by simple
integer comparison.
Signed-off-by: Andrew Rodland <[email protected]>
---
include/types/server.h | 1 +
src/backend.c | 2 ++
2 files changed, 3 insertions(+)
diff --git a/include/types/server.h b/include/types/server.h
index ce13820..5d89212 100644
--- a/include/types/server.h
+++ b/include/types/server.h
@@ -203,6 +203,7 @@ struct server {
unsigned wscore; /* weight score, used during srv map computation */
unsigned prev_eweight; /* eweight before last change */
unsigned rweight; /* remainer of weight in the current LB tree */
+ unsigned cumulative_weight; /* weight of servers prior to this one in the same group, for chash balancing */
unsigned npos, lpos; /* next and last positions in the LB tree */
struct eb32_node lb_node; /* node used for tree-based load balancing */
struct eb_root *lb_tree; /* we want to know in what tree the server is */
diff --git a/src/backend.c b/src/backend.c
index faf872c..573f054 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -115,9 +115,11 @@ void recount_servers(struct proxy *px)
!(px->options & PR_O_USE_ALL_BK))
px->lbprm.fbck = srv;
px->srv_bck++;
+ srv->cumulative_weight = px->lbprm.tot_wbck;
px->lbprm.tot_wbck += srv->eweight;
} else {
px->srv_act++;
+ srv->cumulative_weight = px->lbprm.tot_wact;
px->lbprm.tot_wact += srv->eweight;
}
}
--
2.9.3
>From d3ba5a7e05e2a4e042b87f9b5e63efdbceb9a742 Mon Sep 17 00:00:00 2001
From: Andrew Rodland <[email protected]>
Date: Tue, 20 Sep 2016 13:43:03 -0400
Subject: [PATCH 4/4] MEDIUM: server: Implement bounded-load hash algorithm
X-Bogosity: Ham, tests=bogofilter, spamicity=0.000000, version=1.2.4
The consistent hash lookup is done as normal, then if balancing is
enabled, we progress through the hash ring until we find a server that
doesn't have "too much" load. In the case of equal weights for all
servers, the allowed number of requests for a server is either the
floor or the ceil of (num_requests * hash-balance-factor / num_servers);
with unequal weights things are somewhat more complicated, but the
spirit is the same -- a server should not be able to go too far above
(its relative weight times) the average load. Using the hash ring to
make the second/third/etc. choice maintains as much locality as
possible given the load limit.
Signed-off-by: Andrew Rodland <[email protected]>
---
src/lb_chash.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 46 insertions(+), 1 deletion(-)
diff --git a/src/lb_chash.c b/src/lb_chash.c
index a62dfb5..84a2ef3 100644
--- a/src/lb_chash.c
+++ b/src/lb_chash.c
@@ -242,6 +242,34 @@ static void chash_update_server_weight(struct server *srv)
}
/*
+ * This function implements the "Consistent Hashing with Bounded Loads" algorithm
+ * of Mirrokni, Thorup, and Zadimoghaddam (arxiv:1608.01350), adapted for use with
+ * unequal server weights.
+ */
+int chash_server_is_eligible(struct server *s)
+{
+ /* The total number of slots to allocate is the total number of outstanding requests
+ * (including the one we're about to make) times the load-balance-factor, rounded up.
+ */
+ unsigned tot_slots = ((s->proxy->served + 1) * s->proxy->lbprm.chash.balance_factor + 99) / 100;
+ unsigned slots_per_weight = tot_slots / s->proxy->lbprm.tot_weight;
+ unsigned remainder = tot_slots % s->proxy->lbprm.tot_weight;
+
+ /* Allocate a whole number of slots per weight unit... */
+ unsigned slots = s->eweight * slots_per_weight;
+
+ /* And then distribute the rest among servers proportionally to their weight. */
+ slots += ((s->cumulative_weight + s->eweight) * remainder) / s->proxy->lbprm.tot_weight
+ - (s->cumulative_weight * remainder) / s->proxy->lbprm.tot_weight;
+
+ /* But never leave a server with 0. */
+ if (slots == 0)
+ slots = 1;
+
+ return s->served < slots;
+}
+
+/*
* This function returns the running server from the CHASH tree, which is at
* the closest distance from the value of <hash>. Doing so ensures that even
* with a well imbalanced hash, if some servers are close to each other, they
@@ -254,6 +282,7 @@ struct server *chash_get_server_hash(struct proxy *p, unsigned int hash)
struct server *nsrv, *psrv;
struct eb_root *root;
unsigned int dn, dp;
+ int loop;
if (p->srv_act)
root = &p->lbprm.chash.act;
@@ -287,7 +316,23 @@ struct server *chash_get_server_hash(struct proxy *p, unsigned int hash)
dp = hash - prev->key;
dn = next->key - hash;
- return (dp <= dn) ? psrv : nsrv;
+ if (dp <= dn) {
+ next = prev;
+ nsrv = psrv;
+ }
+
+ loop = 0;
+ while (p->lbprm.chash.balance_factor && !chash_server_is_eligible(nsrv)) {
+ next = eb32_next(next);
+ if (!next) {
+ next = eb32_first(root);
+ if (++loop > 1) // protection against accidental loop
+ break;
+ }
+ nsrv = eb32_entry(next, struct tree_occ, node)->server;
+ }
+
+ return nsrv;
}
/* Return next server from the CHASH tree in backend <p>. If the tree is empty,
--
2.9.3