Having hundreds of HTTP SSL health checks leads to CPU saturation. This patch allows HTTP health checks without any http-expect directives to keep the connection open for subsequent health checks. This patch also does not affect any TCP check code. ---
Notes: We have a situation where we need to do basic SSL+HTTP health check on a large amount of nodes. Without persistent connections, the amount of CPU usage is much too high. This patch is my attempt at resolving this (tested for ourselves). Since this is my first time mucking around in the HAProxy code, I'm sure there's some discussion necessary. I also wasn't sure if this should be a configuration option or not, or could be expanded to include HTTP checks with http-expect directives. Since there's an explicit "Connection: close" added, I excluded them to be on the safe side. Thanks! Steven Davidovitz include/types/checks.h | 1 + src/checks.c | 96 +++++++++++++++++++++++++++++--------------------- 2 files changed, 57 insertions(+), 40 deletions(-) diff --git a/include/types/checks.h b/include/types/checks.h index 283ff3db..0e86a741 100644 --- a/include/types/checks.h +++ b/include/types/checks.h @@ -166,6 +166,7 @@ struct check { short status, code; /* check result, check code */ char desc[HCHK_DESC_LEN]; /* health check description */ int use_ssl; /* use SSL for health checks */ + int use_ssl_persistent; /* use persistent connections for SSL health checks */ int send_proxy; /* send a PROXY protocol header with checks */ struct list *tcpcheck_rules; /* tcp-check send / expect rules */ struct tcpcheck_rule *current_step; /* current step when using tcpcheck */ diff --git a/src/checks.c b/src/checks.c index 49bd886b..c03b7abd 100644 --- a/src/checks.c +++ b/src/checks.c @@ -778,6 +778,7 @@ static void event_srv_chk_w(struct connection *conn) t->expire = tick_add_ifset(now_ms, s->proxy->timeout.check); task_queue(t); } + __conn_sock_want_recv(conn); goto out_nowake; out_wakeup: @@ -1349,14 +1350,16 @@ static void event_srv_chk_r(struct connection *conn) *check->bi->data = '\0'; check->bi->i = 0; - /* Close the connection... We absolutely want to perform a hard close - * and reset the connection if some data are pending, otherwise we end - * up with many TIME_WAITs and eat all the source port range quickly. - * To avoid sending RSTs all the time, we first try to drain pending - * data. - */ - __conn_data_stop_both(conn); - conn_data_shutw_hard(conn); + if (conn->flags & CO_FL_ERROR || !check->use_ssl_persistent) { + /* Close the connection... We absolutely want to perform a hard close + * and reset the connection if some data are pending, otherwise we end + * up with many TIME_WAITs and eat all the source port range quickly. + * To avoid sending RSTs all the time, we first try to drain pending + * data. + */ + __conn_data_stop_both(conn); + conn_data_shutw_hard(conn); + } /* OK, let's not stay here forever */ if (check->result == CHK_RES_FAILED) @@ -1398,13 +1401,14 @@ static int wake_srv_chk(struct connection *conn) task_wakeup(check->task, TASK_WOKEN_IO); } - if (check->result != CHK_RES_UNKNOWN) { + if (check->result != CHK_RES_UNKNOWN && ((conn->flags & CO_FL_ERROR) || !check->use_ssl_persistent)) { /* We're here because nobody wants to handle the error, so we * sure want to abort the hard way. */ conn_sock_drain(conn); conn_force_close(conn); } + return 0; } @@ -1465,7 +1469,6 @@ static int connect_conn_chk(struct task *t) struct check *check = t->context; struct server *s = check->server; struct connection *conn = check->conn; - struct protocol *proto; int ret; int quickack; @@ -1505,33 +1508,34 @@ static int connect_conn_chk(struct task *t) bo_putblk(check->bo, check->send_string, check->send_string_len); } - /* prepare a new connection */ - conn_init(conn); + if (!conn_xprt_ready(conn)) { + /* prepare a new connection */ + conn_init(conn); - if (is_addr(&check->addr)) { - /* we'll connect to the check addr specified on the server */ - conn->addr.to = check->addr; - } - else { - /* we'll connect to the addr on the server */ - conn->addr.to = s->addr; - } + if (is_addr(&check->addr)) { + /* we'll connect to the check addr specified on the server */ + conn->addr.to = check->addr; + } + else { + /* we'll connect to the addr on the server */ + conn->addr.to = s->addr; + } - if ((conn->addr.to.ss_family == AF_INET) || (conn->addr.to.ss_family == AF_INET6)) { - int i = 0; + if ((conn->addr.to.ss_family == AF_INET) || (conn->addr.to.ss_family == AF_INET6)) { + int i = 0; - i = srv_check_healthcheck_port(check); - if (i == 0) { - conn->owner = check; - return SF_ERR_CHK_PORT; + i = srv_check_healthcheck_port(check); + if (i == 0) { + conn->owner = check; + return SF_ERR_CHK_PORT; + } + + set_host_port(&conn->addr.to, i); } - set_host_port(&conn->addr.to, i); + conn_prepare(conn, protocol_by_family(conn->addr.to.ss_family), check->xprt); } - proto = protocol_by_family(conn->addr.to.ss_family); - - conn_prepare(conn, proto, check->xprt); conn_attach(conn, check, &check_conn_cb); conn->target = &s->obj_type; @@ -1555,15 +1559,21 @@ static int connect_conn_chk(struct task *t) quickack = 0; } - ret = SF_ERR_INTERNAL; - if (proto->connect) - ret = proto->connect(conn, check->type, quickack ? 2 : 0); - conn->flags |= CO_FL_WAKE_DATA; - if (s->check.send_proxy) { - conn->send_proxy_ofs = 1; - conn->flags |= CO_FL_SEND_PROXY; + ret = SF_ERR_NONE; + + if (!conn_ctrl_ready(conn) || !conn_xprt_ready(conn)) { + ret = conn->ctrl->connect(conn, check->type, quickack ? 2 : 0); + + /* we need to be notified about connection establishment */ + conn->flags |= CO_FL_WAKE_DATA; + + if (s->check.send_proxy) { + conn->send_proxy_ofs = 1; + conn->flags |= CO_FL_SEND_PROXY; + } } + return ret; } @@ -2100,8 +2110,13 @@ static struct task *process_chk_conn(struct task *t) t->expire = tick_first(t->expire, t_con); } - if (check->type) - conn_data_want_recv(conn); /* prepare for reading a possible reply */ + if (check->type) { + if (conn->flags & CO_FL_CONNECTED) + conn_data_want_send(conn); + else + conn_data_want_recv(conn); /* prepare for reading a possible reply */ + } + goto reschedule; @@ -2161,7 +2176,7 @@ static struct task *process_chk_conn(struct task *t) } /* check complete or aborted */ - if (conn->xprt) { + if (conn->xprt && ((conn->flags & CO_FL_ERROR) || !check->use_ssl_persistent)) { /* The check was aborted and the connection was not yet closed. * This can happen upon timeout, or when an external event such * as a failed response coupled with "observe layer7" caused the @@ -3425,6 +3440,7 @@ int srv_check_healthcheck_port(struct check *chk) */ if (!chk->port && !is_addr(&chk->addr)) { chk->use_ssl |= (srv->use_ssl || (srv->proxy->options & PR_O_TCPCHK_SSL)); + chk->use_ssl_persistent |= (chk->use_ssl && !(srv->proxy->options2 & PR_O2_EXP_TYPE)); chk->send_proxy |= (srv->pp_opts); } -- 2.11.1