>From 1cff8685e1ce210979dc1afc516f504fe8e67b3f Mon Sep 17 00:00:00 2001 From: Krzysztof Piotr Oledzki <o...@ans.pl> Date: Sat, 10 Oct 2009 21:06:49 +0200 Subject: [MINOR] Capture & display more data from health checks, v2
Capture & display more data from health checks, like strerror(errno) for L4 failed checks or a first line from a response for L7 successes/failed checks. Non ascii or control characters are masked with chunk_htmlencode() (html stats) or chunk_asciiencode() (logs). --- include/common/defaults.h | 5 +++ include/types/server.h | 1 + src/checks.c | 86 ++++++++++++++++++++++++++++++++------------- src/dumpstats.c | 15 +++++++- 4 files changed, 80 insertions(+), 27 deletions(-) diff --git a/include/common/defaults.h b/include/common/defaults.h index c09f5a4..b0aee86 100644 --- a/include/common/defaults.h +++ b/include/common/defaults.h @@ -174,4 +174,9 @@ #define MAX_HOSTNAME_LEN 32 #endif +/* Maximum health check description length */ +#ifndef HCHK_DESC_LEN +#define HCHK_DESC_LEN 128 +#endif + #endif /* _COMMON_DEFAULTS_H */ diff --git a/include/types/server.h b/include/types/server.h index dae1a71..9971da2 100644 --- a/include/types/server.h +++ b/include/types/server.h @@ -127,6 +127,7 @@ struct server { struct timeval check_start; /* last health check start time */ unsigned long check_duration; /* time in ms took to finish last health check */ short check_status, check_code; /* check result, check code */ + char check_desc[HCHK_DESC_LEN]; /* healt check descritpion */ struct freq_ctr sess_per_sec; /* sessions per second on this server */ int puid; /* proxy-unique server ID, used for SNMP */ diff --git a/src/checks.c b/src/checks.c index a21386f..0e9aca5 100644 --- a/src/checks.c +++ b/src/checks.c @@ -125,6 +125,17 @@ static void server_status_printf(struct chunk *msg, struct server *s, unsigned o if (s->check_status >= HCHK_STATUS_L57DATA) chunk_printf(msg, ", code: %d", s->check_code); + if (*s->check_desc) { + struct chunk src; + + chunk_printf(msg, ", info: \""); + + chunk_initlen(&src, s->check_desc, 0, strlen(s->check_desc)); + chunk_asciiencode(msg, &src, '"'); + + chunk_printf(msg, "\""); + } + chunk_printf(msg, ", check duration: %lums", s->check_duration); } @@ -151,12 +162,13 @@ static void server_status_printf(struct chunk *msg, struct server *s, unsigned o * Show information in logs about failed health check if server is UP * or succeeded health checks if server is DOWN. */ -static void set_server_check_status(struct server *s, short status) { +static void set_server_check_status(struct server *s, short status, char *desc) { struct chunk msg; if (status == HCHK_STATUS_START) { s->result = SRV_CHK_UNKNOWN; /* no result yet */ + s->check_desc[0] = '\0'; s->check_start = now; return; } @@ -164,6 +176,12 @@ static void set_server_check_status(struct server *s, short status) { if (!s->check_status) return; + if (desc && *desc) { + strncpy(s->check_desc, desc, HCHK_DESC_LEN-1); + s->check_desc[HCHK_DESC_LEN-1] = '\0'; + } else + s->check_desc[0] = '\0'; + s->check_status = status; if (check_statuses[status].result) s->result |= check_statuses[status].result; @@ -503,7 +521,7 @@ static int event_srv_chk_w(int fd) //fprintf(stderr, "event_srv_chk_w, state=%ld\n", unlikely(fdtab[fd].state)); if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))) { - set_server_check_status(s, HCHK_STATUS_L4CON); + set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno)); goto out_error; } @@ -539,11 +557,11 @@ static int event_srv_chk_w(int fd) switch (errno) { case ECONNREFUSED: case ENETUNREACH: - set_server_check_status(s, HCHK_STATUS_L4CON); + set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno)); break; default: - set_server_check_status(s, HCHK_STATUS_SOCKERR); + set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno)); } goto out_error; @@ -572,12 +590,12 @@ static int event_srv_chk_w(int fd) goto out_poll; if (errno && errno != EISCONN) { - set_server_check_status(s, HCHK_STATUS_L4CON); + set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno)); goto out_error; } /* good TCP connection is enough */ - set_server_check_status(s, HCHK_STATUS_L4OK); + set_server_check_status(s, HCHK_STATUS_L4OK, NULL); goto out_wakeup; } } @@ -621,6 +639,7 @@ static int event_srv_chk_r(int fd) struct task *t = fdtab[fd].owner; struct server *s = t->context; int skerr; + char *desc; socklen_t lskerr = sizeof(skerr); len = -1; @@ -633,7 +652,7 @@ static int event_srv_chk_r(int fd) /* in case of TCP only, this tells us if the connection failed */ if (!(s->result & SRV_CHK_ERROR)) - set_server_check_status(s, HCHK_STATUS_SOCKERR); + set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL); goto out_wakeup; } @@ -649,6 +668,11 @@ static int event_srv_chk_r(int fd) return 0; } + if (len < sizeof(trash)) + trash[len] = '\0'; + else + trash[len-1] = '\0'; + /* Note: the response will only be accepted if read at once */ if (s->proxy->options & PR_O_HTTP_CHK) { /* Check if the server speaks HTTP 1.X */ @@ -656,50 +680,62 @@ static int event_srv_chk_r(int fd) (memcmp(trash, "HTTP/1.", 7) != 0 || (trash[12] != ' ' && trash[12] != '\r')) || !isdigit(trash[9]) || !isdigit(trash[10]) || !isdigit(trash[11])) { - set_server_check_status(s, HCHK_STATUS_L7RSP); + + cut_crlf(trash); + set_server_check_status(s, HCHK_STATUS_L7RSP, trash); + goto out_wakeup; } s->check_code = str2uic(&trash[9]); + desc = ltrim(&trash[12], ' '); + cut_crlf(desc); + /* check the reply : HTTP/1.X 2xx and 3xx are OK */ if (trash[9] == '2' || trash[9] == '3') - set_server_check_status(s, HCHK_STATUS_L7OKD); + set_server_check_status(s, HCHK_STATUS_L7OKD, desc); else if ((s->proxy->options & PR_O_DISABLE404) && (s->state & SRV_RUNNING) && (s->check_code == 404)) /* 404 may be accepted as "stopping" only if the server was up */ - set_server_check_status(s, HCHK_STATUS_L7OKCD); + set_server_check_status(s, HCHK_STATUS_L7OKCD, desc); else - set_server_check_status(s, HCHK_STATUS_L7STS); + set_server_check_status(s, HCHK_STATUS_L7STS, desc); } else if (s->proxy->options & PR_O_SSL3_CHK) { /* Check for SSLv3 alert or handshake */ if ((len >= 5) && (trash[0] == 0x15 || trash[0] == 0x16)) - set_server_check_status(s, HCHK_STATUS_L6OK); + set_server_check_status(s, HCHK_STATUS_L6OK, NULL); else - set_server_check_status(s, HCHK_STATUS_L6RSP); + set_server_check_status(s, HCHK_STATUS_L6RSP, NULL); } else if (s->proxy->options & PR_O_SMTP_CHK) { /* Check if the server speaks SMTP */ if ((len < strlen("000\r")) || (trash[3] != ' ' && trash[3] != '\r') || !isdigit(trash[0]) || !isdigit(trash[1]) || !isdigit(trash[2])) { - set_server_check_status(s, HCHK_STATUS_L7RSP); + + cut_crlf(trash); + set_server_check_status(s, HCHK_STATUS_L7RSP, trash); + goto out_wakeup; } s->check_code = str2uic(&trash[0]); + desc = ltrim(&trash[3], ' '); + cut_crlf(desc); + /* Check for SMTP code 2xx (should be 250) */ if (trash[0] == '2') - set_server_check_status(s, HCHK_STATUS_L7OKD); + set_server_check_status(s, HCHK_STATUS_L7OKD, desc); else - set_server_check_status(s, HCHK_STATUS_L7STS); + set_server_check_status(s, HCHK_STATUS_L7STS, desc); } else { /* other checks are valid if the connection succeeded anyway */ - set_server_check_status(s, HCHK_STATUS_L4OK); + set_server_check_status(s, HCHK_STATUS_L4OK, NULL); } out_wakeup: @@ -749,7 +785,7 @@ struct task *process_chk(struct task *t) } /* we'll initiate a new check */ - set_server_check_status(s, HCHK_STATUS_START); + set_server_check_status(s, HCHK_STATUS_START, NULL); if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) != -1) { if ((fd < global.maxsock) && (fcntl(fd, F_SETFL, O_NONBLOCK) != -1) && @@ -824,7 +860,7 @@ struct task *process_chk(struct task *t) } if (ret) { - set_server_check_status(s, HCHK_STATUS_SOCKERR); + set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL); switch (ret) { case 1: Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n", @@ -855,7 +891,7 @@ struct task *process_chk(struct task *t) #endif ret = tcpv4_bind_socket(fd, flags, &s->proxy->source_addr, remote); if (ret) { - set_server_check_status(s, HCHK_STATUS_SOCKERR); + set_server_check_status(s, HCHK_STATUS_SOCKERR, NULL); switch (ret) { case 1: Alert("Cannot bind to source address before connect() for %s '%s'. Aborting.\n", @@ -918,11 +954,11 @@ struct task *process_chk(struct task *t) /* FIXME: is it possible to get ECONNREFUSED/ENETUNREACH with O_NONBLOCK? */ case ECONNREFUSED: case ENETUNREACH: - set_server_check_status(s, HCHK_STATUS_L4CON); + set_server_check_status(s, HCHK_STATUS_L4CON, strerror(errno)); break; default: - set_server_check_status(s, HCHK_STATUS_SOCKERR); + set_server_check_status(s, HCHK_STATUS_SOCKERR, strerror(errno)); } } } @@ -1020,12 +1056,12 @@ struct task *process_chk(struct task *t) else if ((s->result & SRV_CHK_ERROR) || tick_is_expired(t->expire, now_ms)) { if (!(s->result & SRV_CHK_ERROR)) { if (!EV_FD_ISSET(fd, DIR_RD)) { - set_server_check_status(s, HCHK_STATUS_L4TOUT); + set_server_check_status(s, HCHK_STATUS_L4TOUT, NULL); } else { if (s->proxy->options & PR_O_SSL3_CHK) - set_server_check_status(s, HCHK_STATUS_L6TOUT); + set_server_check_status(s, HCHK_STATUS_L6TOUT, NULL); else /* HTTP, SMTP */ - set_server_check_status(s, HCHK_STATUS_L7TOUT); + set_server_check_status(s, HCHK_STATUS_L7TOUT, NULL); } } diff --git a/src/dumpstats.c b/src/dumpstats.c index f59bf90..fd11355 100644 --- a/src/dumpstats.c +++ b/src/dumpstats.c @@ -1333,9 +1333,20 @@ int stats_dump_proxy(struct session *s, struct proxy *px, struct uri_auth *uri) srv_hlt_st[sv_state], (svs->state & SRV_RUNNING) ? (svs->health - svs->rise + 1) : (svs->health), (svs->state & SRV_RUNNING) ? (svs->fall) : (svs->rise)); + + chunk_printf(&msg, "</td><td title=\"%s", + get_check_status_description(sv->check_status)); + + if (*sv->check_desc) { + struct chunk src; + + chunk_printf(&msg, ": "); + + chunk_initlen(&src, sv->check_desc, 0, strlen(sv->check_desc)); + chunk_htmlencode(&msg, &src); + } - chunk_printf(&msg, "</td><td title=\"%s\" nowrap> %s%s", - get_check_status_description(sv->check_status), + chunk_printf(&msg, "\" nowrap> %s%s", tv_iszero(&sv->check_start)?"":"* ", get_check_status_info(sv->check_status)); -- 1.6.4.2