ok yasuoka
On Fri, 21 Feb 2020 15:32:53 +1000
David Gwynne <[email protected]> wrote:
> we (work) use radiusctl as part of a check script with relayd so
> we can try and keep a radius service available with some magical
> routing and redirect configs.
>
> radiusctl is currently pretty simple and sends a radius request,
> and then waits for a reply. however, it does not implement retries
> and timeouts, so if either the reply or request are lost, radiusctl
> ends up waiting in recv forever for a packet that will never turn
> up.
>
> combined with this, we seem to tickle something in relayd where it
> loses track of its children. this results in us "leaking" radiusctl
> processes. historically we've coped with this by running pkill -x
> radiusctl out of cron, and while it made us a sad on the inside,
> we've been busy recently and had to ignore this for now.
>
> unfortunatly one of the radius servers failed this morning. this
> meant that instead of a few radius packets being lost over a long
> period time causing a slow leak of radiusctl processes, we never
> got a reply to any radius packet and started accumulating a ton of
> radius processes. in fact, we hit maxproc, which made recovering very
> annoying.
>
> we run a bunch of different checks out of relayd, but the radiusctl
> one is the only one that doesnt implement timeouts and retries. so
> im fixing it first, and then we'll try and figure out what's wrong
> with relayd.
>
> the specific changes in this diff is the introduction of a transmission
> retry counter, a time interval between the tries, and a maximum
> wait time that the process has before it gives up waiting for a
> reply. each of these is configurable, but i think the defaults are
> reasonable for a test.
>
> it introduces libevent, because that makes it easy to manage the
> timeouts.
>
> ok?
>
> Index: Makefile
> ===================================================================
> RCS file: /cvs/src/usr.sbin/radiusctl/Makefile,v
> retrieving revision 1.2
> diff -u -p -r1.2 Makefile
> --- Makefile 3 Aug 2015 04:10:21 -0000 1.2
> +++ Makefile 21 Feb 2020 05:15:14 -0000
> @@ -3,7 +3,7 @@ PROG= radiusctl
> SRCS= radiusctl.c parser.c chap_ms.c
> MAN= radiusctl.8
> CFLAGS+= -Wall -Wextra -Wno-unused-parameter
> -LDADD+= -lradius -lcrypto
> -DPADD+= ${LIBRADIUS} ${LIBCRYPTO}
> +LDADD+= -lradius -lcrypto -levent
> +DPADD+= ${LIBRADIUS} ${LIBCRYPTO} ${LIBEVENT}
>
> .include <bsd.prog.mk>
> Index: parser.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/radiusctl/parser.c,v
> retrieving revision 1.1
> diff -u -p -r1.1 parser.c
> --- parser.c 21 Jul 2015 04:06:04 -0000 1.1
> +++ parser.c 21 Feb 2020 05:15:14 -0000
> @@ -18,6 +18,8 @@
> * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> */
>
> +#include <sys/time.h>
> +
> #include <stdint.h>
> #include <stdio.h>
> #include <stdlib.h>
> @@ -35,6 +37,9 @@ enum token_type {
> PORT,
> METHOD,
> NAS_PORT,
> + TRIES,
> + INTERVAL,
> + MAXWAIT,
> ENDTOKEN
> };
>
> @@ -45,7 +50,11 @@ struct token {
> const struct token *next;
> };
>
> -static struct parse_result res;
> +static struct parse_result res = {
> + .tries = TEST_TRIES_DEFAULT,
> + .interval = { TEST_INTERVAL_DEFAULT, 0 },
> + .maxwait = { TEST_MAXWAIT_DEFAULT, 0 },
> +};
>
> static const struct token t_test[];
> static const struct token t_secret[];
> @@ -55,6 +64,9 @@ static const struct token t_password[];
> static const struct token t_port[];
> static const struct token t_method[];
> static const struct token t_nas_port[];
> +static const struct token t_tries[];
> +static const struct token t_interval[];
> +static const struct token t_maxwait[];
>
> static const struct token t_main[] = {
> { KEYWORD, "test", TEST, t_test },
> @@ -82,6 +94,9 @@ static const struct token t_test_opts[]
> { KEYWORD, "port", NONE, t_port },
> { KEYWORD, "method", NONE, t_method },
> { KEYWORD, "nas-port", NONE, t_nas_port },
> + { KEYWORD, "interval", NONE, t_interval },
> + { KEYWORD, "tries", NONE, t_tries },
> + { KEYWORD, "maxwait", NONE, t_maxwait },
> { ENDTOKEN, "", NONE, NULL }
> };
>
> @@ -105,6 +120,21 @@ static const struct token t_nas_port[] =
> { ENDTOKEN, "", NONE, NULL }
> };
>
> +static const struct token t_tries[] = {
> + { TRIES, "", NONE, t_test_opts },
> + { ENDTOKEN, "", NONE, NULL }
> +};
> +
> +static const struct token t_interval[] = {
> + { INTERVAL, "", NONE, t_test_opts },
> + { ENDTOKEN, "", NONE, NULL }
> +};
> +
> +static const struct token t_maxwait[] = {
> + { MAXWAIT, "", NONE, t_test_opts },
> + { ENDTOKEN, "", NONE, NULL }
> +};
> +
>
> static const struct token *match_token(char *, const struct token []);
> static void show_valid_args(const struct token []);
> @@ -115,8 +145,6 @@ parse(int argc, char *argv[])
> const struct token *table = t_main;
> const struct token *match;
>
> - bzero(&res, sizeof(res));
> -
> while (argc >= 0) {
> if ((match = match_token(argv[0], table)) == NULL) {
> fprintf(stderr, "valid commands/args:\n");
> @@ -138,6 +166,12 @@ parse(int argc, char *argv[])
> return (NULL);
> }
>
> + if (res.tries * res.interval.tv_sec > res.maxwait.tv_sec) {
> + fprintf(stderr, "tries %u by interval %lld > maxwait %lld",
> + res.tries, res.interval.tv_sec, res.maxwait.tv_sec);
> + return (NULL);
> + }
> +
> return (&res);
> }
>
> @@ -240,6 +274,50 @@ match_token(char *word, const struct tok
> res.nas_port = num;
> t = &table[i];
> break;
> +
> + case TRIES:
> + if (word == NULL)
> + break;
> + num = strtonum(word,
> + TEST_TRIES_MIN, TEST_TRIES_MAX, &errstr);
> + if (errstr != NULL) {
> + printf("invalid argument: %s is %s"
> + " for \"tries\"\n", word, errstr);
> + return (NULL);
> + }
> + match++;
> + res.tries = num;
> + t = &table[i];
> + break;
> + case INTERVAL:
> + if (word == NULL)
> + break;
> + num = strtonum(word,
> + TEST_INTERVAL_MIN, TEST_INTERVAL_MAX, &errstr);
> + if (errstr != NULL) {
> + printf("invalid argument: %s is %s"
> + " for \"interval\"\n", word, errstr);
> + return (NULL);
> + }
> + match++;
> + res.interval.tv_sec = num;
> + t = &table[i];
> + break;
> + case MAXWAIT:
> + if (word == NULL)
> + break;
> + num = strtonum(word,
> + TEST_MAXWAIT_MIN, TEST_MAXWAIT_MAX, &errstr);
> + if (errstr != NULL) {
> + printf("invalid argument: %s is %s"
> + " for \"maxwait\"\n", word, errstr);
> + return (NULL);
> + }
> + match++;
> + res.maxwait.tv_sec = num;
> + t = &table[i];
> + break;
> +
> case ENDTOKEN:
> break;
> }
> @@ -292,6 +370,18 @@ show_valid_args(const struct token table
> break;
> case NAS_PORT:
> fprintf(stderr, " <nas-port (0-65535)>\n");
> + break;
> + case TRIES:
> + fprintf(stderr, " <tries (%u-%u)>\n",
> + TEST_TRIES_MIN, TEST_TRIES_MAX);
> + break;
> + case INTERVAL:
> + fprintf(stderr, " <interval (%u-%u)>\n",
> + TEST_INTERVAL_MIN, TEST_INTERVAL_MAX);
> + break;
> + case MAXWAIT:
> + fprintf(stderr, " <maxwait (%u-%u)>\n",
> + TEST_MAXWAIT_MIN, TEST_MAXWAIT_MAX);
> break;
> case ENDTOKEN:
> break;
> Index: parser.h
> ===================================================================
> RCS file: /cvs/src/usr.sbin/radiusctl/parser.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 parser.h
> --- parser.h 21 Jul 2015 04:06:04 -0000 1.1
> +++ parser.h 21 Feb 2020 05:15:14 -0000
> @@ -31,6 +31,18 @@ enum auth_method {
> MSCHAPV2
> };
>
> +#define TEST_TRIES_MIN 1
> +#define TEST_TRIES_MAX 32
> +#define TEST_TRIES_DEFAULT 3
> +
> +#define TEST_INTERVAL_MIN 1
> +#define TEST_INTERVAL_MAX 10
> +#define TEST_INTERVAL_DEFAULT 2
> +
> +#define TEST_MAXWAIT_MIN 3
> +#define TEST_MAXWAIT_MAX 60
> +#define TEST_MAXWAIT_DEFAULT 8
> +
> struct parse_result {
> enum actions action;
> const char *hostname;
> @@ -40,6 +52,13 @@ struct parse_result {
> u_short port;
> int nas_port;
> enum auth_method auth_method;
> +
> + /* number of packets to try sending */
> + unsigned int tries;
> + /* how long between packet sends */
> + struct timeval interval;
> + /* overall process wait time for a reply */
> + struct timeval maxwait;
> };
>
> struct parse_result *parse(int, char *[]);
> Index: radiusctl.8
> ===================================================================
> RCS file: /cvs/src/usr.sbin/radiusctl/radiusctl.8,v
> retrieving revision 1.3
> diff -u -p -r1.3 radiusctl.8
> --- radiusctl.8 25 Aug 2015 01:21:57 -0000 1.3
> +++ radiusctl.8 21 Feb 2020 05:15:14 -0000
> @@ -77,6 +77,19 @@ when sending a packet to
> .Ar hostname .
> If the port is omitted,
> the default port number 1812 is used.
> +.It Cm tries Ar number
> +Specifies the number of packets to try sending.
> +By default
> +.Nm
> +will send 3 packets before giving up.
> +.It Cm interval Ar seconds
> +Specifies how many seconds to wait before resending a packet.
> +The default interval between packet retries is 2 seconds.
> +.It Cm maxwait Ar seconds
> +Specifies the maximum amount of time to wait for a valid reply packet.
> +By default
> +.Nm
> +will wait 8 seconds for a valid reply.
> .El
> .El
> .Sh SEE ALSO
> Index: radiusctl.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/radiusctl/radiusctl.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 radiusctl.c
> --- radiusctl.c 1 Apr 2019 09:51:56 -0000 1.7
> +++ radiusctl.c 21 Feb 2020 05:15:14 -0000
> @@ -19,6 +19,7 @@
> #include <netinet/in.h>
>
> #include <arpa/inet.h>
> +#include <errno.h>
> #include <err.h>
> #include <md5.h>
> #include <netdb.h>
> @@ -30,11 +31,13 @@
>
> #include <radius.h>
>
> +#include <event.h>
> +
> #include "parser.h"
> #include "chap_ms.h"
>
>
> -static void radius_test (struct parse_result *);
> +static int radius_test (struct parse_result *);
> static void radius_dump (FILE *, RADIUS_PACKET *, bool,
> const char *);
> static const char *radius_code_str (int code);
> @@ -53,6 +56,7 @@ main(int argc, char *argv[])
> {
> int ch;
> struct parse_result *result;
> + int ecode = EXIT_SUCCESS;
>
> while ((ch = getopt(argc, argv, "")) != -1)
> switch (ch) {
> @@ -72,21 +76,38 @@ main(int argc, char *argv[])
> case TEST:
> if (pledge("stdio dns inet", NULL) == -1)
> err(EXIT_FAILURE, "pledge");
> - radius_test(result);
> + ecode = radius_test(result);
> break;
> }
>
> - return (EXIT_SUCCESS);
> + return (ecode);
> }
>
> -static void
> +struct radius_test {
> + const struct parse_result *res;
> + int ecode;
> +
> + RADIUS_PACKET *reqpkt;
> + int sock;
> + unsigned int tries;
> + struct event ev_send;
> + struct event ev_recv;
> + struct event ev_timedout;
> +};
> +
> +static void radius_test_send(int, short, void *);
> +static void radius_test_recv(int, short, void *);
> +static void radius_test_timedout(int, short, void *);
> +
> +static int
> radius_test(struct parse_result *res)
> {
> + struct radius_test test = { .res = res };
> + RADIUS_PACKET *reqpkt;
> struct addrinfo hints, *ai;
> int sock, retval;
> struct sockaddr_storage sockaddr;
> socklen_t sockaddrlen;
> - RADIUS_PACKET *reqpkt, *respkt;
> struct sockaddr_in *sin4;
> struct sockaddr_in6 *sin6;
> uint32_t u32val;
> @@ -110,7 +131,8 @@ radius_test(struct parse_result *res)
> ((struct sockaddr_in *)ai->ai_addr)->sin_port =
> htons(res->port);
>
> - sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
> + sock = socket(ai->ai_family, ai->ai_socktype | SOCK_NONBLOCK,
> + ai->ai_protocol);
> if (sock == -1)
> err(1, "socket");
>
> @@ -211,24 +233,31 @@ radius_test(struct parse_result *res)
>
> radius_put_message_authenticator(reqpkt, res->secret);
>
> + event_init();
> +
> + test.ecode = EXIT_FAILURE;
> + test.res = res;
> + test.sock = sock;
> + test.reqpkt = reqpkt;
> +
> + event_set(&test.ev_recv, sock, EV_READ|EV_PERSIST,
> + radius_test_recv, &test);
> +
> + evtimer_set(&test.ev_send, radius_test_send, &test);
> + evtimer_set(&test.ev_timedout, radius_test_timedout, &test);
> +
> + event_add(&test.ev_recv, NULL);
> + evtimer_add(&test.ev_timedout, &res->maxwait);
> +
> /* Send! */
> fprintf(stderr, "Sending:\n");
> radius_dump(stdout, reqpkt, false, res->secret);
> - if (send(sock, radius_get_data(reqpkt), radius_get_length(reqpkt), 0)
> - == -1)
> - warn("send");
> - if ((respkt = radius_recv(sock, 0)) == NULL)
> - warn("recv");
> - else {
> - radius_set_request_packet(respkt, reqpkt);
> - fprintf(stderr, "\nReceived:\n");
> - radius_dump(stdout, respkt, true, res->secret);
> - }
> + radius_test_send(0, EV_TIMEOUT, &test);
> +
> + event_dispatch();
>
> /* Release the resources */
> radius_delete_packet(reqpkt);
> - if (respkt)
> - radius_delete_packet(respkt);
> close(sock);
> freeaddrinfo(ai);
>
> @@ -236,7 +265,79 @@ radius_test(struct parse_result *res)
> if (res->password)
> explicit_bzero((char *)res->password, strlen(res->password));
>
> - return;
> + return (test.ecode);
> +}
> +
> +static void
> +radius_test_send(int thing, short revents, void *arg)
> +{
> + struct radius_test *test = arg;
> + RADIUS_PACKET *reqpkt = test->reqpkt;
> + ssize_t rv;
> +
> +retry:
> + rv = send(test->sock,
> + radius_get_data(reqpkt), radius_get_length(reqpkt), 0);
> + if (rv == -1) {
> + switch (errno) {
> + case EINTR:
> + case EAGAIN:
> + goto retry;
> + default:
> + break;
> + }
> +
> + warn("send");
> + }
> +
> + if (++test->tries >= test->res->tries)
> + return;
> +
> + evtimer_add(&test->ev_send, &test->res->interval);
> +}
> +
> +static void
> +radius_test_recv(int sock, short revents, void *arg)
> +{
> + struct radius_test *test = arg;
> + RADIUS_PACKET *respkt;
> + RADIUS_PACKET *reqpkt = test->reqpkt;
> +
> +retry:
> + respkt = radius_recv(sock, 0);
> + if (respkt == NULL) {
> + switch (errno) {
> + case EINTR:
> + case EAGAIN:
> + goto retry;
> + default:
> + break;
> + }
> +
> + warn("recv");
> + return;
> + }
> +
> + radius_set_request_packet(respkt, reqpkt);
> + if (radius_get_id(respkt) == radius_get_id(reqpkt)) {
> + fprintf(stderr, "\nReceived:\n");
> + radius_dump(stdout, respkt, true, test->res->secret);
> +
> + event_del(&test->ev_recv);
> + evtimer_del(&test->ev_send);
> + evtimer_del(&test->ev_timedout);
> + test->ecode = EXIT_SUCCESS;
> + }
> +
> + radius_delete_packet(respkt);
> +}
> +
> +static void
> +radius_test_timedout(int thing, short revents, void *arg)
> +{
> + struct radius_test *test = arg;
> +
> + event_del(&test->ev_recv);
> }
>
> static void
>