Re: prevent bgpd from starting when control socket already used
On Mon, Nov 19, 2018 at 09:45:55AM +0100, Claudio Jeker wrote: > On Sun, Nov 18, 2018 at 11:40:40PM +0100, Remi Locherer wrote: > > Hi, > > > > from the discussion I understand nobody rejects the functionality. > > > > To ease the review here again the diff (with incorporated feedback from > > anton@ (redundant parens)). > > > > Any comments or OKs? > > Not sure about this, since control_setup() is called by reconfigure(). > So it is possible that bgpd will terminate while reloading the config. > I would prefer if control_setup() would return an error that is passed > back up to reconfigure() instead of the fatalx() call. In that case bgpd > will quit on startup but return a failure on reload bit keep on running > with the old config. This would also allow to do the same check on the > restricted socket. I also added some minor comments below. Thank you for the explanation! I didn't pay attention to the different return values of reconfigure(). This helps to extend the "safety belt" also to listen addresses. The new diff adds this to the last one: - on startup: stop if a listen address or control socket is already in use - on reload: handle used listen addresses and control sockets similar to configuration errors. - remove control_cleanup() since it can remove control sockets of other processes even if it detected them. See also https://marc.info/?l=openbsd-tech&m=154092081205195&w=2 for a discussion about not removing sockets. cvs diff: Diffing . Index: bgpd.c === RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v retrieving revision 1.204 diff -u -p -r1.204 bgpd.c --- bgpd.c 29 Sep 2018 08:11:11 - 1.204 +++ bgpd.c 19 Nov 2018 14:59:44 - @@ -348,8 +348,6 @@ BROKEN if (pledge("stdio rpath wpath cpa free(p); } - control_cleanup(conf->csock); - control_cleanup(conf->rcsock); carp_demote_shutdown(); kr_shutdown(conf->fib_priority, conf->default_tableid); pftable_clear_all(); @@ -453,10 +451,15 @@ reconfigure(char *conffile, struct bgpd_ reconfpending = 0; return (1); } + + if (prepare_listeners(conf) == -1) { + reconfpending = 0; + return (1); + } + expand_networks(conf); cflags = conf->flags; - prepare_listeners(conf); /* start reconfiguration */ if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1, @@ -473,8 +476,10 @@ reconfigure(char *conffile, struct bgpd_ la->fd = -1; } - if (control_setup(conf) == -1) - return (-1); + if (control_setup(conf) == -1) { + reconfpending = 0; + return (1); + } /* adjust fib syncing on reload */ ktable_preload(); @@ -934,11 +939,12 @@ control_setup(struct bgpd_config *conf) /* control socket is outside chroot */ if (!cname || strcmp(cname, conf->csock)) { if (cname) { - control_cleanup(cname); free(cname); } if ((cname = strdup(conf->csock)) == NULL) fatal("strdup"); + if (control_check(cname) == -1) + return (-1); if ((fd = control_init(0, cname)) == -1) fatalx("control socket setup failed"); if (control_listen(fd) == -1) @@ -950,16 +956,16 @@ control_setup(struct bgpd_config *conf) } if (!conf->rcsock) { /* remove restricted socket */ - control_cleanup(rcname); free(rcname); rcname = NULL; } else if (!rcname || strcmp(rcname, conf->rcsock)) { if (rcname) { - control_cleanup(rcname); free(rcname); } if ((rcname = strdup(conf->rcsock)) == NULL) fatal("strdup"); + if (control_check(rcname) == -1) + return (-1); if ((fd = control_init(1, rcname)) == -1) fatalx("control socket setup failed"); if (control_listen(fd) == -1) Index: bgpd.h === RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v retrieving revision 1.354 diff -u -p -r1.354 bgpd.h --- bgpd.h 14 Nov 2018 14:03:36 - 1.354 +++ bgpd.h 19 Nov 2018 14:33:01 - @@ -1142,7 +1142,6 @@ void set_pollfd(struct pollfd *, struc int handle_pollfd(struct pollfd *, struct imsgbuf *); /* control.c */ -void control_cleanup(const char *); intcontrol_imsg_relay(struct imsg *); /* config.c */ Index: config.c === RCS file: /cvs/src/usr.sbin/bgpd/config.c,v retrievin
Re: prevent bgpd from starting when control socket already used
On Sun, Nov 18, 2018 at 11:40:40PM +0100, Remi Locherer wrote: > Hi, > > from the discussion I understand nobody rejects the functionality. > > To ease the review here again the diff (with incorporated feedback from > anton@ (redundant parens)). > > Any comments or OKs? Not sure about this, since control_setup() is called by reconfigure(). So it is possible that bgpd will terminate while reloading the config. I would prefer if control_setup() would return an error that is passed back up to reconfigure() instead of the fatalx() call. In that case bgpd will quit on startup but return a failure on reload bit keep on running with the old config. This would also allow to do the same check on the restricted socket. I also added some minor comments below. -- :wq Claudio > Remi > > > cvs diff: Diffing . > Index: bgpd.c > === > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v > retrieving revision 1.204 > diff -u -p -r1.204 bgpd.c > --- bgpd.c29 Sep 2018 08:11:11 - 1.204 > +++ bgpd.c18 Nov 2018 22:21:51 - > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) > } > if ((cname = strdup(conf->csock)) == NULL) > fatal("strdup"); > + if (control_check(cname) == -1) > + fatalx("control socket check failed"); > if ((fd = control_init(0, cname)) == -1) > fatalx("control socket setup failed"); > if (control_listen(fd) == -1) > Index: control.c > === > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v > retrieving revision 1.90 > diff -u -p -r1.90 control.c > --- control.c 11 Aug 2017 16:02:53 - 1.90 > +++ control.c 10 Nov 2018 20:52:46 - > @@ -38,6 +38,32 @@ voidcontrol_result(struct ctl_conn *, > ssize_t imsg_read_nofd(struct imsgbuf *); > > int > +control_check(char *path) > +{ > + struct sockaddr_un sun; > + int fd; > + > + bzero(&sun, sizeof(sun)); > + sun.sun_family = AF_UNIX; > + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); > + > + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { > + log_warn("control_check: socket check"); You can use __func__ for the function name here. I also would remove 'check' from the string. > + return (-1); > + } > + > + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { > + log_warnx("control_check: socket in use"); Same __func__ change here. > + close(fd); > + return (-1); > + } > + > + close(fd); > + > + return (0); > +} > + > +int > control_init(int restricted, char *path) > { > struct sockaddr_un sun; > Index: session.h > === > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > retrieving revision 1.125 > diff -u -p -r1.125 session.h > --- session.h 24 Oct 2018 08:26:37 - 1.125 > +++ session.h 10 Nov 2018 21:53:04 - > @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf > int get_mpe_label(struct rdomain *); > > /* control.c */ > +int control_check(char *); > int control_init(int, char *); > int control_listen(int); > void control_shutdown(int); >
Re: prevent bgpd from starting when control socket already used
Hi, from the discussion I understand nobody rejects the functionality. To ease the review here again the diff (with incorporated feedback from anton@ (redundant parens)). Any comments or OKs? Remi cvs diff: Diffing . Index: bgpd.c === RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v retrieving revision 1.204 diff -u -p -r1.204 bgpd.c --- bgpd.c 29 Sep 2018 08:11:11 - 1.204 +++ bgpd.c 18 Nov 2018 22:21:51 - @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) } if ((cname = strdup(conf->csock)) == NULL) fatal("strdup"); + if (control_check(cname) == -1) + fatalx("control socket check failed"); if ((fd = control_init(0, cname)) == -1) fatalx("control socket setup failed"); if (control_listen(fd) == -1) Index: control.c === RCS file: /cvs/src/usr.sbin/bgpd/control.c,v retrieving revision 1.90 diff -u -p -r1.90 control.c --- control.c 11 Aug 2017 16:02:53 - 1.90 +++ control.c 10 Nov 2018 20:52:46 - @@ -38,6 +38,32 @@ void control_result(struct ctl_conn *, ssize_t imsg_read_nofd(struct imsgbuf *); int +control_check(char *path) +{ + struct sockaddr_un sun; + int fd; + + bzero(&sun, sizeof(sun)); + sun.sun_family = AF_UNIX; + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + log_warn("control_check: socket check"); + return (-1); + } + + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { + log_warnx("control_check: socket in use"); + close(fd); + return (-1); + } + + close(fd); + + return (0); +} + +int control_init(int restricted, char *path) { struct sockaddr_un sun; Index: session.h === RCS file: /cvs/src/usr.sbin/bgpd/session.h,v retrieving revision 1.125 diff -u -p -r1.125 session.h --- session.h 24 Oct 2018 08:26:37 - 1.125 +++ session.h 10 Nov 2018 21:53:04 - @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf int get_mpe_label(struct rdomain *); /* control.c */ +intcontrol_check(char *); intcontrol_init(int, char *); intcontrol_listen(int); void control_shutdown(int);
Re: prevent bgpd from starting when control socket already used
Stuart Henderson(s...@spacehopper.org) on 2018.11.11 21:55:19 +: > On 2018/11/11 22:45, Job Snijders wrote: > > Shouldnt we already bomb out at the following? > > > > cannot bind to 0.0.0.0:179: Address already in use > > cannot bind to [::]:179: Address already in use > > > > In any regard, I agree with the functionality proposed. No strong opinion > > on the diff itself. > > Should: probably, yes, but we don't at present. > > Last time I did this by mistake I ended up having to get onto the *peer* > via OOB, I can't remember if killing bgpd was enough, I have a feeling I > had to force a reboot.. I was the other one who happened to fat finger this recently (and suggest to remi to do this). I think i also rebootet the router as fast as i could, because i wanted to rid myself of more dropped packets.
Re: prevent bgpd from starting when control socket already used
On Mon, Nov 12, 2018 at 08:12:37AM +0100, Claudio Jeker wrote: > On Sun, Nov 11, 2018 at 04:40:54PM -0700, Theo de Raadt wrote: > > Makes sense to me, I suppose. > > > > Isn't another approach to swap the opening of the sockets? > > > > Or why does failure to control :179 sockets not stop startup? > > Because you can change them during reload and if they are incorrect for a > reason it would kill running bgpd with all sessions. > Having bgpd die on a config reload is for sure something we should not > introduce. Exactly! It would be easy to change log_warn to fatalx. But think of this scenario: Change "listen on" in bgpd.conf with a typo in the address and reload bgpd. I think it's not desirable that bgpd exits and turns down all session in this situation. > > > > I heard from two devs that started a 2nd bgpd by accident (forgot -n for > > > a config check) which then caused downtime. > > > > > > Below diff adds a check to bgpd similar to the one we have now in ospfd > > > and > > > ospf6d: if another process is listening on the control socket bgpd exits. > > > > > > The situation is a bit different than in the ospfd case: > > > - The socket path is defined in the config and not with an option. > > > - The config is parsed relatively late (after daemon). > > > > > > Because of that rcctl would not detect that bgpd exits. But this only > > > applies > > > when the first bgpd was started without using the rc scripts. > > > > > > bgpd already detects that it can not bind the port if another instance is > > > running. Adding a fatal there can have an effect when reloading after a > > > config change. Loosing all sessions is not nice. > > > > > > Of course if the socket path is changed in the config and then reloaded > > > the > > > same can happen. I think this is less likely than changing a listen > > > address. > > > > > > I think it is sufficient to add this check only to the control socket and > > > not to the restricted socket. > > > > > > The diff in action: > > > > > > # /usr/src/usr.sbin/bgpd/obj/bgpd -dv > > > startup > > > session engine ready > > > rereading config > > > cannot bind to 0.0.0.0:179: Address already in use > > > cannot bind to [::]:179: Address already in use > > > control_check: socket in use > > > fatal in bgpd: control socket check failed > > > rib_new: Adj-RIB-In -> 0 > > > rib_new: Adj-RIB-Out -> 1 > > > route decision engine ready > > > peer closed imsg connection > > > fatal in RDE: Lost connection to parent > > > peer closed imsg connection > > > SE: Lost connection to parent > > > session engine exiting > > > # > > > > > > Comments? OKs? > > > > > > Remi > > > > > > > > > cvs diff: Diffing . > > > Index: bgpd.c > > > === > > > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v > > > retrieving revision 1.204 > > > diff -u -p -r1.204 bgpd.c > > > --- bgpd.c29 Sep 2018 08:11:11 - 1.204 > > > +++ bgpd.c11 Nov 2018 20:33:32 - > > > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) > > > } > > > if ((cname = strdup(conf->csock)) == NULL) > > > fatal("strdup"); > > > + if ((control_check(cname)) == -1) > > > + fatalx("control socket check failed"); > > > if ((fd = control_init(0, cname)) == -1) > > > fatalx("control socket setup failed"); > > > if (control_listen(fd) == -1) > > > Index: control.c > > > === > > > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v > > > retrieving revision 1.90 > > > diff -u -p -r1.90 control.c > > > --- control.c 11 Aug 2017 16:02:53 - 1.90 > > > +++ control.c 10 Nov 2018 20:52:46 - > > > @@ -38,6 +38,32 @@ voidcontrol_result(struct ctl_conn *, > > > ssize_t imsg_read_nofd(struct imsgbuf *); > > > > > > int > > > +control_check(char *path) > > > +{ > > > + struct sockaddr_un sun; > > > + int fd; > > > + > > > + bzero(&sun, sizeof(sun)); > > > + sun.sun_family = AF_UNIX; > > > + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); > > > + > > > + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { > > > + log_warn("control_check: socket check"); > > > + return (-1); > > > + } > > > + > > > + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { > > > + log_warnx("control_check: socket in use"); > > > + close(fd); > > > + return (-1); > > > + } > > > + > > > + close(fd); > > > + > > > + return (0); > > > +} > > > + > > > +int > > > control_init(int restricted, char *path) > > > { > > > struct sockaddr_un sun; > > > Index: session.h > > > === > > > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > > > retrieving revision 1.125 > > > diff -u -p -r1.125 session.h > > > --- session.h 24 Oct 2
Re: prevent bgpd from starting when control socket already used
On Sun, Nov 11, 2018 at 04:40:54PM -0700, Theo de Raadt wrote: > Makes sense to me, I suppose. > > Isn't another approach to swap the opening of the sockets? > > Or why does failure to control :179 sockets not stop startup? Because you can change them during reload and if they are incorrect for a reason it would kill running bgpd with all sessions. Having bgpd die on a config reload is for sure something we should not introduce. > > I heard from two devs that started a 2nd bgpd by accident (forgot -n for > > a config check) which then caused downtime. > > > > Below diff adds a check to bgpd similar to the one we have now in ospfd and > > ospf6d: if another process is listening on the control socket bgpd exits. > > > > The situation is a bit different than in the ospfd case: > > - The socket path is defined in the config and not with an option. > > - The config is parsed relatively late (after daemon). > > > > Because of that rcctl would not detect that bgpd exits. But this only > > applies > > when the first bgpd was started without using the rc scripts. > > > > bgpd already detects that it can not bind the port if another instance is > > running. Adding a fatal there can have an effect when reloading after a > > config change. Loosing all sessions is not nice. > > > > Of course if the socket path is changed in the config and then reloaded the > > same can happen. I think this is less likely than changing a listen address. > > > > I think it is sufficient to add this check only to the control socket and > > not to the restricted socket. > > > > The diff in action: > > > > # /usr/src/usr.sbin/bgpd/obj/bgpd -dv > > startup > > session engine ready > > rereading config > > cannot bind to 0.0.0.0:179: Address already in use > > cannot bind to [::]:179: Address already in use > > control_check: socket in use > > fatal in bgpd: control socket check failed > > rib_new: Adj-RIB-In -> 0 > > rib_new: Adj-RIB-Out -> 1 > > route decision engine ready > > peer closed imsg connection > > fatal in RDE: Lost connection to parent > > peer closed imsg connection > > SE: Lost connection to parent > > session engine exiting > > # > > > > Comments? OKs? > > > > Remi > > > > > > cvs diff: Diffing . > > Index: bgpd.c > > === > > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v > > retrieving revision 1.204 > > diff -u -p -r1.204 bgpd.c > > --- bgpd.c 29 Sep 2018 08:11:11 - 1.204 > > +++ bgpd.c 11 Nov 2018 20:33:32 - > > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) > > } > > if ((cname = strdup(conf->csock)) == NULL) > > fatal("strdup"); > > + if ((control_check(cname)) == -1) > > + fatalx("control socket check failed"); > > if ((fd = control_init(0, cname)) == -1) > > fatalx("control socket setup failed"); > > if (control_listen(fd) == -1) > > Index: control.c > > === > > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v > > retrieving revision 1.90 > > diff -u -p -r1.90 control.c > > --- control.c 11 Aug 2017 16:02:53 - 1.90 > > +++ control.c 10 Nov 2018 20:52:46 - > > @@ -38,6 +38,32 @@ void control_result(struct ctl_conn *, > > ssize_t imsg_read_nofd(struct imsgbuf *); > > > > int > > +control_check(char *path) > > +{ > > + struct sockaddr_un sun; > > + int fd; > > + > > + bzero(&sun, sizeof(sun)); > > + sun.sun_family = AF_UNIX; > > + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); > > + > > + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { > > + log_warn("control_check: socket check"); > > + return (-1); > > + } > > + > > + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { > > + log_warnx("control_check: socket in use"); > > + close(fd); > > + return (-1); > > + } > > + > > + close(fd); > > + > > + return (0); > > +} > > + > > +int > > control_init(int restricted, char *path) > > { > > struct sockaddr_un sun; > > Index: session.h > > === > > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > > retrieving revision 1.125 > > diff -u -p -r1.125 session.h > > --- session.h 24 Oct 2018 08:26:37 - 1.125 > > +++ session.h 10 Nov 2018 21:53:04 - > > @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf > > int get_mpe_label(struct rdomain *); > > > > /* control.c */ > > +intcontrol_check(char *); > > intcontrol_init(int, char *); > > intcontrol_listen(int); > > void control_shutdown(int); > > > -- :wq Claudio
Re: prevent bgpd from starting when control socket already used
Makes sense to me, I suppose. Isn't another approach to swap the opening of the sockets? Or why does failure to control :179 sockets not stop startup? > I heard from two devs that started a 2nd bgpd by accident (forgot -n for > a config check) which then caused downtime. > > Below diff adds a check to bgpd similar to the one we have now in ospfd and > ospf6d: if another process is listening on the control socket bgpd exits. > > The situation is a bit different than in the ospfd case: > - The socket path is defined in the config and not with an option. > - The config is parsed relatively late (after daemon). > > Because of that rcctl would not detect that bgpd exits. But this only applies > when the first bgpd was started without using the rc scripts. > > bgpd already detects that it can not bind the port if another instance is > running. Adding a fatal there can have an effect when reloading after a > config change. Loosing all sessions is not nice. > > Of course if the socket path is changed in the config and then reloaded the > same can happen. I think this is less likely than changing a listen address. > > I think it is sufficient to add this check only to the control socket and > not to the restricted socket. > > The diff in action: > > # /usr/src/usr.sbin/bgpd/obj/bgpd -dv > startup > session engine ready > rereading config > cannot bind to 0.0.0.0:179: Address already in use > cannot bind to [::]:179: Address already in use > control_check: socket in use > fatal in bgpd: control socket check failed > rib_new: Adj-RIB-In -> 0 > rib_new: Adj-RIB-Out -> 1 > route decision engine ready > peer closed imsg connection > fatal in RDE: Lost connection to parent > peer closed imsg connection > SE: Lost connection to parent > session engine exiting > # > > Comments? OKs? > > Remi > > > cvs diff: Diffing . > Index: bgpd.c > === > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v > retrieving revision 1.204 > diff -u -p -r1.204 bgpd.c > --- bgpd.c29 Sep 2018 08:11:11 - 1.204 > +++ bgpd.c11 Nov 2018 20:33:32 - > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) > } > if ((cname = strdup(conf->csock)) == NULL) > fatal("strdup"); > + if ((control_check(cname)) == -1) > + fatalx("control socket check failed"); > if ((fd = control_init(0, cname)) == -1) > fatalx("control socket setup failed"); > if (control_listen(fd) == -1) > Index: control.c > === > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v > retrieving revision 1.90 > diff -u -p -r1.90 control.c > --- control.c 11 Aug 2017 16:02:53 - 1.90 > +++ control.c 10 Nov 2018 20:52:46 - > @@ -38,6 +38,32 @@ voidcontrol_result(struct ctl_conn *, > ssize_t imsg_read_nofd(struct imsgbuf *); > > int > +control_check(char *path) > +{ > + struct sockaddr_un sun; > + int fd; > + > + bzero(&sun, sizeof(sun)); > + sun.sun_family = AF_UNIX; > + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); > + > + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { > + log_warn("control_check: socket check"); > + return (-1); > + } > + > + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { > + log_warnx("control_check: socket in use"); > + close(fd); > + return (-1); > + } > + > + close(fd); > + > + return (0); > +} > + > +int > control_init(int restricted, char *path) > { > struct sockaddr_un sun; > Index: session.h > === > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > retrieving revision 1.125 > diff -u -p -r1.125 session.h > --- session.h 24 Oct 2018 08:26:37 - 1.125 > +++ session.h 10 Nov 2018 21:53:04 - > @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf > int get_mpe_label(struct rdomain *); > > /* control.c */ > +int control_check(char *); > int control_init(int, char *); > int control_listen(int); > void control_shutdown(int); >
Re: prevent bgpd from starting when control socket already used
On 2018/11/11 22:45, Job Snijders wrote: > Shouldnt we already bomb out at the following? > > cannot bind to 0.0.0.0:179: Address already in use > cannot bind to [::]:179: Address already in use > > In any regard, I agree with the functionality proposed. No strong opinion > on the diff itself. Should: probably, yes, but we don't at present. Last time I did this by mistake I ended up having to get onto the *peer* via OOB, I can't remember if killing bgpd was enough, I have a feeling I had to force a reboot..
Re: prevent bgpd from starting when control socket already used
Shouldnt we already bomb out at the following? cannot bind to 0.0.0.0:179: Address already in use cannot bind to [::]:179: Address already in use In any regard, I agree with the functionality proposed. No strong opinion on the diff itself. Kind regards, Job On Sun, Nov 11, 2018 at 22:35 Remi Locherer wrote: > Hi, > > I heard from two devs that started a 2nd bgpd by accident (forgot -n for > a config check) which then caused downtime. > > Below diff adds a check to bgpd similar to the one we have now in ospfd and > ospf6d: if another process is listening on the control socket bgpd exits. > > The situation is a bit different than in the ospfd case: > - The socket path is defined in the config and not with an option. > - The config is parsed relatively late (after daemon). > > Because of that rcctl would not detect that bgpd exits. But this only > applies > when the first bgpd was started without using the rc scripts. > > bgpd already detects that it can not bind the port if another instance is > running. Adding a fatal there can have an effect when reloading after a > config change. Loosing all sessions is not nice. > > Of course if the socket path is changed in the config and then reloaded the > same can happen. I think this is less likely than changing a listen > address. > > I think it is sufficient to add this check only to the control socket and > not to the restricted socket. > > The diff in action: > > # /usr/src/usr.sbin/bgpd/obj/bgpd -dv > startup > session engine ready > rereading config > cannot bind to 0.0.0.0:179: Address already in use > cannot bind to [::]:179: Address already in use > control_check: socket in use > fatal in bgpd: control socket check failed > rib_new: Adj-RIB-In -> 0 > rib_new: Adj-RIB-Out -> 1 > route decision engine ready > peer closed imsg connection > fatal in RDE: Lost connection to parent > peer closed imsg connection > SE: Lost connection to parent > session engine exiting > # > > Comments? OKs? > > Remi > > > cvs diff: Diffing . > Index: bgpd.c > === > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v > retrieving revision 1.204 > diff -u -p -r1.204 bgpd.c > --- bgpd.c 29 Sep 2018 08:11:11 - 1.204 > +++ bgpd.c 11 Nov 2018 20:33:32 - > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) > } > if ((cname = strdup(conf->csock)) == NULL) > fatal("strdup"); > + if ((control_check(cname)) == -1) > + fatalx("control socket check failed"); > if ((fd = control_init(0, cname)) == -1) > fatalx("control socket setup failed"); > if (control_listen(fd) == -1) > Index: control.c > === > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v > retrieving revision 1.90 > diff -u -p -r1.90 control.c > --- control.c 11 Aug 2017 16:02:53 - 1.90 > +++ control.c 10 Nov 2018 20:52:46 - > @@ -38,6 +38,32 @@ void control_result(struct ctl_conn *, > ssize_t imsg_read_nofd(struct imsgbuf *); > > int > +control_check(char *path) > +{ > + struct sockaddr_un sun; > + int fd; > + > + bzero(&sun, sizeof(sun)); > + sun.sun_family = AF_UNIX; > + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); > + > + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { > + log_warn("control_check: socket check"); > + return (-1); > + } > + > + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { > + log_warnx("control_check: socket in use"); > + close(fd); > + return (-1); > + } > + > + close(fd); > + > + return (0); > +} > + > +int > control_init(int restricted, char *path) > { > struct sockaddr_un sun; > Index: session.h > === > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v > retrieving revision 1.125 > diff -u -p -r1.125 session.h > --- session.h 24 Oct 2018 08:26:37 - 1.125 > +++ session.h 10 Nov 2018 21:53:04 - > @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf > int get_mpe_label(struct rdomain *); > > /* control.c */ > +intcontrol_check(char *); > intcontrol_init(int, char *); > intcontrol_listen(int); > void control_shutdown(int); > >
prevent bgpd from starting when control socket already used
Hi, I heard from two devs that started a 2nd bgpd by accident (forgot -n for a config check) which then caused downtime. Below diff adds a check to bgpd similar to the one we have now in ospfd and ospf6d: if another process is listening on the control socket bgpd exits. The situation is a bit different than in the ospfd case: - The socket path is defined in the config and not with an option. - The config is parsed relatively late (after daemon). Because of that rcctl would not detect that bgpd exits. But this only applies when the first bgpd was started without using the rc scripts. bgpd already detects that it can not bind the port if another instance is running. Adding a fatal there can have an effect when reloading after a config change. Loosing all sessions is not nice. Of course if the socket path is changed in the config and then reloaded the same can happen. I think this is less likely than changing a listen address. I think it is sufficient to add this check only to the control socket and not to the restricted socket. The diff in action: # /usr/src/usr.sbin/bgpd/obj/bgpd -dv startup session engine ready rereading config cannot bind to 0.0.0.0:179: Address already in use cannot bind to [::]:179: Address already in use control_check: socket in use fatal in bgpd: control socket check failed rib_new: Adj-RIB-In -> 0 rib_new: Adj-RIB-Out -> 1 route decision engine ready peer closed imsg connection fatal in RDE: Lost connection to parent peer closed imsg connection SE: Lost connection to parent session engine exiting # Comments? OKs? Remi cvs diff: Diffing . Index: bgpd.c === RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v retrieving revision 1.204 diff -u -p -r1.204 bgpd.c --- bgpd.c 29 Sep 2018 08:11:11 - 1.204 +++ bgpd.c 11 Nov 2018 20:33:32 - @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf) } if ((cname = strdup(conf->csock)) == NULL) fatal("strdup"); + if ((control_check(cname)) == -1) + fatalx("control socket check failed"); if ((fd = control_init(0, cname)) == -1) fatalx("control socket setup failed"); if (control_listen(fd) == -1) Index: control.c === RCS file: /cvs/src/usr.sbin/bgpd/control.c,v retrieving revision 1.90 diff -u -p -r1.90 control.c --- control.c 11 Aug 2017 16:02:53 - 1.90 +++ control.c 10 Nov 2018 20:52:46 - @@ -38,6 +38,32 @@ void control_result(struct ctl_conn *, ssize_t imsg_read_nofd(struct imsgbuf *); int +control_check(char *path) +{ + struct sockaddr_un sun; + int fd; + + bzero(&sun, sizeof(sun)); + sun.sun_family = AF_UNIX; + strlcpy(sun.sun_path, path, sizeof(sun.sun_path)); + + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { + log_warn("control_check: socket check"); + return (-1); + } + + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) { + log_warnx("control_check: socket in use"); + close(fd); + return (-1); + } + + close(fd); + + return (0); +} + +int control_init(int restricted, char *path) { struct sockaddr_un sun; Index: session.h === RCS file: /cvs/src/usr.sbin/bgpd/session.h,v retrieving revision 1.125 diff -u -p -r1.125 session.h --- session.h 24 Oct 2018 08:26:37 - 1.125 +++ session.h 10 Nov 2018 21:53:04 - @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf int get_mpe_label(struct rdomain *); /* control.c */ +intcontrol_check(char *); intcontrol_init(int, char *); intcontrol_listen(int); void control_shutdown(int);