Re: prevent bgpd from starting when control socket already used

2018-11-19 Thread Remi Locherer
On Mon, Nov 19, 2018 at 09:45:55AM +0100, Claudio Jeker wrote:
> On Sun, Nov 18, 2018 at 11:40:40PM +0100, Remi Locherer wrote:
> > Hi,
> > 
> > from the discussion I understand nobody rejects the functionality.
> > 
> > To ease the review here again the diff (with incorporated feedback from
> > anton@ (redundant parens)).
> > 
> > Any comments or OKs?
> 
> Not sure about this, since control_setup() is called by reconfigure().
> So it is possible that bgpd will terminate while reloading the config.
> I would prefer if control_setup() would return an error that is passed
> back up to reconfigure() instead of the fatalx() call. In that case bgpd
> will quit on startup but return a failure on reload bit keep on running
> with the old config.  This would also allow to do the same check on the
> restricted socket. I also added some minor comments below.

Thank you for the explanation! I didn't pay attention to the different
return values of reconfigure(). This helps to extend the "safety belt"
also to listen addresses.

The new diff adds this to the last one:
- on startup:
  stop if a listen address or control socket is already in use
- on reload:
  handle used listen addresses and control sockets similar to configuration
  errors.
- remove control_cleanup() since it can remove control sockets of other
  processes even if it detected them.
  See also https://marc.info/?l=openbsd-tech&m=154092081205195&w=2 for a
  discussion about not removing sockets.



cvs diff: Diffing .
Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.204
diff -u -p -r1.204 bgpd.c
--- bgpd.c  29 Sep 2018 08:11:11 -  1.204
+++ bgpd.c  19 Nov 2018 14:59:44 -
@@ -348,8 +348,6 @@ BROKEN  if (pledge("stdio rpath wpath cpa
free(p);
}
 
-   control_cleanup(conf->csock);
-   control_cleanup(conf->rcsock);
carp_demote_shutdown();
kr_shutdown(conf->fib_priority, conf->default_tableid);
pftable_clear_all();
@@ -453,10 +451,15 @@ reconfigure(char *conffile, struct bgpd_
reconfpending = 0;
return (1);
}
+
+   if (prepare_listeners(conf) == -1) {
+   reconfpending = 0;
+   return (1);
+   }
+
expand_networks(conf);
 
cflags = conf->flags;
-   prepare_listeners(conf);
 
/* start reconfiguration */
if (imsg_compose(ibuf_se, IMSG_RECONF_CONF, 0, 0, -1,
@@ -473,8 +476,10 @@ reconfigure(char *conffile, struct bgpd_
la->fd = -1;
}
 
-   if (control_setup(conf) == -1)
-   return (-1);
+   if (control_setup(conf) == -1) {
+   reconfpending = 0;
+   return (1);
+   }
 
/* adjust fib syncing on reload */
ktable_preload();
@@ -934,11 +939,12 @@ control_setup(struct bgpd_config *conf)
/* control socket is outside chroot */
if (!cname || strcmp(cname, conf->csock)) {
if (cname) {
-   control_cleanup(cname);
free(cname);
}
if ((cname = strdup(conf->csock)) == NULL)
fatal("strdup");
+   if (control_check(cname) == -1)
+   return (-1);
if ((fd = control_init(0, cname)) == -1)
fatalx("control socket setup failed");
if (control_listen(fd) == -1)
@@ -950,16 +956,16 @@ control_setup(struct bgpd_config *conf)
}
if (!conf->rcsock) {
/* remove restricted socket */
-   control_cleanup(rcname);
free(rcname);
rcname = NULL;
} else if (!rcname || strcmp(rcname, conf->rcsock)) {
if (rcname) {
-   control_cleanup(rcname);
free(rcname);
}
if ((rcname = strdup(conf->rcsock)) == NULL)
fatal("strdup");
+   if (control_check(rcname) == -1)
+   return (-1);
if ((fd = control_init(1, rcname)) == -1)
fatalx("control socket setup failed");
if (control_listen(fd) == -1)
Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.354
diff -u -p -r1.354 bgpd.h
--- bgpd.h  14 Nov 2018 14:03:36 -  1.354
+++ bgpd.h  19 Nov 2018 14:33:01 -
@@ -1142,7 +1142,6 @@ void   set_pollfd(struct pollfd *, struc
 int handle_pollfd(struct pollfd *, struct imsgbuf *);
 
 /* control.c */
-void   control_cleanup(const char *);
 intcontrol_imsg_relay(struct imsg *);
 
 /* config.c */
Index: config.c
===
RCS file: /cvs/src/usr.sbin/bgpd/config.c,v
retrievin

Re: prevent bgpd from starting when control socket already used

2018-11-19 Thread Claudio Jeker
On Sun, Nov 18, 2018 at 11:40:40PM +0100, Remi Locherer wrote:
> Hi,
> 
> from the discussion I understand nobody rejects the functionality.
> 
> To ease the review here again the diff (with incorporated feedback from
> anton@ (redundant parens)).
> 
> Any comments or OKs?

Not sure about this, since control_setup() is called by reconfigure().
So it is possible that bgpd will terminate while reloading the config.
I would prefer if control_setup() would return an error that is passed
back up to reconfigure() instead of the fatalx() call. In that case bgpd
will quit on startup but return a failure on reload bit keep on running
with the old config.  This would also allow to do the same check on the
restricted socket. I also added some minor comments below.

-- 
:wq Claudio

 
> Remi
> 
> 
> cvs diff: Diffing .
> Index: bgpd.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
> retrieving revision 1.204
> diff -u -p -r1.204 bgpd.c
> --- bgpd.c29 Sep 2018 08:11:11 -  1.204
> +++ bgpd.c18 Nov 2018 22:21:51 -
> @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
>   }
>   if ((cname = strdup(conf->csock)) == NULL)
>   fatal("strdup");
> + if (control_check(cname) == -1)
> + fatalx("control socket check failed");
>   if ((fd = control_init(0, cname)) == -1)
>   fatalx("control socket setup failed");
>   if (control_listen(fd) == -1)
> Index: control.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
> retrieving revision 1.90
> diff -u -p -r1.90 control.c
> --- control.c 11 Aug 2017 16:02:53 -  1.90
> +++ control.c 10 Nov 2018 20:52:46 -
> @@ -38,6 +38,32 @@ voidcontrol_result(struct ctl_conn *,
>  ssize_t   imsg_read_nofd(struct imsgbuf *);
>  
>  int
> +control_check(char *path)
> +{
> + struct sockaddr_un   sun;
> + int  fd;
> +
> + bzero(&sun, sizeof(sun));
> + sun.sun_family = AF_UNIX;
> + strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
> +
> + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
> + log_warn("control_check: socket check");

You can use __func__ for the function name here. I also would remove
'check' from the string.

> + return (-1);
> + }
> +
> + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
> + log_warnx("control_check: socket in use");

Same __func__ change here.

> + close(fd);
> + return (-1);
> + }
> +
> + close(fd);
> +
> + return (0);
> +}
> +
> +int
>  control_init(int restricted, char *path)
>  {
>   struct sockaddr_un   sun;
> Index: session.h
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> retrieving revision 1.125
> diff -u -p -r1.125 session.h
> --- session.h 24 Oct 2018 08:26:37 -  1.125
> +++ session.h 10 Nov 2018 21:53:04 -
> @@ -250,6 +250,7 @@ void   prepare_listeners(struct bgpd_conf
>  int   get_mpe_label(struct rdomain *);
>  
>  /* control.c */
> +int  control_check(char *);
>  int  control_init(int, char *);
>  int  control_listen(int);
>  void control_shutdown(int);
> 



Re: prevent bgpd from starting when control socket already used

2018-11-18 Thread Remi Locherer
Hi,

from the discussion I understand nobody rejects the functionality.

To ease the review here again the diff (with incorporated feedback from
anton@ (redundant parens)).

Any comments or OKs?

Remi


cvs diff: Diffing .
Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.204
diff -u -p -r1.204 bgpd.c
--- bgpd.c  29 Sep 2018 08:11:11 -  1.204
+++ bgpd.c  18 Nov 2018 22:21:51 -
@@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
}
if ((cname = strdup(conf->csock)) == NULL)
fatal("strdup");
+   if (control_check(cname) == -1)
+   fatalx("control socket check failed");
if ((fd = control_init(0, cname)) == -1)
fatalx("control socket setup failed");
if (control_listen(fd) == -1)
Index: control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.90
diff -u -p -r1.90 control.c
--- control.c   11 Aug 2017 16:02:53 -  1.90
+++ control.c   10 Nov 2018 20:52:46 -
@@ -38,6 +38,32 @@ void  control_result(struct ctl_conn *,
 ssize_t imsg_read_nofd(struct imsgbuf *);
 
 int
+control_check(char *path)
+{
+   struct sockaddr_un   sun;
+   int  fd;
+
+   bzero(&sun, sizeof(sun));
+   sun.sun_family = AF_UNIX;
+   strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
+
+   if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+   log_warn("control_check: socket check");
+   return (-1);
+   }
+
+   if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
+   log_warnx("control_check: socket in use");
+   close(fd);
+   return (-1);
+   }
+
+   close(fd);
+
+   return (0);
+}
+
+int
 control_init(int restricted, char *path)
 {
struct sockaddr_un   sun;
Index: session.h
===
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.125
diff -u -p -r1.125 session.h
--- session.h   24 Oct 2018 08:26:37 -  1.125
+++ session.h   10 Nov 2018 21:53:04 -
@@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf
 int get_mpe_label(struct rdomain *);
 
 /* control.c */
+intcontrol_check(char *);
 intcontrol_init(int, char *);
 intcontrol_listen(int);
 void   control_shutdown(int);



Re: prevent bgpd from starting when control socket already used

2018-11-12 Thread Sebastian Benoit
Stuart Henderson(s...@spacehopper.org) on 2018.11.11 21:55:19 +:
> On 2018/11/11 22:45, Job Snijders wrote:
> > Shouldnt we already bomb out at the following?
> > 
> > cannot bind to 0.0.0.0:179: Address already in use
> > cannot bind to [::]:179: Address already in use
> > 
> > In any regard, I agree with the functionality proposed. No strong opinion
> > on the diff itself.
> 
> Should: probably, yes, but we don't at present.
> 
> Last time I did this by mistake I ended up having to get onto the *peer*
> via OOB, I can't remember if killing bgpd was enough, I have a feeling I
> had to force a reboot..

I was the other one who happened to fat finger this recently (and suggest to 
remi to do this).

I think i also rebootet the router as fast as i could, because i wanted to
rid myself of more dropped packets.



Re: prevent bgpd from starting when control socket already used

2018-11-12 Thread Remi Locherer
On Mon, Nov 12, 2018 at 08:12:37AM +0100, Claudio Jeker wrote:
> On Sun, Nov 11, 2018 at 04:40:54PM -0700, Theo de Raadt wrote:
> > Makes sense to me, I suppose.
> > 
> > Isn't another approach to swap the opening of the sockets?
> > 
> > Or why does failure to control :179 sockets not stop startup?
> 
> Because you can change them during reload and if they are incorrect for a
> reason it would kill running bgpd with all sessions.
> Having bgpd die on a config reload is for sure something we should not
> introduce.

Exactly!
It would be easy to change log_warn to fatalx. But think of this scenario:
Change "listen on" in bgpd.conf with a typo in the address and reload bgpd.

I think it's not desirable that bgpd exits and turns down all session in this
situation.

>  
> > > I heard from two devs that started a 2nd bgpd by accident (forgot -n for
> > > a config check) which then caused downtime.
> > > 
> > > Below diff adds a check to bgpd similar to the one we have now in ospfd 
> > > and
> > > ospf6d: if another process is listening on the control socket bgpd exits.
> > > 
> > > The situation is a bit different than in the ospfd case:
> > > - The socket path is defined in the config and not with an option.
> > > - The config is parsed relatively late (after daemon).
> > > 
> > > Because of that rcctl would not detect that bgpd exits. But this only 
> > > applies
> > > when the first bgpd was started without using the rc scripts.
> > > 
> > > bgpd already detects that it can not bind the port if another instance is
> > > running. Adding a fatal there can have an effect when reloading after a
> > > config change. Loosing all sessions is not nice.
> > > 
> > > Of course if the socket path is changed in the config and then reloaded 
> > > the
> > > same can happen. I think this is less likely than changing a listen 
> > > address.
> > > 
> > > I think it is sufficient to add this check only to the control socket and
> > > not to the restricted socket.
> > > 
> > > The diff in action:
> > > 
> > > # /usr/src/usr.sbin/bgpd/obj/bgpd -dv 
> > > startup
> > > session engine ready
> > > rereading config
> > > cannot bind to 0.0.0.0:179: Address already in use
> > > cannot bind to [::]:179: Address already in use
> > > control_check: socket in use
> > > fatal in bgpd: control socket check failed
> > > rib_new: Adj-RIB-In -> 0
> > > rib_new: Adj-RIB-Out -> 1
> > > route decision engine ready
> > > peer closed imsg connection
> > > fatal in RDE: Lost connection to parent
> > > peer closed imsg connection
> > > SE: Lost connection to parent
> > > session engine exiting
> > > #
> > > 
> > > Comments? OKs?
> > > 
> > > Remi
> > > 
> > > 
> > > cvs diff: Diffing .
> > > Index: bgpd.c
> > > ===
> > > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
> > > retrieving revision 1.204
> > > diff -u -p -r1.204 bgpd.c
> > > --- bgpd.c29 Sep 2018 08:11:11 -  1.204
> > > +++ bgpd.c11 Nov 2018 20:33:32 -
> > > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
> > >   }
> > >   if ((cname = strdup(conf->csock)) == NULL)
> > >   fatal("strdup");
> > > + if ((control_check(cname)) == -1)
> > > + fatalx("control socket check failed");
> > >   if ((fd = control_init(0, cname)) == -1)
> > >   fatalx("control socket setup failed");
> > >   if (control_listen(fd) == -1)
> > > Index: control.c
> > > ===
> > > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
> > > retrieving revision 1.90
> > > diff -u -p -r1.90 control.c
> > > --- control.c 11 Aug 2017 16:02:53 -  1.90
> > > +++ control.c 10 Nov 2018 20:52:46 -
> > > @@ -38,6 +38,32 @@ voidcontrol_result(struct ctl_conn *,
> > >  ssize_t   imsg_read_nofd(struct imsgbuf *);
> > >  
> > >  int
> > > +control_check(char *path)
> > > +{
> > > + struct sockaddr_un   sun;
> > > + int  fd;
> > > +
> > > + bzero(&sun, sizeof(sun));
> > > + sun.sun_family = AF_UNIX;
> > > + strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
> > > +
> > > + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
> > > + log_warn("control_check: socket check");
> > > + return (-1);
> > > + }
> > > +
> > > + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
> > > + log_warnx("control_check: socket in use");
> > > + close(fd);
> > > + return (-1);
> > > + }
> > > +
> > > + close(fd);
> > > +
> > > + return (0);
> > > +}
> > > +
> > > +int
> > >  control_init(int restricted, char *path)
> > >  {
> > >   struct sockaddr_un   sun;
> > > Index: session.h
> > > ===
> > > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> > > retrieving revision 1.125
> > > diff -u -p -r1.125 session.h
> > > --- session.h 24 Oct 2

Re: prevent bgpd from starting when control socket already used

2018-11-11 Thread Claudio Jeker
On Sun, Nov 11, 2018 at 04:40:54PM -0700, Theo de Raadt wrote:
> Makes sense to me, I suppose.
> 
> Isn't another approach to swap the opening of the sockets?
> 
> Or why does failure to control :179 sockets not stop startup?

Because you can change them during reload and if they are incorrect for a
reason it would kill running bgpd with all sessions.
Having bgpd die on a config reload is for sure something we should not
introduce.
 
> > I heard from two devs that started a 2nd bgpd by accident (forgot -n for
> > a config check) which then caused downtime.
> > 
> > Below diff adds a check to bgpd similar to the one we have now in ospfd and
> > ospf6d: if another process is listening on the control socket bgpd exits.
> > 
> > The situation is a bit different than in the ospfd case:
> > - The socket path is defined in the config and not with an option.
> > - The config is parsed relatively late (after daemon).
> > 
> > Because of that rcctl would not detect that bgpd exits. But this only 
> > applies
> > when the first bgpd was started without using the rc scripts.
> > 
> > bgpd already detects that it can not bind the port if another instance is
> > running. Adding a fatal there can have an effect when reloading after a
> > config change. Loosing all sessions is not nice.
> > 
> > Of course if the socket path is changed in the config and then reloaded the
> > same can happen. I think this is less likely than changing a listen address.
> > 
> > I think it is sufficient to add this check only to the control socket and
> > not to the restricted socket.
> > 
> > The diff in action:
> > 
> > # /usr/src/usr.sbin/bgpd/obj/bgpd -dv 
> > startup
> > session engine ready
> > rereading config
> > cannot bind to 0.0.0.0:179: Address already in use
> > cannot bind to [::]:179: Address already in use
> > control_check: socket in use
> > fatal in bgpd: control socket check failed
> > rib_new: Adj-RIB-In -> 0
> > rib_new: Adj-RIB-Out -> 1
> > route decision engine ready
> > peer closed imsg connection
> > fatal in RDE: Lost connection to parent
> > peer closed imsg connection
> > SE: Lost connection to parent
> > session engine exiting
> > #
> > 
> > Comments? OKs?
> > 
> > Remi
> > 
> > 
> > cvs diff: Diffing .
> > Index: bgpd.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
> > retrieving revision 1.204
> > diff -u -p -r1.204 bgpd.c
> > --- bgpd.c  29 Sep 2018 08:11:11 -  1.204
> > +++ bgpd.c  11 Nov 2018 20:33:32 -
> > @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
> > }
> > if ((cname = strdup(conf->csock)) == NULL)
> > fatal("strdup");
> > +   if ((control_check(cname)) == -1)
> > +   fatalx("control socket check failed");
> > if ((fd = control_init(0, cname)) == -1)
> > fatalx("control socket setup failed");
> > if (control_listen(fd) == -1)
> > Index: control.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
> > retrieving revision 1.90
> > diff -u -p -r1.90 control.c
> > --- control.c   11 Aug 2017 16:02:53 -  1.90
> > +++ control.c   10 Nov 2018 20:52:46 -
> > @@ -38,6 +38,32 @@ void  control_result(struct ctl_conn *,
> >  ssize_t imsg_read_nofd(struct imsgbuf *);
> >  
> >  int
> > +control_check(char *path)
> > +{
> > +   struct sockaddr_un   sun;
> > +   int  fd;
> > +
> > +   bzero(&sun, sizeof(sun));
> > +   sun.sun_family = AF_UNIX;
> > +   strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
> > +
> > +   if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
> > +   log_warn("control_check: socket check");
> > +   return (-1);
> > +   }
> > +
> > +   if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
> > +   log_warnx("control_check: socket in use");
> > +   close(fd);
> > +   return (-1);
> > +   }
> > +
> > +   close(fd);
> > +
> > +   return (0);
> > +}
> > +
> > +int
> >  control_init(int restricted, char *path)
> >  {
> > struct sockaddr_un   sun;
> > Index: session.h
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> > retrieving revision 1.125
> > diff -u -p -r1.125 session.h
> > --- session.h   24 Oct 2018 08:26:37 -  1.125
> > +++ session.h   10 Nov 2018 21:53:04 -
> > @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf
> >  int get_mpe_label(struct rdomain *);
> >  
> >  /* control.c */
> > +intcontrol_check(char *);
> >  intcontrol_init(int, char *);
> >  intcontrol_listen(int);
> >  void   control_shutdown(int);
> > 
> 

-- 
:wq Claudio



Re: prevent bgpd from starting when control socket already used

2018-11-11 Thread Theo de Raadt
Makes sense to me, I suppose.

Isn't another approach to swap the opening of the sockets?

Or why does failure to control :179 sockets not stop startup?

> I heard from two devs that started a 2nd bgpd by accident (forgot -n for
> a config check) which then caused downtime.
> 
> Below diff adds a check to bgpd similar to the one we have now in ospfd and
> ospf6d: if another process is listening on the control socket bgpd exits.
> 
> The situation is a bit different than in the ospfd case:
> - The socket path is defined in the config and not with an option.
> - The config is parsed relatively late (after daemon).
> 
> Because of that rcctl would not detect that bgpd exits. But this only applies
> when the first bgpd was started without using the rc scripts.
> 
> bgpd already detects that it can not bind the port if another instance is
> running. Adding a fatal there can have an effect when reloading after a
> config change. Loosing all sessions is not nice.
> 
> Of course if the socket path is changed in the config and then reloaded the
> same can happen. I think this is less likely than changing a listen address.
> 
> I think it is sufficient to add this check only to the control socket and
> not to the restricted socket.
> 
> The diff in action:
> 
> # /usr/src/usr.sbin/bgpd/obj/bgpd -dv 
> startup
> session engine ready
> rereading config
> cannot bind to 0.0.0.0:179: Address already in use
> cannot bind to [::]:179: Address already in use
> control_check: socket in use
> fatal in bgpd: control socket check failed
> rib_new: Adj-RIB-In -> 0
> rib_new: Adj-RIB-Out -> 1
> route decision engine ready
> peer closed imsg connection
> fatal in RDE: Lost connection to parent
> peer closed imsg connection
> SE: Lost connection to parent
> session engine exiting
> #
> 
> Comments? OKs?
> 
> Remi
> 
> 
> cvs diff: Diffing .
> Index: bgpd.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
> retrieving revision 1.204
> diff -u -p -r1.204 bgpd.c
> --- bgpd.c29 Sep 2018 08:11:11 -  1.204
> +++ bgpd.c11 Nov 2018 20:33:32 -
> @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
>   }
>   if ((cname = strdup(conf->csock)) == NULL)
>   fatal("strdup");
> + if ((control_check(cname)) == -1)
> + fatalx("control socket check failed");
>   if ((fd = control_init(0, cname)) == -1)
>   fatalx("control socket setup failed");
>   if (control_listen(fd) == -1)
> Index: control.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
> retrieving revision 1.90
> diff -u -p -r1.90 control.c
> --- control.c 11 Aug 2017 16:02:53 -  1.90
> +++ control.c 10 Nov 2018 20:52:46 -
> @@ -38,6 +38,32 @@ voidcontrol_result(struct ctl_conn *,
>  ssize_t   imsg_read_nofd(struct imsgbuf *);
>  
>  int
> +control_check(char *path)
> +{
> + struct sockaddr_un   sun;
> + int  fd;
> +
> + bzero(&sun, sizeof(sun));
> + sun.sun_family = AF_UNIX;
> + strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
> +
> + if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
> + log_warn("control_check: socket check");
> + return (-1);
> + }
> +
> + if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
> + log_warnx("control_check: socket in use");
> + close(fd);
> + return (-1);
> + }
> +
> + close(fd);
> +
> + return (0);
> +}
> +
> +int
>  control_init(int restricted, char *path)
>  {
>   struct sockaddr_un   sun;
> Index: session.h
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> retrieving revision 1.125
> diff -u -p -r1.125 session.h
> --- session.h 24 Oct 2018 08:26:37 -  1.125
> +++ session.h 10 Nov 2018 21:53:04 -
> @@ -250,6 +250,7 @@ void   prepare_listeners(struct bgpd_conf
>  int   get_mpe_label(struct rdomain *);
>  
>  /* control.c */
> +int  control_check(char *);
>  int  control_init(int, char *);
>  int  control_listen(int);
>  void control_shutdown(int);
> 



Re: prevent bgpd from starting when control socket already used

2018-11-11 Thread Stuart Henderson
On 2018/11/11 22:45, Job Snijders wrote:
> Shouldnt we already bomb out at the following?
> 
> cannot bind to 0.0.0.0:179: Address already in use
> cannot bind to [::]:179: Address already in use
> 
> In any regard, I agree with the functionality proposed. No strong opinion
> on the diff itself.

Should: probably, yes, but we don't at present.

Last time I did this by mistake I ended up having to get onto the *peer*
via OOB, I can't remember if killing bgpd was enough, I have a feeling I
had to force a reboot..



Re: prevent bgpd from starting when control socket already used

2018-11-11 Thread Job Snijders
Shouldnt we already bomb out at the following?

cannot bind to 0.0.0.0:179: Address already in use
cannot bind to [::]:179: Address already in use

In any regard, I agree with the functionality proposed. No strong opinion
on the diff itself.

Kind regards,

Job

On Sun, Nov 11, 2018 at 22:35 Remi Locherer  wrote:

> Hi,
>
> I heard from two devs that started a 2nd bgpd by accident (forgot -n for
> a config check) which then caused downtime.
>
> Below diff adds a check to bgpd similar to the one we have now in ospfd and
> ospf6d: if another process is listening on the control socket bgpd exits.
>
> The situation is a bit different than in the ospfd case:
> - The socket path is defined in the config and not with an option.
> - The config is parsed relatively late (after daemon).
>
> Because of that rcctl would not detect that bgpd exits. But this only
> applies
> when the first bgpd was started without using the rc scripts.
>
> bgpd already detects that it can not bind the port if another instance is
> running. Adding a fatal there can have an effect when reloading after a
> config change. Loosing all sessions is not nice.
>
> Of course if the socket path is changed in the config and then reloaded the
> same can happen. I think this is less likely than changing a listen
> address.
>
> I think it is sufficient to add this check only to the control socket and
> not to the restricted socket.
>
> The diff in action:
>
> # /usr/src/usr.sbin/bgpd/obj/bgpd -dv
> startup
> session engine ready
> rereading config
> cannot bind to 0.0.0.0:179: Address already in use
> cannot bind to [::]:179: Address already in use
> control_check: socket in use
> fatal in bgpd: control socket check failed
> rib_new: Adj-RIB-In -> 0
> rib_new: Adj-RIB-Out -> 1
> route decision engine ready
> peer closed imsg connection
> fatal in RDE: Lost connection to parent
> peer closed imsg connection
> SE: Lost connection to parent
> session engine exiting
> #
>
> Comments? OKs?
>
> Remi
>
>
> cvs diff: Diffing .
> Index: bgpd.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
> retrieving revision 1.204
> diff -u -p -r1.204 bgpd.c
> --- bgpd.c  29 Sep 2018 08:11:11 -  1.204
> +++ bgpd.c  11 Nov 2018 20:33:32 -
> @@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
> }
> if ((cname = strdup(conf->csock)) == NULL)
> fatal("strdup");
> +   if ((control_check(cname)) == -1)
> +   fatalx("control socket check failed");
> if ((fd = control_init(0, cname)) == -1)
> fatalx("control socket setup failed");
> if (control_listen(fd) == -1)
> Index: control.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
> retrieving revision 1.90
> diff -u -p -r1.90 control.c
> --- control.c   11 Aug 2017 16:02:53 -  1.90
> +++ control.c   10 Nov 2018 20:52:46 -
> @@ -38,6 +38,32 @@ void  control_result(struct ctl_conn *,
>  ssize_t imsg_read_nofd(struct imsgbuf *);
>
>  int
> +control_check(char *path)
> +{
> +   struct sockaddr_un   sun;
> +   int  fd;
> +
> +   bzero(&sun, sizeof(sun));
> +   sun.sun_family = AF_UNIX;
> +   strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
> +
> +   if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
> +   log_warn("control_check: socket check");
> +   return (-1);
> +   }
> +
> +   if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
> +   log_warnx("control_check: socket in use");
> +   close(fd);
> +   return (-1);
> +   }
> +
> +   close(fd);
> +
> +   return (0);
> +}
> +
> +int
>  control_init(int restricted, char *path)
>  {
> struct sockaddr_un   sun;
> Index: session.h
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> retrieving revision 1.125
> diff -u -p -r1.125 session.h
> --- session.h   24 Oct 2018 08:26:37 -  1.125
> +++ session.h   10 Nov 2018 21:53:04 -
> @@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf
>  int get_mpe_label(struct rdomain *);
>
>  /* control.c */
> +intcontrol_check(char *);
>  intcontrol_init(int, char *);
>  intcontrol_listen(int);
>  void   control_shutdown(int);
>
>


prevent bgpd from starting when control socket already used

2018-11-11 Thread Remi Locherer
Hi,

I heard from two devs that started a 2nd bgpd by accident (forgot -n for
a config check) which then caused downtime.

Below diff adds a check to bgpd similar to the one we have now in ospfd and
ospf6d: if another process is listening on the control socket bgpd exits.

The situation is a bit different than in the ospfd case:
- The socket path is defined in the config and not with an option.
- The config is parsed relatively late (after daemon).

Because of that rcctl would not detect that bgpd exits. But this only applies
when the first bgpd was started without using the rc scripts.

bgpd already detects that it can not bind the port if another instance is
running. Adding a fatal there can have an effect when reloading after a
config change. Loosing all sessions is not nice.

Of course if the socket path is changed in the config and then reloaded the
same can happen. I think this is less likely than changing a listen address.

I think it is sufficient to add this check only to the control socket and
not to the restricted socket.

The diff in action:

# /usr/src/usr.sbin/bgpd/obj/bgpd -dv 
startup
session engine ready
rereading config
cannot bind to 0.0.0.0:179: Address already in use
cannot bind to [::]:179: Address already in use
control_check: socket in use
fatal in bgpd: control socket check failed
rib_new: Adj-RIB-In -> 0
rib_new: Adj-RIB-Out -> 1
route decision engine ready
peer closed imsg connection
fatal in RDE: Lost connection to parent
peer closed imsg connection
SE: Lost connection to parent
session engine exiting
#

Comments? OKs?

Remi


cvs diff: Diffing .
Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.204
diff -u -p -r1.204 bgpd.c
--- bgpd.c  29 Sep 2018 08:11:11 -  1.204
+++ bgpd.c  11 Nov 2018 20:33:32 -
@@ -939,6 +939,8 @@ control_setup(struct bgpd_config *conf)
}
if ((cname = strdup(conf->csock)) == NULL)
fatal("strdup");
+   if ((control_check(cname)) == -1)
+   fatalx("control socket check failed");
if ((fd = control_init(0, cname)) == -1)
fatalx("control socket setup failed");
if (control_listen(fd) == -1)
Index: control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.90
diff -u -p -r1.90 control.c
--- control.c   11 Aug 2017 16:02:53 -  1.90
+++ control.c   10 Nov 2018 20:52:46 -
@@ -38,6 +38,32 @@ void  control_result(struct ctl_conn *,
 ssize_t imsg_read_nofd(struct imsgbuf *);
 
 int
+control_check(char *path)
+{
+   struct sockaddr_un   sun;
+   int  fd;
+
+   bzero(&sun, sizeof(sun));
+   sun.sun_family = AF_UNIX;
+   strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
+
+   if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
+   log_warn("control_check: socket check");
+   return (-1);
+   }
+
+   if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
+   log_warnx("control_check: socket in use");
+   close(fd);
+   return (-1);
+   }
+
+   close(fd);
+
+   return (0);
+}
+
+int
 control_init(int restricted, char *path)
 {
struct sockaddr_un   sun;
Index: session.h
===
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.125
diff -u -p -r1.125 session.h
--- session.h   24 Oct 2018 08:26:37 -  1.125
+++ session.h   10 Nov 2018 21:53:04 -
@@ -250,6 +250,7 @@ void prepare_listeners(struct bgpd_conf
 int get_mpe_label(struct rdomain *);
 
 /* control.c */
+intcontrol_check(char *);
 intcontrol_init(int, char *);
 intcontrol_listen(int);
 void   control_shutdown(int);