Re: vmd: fix i8259 race condition, vioblk hang

2023-08-29 Thread Florian Riehm
Hi,

I tested your patch and it is a great improvement for me.
My vms are hanging reproducible without your fix.

Thank you

Florian





Am Di., 29. Aug. 2023 um 18:01 Uhr schrieb Dave Voutila :

>
> Dave Voutila  writes:
>
> > mbuhl@ found an issue where the emulated virtio block device can
> > hang. The tl;dr: the emulated pic never injects an interrupt and the
> > vioblk(4) driver in the guest starves, waiting to be told to check the
> > virtqueue.
> >
> > Flipping the bit in the i8259 using gdb causes the spice to flow once
> > again.
> >
> > This diff fixes two related things (so could be committed in 2 parts):
> >
> > 1. the irq deassert on a virtio pci interrupt status register read
> >should occur synchronously by the vcpu thread in the vm and not
> >async.
> >
> > 2. always raise the irr bit in the pic, regardless of mask. The mask is
> >used when finding an irq to ack and shouldn't be used to determine if
> >the irr bit is raised. AFAICT from the old i8259 data sheets, masking
> >should have no effect on if the irr bit is set.
> >
> > This is holding up another diff I want to share that reduces latency in
> > interrupts, but it's shown to make this i8259 race condition worse, so
> > I'd like to give folks a few days to check if the below diff causes
> > regressions. Once this is committed, I'll feel safe sharing the latency
> > diff with tech@.
> >
> > Any ok's pending a few days for folks to test?
>
> Still looking for ok's or test reports (other than from mbuhl@, of
> course.)
>
> >
> > -dv
> >
> >
> > diff refs/heads/master cdba90a89ee6e77b52c2b6955d36a260a23a3b85
> > commit - ad1cd1152fddbf55189657a2df9f2468409698ab
> > commit + cdba90a89ee6e77b52c2b6955d36a260a23a3b85
> > blob - f7862f5e9d17f96f5260358271fab8f253b26505
> > blob + b6891c52c824d7b8c69e67e5323772152b1ed844
> > --- usr.sbin/vmd/i8259.c
> > +++ usr.sbin/vmd/i8259.c
> > @@ -222,20 +222,16 @@ i8259_assert_irq(uint8_t irq)
> >  {
> >   mutex_lock(_mtx);
> >   if (irq <= 7) {
> > - if (!ISSET(pics[MASTER].imr, 1 << irq)) {
> > - SET(pics[MASTER].irr, 1 << irq);
> > - pics[MASTER].asserted = 1;
> > - }
> > + SET(pics[MASTER].irr, 1 << irq);
> > + pics[MASTER].asserted = 1;
> >   } else {
> >   irq -= 8;
> > - if (!ISSET(pics[SLAVE].imr, 1 << irq)) {
> > - SET(pics[SLAVE].irr, 1 << irq);
> > - pics[SLAVE].asserted = 1;
> > + SET(pics[SLAVE].irr, 1 << irq);
> > + pics[SLAVE].asserted = 1;
> >
> > - /* Assert cascade IRQ on master PIC */
> > - SET(pics[MASTER].irr, 1 << 2);
> > - pics[MASTER].asserted = 1;
> > - }
> > + /* Assert cascade IRQ on master PIC */
> > + SET(pics[MASTER].irr, 1 << 2);
> > + pics[MASTER].asserted = 1;
> >   }
> >   mutex_unlock(_mtx);
> >  }
> > blob - 7bc76c4daed427dae82688452ec21985be679bc4
> > blob + 4d321fd4ff23514ffa7a4bee0eeb7a9324020d80
> > --- usr.sbin/vmd/vioblk.c
> > +++ usr.sbin/vmd/vioblk.c
> > @@ -39,7 +39,8 @@ static uint32_t handle_io_read(struct viodev_msg *, st
> >  extern struct vmd_vm *current_vm;
> >
> >  static const char *disk_type(int);
> > -static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev
> *);
> > +static uint32_t handle_io_read(struct viodev_msg *, struct virtio_dev *,
> > +int8_t *);
> >  static int handle_io_write(struct viodev_msg *, struct virtio_dev *);
> >  void vioblk_notify_rx(struct vioblk_dev *);
> >  int vioblk_notifyq(struct vioblk_dev *);
> > @@ -723,6 +724,7 @@ handle_sync_io(int fd, short event, void *arg)
> >   struct viodev_msg msg;
> >   struct imsg imsg;
> >   ssize_t n;
> > + int8_t intr = INTR_STATE_NOOP;
> >
> >   if (event & EV_READ) {
> >   if ((n = imsg_read(ibuf)) == -1 && errno != EAGAIN)
> > @@ -770,8 +772,9 @@ handle_sync_io(int fd, short event, void *arg)
> >   }
> >   case VIODEV_MSG_IO_READ:
> >   /* Read IO: make sure to send a reply */
> > - msg.data = handle_io_read(, dev);
> > + msg.data = handle_io_read(, dev, );
> >   msg.data_valid = 1;
> > + msg.state = intr;
> >   imsg_compose_event(iev, IMSG_DEVOP_MSG, 0, 0, -1,
> ,
> >   sizeof(msg));
> >   break;
> > @@ -844,7 +847,7 @@ handle_io_read(struct viodev_msg *msg, struct
> virtio_d
> >  }
> >
> >  static uint32_t
> > -handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev)
> > +handle_io_read(struct viodev_msg *msg, struct virtio_dev *dev, int8_t
> *intr)
> >  {
> >   struct vioblk_dev *vioblk = >vioblk;
> >   uint8_t sz = msg->io_sz;
> > @@ -1001,7 +1004,8 @@ handle_io_read(struct viodev_msg *msg, struct
> virtio_d

Re: ospf6d: depend on

2018-07-11 Thread Florian Riehm

Hi,

successfully tested. I like the feature!

Some (mostly cosmetic) comments inline.


Index: ospfe.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/ospfe.c,v
retrieving revision 1.51
diff -u -p -r1.51 ospfe.c
--- ospfe.c 12 Aug 2017 16:27:50 -  1.51
+++ ospfe.c 11 Jul 2018 11:29:44 -
@@ -295,9 +295,27 @@ ospfe_dispatch_main(int fd, short event,
fatalx("IFINFO imsg with wrong len");
ifp = imsg.data;
 
+			LIST_FOREACH(area, >area_list, entry) {

+   LIST_FOREACH(i, >iface_list, entry) {
+   if (strcmp(i->dependon,
+   ifp->name) == 0) {
+   log_warnx("interface %s"
+   " changed state, %s"
+   " depends on it",
+   ifp->name, i->name);
+   i->depend_ok =
+   ifstate_is_up(ifp);
+   if (ifstate_is_up(i))
+   orig_rtr_lsa(i);
+   }
+   }
+   }
+
+   if (!(ifp->cflags & F_IFACE_CONFIGURED))
+   break;
iface = if_find(ifp->ifindex);
if (iface == NULL)
-   fatalx("interface lost in ospfe");
+   break;
You added the F_IFACE_CONFIGURED check in your second version of the diff,
because I found a bug. Is it still necessary to remove fatalx("interface lost in
ospfe") ?
 
 			wasvalid = (iface->flags & IFF_UP) &&

LINK_STATE_IS_UP(iface->linkstate);
@@ -834,7 +852,11 @@ orig_rtr_lsa_area(struct area *area)
log_debug("orig_rtr_lsa: point-to-point, "
"interface %s", iface->name);
rtr_link.type = LINK_TYPE_POINTTOPOINT;
-   rtr_link.metric = htons(iface->metric);
+   if (iface->dependon[0] != '\0' &&
+   iface->depend_ok ==0)
Whitespace before 0
+   rtr_link.metric = MAX_METRIC;
+   else
+   rtr_link.metric = htons(iface->metric);
rtr_link.iface_id = htonl(iface->ifindex);
rtr_link.nbr_iface_id = htonl(nbr->iface_id);
rtr_link.nbr_rtr_id = nbr->id.s_addr;
@@ -859,7 +881,12 @@ orig_rtr_lsa_area(struct area *area)
"interface %s", iface->name);
 
 	rtr_link.type = LINK_TYPE_TRANSIT_NET;

-   rtr_link.metric = htons(iface->metric);
+   if (iface->dependon[0] != '\0' &&
+   iface->depend_ok ==0)
Whitespace before 0
+   rtr_link.metric = MAX_METRIC;
+   else
+   rtr_link.metric =
+   htons(iface->metric);
rtr_link.iface_id = 
htonl(iface->ifindex);
rtr_link.nbr_iface_id = 
htonl(iface->dr->iface_id);
rtr_link.nbr_rtr_id = 
iface->dr->id.s_addr;
Index: rde.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/rde.c,v
retrieving revision 1.76
diff -u -p -r1.76 rde.c
--- rde.c   12 Jun 2018 20:12:36 -  1.76
+++ rde.c   10 Jul 2018 09:52:42 -
@@ -706,6 +706,25 @@ rde_dispatch_parent(int fd, short event,
fatalx("IFINFO imsg with wrong len");
 
 			ifp = imsg.data;

+
+   LIST_FOREACH(area, >area_list, entry) {
+   orig_lsa = 0;
+   LIST_FOREACH(i, >iface_list, entry) {
+   if (strcmp(i->dependon,
+   ifp->name) == 0) {
+   i->depend_ok =
+   ifstate_is_up(ifp);
+   if (ifstate_is_up(i)) {
+   orig_lsa = 1;
+   }
unnecessary curly braces



Re: ospfd: change control socket to ospfd.sock.

2018-07-11 Thread Florian Riehm

Please initialize *sockname with NULL, then OK .

On 07/11/18 00:33, Remi Locherer wrote:

Hi,

This changes the name of the ospfd control socket to include the rdomain.
It's similar to what bgpd does.

OK?

Remi


Index: ospfd/ospfd.c
===
RCS file: /cvs/src/usr.sbin/ospfd/ospfd.c,v
retrieving revision 1.98
diff -u -p -r1.98 ospfd.c
--- ospfd/ospfd.c   9 Jul 2018 13:19:46 -   1.98
+++ ospfd/ospfd.c   10 Jul 2018 22:24:02 -
@@ -119,7 +119,6 @@ main(int argc, char *argv[])
  
  	conffile = CONF_FILE;

ospfd_process = PROC_MAIN;
-   sockname = OSPFD_SOCKET;
  
  	log_init(1, LOG_DAEMON);	/* log to stderr until daemonized */

log_procinit(log_procnames[ospfd_process]);
@@ -185,6 +184,13 @@ main(int argc, char *argv[])
kif_clear();
exit(1);
}
+
+   if (sockname == NULL) {
+   if (asprintf(, "%s.%d", OSPFD_SOCKET,
+   ospfd_conf->rdomain) == -1)
+   err(1, "asprintf");
+   }
+
ospfd_conf->csock = sockname;
  
  	if (ospfd_conf->opts & OSPFD_OPT_NOACTION) {

Index: ospfctl/ospfctl.c
===
RCS file: /cvs/src/usr.sbin/ospfctl/ospfctl.c,v
retrieving revision 1.64
diff -u -p -r1.64 ospfctl.c
--- ospfctl/ospfctl.c   5 Dec 2016 22:39:25 -   1.64
+++ ospfctl/ospfctl.c   10 Jul 2018 21:32:27 -
@@ -89,13 +89,16 @@ main(int argc, char *argv[])
struct parse_result *res;
struct imsg  imsg;
unsigned int ifidx = 0;
-   int  ctl_sock;
+   int  ctl_sock, r;
int  done = 0;
int  n, verbose = 0;
int  ch;
char*sockname;
  
-	sockname = OSPFD_SOCKET;

+   r = getrtable();
+   if (asprintf(, "%s.%d", OSPFD_SOCKET, r) == -1)
+   err(1, "asprintf");
+
while ((ch = getopt(argc, argv, "s:")) != -1) {
switch (ch) {
case 's':





pledge ospf6d

2018-07-10 Thread Florian Riehm
Hi,

this adds pledge to the ospf6d route decision engine and the ospf engine.
It is compared to the ospfd quite simple, since ospf6d does not support reload,
rdomains and kif-interfaces.

ok?

friehm

Index: ospfe.c
===
RCS file: /home/friehm/repos/openbsd-cvs/cvs/src/usr.sbin/ospf6d/ospfe.c,v
retrieving revision 1.51
diff -u -p -r1.51 ospfe.c
--- ospfe.c 12 Aug 2017 16:27:50 -  1.51
+++ ospfe.c 10 Jul 2018 15:14:35 -
@@ -133,6 +133,9 @@ ospfe(struct ospfd_conf *xconf, int pipe
setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
fatal("can't drop privileges");
 
+   if (pledge("stdio inet mcast", NULL) == -1)
+   fatal("pledge");
+
event_init();
nbr_init(NBR_HASHSIZE);
lsa_cache_init(LSA_HASHSIZE);
Index: rde.c
===
RCS file: /home/friehm/repos/openbsd-cvs/cvs/src/usr.sbin/ospf6d/rde.c,v
retrieving revision 1.76
diff -u -p -r1.76 rde.c
--- rde.c   12 Jun 2018 20:12:36 -  1.76
+++ rde.c   10 Jul 2018 15:14:39 -
@@ -156,6 +156,9 @@ rde(struct ospfd_conf *xconf, int pipe_p
setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
fatal("can't drop privileges");
 
+   if (pledge("stdio", NULL) == -1)
+   fatal("pledge");
+
event_init();
rde_nbr_init(NBR_HASHSIZE);
lsa_init(_tree);



ospf6d: Do not try to change cloning routes into gateway routes

2018-07-10 Thread Florian Riehm
Hi,

If intra area prefixes move from one router to another router, cloning routes
may become gateway routes and contrary. The kernel does not allow to change the
flags RTF_GATEWAY / RTF_CLONING in RTM_CHANGE messages, but ospf6d tries this
anyway. The result is a broken route.
Instead of modifying such routes I remove the old route and insert a new one.

Thanks to Raimund Specht for reporting the problem and testing the fix.

ok?

friehm

Index: kroute.c
===
RCS file: /home/friehm/repos/openbsd-cvs/cvs/src/usr.sbin/ospf6d/kroute.c,v
retrieving revision 1.54
diff -u -p -r1.54 kroute.c
--- kroute.c8 Feb 2018 21:37:36 -   1.54
+++ kroute.c9 Jul 2018 13:53:44 -
@@ -241,9 +241,28 @@ kr_change(struct kroute *kroute, int krc
kroute->rtlabel = rtlabel_tag2id(kroute->ext_tag);
 
kr = kroute_find(>prefix, kroute->prefixlen, RTP_OSPF);
-   if (kr != NULL && kr->next == NULL && krcount == 1)
-   /* single path OSPF route */
-   action = RTM_CHANGE;
+   if (kr != NULL && kr->next == NULL && krcount == 1) {
+   /*
+* single path OSPF route.
+* The kernel does not allow to change a gateway route to a
+* cloning route or contrary. In this case remove and add the
+* route, otherwise change the existing one.
+*/
+   if ((IN6_IS_ADDR_UNSPECIFIED(>nexthop) &&
+   !IN6_IS_ADDR_UNSPECIFIED(>r.nexthop)) ||
+   (!IN6_IS_ADDR_UNSPECIFIED(>nexthop) &&
+   IN6_IS_ADDR_UNSPECIFIED(>r.nexthop))) {
+   if (kr_delete_fib(kr) == 0)
+   kr = NULL;
+   else {
+   log_warn("kr_change: failed to remove route: "
+   "%s/%d", log_in6addr(>r.prefix),
+   kr->r.prefixlen);
+   return (-1);
+   }
+   } else
+   action = RTM_CHANGE;
+   }
 
return (kr_change_fib(kr, kroute, krcount, action));
 }



ospfd: track gateway addresses of cloning routes

2018-07-10 Thread Florian Riehm
Hi,

since we use multiple cloning routes (mpath) if more than one ip address
exists in the same network, the routes are distinguished by their gateway
address, which is the associated interface address.
The ospfd has to track the gateway addresses so that kroute_matchgw() is able to
find the correct routes.

Example:
/etc/hostname.em3:
rtlabel EXPORT
[...]

/etc/ospfd.conf:
[...]
redistribute rtlabel EXPORT

$ ifconfig em3 inet 10.0.0.1 prefixlen 24 alias
$ ifconfig em3 inet 10.0.0.2 prefixlen 24 alias
$ ifconfig em3 inet 10.0.0.2 delete

Now 10.0.0.1 still exists and 10.0.0.0/24 should be exported via ospfd, but
since ospfd messed up the routes, it has removed the prefix after 10.0.0.2 was
gone.

ok?

friehm

Index: kroute.c
===
RCS file: /home/friehm/repos/openbsd-cvs/cvs/src/usr.sbin/ospfd/kroute.c,v
retrieving revision 1.110
diff -u -p -r1.110 kroute.c
--- kroute.c25 Jun 2018 22:16:53 -  1.110
+++ kroute.c10 Jul 2018 10:01:15 -
@@ -1451,7 +1451,6 @@ rtmsg_process(char *buf, size_t len)
case AF_INET:
if (rtm->rtm_flags & RTF_CONNECTED) {
flags |= F_CONNECTED;
-   break;
}
 
nexthop.s_addr = ((struct



Remove DELAY(1000) from ip_carp.c

2018-07-10 Thread Florian Riehm
Hi,

Several people, including myself, asked why we need the DELAY(1000) in
netinet/ip_carp.c. It exists since the initial revision of carp(4).
Nobody can exactly explain why it was added and tests work fine without it.
I would like to remove it, since it blocks unlocking efforts of tb@ und mpi@.
If contrary to expectations problems show up we should think about a better
solution.

Thanks to Raimund Specht for testing.

ok?

friehm

Index: netinet/ip_carp.c
===
RCS file: /home/friehm/repos/openbsd-cvs/cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.332
diff -u -p -r1.332 ip_carp.c
--- netinet/ip_carp.c   21 May 2018 15:52:22 -  1.332
+++ netinet/ip_carp.c   8 Jul 2018 16:19:57 -
@@ -1277,7 +1277,6 @@ carp_send_arp(struct carp_softc *sc)
 
in = ifatoia(ifa)->ia_addr.sin_addr.s_addr;
arprequest(>sc_if, , , sc->sc_ac.ac_enaddr);
-   DELAY(1000);/* XXX */
}
 }
 
@@ -1298,7 +1297,6 @@ carp_send_na(struct carp_softc *sc)
nd6_na_output(>sc_if, , in6,
ND_NA_FLAG_OVERRIDE |
(ip6_forwarding ? ND_NA_FLAG_ROUTER : 0), 1, NULL);
-   DELAY(1000);/* XXX */
}
 }
 #endif /* INET6 */



Re: mpath cloning routes and cloned routes

2018-02-20 Thread Florian Riehm

On 02/19/18 11:01, Martin Pieuchot wrote:

On 14/02/18(Wed) 21:53, Florian Riehm wrote:

If we delete cloning routes, we also delete their cloned routes.
This doesn't make sense if we delete a multipath cloning route and may result in
broken gateway routes:


That's a bug!


# netstat -rn | grep 192.168.178
default192.168.178.1  UGS5 4939 -12 iwn0
192.168.178/24 192.168.178.52 UCPn   1   51 - 8 iwn0
192.168.178/24 192.168.178.53 UCPn   00 - 8 iwn0
192.168.178.1  34:31:c4:24:83:d4  UHLch  1  118 - 7 iwn0
192.168.178.52 a4:4e:31:38:70:7c  UHLl   0 3749 - 1 iwn0
192.168.178.53 a4:4e:31:38:70:7c  UHLl   00 - 1 iwn0
192.168.178.255192.168.178.52 UHPb   00 - 1 iwn0
192.168.178.255192.168.178.53 UHPb   00 - 1 iwn0
As you can see above, iwn0 has 192.168.178.52/24 and 192.168.178.53/24 assigned
and therefore we have 2 mpath cloning routes (P). Their is a cloned route to
192.168.178.1 with RTF_CACHED (h) to reach the default gateway.

# ifconfig iwn0 inet 192.168.178.53 delete
# netstat -rn | grep 192.168.178
default192.168.178.1  UGS5 4955 -12 iwn0
192.168.178/24 192.168.178.52 UCn0   51 - 8 iwn0
192.168.178.52 a4:4e:31:38:70:7c  UHLl   0 3754 - 1 iwn0
192.168.178.255192.168.178.52 UHb00 - 1 iwn0
Now 192.168.178.53/24 was deleted, therefore the cloned route to the gateway
(192.168.178.1) is also gone and the default route is 'broken':

# ping 8.8.8.8
# dmesg | tail
arpresolve: 192.168.178.1: route contains no arp information
arpresolve: 192.168.178.1: route contains no arp information
arpresolve: 192.168.178.1: route contains no arp information
arpresolve: 192.168.178.1: route contains no arp information

I think there is no need to delete cloned routes as long as we don't delete
the last cloning route to a network.


Not flushing any RTF_CLONED is a too big hammer and will break setups
where the cloning routes are on two different interfaces.

A better fix is to not delete RTF_CACHED routes until their own parent
is going away.  Diff below does that and includes a regression test.
Does that work for you?  ok?


Successfully tested.

Thanks & ok

friehm



Index: sys/net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.371
diff -u -p -r1.371 route.c
--- sys/net/route.c 10 Feb 2018 09:17:56 -  1.371
+++ sys/net/route.c 19 Feb 2018 09:55:31 -
@@ -707,26 +707,31 @@ rtequal(struct rtentry *a, struct rtentr
  int
  rtflushclone1(struct rtentry *rt, void *arg, u_int id)
  {
-   struct rtentry *parent = arg;
+   struct rtentry *cloningrt = arg;
struct ifnet *ifp;
int error;
  
-	ifp = if_get(rt->rt_ifidx);

+   if (!ISSET(rt->rt_flags, RTF_CLONED))
+   return 0;
+
+   /* Cached route must stay alive as long as their parent are alive. */
+   if (ISSET(rt->rt_flags, RTF_CACHED) && (rt->rt_parent != cloningrt))
+   return 0;
  
+	if (!rtequal(rt->rt_parent, cloningrt))

+   return 0;
/*
 * This happens when an interface with a RTF_CLONING route is
 * being detached.  In this case it's safe to bail because all
 * the routes are being purged by rt_ifa_purge().
 */
+   ifp = if_get(rt->rt_ifidx);
if (ifp == NULL)
return 0;
  
-	if (ISSET(rt->rt_flags, RTF_CLONED) && rtequal(rt->rt_parent, parent)) {

-   error = rtdeletemsg(rt, ifp, id);
-   if (error == 0)
-   error = EAGAIN;
-   } else
-   error = 0;
+   error = rtdeletemsg(rt, ifp, id);
+   if (error == 0)
+   error = EAGAIN;
  
  	if_put(ifp);

return error;
Index: regress/sbin/route/Makefile
===
RCS file: /cvs/src/regress/sbin/route/Makefile,v
retrieving revision 1.27
diff -u -p -r1.27 Makefile
--- regress/sbin/route/Makefile 14 Feb 2018 08:42:22 -  1.27
+++ regress/sbin/route/Makefile 19 Feb 2018 09:45:43 -
@@ -376,7 +376,24 @@ rttest${n}:
${RCMD} add -net 192.168.67.128/25 130.102.71.67 -priority 3 -ifp vlan99
${RCMD} show -inet 2>&1 | \
sed -e "s,link\#[0-9 ]*U,link#  U," | \
-   cat > ${.CURDIR}/${.TARGET}.ok
+   diff -u ${.CURDIR}/${.TARGET}.ok /dev/stdin
+
+# Check that removing a RTF_CLONING route do not remove children from
+# another cloning route.
+n= 33
+RTTEST_TARGETS+:=rttest${n}
+rttest${n}:
+   # Use vether(4) because we need IFT_ETHER interfaces
+   # for the auto-magic RTF_CLONING routes.
+

mpath cloning routes and cloned routes

2018-02-14 Thread Florian Riehm
Hi,

If we delete cloning routes, we also delete their cloned routes.
This doesn't make sense if we delete a multipath cloning route and may result in
broken gateway routes:

# netstat -rn | grep 192.168.178
default192.168.178.1  UGS5 4939 -12 iwn0 
192.168.178/24 192.168.178.52 UCPn   1   51 - 8 iwn0 
192.168.178/24 192.168.178.53 UCPn   00 - 8 iwn0 
192.168.178.1  34:31:c4:24:83:d4  UHLch  1  118 - 7 iwn0 
192.168.178.52 a4:4e:31:38:70:7c  UHLl   0 3749 - 1 iwn0 
192.168.178.53 a4:4e:31:38:70:7c  UHLl   00 - 1 iwn0 
192.168.178.255192.168.178.52 UHPb   00 - 1 iwn0 
192.168.178.255192.168.178.53 UHPb   00 - 1 iwn0 
As you can see above, iwn0 has 192.168.178.52/24 and 192.168.178.53/24 assigned
and therefore we have 2 mpath cloning routes (P). Their is a cloned route to
192.168.178.1 with RTF_CACHED (h) to reach the default gateway.

# ifconfig iwn0 inet 192.168.178.53 delete
# netstat -rn | grep 192.168.178
default192.168.178.1  UGS5 4955 -12 iwn0 
192.168.178/24 192.168.178.52 UCn0   51 - 8 iwn0 
192.168.178.52 a4:4e:31:38:70:7c  UHLl   0 3754 - 1 iwn0 
192.168.178.255192.168.178.52 UHb00 - 1 iwn0 
Now 192.168.178.53/24 was deleted, therefore the cloned route to the gateway
(192.168.178.1) is also gone and the default route is 'broken':

# ping 8.8.8.8
# dmesg | tail
arpresolve: 192.168.178.1: route contains no arp information
arpresolve: 192.168.178.1: route contains no arp information
arpresolve: 192.168.178.1: route contains no arp information
arpresolve: 192.168.178.1: route contains no arp information

I think there is no need to delete cloned routes as long as we don't delete
the last cloning route to a network.

ok?

friehm

Index: sys/net/route.c
===
RCS file: /home/friehm/repos/openbsd-cvs/cvs/src/sys/net/route.c,v
retrieving revision 1.371
diff -u -p -r1.371 route.c
--- sys/net/route.c 10 Feb 2018 09:17:56 -  1.371
+++ sys/net/route.c 14 Feb 2018 09:37:29 -
@@ -781,7 +781,7 @@ rtrequest_delete(struct rt_addrinfo *inf
rt_putgwroute(rt);
 
/* Clean up any cloned children. */
-   if (ISSET(rt->rt_flags, RTF_CLONING))
+   if (ISSET(rt->rt_flags, RTF_CLONING) && !ISSET(rt->rt_flags, RTF_MPATH))
rtflushclone(tableid, rt);
 
rtfree(rt->rt_parent);



Re: carp_ourether() tweak

2018-01-22 Thread Florian Riehm

ok.

friehm

On 01/22/18 11:58, Martin Pieuchot wrote:

Check if `if_carp' is empty inside carp_ourether() instead of outside.

ok?

Index: net/if_bridge.c
===
RCS file: /cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.301
diff -u -p -r1.301 if_bridge.c
--- net/if_bridge.c 10 Jan 2018 23:50:39 -  1.301
+++ net/if_bridge.c 22 Jan 2018 10:54:48 -
@@ -1108,8 +1108,7 @@ bridge_process(struct ifnet *ifp, struct
ac = (struct arpcom *)ifl->ifp;
if (bcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN) == 0
  #if NCARP > 0
-   || (!SRPL_EMPTY_LOCKED(>ifp->if_carp) &&
-   !carp_ourether(ifl->ifp, eh->ether_dhost))
+   || !carp_ourether(ifl->ifp, eh->ether_dhost)
  #endif
) {
if (srcifl->bif_flags & IFBIF_LEARNING)
@@ -1131,8 +1130,7 @@ bridge_process(struct ifnet *ifp, struct
}
if (bcmp(ac->ac_enaddr, eh->ether_shost, ETHER_ADDR_LEN) == 0
  #if NCARP > 0
-   || (!SRPL_EMPTY_LOCKED(>ifp->if_carp) &&
-   !carp_ourether(ifl->ifp, eh->ether_shost))
+   || !carp_ourether(ifl->ifp, eh->ether_shost)
  #endif
) {
m_freem(m);
Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.327
diff -u -p -r1.327 ip_carp.c
--- netinet/ip_carp.c   12 Jan 2018 23:47:24 -  1.327
+++ netinet/ip_carp.c   22 Jan 2018 10:53:35 -
@@ -1339,12 +1339,15 @@ carp_iamatch(struct ifnet *ifp)
  int
  carp_ourether(struct ifnet *ifp, u_int8_t *ena)
  {
-   struct srpl *cif;
+   struct srpl *cif = >if_carp;
struct carp_softc *vh;
  
  	KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */

+
+   if (SRPL_EMPTY_LOCKED(cif))
+   return (0);
+
KASSERT(ifp->if_type == IFT_ETHER);
-   cif = >if_carp;
  
  	SRPL_FOREACH_LOCKED(vh, cif, sc_list) {

struct carp_vhost_entry *vhe;





Re: merge vlan and carp input back into ether_input

2018-01-14 Thread Florian Riehm

On 01/11/18 14:51, Martin Pieuchot wrote:

On 11/01/18(Thu) 21:59, David Gwynne wrote:

[...]
when you say i break carp balancing, are you talking about the removal of the 
PACKET_TAG_CARP_BAL_IP tagging? PACKET_TAG_CARP_BAL_IP is only used in 
carp_lsdrop to clear the M_MCAST flag on the mbuf. M_MCAST wont be set on 
packets destined for the carp interface because we check ac_enaddr before 
checking if the packet is multicast or broadcast.


I might be mistaken.  I just know that this code is fragile and I'd
prefer to see such change tested isolated because carp(4) itself has
multiple configurations.  CARP balancing has been fixed for 6.2 by
friehm@ after being broken for multiple releases.


Hi David,

let me know if you all you carp changes are commited, then
I will test if carp balancing is still working.
I tested this commit already and it seems to be fine.

Thanks & Regards,

friehm



Re: Add reset option to boot command of ddb(4)

2017-12-14 Thread Florian Riehm

On 12/14/17 12:20, Mark Kettenis wrote:

Date: Thu, 14 Dec 2017 11:49:18 +0100
From: Martin Pieuchot <m...@openbsd.org>

On 14/12/17(Thu) 11:30, Mark Kettenis wrote:

X-Originating-IP: 88.153.7.170
Date: Thu, 14 Dec 2017 10:30:21 +0100
From: Martin Pieuchot <m...@openbsd.org>

On 13/12/17(Wed) 19:09, Florian Riehm wrote:

Hi,

This patch follows bluhm's attempt for a ddb command 'boot reset'.
My first attempt was not architecture aware.

Tested on i386 by bluhm@ and on amd64 by me.


I don't understand why we need to add "boot reset"?  To not fix ddb(4)
and keep a broken "boot reboot"?  If we cannot fix our own code...


Funny you say that given the discussion about if_downall() on icb ;).


There's nothing funny.  There's people not reporting bugs with traceback
to bugs@ and coming around with workaround like that.


IIRC "boot reset" is all about avoiding the if_downall() call.  And we
really don't want to skip if_downall() in the "boot reboot".  We added
that call since not stopping the DMA engines of the network cards had
some very interesting effects when the machine rebooted...


If if_downall() is a problem, then please show me a traceback where
that's the case.  I'd be delighted to fix it :)


True.  Given the DMA issue, "boot reset" would be a rather dangerous
command.



If the command could lead to DMA issues, I agree that we should not
introduce it.
I also agree that we should try to make boot reboot as reliable as possible
and it works fine in most situations.
Anyway I have seen it hanging in some cases. This is a problem for me,
since I don't have physical access to my machines all the time. In such
cases I preferred the cpu_reset hammer. I have never noticed any side effects
like DMA issues, so I thought it would be a good idea, to provide that way
as a regular ddb feature. But I am also fine without the command.



Re: Add reset option to boot command of ddb(4)

2017-12-13 Thread Florian Riehm

I will prepare a new diff including the other architecures and
try to find people who can test it.
I have had such a diff already but then I decided to remove
the untested parts because I didn't want to submit untested
code.

friehm

On 12/13/17 21:59, Theo de Raadt wrote:

As it is, this diff will not go in.

Your 2nd attempt is not architecture aware either.  There are more
than 2 architectures.  If you add a MI feature, you must attempt to
add support for it to all the MD versions.  And the process of mailing
it out to the community gives people an opportunity to help test
those.

Seeing as this is only a goto and a label: on each architecture, what
is the purpose of not even trying to write such a diff??  You are
leaving the work, hoping someone eventually does it??

That isn't the way we work.


This patch follows bluhm's attempt for a ddb command 'boot reset'.
My first attempt was not architecture aware.

Tested on i386 by bluhm@ and on amd64 by me.

ok?

friehm

Index: share/man/man4/ddb.4
===
RCS file: /openbsd/src/share/man/man4/ddb.4,v
retrieving revision 1.92
diff -u -p -r1.92 ddb.4
--- share/man/man4/ddb.429 Nov 2017 07:28:21 -  1.92
+++ share/man/man4/ddb.412 Dec 2017 06:35:44 -
@@ -381,6 +381,15 @@ Just halt.
  Just reboot.
  .It Ic boot poweroff
  Power down the machine whenever possible; if it fails, just halt.
+.It Ic boot reset
+Restart the machine by resetting the CPU on i386 and amd64
+architectures.
+Useful in situations were
+.Ic boot reboot
+does not work anymore, i.e. due to locking issues.
+On other platforms it is equivalent to the
+.Ic boot reboot
+command.
  .El
  .\" 
  .It Xo
Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /openbsd/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.236
diff -u -p -r1.236 machdep.c
--- sys/arch/amd64/amd64/machdep.c  11 Dec 2017 05:27:40 -  1.236
+++ sys/arch/amd64/amd64/machdep.c  12 Dec 2017 06:35:44 -
@@ -713,6 +713,9 @@ struct pcb dumppcb;
  __dead void
  boot(int howto)
  {
+   if ((howto & RB_RESET) != 0)
+   goto reset;
+
if ((howto & RB_POWERDOWN) != 0)
lid_action = 0;
  
@@ -770,6 +773,7 @@ haltsys:

printf("rebooting...\n");
if (cpureset_delay > 0)
delay(cpureset_delay * 1000);
+reset:
cpu_reset();
for (;;)
continue;
Index: sys/arch/i386/i386/machdep.c
===
RCS file: /openbsd/src/sys/arch/i386/i386/machdep.c,v
retrieving revision 1.607
diff -u -p -r1.607 machdep.c
--- sys/arch/i386/i386/machdep.c11 Dec 2017 05:27:40 -  1.607
+++ sys/arch/i386/i386/machdep.c12 Dec 2017 06:35:44 -
@@ -2629,6 +2629,9 @@ struct pcb dumppcb;
  __dead void
  boot(int howto)
  {
+   if ((howto & RB_RESET) != 0)
+   goto reset;
+
if ((howto & RB_POWERDOWN) != 0)
lid_action = 0;
  
@@ -2709,6 +2712,7 @@ haltsys:

}
  
  	printf("rebooting...\n");

+reset:
cpu_reset();
for (;;)
continue;
Index: sys/ddb/db_command.c
===
RCS file: /openbsd/src/sys/ddb/db_command.c,v
retrieving revision 1.81
diff -u -p -r1.81 db_command.c
--- sys/ddb/db_command.c11 Dec 2017 05:27:40 -  1.81
+++ sys/ddb/db_command.c12 Dec 2017 06:35:44 -
@@ -105,6 +105,7 @@ voiddb_boot_dump_cmd(db_expr_t, int, db
  void  db_boot_halt_cmd(db_expr_t, int, db_expr_t, char *);
  void  db_boot_reboot_cmd(db_expr_t, int, db_expr_t, char *);
  void  db_boot_poweroff_cmd(db_expr_t, int, db_expr_t, char *);
+void   db_boot_reset_cmd(db_expr_t, int, db_expr_t, char *);
  void  db_stack_trace_cmd(db_expr_t, int, db_expr_t, char *);
  void  db_dmesg_cmd(db_expr_t, int, db_expr_t, char *);
  void  db_show_panic_cmd(db_expr_t, int, db_expr_t, char *);
@@ -597,6 +598,7 @@ struct db_command db_boot_cmds[] = {
{ "halt", db_boot_halt_cmd,   0,  0 },
{ "reboot",   db_boot_reboot_cmd, 0,  0 },
{ "poweroff", db_boot_poweroff_cmd,   0,  0 },
+   { "reset",db_boot_reset_cmd,  0,  0 },
{ NULL, }
  };
  
@@ -812,6 +814,12 @@ void

  db_boot_poweroff_cmd(db_expr_t addr, int haddr, db_expr_t count, char *modif)
  {
db_reboot(RB_NOSYNC | RB_HALT | RB_POWERDOWN | RB_TIMEBAD | RB_USERREQ);
+}
+
+void
+db_boot_reset_cmd(db_expr_t addr, int haddr, db_expr_t count, char *modif)
+{
+   db_reboot(RB_RESET | RB_AUTOBOOT | RB_NOSYNC | RB_TIMEBAD | RB_USERREQ);
  }
  
  void

Index: sys/sys/reboot.h
===
RCS file: /openbsd/src/sys/sys/reboot.h,v
retrieving revision 1.17
diff -u -p -r1.17 reboot.h
--- sys/sys/reboot.h  

Add reset option to boot command of ddb(4)

2017-12-13 Thread Florian Riehm
Hi,

This patch follows bluhm's attempt for a ddb command 'boot reset'.
My first attempt was not architecture aware.

Tested on i386 by bluhm@ and on amd64 by me.

ok?

friehm

Index: share/man/man4/ddb.4
===
RCS file: /openbsd/src/share/man/man4/ddb.4,v
retrieving revision 1.92
diff -u -p -r1.92 ddb.4
--- share/man/man4/ddb.429 Nov 2017 07:28:21 -  1.92
+++ share/man/man4/ddb.412 Dec 2017 06:35:44 -
@@ -381,6 +381,15 @@ Just halt.
 Just reboot.
 .It Ic boot poweroff
 Power down the machine whenever possible; if it fails, just halt.
+.It Ic boot reset
+Restart the machine by resetting the CPU on i386 and amd64
+architectures.
+Useful in situations were
+.Ic boot reboot
+does not work anymore, i.e. due to locking issues.
+On other platforms it is equivalent to the
+.Ic boot reboot
+command.
 .El
 .\" 
 .It Xo
Index: sys/arch/amd64/amd64/machdep.c
===
RCS file: /openbsd/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.236
diff -u -p -r1.236 machdep.c
--- sys/arch/amd64/amd64/machdep.c  11 Dec 2017 05:27:40 -  1.236
+++ sys/arch/amd64/amd64/machdep.c  12 Dec 2017 06:35:44 -
@@ -713,6 +713,9 @@ struct pcb dumppcb;
 __dead void
 boot(int howto)
 {
+   if ((howto & RB_RESET) != 0)
+   goto reset;
+
if ((howto & RB_POWERDOWN) != 0)
lid_action = 0;
 
@@ -770,6 +773,7 @@ haltsys:
printf("rebooting...\n");
if (cpureset_delay > 0)
delay(cpureset_delay * 1000);
+reset:
cpu_reset();
for (;;)
continue;
Index: sys/arch/i386/i386/machdep.c
===
RCS file: /openbsd/src/sys/arch/i386/i386/machdep.c,v
retrieving revision 1.607
diff -u -p -r1.607 machdep.c
--- sys/arch/i386/i386/machdep.c11 Dec 2017 05:27:40 -  1.607
+++ sys/arch/i386/i386/machdep.c12 Dec 2017 06:35:44 -
@@ -2629,6 +2629,9 @@ struct pcb dumppcb;
 __dead void
 boot(int howto)
 {
+   if ((howto & RB_RESET) != 0)
+   goto reset;
+
if ((howto & RB_POWERDOWN) != 0)
lid_action = 0;
 
@@ -2709,6 +2712,7 @@ haltsys:
}
 
printf("rebooting...\n");
+reset:
cpu_reset();
for (;;)
continue;
Index: sys/ddb/db_command.c
===
RCS file: /openbsd/src/sys/ddb/db_command.c,v
retrieving revision 1.81
diff -u -p -r1.81 db_command.c
--- sys/ddb/db_command.c11 Dec 2017 05:27:40 -  1.81
+++ sys/ddb/db_command.c12 Dec 2017 06:35:44 -
@@ -105,6 +105,7 @@ voiddb_boot_dump_cmd(db_expr_t, int, db
 void   db_boot_halt_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_boot_reboot_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_boot_poweroff_cmd(db_expr_t, int, db_expr_t, char *);
+void   db_boot_reset_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_stack_trace_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_dmesg_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_show_panic_cmd(db_expr_t, int, db_expr_t, char *);
@@ -597,6 +598,7 @@ struct db_command db_boot_cmds[] = {
{ "halt",   db_boot_halt_cmd,   0,  0 },
{ "reboot", db_boot_reboot_cmd, 0,  0 },
{ "poweroff",   db_boot_poweroff_cmd,   0,  0 },
+   { "reset",  db_boot_reset_cmd,  0,  0 },
{ NULL, }
 };
 
@@ -812,6 +814,12 @@ void
 db_boot_poweroff_cmd(db_expr_t addr, int haddr, db_expr_t count, char *modif)
 {
db_reboot(RB_NOSYNC | RB_HALT | RB_POWERDOWN | RB_TIMEBAD | RB_USERREQ);
+}
+
+void
+db_boot_reset_cmd(db_expr_t addr, int haddr, db_expr_t count, char *modif)
+{
+   db_reboot(RB_RESET | RB_AUTOBOOT | RB_NOSYNC | RB_TIMEBAD | RB_USERREQ);
 }
 
 void
Index: sys/sys/reboot.h
===
RCS file: /openbsd/src/sys/sys/reboot.h,v
retrieving revision 1.17
diff -u -p -r1.17 reboot.h
--- sys/sys/reboot.h11 Jul 2014 14:36:44 -  1.17
+++ sys/sys/reboot.h12 Dec 2017 06:35:45 -
@@ -56,6 +56,7 @@
 #defineRB_POWERDOWN0x1000  /* attempt to power down machine */
 #defineRB_SERCONS  0x2000  /* use serial console if available */
 #defineRB_USERREQ  0x4000  /* boot() called at user request (e.g. 
ddb) */
+#defineRB_RESET0x8000  /* do not try to cleanup, only for ddb 
*/
 
 /*
  * Constants for converting boot-style device number to type,



Add reset option to boot command of ddb(4)

2017-10-26 Thread Florian Riehm

Hi,

Sometimes I see systems hanging in ddb(4) after panic(9) and the "boot reboot"
command doesn't work anymore, i.e. of filesystem or locking issues.
Bluhm@ suggested to me to use "call cpu_reset" in such situations.

I would like to introduce a command 'boot reset' to do this.

ok?

friehm


Index: share/man/man4//ddb.4
===
RCS file: /cvs/src/share/man/man4/ddb.4,v
retrieving revision 1.91
diff -u -p -r1.91 ddb.4
--- share/man/man4//ddb.4   29 Sep 2017 09:36:04 -  1.91
+++ share/man/man4//ddb.4   26 Oct 2017 08:18:44 -
@@ -379,6 +379,10 @@ Just halt.
 Just reboot.
 .It Ic boot poweroff
 Power down the machine whenever possible; if it fails, just halt.
+.It Ic boot reset
+Restart the machine by resetting the CPU. Useful in situations were
+.Ic boot reboot
+does not work anymore.
 .El
 .\" 
 .It Xo
Index: sys/ddb/db_command.c
===
RCS file: /cvs/src/sys/ddb/db_command.c,v
retrieving revision 1.79
diff -u -p -r1.79 db_command.c
--- sys/ddb/db_command.c19 Oct 2017 16:58:05 -  1.79
+++ sys/ddb/db_command.c26 Oct 2017 08:18:55 -
@@ -105,6 +105,7 @@ voiddb_boot_dump_cmd(db_expr_t, int, db
 void   db_boot_halt_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_boot_reboot_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_boot_poweroff_cmd(db_expr_t, int, db_expr_t, char *);
+void   db_boot_reset_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_stack_trace_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_dmesg_cmd(db_expr_t, int, db_expr_t, char *);
 void   db_show_panic_cmd(db_expr_t, int, db_expr_t, char *);
@@ -606,6 +607,7 @@ struct db_command db_boot_cmds[] = {
{ "halt", db_boot_halt_cmd,   0,  0 },
{ "reboot",   db_boot_reboot_cmd, 0,  0 },
{ "poweroff", db_boot_poweroff_cmd,   0,  0 },
+   { "reset",db_boot_reset_cmd,  0,  0 },
{ NULL, }
 };
 
@@ -812,6 +814,12 @@ void

 db_boot_poweroff_cmd(db_expr_t addr, int haddr, db_expr_t count, char *modif)
 {
reboot(RB_NOSYNC | RB_HALT | RB_POWERDOWN | RB_TIMEBAD | RB_USERREQ);
+}
+
+void
+db_boot_reset_cmd(db_expr_t addr, int haddr, db_expr_t count, char *modif)
+{
+   cpu_reset();
 }
 
 void




Re: close cron sockets in child processes

2017-10-23 Thread Florian Riehm

On 10/23/17 09:05, Jeremie Courreges-Anglas wrote:

On Fri, Oct 20 2017, "Todd C. Miller" <todd.mil...@courtesan.com> wrote:

On Fri, 20 Oct 2017 16:25:32 +0200, Florian Riehm wrote:


cron(8) opens /var/run/cron.sock for communication with crontab(1).
The forked cronjobs have the socked still open.
This prevents restarting cron while a job is running:
(CRON) DEATH (already running)

I think cron's children should not inherit sockets.


There is a similar issue with at jobs.  Here's a comprehensive diff.


Good catch.  The cron part ought to be committed now, but I think
atrun.c needs more love.

Good catch, indeed.
I have commited the cron part.




  - todd

Index: usr.sbin/cron/atrun.c
===
RCS file: /cvs/src/usr.sbin/cron/atrun.c,v
retrieving revision 1.46
diff -u -p -u -r1.46 atrun.c
--- usr.sbin/cron/atrun.c   8 Jun 2017 16:23:39 -   1.46
+++ usr.sbin/cron/atrun.c   20 Oct 2017 15:09:57 -
@@ -283,6 +283,9 @@ run_job(const atjob *job, int dfd, const
return;
}
  
+	/* close fds opened by the parent (e.g. cronSock) */

+   closefrom(3);
+


That doesn't work.  When here, we hold cronSock and dfd (should be 3 and
4).  We need dfd to get fd (should be 5), thus we can't just use
"closefrom(3)".  The straightest way IMO is to close what we should
close (including dfd), but this means making cronSock a global.

I would also propose sprinkling more O_CLOEXEC magic but that can be in
another diff.


I guess it is ok to make cronSock global. closefrom() is good if
you have obvious situations. In more complex scenarios like this, I
prefer close() since it shows more clearly what happens.



Thoughts?


Index: atrun.c
===
RCS file: /d/cvs/src/usr.sbin/cron/atrun.c,v
retrieving revision 1.46
diff -u -p -r1.46 atrun.c
--- atrun.c 8 Jun 2017 16:23:39 -   1.46
+++ atrun.c 23 Oct 2017 03:21:20 -
@@ -283,6 +283,10 @@ run_job(const atjob *job, int dfd, const
return;
}
  
+	/* Close fds opened by the parent. */

+   close(cronSock);
+   close(dfd);
+
/*
 * We don't want the main cron daemon to wait for our children--
 * we will do it ourselves via waitpid().
Index: cron.c
===
RCS file: /d/cvs/src/usr.sbin/cron/cron.c,v
retrieving revision 1.76
diff -u -p -r1.76 cron.c
--- cron.c  7 Jun 2017 23:36:43 -   1.76
+++ cron.c  23 Oct 2017 02:10:54 -
@@ -60,7 +60,6 @@ staticint open_socket(void);
  
  static	volatile sig_atomic_t	got_sigchld;

  statictime_t  timeRunning, virtualTime, clockTime;
-static int cronSock;
  staticlongGMToff;
  staticcron_db *database;
  staticat_db   *at_database;
@@ -68,6 +67,7 @@ staticdouble  batch_maxload = BATCH_MA
  staticint NoFork;
  statictime_t  StartTime;
gid_t   cron_gid;
+   int cronSock;
  
  static void

  usage(void)
Index: globals.h
===
RCS file: /d/cvs/src/usr.sbin/cron/globals.h,v
retrieving revision 1.14
diff -u -p -r1.14 globals.h
--- globals.h   7 Jun 2017 23:36:43 -   1.14
+++ globals.h   23 Oct 2017 02:03:18 -
@@ -18,5 +18,6 @@
   */
  
  extern gid_t	cron_gid;

+extern int cronSock;
  extern intLineNumber;
  extern char   *__progname;




ok friehm



close cron sockets in child processes

2017-10-20 Thread Florian Riehm

Hi,

cron(8) opens /var/run/cron.sock for communication with crontab(1).
The forked cronjobs have the socked still open.
This prevents restarting cron while a job is running:
(CRON) DEATH (already running)

I think cron's children should not inherit sockets.

ok?

friehm

Index: usr.sbin/cron/do_command.c
===
RCS file: /cvs/src/usr.sbin/cron/do_command.c,v
retrieving revision 1.56
diff -u -p -r1.56 do_command.c
--- usr.sbin/cron/do_command.c  17 Nov 2015 22:31:44 -  1.56
+++ usr.sbin/cron/do_command.c  20 Oct 2017 13:56:27 -
@@ -86,6 +86,9 @@ child_process(entry *e, user *u)
/* mark ourselves as different to PS command watchers */
setproctitle("running job");
 
+	/* close sockets from parent (i.e. cronSock) */

+   closefrom(3);
+
/* discover some useful and important environment settings
 */
usernm = e->pwd->pw_name;



Re: ospfd: add IMSG_IFADDRADD to deal with "sh /etc/netstart if"

2017-09-04 Thread Florian Riehm

On 08/23/17 00:22, Florian Riehm wrote:

On 08/21/17 18:57, Remi Locherer wrote:

On Mon, Jul 24, 2017 at 04:59:46PM +0200, Remi Locherer wrote:

On Fri, Jul 21, 2017 at 06:24:06PM +0200, Remi Locherer wrote:

On Fri, Jul 21, 2017 at 02:45:03PM +0200, Florian Riehm wrote:

On 06/25/17 23:47, Remi Locherer wrote:

Hi,

ospfd does not react nicely when running "sh /etc/netstart if".

This is because adding the same address again do an interface results
in RTM_DELADDR and RTM_NEWADDR. ospfd handles the former but the later.
If this happens ospfd says "interface vether0:192.168.250.1 gone".
Adjacencies on that interface are down and ospfd can not recover.

The below patch adds IMSG_IFADDRADD to deal with that. With it ospfd
logs the following after "ifconfig vether0 192.168.250.1/24" (same address
as active before):



Hi Remi,

thanks for your report and your patch.
I think it is the right approach, but unfortunately it doesn't work in my setup.
If I run 'sh /etc/netstart vio1' vio1 goes down and stays down.
Please have a look to my config / logs. What is the differece between your and
my test?


I tested with an interface connected to stub network. Your output shows an
interface connected to a transit network. In my test setup I did not get the
error message: "if_join_group: error IP_ADD_MEMBERSHIP"

I'll look into it and try to fix this.


I could reproduce the behaviour you see with my patch when testing with
vmm and vio interfaces. It looks like in the IFADDRDEL case the interface
can not leave the multicast group.

I do not see this when testing with vether, pair or vlan (over ix)
interfaces. Could this be a bug with multicast handling in vio?



Today I did a test with an unpatched ospfd and a vio interface in vmm.

I started ospfd and waited till adjacency was up. Then I did
"ifconfig vio0 192.168.250.101/24" (same ip as set before).

The debug output from ospfd:

[...]
if_leave_group: error IP_DROP_MEMBERSHIP, interface vio0 address 224.0.0.5: 
Can't assign requested address
if_leave_group: error IP_DROP_MEMBERSHIP, interface vio0 address 224.0.0.6: 
Can't assign requested address
orig_rtr_lsa: area 0.0.0.0
orig_rtr_lsa: stub net, interface vio0
orig_rtr_lsa: stub net, interface vether0
if_act_elect: interface vio0 old dr 192.168.250.1 new dr 192.168.250.101, old 
bdr 192.168.250.101 new bdr none
orig_rtr_lsa: area 0.0.0.0
[...]

Doing the same with a pair or vether interface does not produce the message
"if_leave_group: error IP_DROP_MEMBERSHIP ".

My conclusion of this is that the error is problem with the vio driver and
not with my patch for ospfd.

Could we proceed with the proposed ospfd patch and attack the vio problem
separately?



Actually I would be fine with that approach after I am sure it is a vio(4)
problem.

I think vether(4) and pair(4) are a bit too exotic to prove your patch works ;)
Tomorrow I will test your change with em(4).

My problem is not the multicast error message. I just can't 'see' your
fix, since my interfaces stay down after netstart.


Hi,

Now I got it. We are seeing a race!
Ospfd calls if_leave_group() after the kernel has deleted the interface address.
Then the syscall enters ip_setmoptions() in sys/netinet/ip_output.c.
Here ifa_ifwithaddr() (see case IP_DROP_MEMBERSHIP) fails, since the address is
gone at the moment.

Putting sleep(1) before setsockopt(iface->fd, IPPROTO_IP, IP_DROP_MEMBERSHIP...)
let the problem disappear. Of course, this is not the fix.

I am thinking about the right way to fix this.



Re: ospfd: add IMSG_IFADDRADD to deal with "sh /etc/netstart if"

2017-08-22 Thread Florian Riehm

On 08/21/17 18:57, Remi Locherer wrote:

On Mon, Jul 24, 2017 at 04:59:46PM +0200, Remi Locherer wrote:

On Fri, Jul 21, 2017 at 06:24:06PM +0200, Remi Locherer wrote:

On Fri, Jul 21, 2017 at 02:45:03PM +0200, Florian Riehm wrote:

On 06/25/17 23:47, Remi Locherer wrote:

Hi,

ospfd does not react nicely when running "sh /etc/netstart if".

This is because adding the same address again do an interface results
in RTM_DELADDR and RTM_NEWADDR. ospfd handles the former but the later.
If this happens ospfd says "interface vether0:192.168.250.1 gone".
Adjacencies on that interface are down and ospfd can not recover.

The below patch adds IMSG_IFADDRADD to deal with that. With it ospfd
logs the following after "ifconfig vether0 192.168.250.1/24" (same address
as active before):



Hi Remi,

thanks for your report and your patch.
I think it is the right approach, but unfortunately it doesn't work in my setup.
If I run 'sh /etc/netstart vio1' vio1 goes down and stays down.
Please have a look to my config / logs. What is the differece between your and
my test?


I tested with an interface connected to stub network. Your output shows an
interface connected to a transit network. In my test setup I did not get the
error message: "if_join_group: error IP_ADD_MEMBERSHIP"

I'll look into it and try to fix this.


I could reproduce the behaviour you see with my patch when testing with
vmm and vio interfaces. It looks like in the IFADDRDEL case the interface
can not leave the multicast group.

I do not see this when testing with vether, pair or vlan (over ix)
interfaces. Could this be a bug with multicast handling in vio?



Today I did a test with an unpatched ospfd and a vio interface in vmm.

I started ospfd and waited till adjacency was up. Then I did
"ifconfig vio0 192.168.250.101/24" (same ip as set before).

The debug output from ospfd:

[...]
if_leave_group: error IP_DROP_MEMBERSHIP, interface vio0 address 224.0.0.5: 
Can't assign requested address
if_leave_group: error IP_DROP_MEMBERSHIP, interface vio0 address 224.0.0.6: 
Can't assign requested address
orig_rtr_lsa: area 0.0.0.0
orig_rtr_lsa: stub net, interface vio0
orig_rtr_lsa: stub net, interface vether0
if_act_elect: interface vio0 old dr 192.168.250.1 new dr 192.168.250.101, old 
bdr 192.168.250.101 new bdr none
orig_rtr_lsa: area 0.0.0.0
[...]

Doing the same with a pair or vether interface does not produce the message
"if_leave_group: error IP_DROP_MEMBERSHIP ".

My conclusion of this is that the error is problem with the vio driver and
not with my patch for ospfd.

Could we proceed with the proposed ospfd patch and attack the vio problem
separately?



Actually I would be fine with that approach after I am sure it is a vio(4)
problem.

I think vether(4) and pair(4) are a bit too exotic to prove your patch works ;)
Tomorrow I will test your change with em(4).

My problem is not the multicast error message. I just can't 'see' your
fix, since my interfaces stay down after netstart.

Does your interface come up again, if the multicast error message occurs?

friehm



Re: [patch] ospfd: exporting default gateway via route label (fix ROUNDUP)

2017-07-21 Thread Florian Riehm
On 03/03/14 00:33, Florian Riehm wrote:
> Hi all,
> 
> ospfd can't export the default gateway via route label because
> get_rtaddrs gets confused by a netmask (RTAX_NETMASK) of 0 because
> sa->sa_len in get_rtaddrs is 0 and ROUNDUP then returns 0 also.
> 
> The bug has been fixed in ospf6d in the same way a couple of years ago.
> Ospf6d uses the ROUNDUP macro from route/show.c now.
> I think ospfd should do the same.
> 
> Regards,
> 
> Florian

Diff is long-term production tested by many customers now.
ok?

Thanks & Regards

friehm


Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/ospfd/kroute.c,v
retrieving revision 1.107
diff -u -p -r1.107 kroute.c
--- kroute.c27 Dec 2016 09:15:16 -  1.107
+++ kroute.c21 Jul 2017 13:37:42 -
@@ -983,8 +983,8 @@ prefixlen2mask(u_int8_t prefixlen)
return (htonl(0x << (32 - prefixlen)));
 }
 
-#defineROUNDUP(a)  \
-(((a) & (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a))
+#define ROUNDUP(a) \
+   ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
 void
 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info) 



Re: ospfd: add IMSG_IFADDRADD to deal with "sh /etc/netstart if"

2017-07-21 Thread Florian Riehm
On 06/25/17 23:47, Remi Locherer wrote:
> Hi,
> 
> ospfd does not react nicely when running "sh /etc/netstart if".
> 
> This is because adding the same address again do an interface results
> in RTM_DELADDR and RTM_NEWADDR. ospfd handles the former but the later.
> If this happens ospfd says "interface vether0:192.168.250.1 gone".
> Adjacencies on that interface are down and ospfd can not recover.
> 
> The below patch adds IMSG_IFADDRADD to deal with that. With it ospfd
> logs the following after "ifconfig vether0 192.168.250.1/24" (same address
> as active before):
> 

Hi Remi,

thanks for your report and your patch.
I think it is the right approach, but unfortunately it doesn't work in my setup.
If I run 'sh /etc/netstart vio1' vio1 goes down and stays down.
Please have a look to my config / logs. What is the differece between your and
my test?

Beside that:
- I would rename 'struct ifaddrdel' to 'struct ifaddr' and extend it. So we
  can use it in if_newaddr() and if_deladdr() and avoid 'struct ifaddrnew'.

Thanks & Regards

friehm

Before sh /etc/netstart vio1:
ospfctl show int
Interface   AddressState  HelloTimer Linkstate  Uptimenc  ac
vio210.88.12.1/24  OTHER  00:00:08   active 00:00:30   2   2
vio110.88.10.2/24  BCKUP  00:00:01   active 00:00:30   1   1

After (Waiting doesn't fix the problem):
Interface   AddressState  HelloTimer Linkstate  Uptimenc  ac
vio210.88.12.1/24  OTHER  00:00:06   active 00:24:18   2   2
vio110.88.10.2/24  DOWN   -  active 00:00:00   1   0

ospfd -dv:
startup
orig_rtr_lsa: area 51.0.0.0
orig_rtr_lsa: stub net, interface vio2
orig_rtr_lsa: stub net, interface vio1
if_fsm: event UP resulted in action START and changing state for interface vio2 
from DOWN to WAIT
orig_rtr_lsa: area 51.0.0.0
orig_rtr_lsa: stub net, interface vio2
orig_rtr_lsa: stub net, interface vio1
if_fsm: event UP resulted in action START and changing state for interface vio1 
from DOWN to WAIT
spf_calc: area 51.0.0.0 calculated
nbr_fsm: event HELLO_RECEIVED resulted in action START_INACTIVITY_TIMER and 
changing state for neighbor ID 192.168.30.101 from DOWN to INIT
nbr_fsm: event 2_WAY_RECEIVED resulted in action EVAL and changing state for 
neighbor ID 192.168.30.101 from INIT to 2-WAY
if_act_elect: interface vio1 old dr none new dr 10.88.10.1, old bdr none new 
bdr 10.88.10.2
nbr_fsm: event ADJ_OK resulted in action EVAL and changing state for neighbor 
ID 192.168.30.101 from 2-WAY to EXSTA
orig_rtr_lsa: area 51.0.0.0
orig_rtr_lsa: stub net, interface vio2
orig_rtr_lsa: stub net, interface vio1
orig_rtr_lsa: area 51.0.0.0
orig_rtr_lsa: stub net, interface vio2
orig_rtr_lsa: stub net, interface vio1
if_fsm: event BACKUPSEEN resulted in action ELECT and changing state for 
interface vio1 from WAIT to BCKUP
if_act_elect: interface vio1 old dr 10.88.10.1 new dr 10.88.10.1, old bdr 
10.88.10.2 new bdr 10.88.10.2
if_fsm: event NEIGHBORCHANGE resulted in action ELECT and changing state for 
interface vio1 from BCKUP to BCKUP
nbr_fsm: event NEGOTIATION_DONE resulted in action SNAPSHOT and changing state 
for neighbor ID 192.168.30.101 from EXSTA to SNAP
nbr_fsm: event SNAPSHOT_DONE resulted in action SNAPSHOT_DONE and changing 
state for neighbor ID 192.168.30.101 from SNAP to EXCHG
nbr_fsm: event EXCHANGE_DONE resulted in action EXCHANGE_DONE and changing 
state for neighbor ID 192.168.30.101 from EXCHG to LOAD
orig_rtr_lsa: area 51.0.0.0
orig_rtr_lsa: stub net, interface vio2
orig_rtr_lsa: transit net, interface vio1
nbr_fsm: event LOADING_DONE resulted in action NOTHING and changing state for 
neighbor ID 192.168.30.101 from LOAD to FULL
spf_calc: area 51.0.0.0 calculated
nbr_fsm: event HELLO_RECEIVED resulted in action START_INACTIVITY_TIMER and 
changing state for neighbor ID 192.168.30.104 from DOWN to INIT
nbr_fsm: event 2_WAY_RECEIVED resulted in action EVAL and changing state for 
neighbor ID 192.168.30.104 from INIT to 2-WAY
if_fsm: event NEIGHBORCHANGE resulted in action NOTHING and changing state for 
interface vio2 from WAIT to WAIT
nbr_fsm: event HELLO_RECEIVED resulted in action START_INACTIVITY_TIMER and 
changing state for neighbor ID 192.168.30.105 from DOWN to INIT
nbr_fsm: event 2_WAY_RECEIVED resulted in action EVAL and changing state for 
neighbor ID 192.168.30.105 from INIT to 2-WAY
if_fsm: event NEIGHBORCHANGE resulted in action NOTHING and changing state for 
interface vio2 from WAIT to WAIT
recv_db_description: neighbor ID 192.168.30.105: packet ignored in state 2-WAY
recv_db_description: neighbor ID 192.168.30.104: packet ignored in state 2-WAY
recv_db_description: neighbor ID 192.168.30.104: packet ignored in state 2-WAY
recv_db_description: neighbor ID 192.168.30.105: packet ignored in state 2-WAY
if_act_elect: interface vio2 old dr none new dr 10.88.12.3, old bdr none new 
bdr 10.88.12.2
nbr_fsm: event ADJ_OK resulted in action EVAL and changing state for neighbor 
ID 

Re: Route priority support for ospf6d

2017-06-19 Thread Florian Riehm
Thanks,

I commited it to move forward.

On 06/09/17 15:41, Claudio Jeker wrote:
> On Fri, Jun 09, 2017 at 03:28:07PM +0200, Alexander Bluhm wrote:
>> On Wed, May 31, 2017 at 02:29:03PM +0200, Florian Riehm wrote:
>>> @@ -359,6 +333,7 @@ kr_fib_decouple(void)
>>>  void
>>>  kr_dispatch_msg(int fd, short event, void *bula)
>>>  {
>>> +   /* XXX this is stupid */
>>> dispatch_rtmsg();
>>>  }
>>
>> I guess this comment refers to the event_loopexit(NULL) in ospfd code.
>> So I would not add it here.
>>
> 
> This this is from my side, because it is dumb to have a void function that
> only does on thing calling another void function.

I thought it referred to the exit on error approach.
I think it is the right approach (especially for a routing daemon)
but usually our customers tell me "ospf(6)d exited" and there is no 
chance for debugging. I am thinking about a solution for this problem.
My idea is do dump 'helpful' data in case of error, but 'helpful'
has to be precised.

>>> @@ -1377,10 +1376,10 @@ dispatch_rtmsg(void)
>>> if (rtm->rtm_flags & (RTF_LLINFO|RTF_BROADCAST))
>>> continue;
>>>  
>>> -#ifdef RTF_MPATH
>>> if (rtm->rtm_flags & RTF_MPATH)
>>> mpath = 1;
>>> -#endif
>>> +   prio = rtm->rtm_priority;
>>> +
>>
>> In ospfd we have at this location
>> flags = (prio == RTP_OSPF) ?
>> F_OSPFD_INSERTED : F_KERNEL;
>> Should we add that here?
> 
> Hmmm... this seems to be to detect from where routes are originating.

Yes, the next change will unify fetchtable and the kroute dispatch handler.
This will fix it.



Route priority support for ospf6d

2017-05-31 Thread Florian Riehm
Hi,

this diff adds priority support to ospf6d.
Mostly based on the following ospfd commit:
cvs diff -D "2008-12-11" -D "2008-12-13"

Additionally I removed the RTF_UP from hdr.rtm_flags in send_rtmsg().
Ospfd and bgpd also don't set the flag.

The next steps will be to add support for ospf6ctl fib reload and
to invent rtmsg_process() to reduce duplicate code in fetchtable() 
and dispatch_rtmsg().

OK ?

friehm

Index: usr.sbin/ospf6ctl/ospf6ctl.c
===
RCS file: /cvs/src/usr.sbin/ospf6ctl/ospf6ctl.c,v
retrieving revision 1.44
diff -u -p -r1.44 ospf6ctl.c
--- usr.sbin/ospf6ctl/ospf6ctl.c22 Dec 2016 23:03:55 -  1.44
+++ usr.sbin/ospf6ctl/ospf6ctl.c31 May 2017 12:22:20 -
@@ -1256,7 +1256,8 @@ void
 show_fib_head(void)
 {
printf("flags: * = valid, O = OSPF, C = Connected, S = Static\n");
-   printf("%-6s %-20s %-17s\n", "Flags", "Destination", "Nexthop");
+   printf("%-6s %-4s %-20s %-17s\n",
+   "Flags", "Prio", "Destination", "Nexthop");
 }
 
 int
@@ -1286,6 +1287,7 @@ show_fib_msg(struct imsg *imsg)
printf(" ");
 
printf(" ");
+   printf("%4d ", k->priority);
if (asprintf(, "%s/%u", log_in6addr(>prefix),
k->prefixlen) == -1)
err(1, NULL);
Index: usr.sbin/ospf6d/kroute.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v
retrieving revision 1.51
diff -u -p -r1.51 kroute.c
--- usr.sbin/ospf6d/kroute.c30 May 2017 12:42:31 -  1.51
+++ usr.sbin/ospf6d/kroute.c31 May 2017 12:22:20 -
@@ -62,7 +62,8 @@ int   kroute_compare(struct kroute_node *,
 intkr_change_fib(struct kroute_node *, struct kroute *, int, int);
 intkr_delete_fib(struct kroute_node *);
 
-struct kroute_node *kroute_find(const struct in6_addr *, u_int8_t);
+struct kroute_node *kroute_find(const struct in6_addr *, u_int8_t,
+   u_int8_t);
 struct kroute_node *kroute_matchgw(struct kroute_node *,
struct in6_addr *, unsigned int);
 int kroute_insert(struct kroute_node *);
@@ -215,6 +216,7 @@ kr_change_fib(struct kroute_node *kr, st
kn->r.nexthop = kroute[i].nexthop;
kn->r.scope = kroute[i].scope;
kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
+   kn->r.priority = RTP_OSPF;
kn->r.ext_tag = kroute[i].ext_tag;
rtlabel_unref(kn->r.rtlabel);   /* for RTM_CHANGE */
kn->r.rtlabel = kroute[i].rtlabel;
@@ -238,31 +240,10 @@ kr_change(struct kroute *kroute, int krc
 
kroute->rtlabel = rtlabel_tag2id(kroute->ext_tag);
 
-   kr = kroute_find(>prefix, kroute->prefixlen);
-   if (kr != NULL) {
-   if (kr->r.flags & F_KERNEL) {
-   /* a non-ospf route already exists. not a problem */
-   if (!(kr->r.flags & F_BGPD_INSERTED)) {
-   do {
-   kr->r.flags |= F_OSPFD_INSERTED;
-   kr = kr->next;
-   } while (kr);
-   return (0);
-   }
-   /*
-* XXX as long as there is no multipath support in
-* bgpd this is safe else we end up in a bad situation.
-*/
-   /*
-* ospf route has higher pref
-* - reset flags to the ospf ones
-* - use RTM_CHANGE
-* - zero out ifindex (this is no longer relevant)
-*/
-   action = RTM_CHANGE;
-   } else if (kr->next == NULL)/* single path OSPF route */
-   action = RTM_CHANGE;
-   }
+   kr = kroute_find(>prefix, kroute->prefixlen, RTP_OSPF);
+   if (kr != NULL && kr->next == NULL && krcount == 1)
+   /* single path OSPF route */
+   action = RTM_CHANGE;
 
return (kr_change_fib(kr, kroute, krcount, action));
 }
@@ -270,14 +251,10 @@ kr_change(struct kroute *kroute, int krc
 int
 kr_delete_fib(struct kroute_node *kr)
 {
-   if (!(kr->r.flags & F_OSPFD_INSERTED))
-   return 0;
-
-   if (kr->r.flags & F_KERNEL) {
-   /* remove F_OSPFD_INSERTED flag, route still exists in kernel */
-   kr->r.flags &= ~F_OSPFD_INSERTED;
-   return (0);
-   }
+   if (kr->r.priority != RTP_OSPF)
+   log_warn("kr_delete_fib: %s/%d has wrong priority %d",
+   log_in6addr(>r.prefix), kr->r.prefixlen,
+   kr->r.priority);
 
if (send_rtmsg(kr_state.fd, RTM_DELETE, >r) == -1)

Re: Fix carp(4) balancing ip: replace mac address hack with mbuf tag

2017-05-28 Thread Florian Riehm
On 05/28/17 16:09, Martin Pieuchot wrote:
> On 28/05/17(Sun) 16:02, Florian Riehm wrote:
>> [...] 
>> Ok, new diff below.
> 
> I overlooked something!  See below:

Good catch!
I also forgot SRPL_LEAVE.
fixed.


Index: share/man/man9/mbuf_tags.9
===
RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
retrieving revision 1.37
diff -u -p -r1.37 mbuf_tags.9
--- share/man/man9/mbuf_tags.9  24 Nov 2015 19:58:48 -  1.37
+++ share/man/man9/mbuf_tags.9  28 May 2017 15:16:09 -
@@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of 
 IP packet, in case a protocol wants to respond over the same route.
 The tag contains a
 .Va struct ip_srcrt .
+.It PACKET_TAG_CARP_BAL_IP
+Used by
+.Xr carp 4
+to mark packets received in mode 
+.Va balancing ip .
+This packets need some special treatment since they contain layer 3 unicast
+inside layer 2 multicast. The tag contains no data.
 .El
 .Pp
 .Fn m_tag_find
Index: sys/netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.310
diff -u -p -r1.310 ip_carp.c
--- sys/netinet/ip_carp.c   27 May 2017 21:55:52 -  1.310
+++ sys/netinet/ip_carp.c   28 May 2017 15:16:10 -
@@ -1422,8 +1422,25 @@ carp_input(struct ifnet *ifp0, struct mb
(IFF_UP|IFF_RUNNING))
continue;
 
-   if (carp_vhe_match(sc, eh->ether_dhost))
+   if (carp_vhe_match(sc, eh->ether_dhost)) {
+   /*
+* These packets look like layer 2 multicast but they
+* are unicast at layer 3. With help of the tag the
+* mbuf's M_MCAST flag can be removed by carp_lsdrop()
+* after we have passed layer 2.
+*/
+   if (sc->sc_balancing == CARP_BAL_IP) {
+   struct m_tag *mtag;
+   mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
+   M_NOWAIT);
+   if (mtag == NULL) {
+   m_freem(m);
+   goto out;
+   }
+   m_tag_prepend(m, mtag);
+   }
break;
+   }
}
 
if (sc == NULL) {
@@ -1456,27 +1473,23 @@ carp_input(struct ifnet *ifp0, struct mb
return (0);
}
 
-   /*
-* Clear mcast if received on a carp IP balanced address.
-*/
-   if (sc->sc_balancing == CARP_BAL_IP &&
-   ETHER_IS_MULTICAST(eh->ether_dhost))
-   *(eh->ether_dhost) &= ~0x01;
-
ml_enqueue(, m);
if_input(>sc_if, );
+out:
SRPL_LEAVE();
 
return (1);
 }
 
 int
-carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst)
+carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst,
+   int drop)
 {
struct ifnet *ifp;
struct carp_softc *sc;
int match = 1;
u_int32_t fold;
+   struct m_tag *mtag;
 
ifp = if_get(m->m_pkthdr.ph_ifidx);
KASSERT(ifp != NULL);
@@ -1484,6 +1497,25 @@ carp_lsdrop(struct mbuf *m, sa_family_t 
sc = ifp->if_softc;
if (sc->sc_balancing == CARP_BAL_NONE)
goto done;
+
+   /*
+* Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact
+* that it is layer 2 multicast does not implicate that it is also layer
+* 3 multicast.
+*/
+   if (m->m_flags & M_MCAST &&
+   (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
+   m_tag_delete(m, mtag);
+   m->m_flags &= ~M_MCAST;
+   }
+   
+   /*
+* Return without making a drop decision. This allows to clear the
+* M_MCAST flag and do nothing else.
+*/
+   if (!drop)
+   goto done;
+
/*
 * Never drop carp advertisements.
 * XXX Bad idea to pass all broadcast / multicast traffic?
Index: sys/netinet/ip_carp.h
===
RCS file: /cvs/src/sys/netinet/ip_carp.h,v
retrieving revision 1.42
diff -u -p -r1.42 ip_carp.h
--- sys/netinet/ip_carp.h   14 Apr 2017 20:46:31 -  1.42
+++ sys/netinet/ip_carp.h   28 May 2017 15:16:10 -
@@ -204,6 +204,7 @@ struct ifnet*carp_ourether(void *, u_in
 int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 struct rtentry *);
 int carp_sysctl(int *, u_int,  void *, size_t *, void *, size_t);
-int carp_lsdrop(struct mbuf *, sa_family_t, u_int32_t *, u_int32_t 
*);
+int   

Re: Fix carp(4) balancing ip: replace mac address hack with mbuf tag

2017-05-28 Thread Florian Riehm
On 05/28/17 14:04, Martin Pieuchot wrote:
> On 28/05/17(Sun) 13:58, Florian Riehm wrote:
>> On 05/28/17 11:33, Martin Pieuchot wrote:
>>> On 28/05/17(Sun) 10:34, Florian Riehm wrote:
>>>> Hi,
>>>>
>>>> after the fix for carp balancing ip-stealth is in, here is the fix for
>>>> balancing ip.
>>>
>>> Great!
>>>
>>>>
>>>> Non-stealth balancing traffic needs some special treatment since it 
>>>> contains
>>>> layer 3 unicast inside layer 2 multicast.
>>>>
>>>> Now the idea is to deal at layer 2 (ether_input()) with the multicast 
>>>> frames
>>>> like regular multicast. After layer 2 processing is done, ip(6)_input() 
>>>> resets
>>>> the M_MCAST flag and we are unicast.
>>>>
>>>> To achieve this I mark incoming packets matching to balancing mac 
>>>> addresses with
>>>> a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the 
>>>> tag
>>>> exists. Thanks to mpi@ who brought me to this idea.
>>>
>>> Could you remove this flag in carp_lsdrop() instead?  That would keep
>>> carp logic's in netinet/ip_carp.c which makes it more resilient to
>>> future changes.
>>
>> Actually I did this in my first attempt and basically it worked.
>> Then I decided to move it out of carp_lsdrop() because carp_lsdrop()
>> is called twice in ip(6)_input(). ICMP has to be handled later,
>> to make sure we don't drop the wrong ICMP packets.
>>
>> My intention was to remove the flag as early as possible to avoid any
>> potential problems. Before carp_lsdrop() is called for ICMP, ip_input()
>> is already dealing with the M_MCAST flag. As I saw that, I decided to move
>> my fix out of carp_lsdrop(). Even it would work at the moment, it would
>> be more fragile. In example a change in pf_test() in the future could
>> break it.
>>
>> So I think a direkt fix inside ip(6)_input() is a better solution.
>> What do you think?
> 
> If you need a special case for ICMP, then do this check inside
> carp_lsdrop().
> 

Ok, new diff below.

Index: share/man/man9/mbuf_tags.9
===
RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
retrieving revision 1.37
diff -u -p -r1.37 mbuf_tags.9
--- share/man/man9/mbuf_tags.9  24 Nov 2015 19:58:48 -  1.37
+++ share/man/man9/mbuf_tags.9  28 May 2017 13:50:23 -
@@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of 
 IP packet, in case a protocol wants to respond over the same route.
 The tag contains a
 .Va struct ip_srcrt .
+.It PACKET_TAG_CARP_BAL_IP
+Used by
+.Xr carp 4
+to mark packets received in mode 
+.Va balancing ip .
+This packets need some special treatment since they contain layer 3 unicast
+inside layer 2 multicast. The tag contains no data.
 .El
 .Pp
 .Fn m_tag_find
Index: sys/netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.310
diff -u -p -r1.310 ip_carp.c
--- sys/netinet/ip_carp.c   27 May 2017 21:55:52 -  1.310
+++ sys/netinet/ip_carp.c   28 May 2017 13:50:25 -
@@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
(IFF_UP|IFF_RUNNING))
continue;
 
-   if (carp_vhe_match(sc, eh->ether_dhost))
+   if (carp_vhe_match(sc, eh->ether_dhost)) {
+   /*
+* These packets look like layer 2 multicast but they
+* are unicast at layer 3. With help of the tag the
+* mbuf's M_MCAST flag can be removed by carp_lsdrop()
+* after we have passed layer 2.
+*/
+   if (sc->sc_balancing == CARP_BAL_IP) {
+   struct m_tag *mtag;
+   mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
+   M_NOWAIT);
+   if (mtag == NULL)
+   return (0);
+   m_tag_prepend(m, mtag);
+   }
break;
+   }
}
 
if (sc == NULL) {
@@ -1456,13 +1471,6 @@ carp_input(struct ifnet *ifp0, struct mb
return (0);
}
 
-   /*
-* Clear mcast if received on a carp IP balanced address.
-*/
-   if (sc->sc_balancing == CARP_BAL_IP &&
-   ETHER_IS_MULTICAST(eh->ether_dhost))
-   *(eh->ether_dhost) &= ~0x01;
-
ml_enqueue(, m);
if_input(&g

Re: Fix carp(4) balancing ip: replace mac address hack with mbuf tag

2017-05-28 Thread Florian Riehm
On 05/28/17 11:33, Martin Pieuchot wrote:
> On 28/05/17(Sun) 10:34, Florian Riehm wrote:
>> Hi,
>>
>> after the fix for carp balancing ip-stealth is in, here is the fix for
>> balancing ip.
> 
> Great!
> 
>>
>> Non-stealth balancing traffic needs some special treatment since it contains
>> layer 3 unicast inside layer 2 multicast.
>>
>> Now the idea is to deal at layer 2 (ether_input()) with the multicast frames
>> like regular multicast. After layer 2 processing is done, ip(6)_input() 
>> resets
>> the M_MCAST flag and we are unicast.
>>
>> To achieve this I mark incoming packets matching to balancing mac addresses 
>> with
>> a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the tag
>> exists. Thanks to mpi@ who brought me to this idea.
> 
> Could you remove this flag in carp_lsdrop() instead?  That would keep
> carp logic's in netinet/ip_carp.c which makes it more resilient to
> future changes.

Actually I did this in my first attempt and basically it worked.
Then I decided to move it out of carp_lsdrop() because carp_lsdrop()
is called twice in ip(6)_input(). ICMP has to be handled later,
to make sure we don't drop the wrong ICMP packets.

My intention was to remove the flag as early as possible to avoid any
potential problems. Before carp_lsdrop() is called for ICMP, ip_input()
is already dealing with the M_MCAST flag. As I saw that, I decided to move
my fix out of carp_lsdrop(). Even it would work at the moment, it would
be more fragile. In example a change in pf_test() in the future could
break it.

So I think a direkt fix inside ip(6)_input() is a better solution.
What do you think?

Regards,

Florian



Fix carp(4) balancing ip: replace mac address hack with mbuf tag

2017-05-28 Thread Florian Riehm
Hi,

after the fix for carp balancing ip-stealth is in, here is the fix for
balancing ip.

Non-stealth balancing traffic needs some special treatment since it contains
layer 3 unicast inside layer 2 multicast.

Now the idea is to deal at layer 2 (ether_input()) with the multicast frames
like regular multicast. After layer 2 processing is done, ip(6)_input() resets
the M_MCAST flag and we are unicast.

To achieve this I mark incoming packets matching to balancing mac addresses with
a mbuf tag. In ip(6)_input() I remove M_MCAST from mbuf's m_flags if the tag
exists. Thanks to mpi@ who brought me to this idea.

The current code tried to solve the problem by removing the MCAST-Bit from the
MAC address to avoid that the kernel treat it as multicast. This is very
fragile and it was broken more than once. At the moment it is broken
due to the mac address checks at the begin of ether_input().

>From my point of view carp balancing is fully working again after
this patch is in. No further issues are known at the moment. Feel
free to test and report.

Regards,

Florian

Index: share/man/man9/mbuf_tags.9
===
RCS file: /cvs/src/share/man/man9/mbuf_tags.9,v
retrieving revision 1.37
diff -u -p -r1.37 mbuf_tags.9
--- share/man/man9/mbuf_tags.9  24 Nov 2015 19:58:48 -  1.37
+++ share/man/man9/mbuf_tags.9  28 May 2017 08:14:31 -
@@ -170,6 +170,13 @@ Used by the IPv4 stack to keep track of 
 IP packet, in case a protocol wants to respond over the same route.
 The tag contains a
 .Va struct ip_srcrt .
+.It PACKET_TAG_CARP_BAL_IP
+Used by
+.Xr carp 4
+to mark packets received in mode 
+.Va balancing ip .
+This packets need some special treatment since they contain layer 3 unicast
+inside layer 2 multicast. The tag contains no data.
 .El
 .Pp
 .Fn m_tag_find
Index: sys/netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.310
diff -u -p -r1.310 ip_carp.c
--- sys/netinet/ip_carp.c   27 May 2017 21:55:52 -  1.310
+++ sys/netinet/ip_carp.c   28 May 2017 08:14:32 -
@@ -1422,8 +1422,23 @@ carp_input(struct ifnet *ifp0, struct mb
(IFF_UP|IFF_RUNNING))
continue;
 
-   if (carp_vhe_match(sc, eh->ether_dhost))
+   if (carp_vhe_match(sc, eh->ether_dhost)) {
+   /*
+* These packets look like layer 2 multicast but they
+* are unicast at layer 3. With help of the tag the
+* mbuf's M_MCAST flag can be removed in ip(6)_input,
+* after we have passed layer 2.
+*/
+   if (sc->sc_balancing == CARP_BAL_IP) {
+   struct m_tag *mtag;
+   mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0,
+   M_NOWAIT);
+   if (mtag == NULL)
+   return (0);
+   m_tag_prepend(m, mtag);
+   }
break;
+   }
}
 
if (sc == NULL) {
@@ -1455,13 +1470,6 @@ carp_input(struct ifnet *ifp0, struct mb
 
return (0);
}
-
-   /*
-* Clear mcast if received on a carp IP balanced address.
-*/
-   if (sc->sc_balancing == CARP_BAL_IP &&
-   ETHER_IS_MULTICAST(eh->ether_dhost))
-   *(eh->ether_dhost) &= ~0x01;
 
ml_enqueue(, m);
if_input(>sc_if, );
Index: sys/netinet/ip_input.c
===
RCS file: /cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.304
diff -u -p -r1.304 ip_input.c
--- sys/netinet/ip_input.c  22 May 2017 22:23:11 -  1.304
+++ sys/netinet/ip_input.c  28 May 2017 08:14:32 -
@@ -319,9 +319,18 @@ ipv4_input(struct mbuf *m)
}
 
 #if NCARP > 0
-   if (ifp->if_type == IFT_CARP && ip->ip_p != IPPROTO_ICMP &&
-   carp_lsdrop(m, AF_INET, >ip_src.s_addr, >ip_dst.s_addr))
-   goto bad;
+   if (ifp->if_type == IFT_CARP) {
+   struct m_tag *mtag;
+   if (m->m_flags & M_MCAST &&
+   (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) {
+   m_tag_delete(m, mtag);
+   m->m_flags &= ~M_MCAST;
+   }
+
+   if (ip->ip_p != IPPROTO_ICMP && carp_lsdrop(m, AF_INET,
+   >ip_src.s_addr, >ip_dst.s_addr))
+   goto bad;
+   }
 #endif
 
 #if NPF > 0
Index: sys/netinet6/ip6_input.c
===
RCS file: /cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.189
diff -u -p -r1.189 ip6_input.c
--- sys/netinet6/ip6_input.c23 May 2017 08:13:10 

Fix carp balancing ip-stealth

2017-05-27 Thread Florian Riehm
Hi,

This patch fixes the carp mode 'balancing ip-stealth'.

Problem:
System A
carp1: flags=8843 mtu 1500
lladdr 00:00:5e:00:01:01
description: Carp-intern
index 7 priority 15 llprio 3
carp: carpdev vio2 advbase 1 balancing ip-stealth
state MASTER vhid 1 advskew 0
state BACKUP vhid 2 advskew 100

System B
carp1: flags=8843 mtu 1500
lladdr 00:00:5e:00:01:01
description: Carp-intern
index 7 priority 15 llprio 3
carp: carpdev vio2 advbase 1 balancing ip-stealth
state BACKUP vhid 1 advskew 100
state MASTER vhid 2 advskew 0

System B was setting the if_link_state to LINK_STATE_DOWN because
vhid 1 was in state BACKUP. The cloning routes were missing the
RTF_UP flag then.

We musst set the link state UP if at least one vhid is in state MASTER.

Please note that carp 'balancing ip' (non-stealth-mode) is still broken.
My next patch will address this problem.

Regards

friehm

Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.309
diff -u -p -r1.309 ip_carp.c
--- netinet/ip_carp.c   4 May 2017 17:58:46 -   1.309
+++ netinet/ip_carp.c   27 May 2017 08:11:22 -
@@ -2362,6 +2362,7 @@ carp_set_state(struct carp_vhost_entry *
struct carp_softc *sc = vhe->parent_sc;
static const char *carp_states[] = { CARP_STATES };
int loglevel;
+   struct carp_vhost_entry *vhe0;
 
KASSERT(vhe->state != state);
 
@@ -2382,20 +2383,20 @@ carp_set_state(struct carp_vhost_entry *
vhe->state = state;
carp_update_lsmask(sc);
 
-   /* only the master vhe creates link state messages */
-   if (!vhe->vhe_leader)
-   return;
-
-   switch (state) {
-   case BACKUP:
-   sc->sc_if.if_link_state = LINK_STATE_DOWN;
-   break;
-   case MASTER:
-   sc->sc_if.if_link_state = LINK_STATE_UP;
-   break;
-   default:
-   sc->sc_if.if_link_state = LINK_STATE_INVALID;
-   break;
+   KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */
+
+   sc->sc_if.if_link_state = LINK_STATE_INVALID;
+   SRPL_FOREACH_LOCKED(vhe0, >carp_vhosts, vhost_entries) {
+   /*
+* Link must be up if at least one vhe is in state MASTER to
+* bring or keep route up.
+*/
+   if (vhe0->state == MASTER) {
+   sc->sc_if.if_link_state = LINK_STATE_UP;
+   break;
+   } else if (vhe0->state == BACKUP) {
+   sc->sc_if.if_link_state = LINK_STATE_DOWN;
+   }
}
if_link_state_change(>sc_if);
 }



Re: multipath / route priority support for ospf6d

2017-05-15 Thread Florian Riehm
On 05/12/17 18:07, Florian Riehm wrote:
> Hi,
> 
> our QA reports issues with the ospf6d since the kernel uses more multipath
> routes.
> It exits after certain topology changes with:
> rde_send_change_kroute: no valid nexthop found
> 
> Since the kernel uses more multipath routes, the lack of multipath support in
> ospf6d became a problem.
> 
> The attached patch ports the multipath support from ospfd to ospf6d.
> It bases on the following ospfd commits:
> cvs diff -D "2007-09-24" -D "2007-09-26"
> cvs diff -r1.65 -r1.67 kroute.c
> 
> A similar patch was suggested by Manues Guesdon a couple of years ago.
> 
> This patch doesn't fix all problems I am seeing, but it improves the situation
> a lot. A second patch porting priority support will follow and fix further
> issues. I decided to split my fixes into two parts to make review easier.
> 
Sorry,

Mailclient messed up whitespaces.

Fixed diff below.

friehm

Index: kroute.c
===
RCS file: /openbsd/src/usr.sbin/ospf6d/kroute.c,v
retrieving revision 1.50
diff -u -p -r1.50 kroute.c
--- kroute.c27 Dec 2016 17:18:56 -  1.50
+++ kroute.c11 May 2017 20:48:49 -
@@ -59,6 +59,8 @@ void  kr_redist_remove(struct kroute_node
 intkr_redist_eval(struct kroute *, struct kroute *);
 void   kr_redistribute(struct kroute_node *);
 intkroute_compare(struct kroute_node *, struct kroute_node *);
+intkr_change_fib(struct kroute_node *, struct kroute *, int, int);
+intkr_delete_fib(struct kroute_node *);
 
 struct kroute_node *kroute_find(const struct in6_addr *, u_int8_t);
 struct kroute_node *kroute_matchgw(struct kroute_node *,
@@ -141,18 +143,105 @@ kr_init(int fs)
 }
 
 int
-kr_change(struct kroute *kroute)
+kr_change_fib(struct kroute_node *kr, struct kroute *kroute, int krcount,
+int action)
+{
+   int  i;
+   struct kroute_node  *kn, *nkn;
+
+   if (action == RTM_ADD) {
+   /*
+* First remove all stale multipath routes.
+* This step must be skipped when the action is RTM_CHANGE
+* because it is already a single path route that will be
+* changed.
+*/
+   for (kn = kr; kn != NULL; kn = nkn) {
+   for (i = 0; i < krcount; i++) {
+   if (kn->r.scope == kroute[i].scope &&
+   IN6_ARE_ADDR_EQUAL(>r.nexthop,
+   [i].nexthop))
+   break;
+   }
+   nkn = kn->next;
+   if (i == krcount) {
+   /* stale route */
+   if (kr_delete_fib(kn) == -1)
+   log_warnx("kr_delete_fib failed");
+   /*
+* if head element was removed we need to adjust
+* the head
+*/
+   if (kr == kn)
+   kr = nkn;
+   }
+   }
+   }
+
+   /*
+* now add or change the route
+*/
+   for (i = 0; i < krcount; i++) {
+   /* nexthop ::1 -> ignore silently */
+   if (IN6_IS_ADDR_LOOPBACK([i].nexthop))
+   continue;
+
+   if (action == RTM_ADD && kr) {
+   for (kn = kr; kn != NULL; kn = kn->next) {
+   if (kn->r.scope == kroute[i].scope &&
+   IN6_ARE_ADDR_EQUAL(>r.nexthop,
+   [i].nexthop))
+   break;
+   }
+
+   if (kn != NULL)
+   /* nexthop already present, skip it */
+   continue;
+   } else
+   /* modify first entry */
+   kn = kr;
+
+   /* send update */
+   if (send_rtmsg(kr_state.fd, action, [i]) == -1)
+   return (-1);
+
+   /* create new entry unless we are changing the first entry */
+   if (action == RTM_ADD)
+   if ((kn = calloc(1, sizeof(*kn))) == NULL)
+   fatal(NULL);
+
+   kn->r.prefix = kroute[i].prefix;
+   kn->r.prefixlen = kroute[i].prefixlen;
+   kn->r.nexthop = kroute[i].nexthop;
+   kn->r.scope = kroute[i].scope;
+   kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
+   kn->r.ext_tag = kroute[i].ext_ta

multipath / route priority support for ospf6d

2017-05-12 Thread Florian Riehm

Hi,

our QA reports issues with the ospf6d since the kernel uses more multipath
routes.
It exits after certain topology changes with:
rde_send_change_kroute: no valid nexthop found

Since the kernel uses more multipath routes, the lack of multipath support in
ospf6d became a problem.

The attached patch ports the multipath support from ospfd to ospf6d.
It bases on the following ospfd commits:
cvs diff -D "2007-09-24" -D "2007-09-26"
cvs diff -r1.65 -r1.67 kroute.c

A similar patch was suggested by Manues Guesdon a couple of years ago.

This patch doesn't fix all problems I am seeing, but it improves the situation
a lot. A second patch porting priority support will follow and fix further
issues. I decided to split my fixes into two parts to make review easier.

Thanks & Regards

Florian

Index: kroute.c
===
RCS file: /openbsd/src/usr.sbin/ospf6d/kroute.c,v
retrieving revision 1.50
diff -u -p -r1.50 kroute.c
--- kroute.c27 Dec 2016 17:18:56 -  1.50
+++ kroute.c11 May 2017 20:48:49 -
@@ -59,6 +59,8 @@ void  kr_redist_remove(struct kroute_node
 intkr_redist_eval(struct kroute *, struct kroute *);
 void   kr_redistribute(struct kroute_node *);
 intkroute_compare(struct kroute_node *, struct kroute_node *);
+intkr_change_fib(struct kroute_node *, struct kroute *, int, int);
+intkr_delete_fib(struct kroute_node *);
 
 struct kroute_node	*kroute_find(const struct in6_addr *, u_int8_t);

 struct kroute_node *kroute_matchgw(struct kroute_node *,
@@ -141,18 +143,105 @@ kr_init(int fs)
 }
 
 int

-kr_change(struct kroute *kroute)
+kr_change_fib(struct kroute_node *kr, struct kroute *kroute, int krcount,
+int action)
+{
+   int  i;
+   struct kroute_node  *kn, *nkn;
+
+   if (action == RTM_ADD) {
+   /*
+* First remove all stale multipath routes.
+* This step must be skipped when the action is RTM_CHANGE
+* because it is already a single path route that will be
+* changed.
+*/
+   for (kn = kr; kn != NULL; kn = nkn) {
+   for (i = 0; i < krcount; i++) {
+   if (kn->r.scope == kroute[i].scope &&
+   IN6_ARE_ADDR_EQUAL(>r.nexthop,
+   [i].nexthop))
+   break;
+   }
+   nkn = kn->next;
+   if (i == krcount) {
+   /* stale route */
+   if (kr_delete_fib(kn) == -1)
+   log_warnx("kr_delete_fib failed");
+   /*
+* if head element was removed we need to adjust
+* the head
+*/
+   if (kr == kn)
+   kr = nkn;
+   }
+   }
+   }
+
+   /*
+* now add or change the route
+*/
+   for (i = 0; i < krcount; i++) {
+   /* nexthop ::1 -> ignore silently */
+   if (IN6_IS_ADDR_LOOPBACK([i].nexthop))
+   continue;
+
+   if (action == RTM_ADD && kr) {
+   for (kn = kr; kn != NULL; kn = kn->next) {
+   if (kn->r.scope == kroute[i].scope &&
+   IN6_ARE_ADDR_EQUAL(>r.nexthop,
+   [i].nexthop))
+   break;
+   }
+
+   if (kn != NULL)
+   /* nexthop already present, skip it */
+   continue;
+   } else
+   /* modify first entry */
+   kn = kr;
+
+   /* send update */
+   if (send_rtmsg(kr_state.fd, action, [i]) == -1)
+   return (-1);
+
+   /* create new entry unless we are changing the first entry */
+   if (action == RTM_ADD)
+   if ((kn = calloc(1, sizeof(*kn))) == NULL)
+   fatal(NULL);
+
+   kn->r.prefix = kroute[i].prefix;
+   kn->r.prefixlen = kroute[i].prefixlen;
+   kn->r.nexthop = kroute[i].nexthop;
+   kn->r.scope = kroute[i].scope;
+   kn->r.flags = kroute[i].flags | F_OSPFD_INSERTED;
+   kn->r.ext_tag = kroute[i].ext_tag;
+   rtlabel_unref(kn->r.rtlabel);/* for RTM_CHANGE */
+   kn->r.rtlabel = kroute[i].rtlabel;
+
+   if (action == RTM_ADD)
+   if (kroute_insert(kn) == -1) {
+   log_debug("kr_update_fib: cannot insert %s",
+ 

Re: [patch] Fix carp(4) with balancing ip / ip-stealth

2016-04-04 Thread Florian Riehm
On 03/31/16 10:14, Martin Pieuchot wrote:
> On 30/03/16(Wed) 18:04, Florian Riehm wrote:
>> On 03/01/16 23:03, Martin Pieuchot wrote:
>>> On 18/02/16(Thu) 16:46, Florian Riehm wrote:
>>>> On 02/16/16 11:23, Martin Pieuchot wrote:
>>>>> On 12/02/16(Fri) 16:33, Florian Riehm wrote:
>>>>>> Hi Tech,
>>>>>>
>>>>>> I have noticed that CARP IP-Balancing is broken, so I am testing and
>>>>>> fixing it.
>>>>>>
>>>>>> The first problem came in with this commit:
>>>>>> http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/net/if_ethersubr.c.diff?r1=1.176=1.177
>>>>>> It enforces that outgoing packets use the src mac of the carp interface 
>>>>>> instead
>>>>>> the mac of its carpdev. That's a good idea for failover carp but it 
>>>>>> breaks
>>>>>> balancing ip(-stealth), because the same source MAC is seen by multiple
>>>>>> switchports now. That leads to continues MAC flapping at switches.
>>>>>> My attached patch fixes the problem. Thanks Martin's refactoring, we can
>>>>>> enforce the right MAC on a central place in carp_start() now.
>>>>>
>>>>> Can't you completely get rid of the else chunk then?  It looks like a
>>>>> bad refactoring.
>>>>
>>>> I was also thinking about deleting the else chunk. At the end I decided to 
>>>> keep
>>>> it, because I wasn't sure about its purpose.
>>>> My intention was:
>>>> Fix the more or less obviously broken balancing case and don't change the
>>>> behavior of other CARP modes.
>>>> I will remove the else chunk and do some more testing to determine if it's
>>>> really unnecessary.
>>>>
>>>>> I'm also wondering can't we use "sc_realmac" here?
>>>>
>>>> I have not know "sc_realmac" until now. As far as I understand the 
>>>> intention of
>>>> the sc_realmac flag is, to indicate that the user has overwritten the mac
>>>> address of the carp interface with the mac of its parent. I have never had 
>>>> a
>>>> use case to do this. Without overwriting the mac by hand sc_realmac is 0 
>>>> for
>>>> carp with balancing ip/ip-stealth AND carp without balancing.
>>>>
>>>> I don't see how sc_realmac could help us by this problem?!
>>>> Can you give me a hint please?
>>>
>>> It was an open question, if that does not make any sense, then don't do
>>> it ;)
>>>
>>>>>> A second problem was introduced two weeks ago. Since we always check the
>>>>>> destination MAC address of received unicast packets, not only when in
>>>>>> promiscuous mode, carp balancing ip is always broken, not only when in
>>>>>> promiscuous mode. The new check collides with "Clear mcast if received 
>>>>>> on a
>>>>>> carp IP balanced address." in carp_input().
>>>>>
>>>>> Could you describe the problem?  Which Destination MAC the packet
>>>>> contains and why it doesn't match the interface?  I still don't grok why
>>>>> a carp(4) interface in balancing mode cannot have the right MAC
>>>>> configured...  Do you know?  But if what you want is the parent's MAC,
>>>>> then I'd rather go with the symmetric of your other change:  modify
>>>>> carp_input() to overwrite ``ether_dhost''.
>>>>
>>>> If using carp balancing, incoming packets contain the destination mac 
>>>> address
>>>> of the carp interface, let's say: 01:00:5e:00:01:01.
>>>> The MCAST-Bit is set here!
>>>>
>>>> carp_input() removes the MCAST-Bit:
>>>>/*
>>>> * Clear mcast if received on a carp IP balanced address.
>>>> */
>>>>if (sc->sc_balancing == CARP_BAL_IP &&
>>>>ETHER_IS_MULTICAST(eh->ether_dhost))
>>>>*(eh->ether_dhost) &= ~0x01;
>>>>
>>>> Then we run into ether_input() and 
>>>> memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)
>>>> evaluates to false because 
>>>> ac->ac_enaddr (01:00:5e:00:01:01) != eh->ether_dhost 
>>>> (00:00:5e:00:01:01)
>>>
>>> Taking a step back why do you want to clear the MULTICAST bit?
>>>
>>>

Re: [patch] Fix carp(4) with balancing ip / ip-stealth

2016-03-30 Thread Florian Riehm
On 03/01/16 23:03, Martin Pieuchot wrote:
> On 18/02/16(Thu) 16:46, Florian Riehm wrote:
>> On 02/16/16 11:23, Martin Pieuchot wrote:
>>> On 12/02/16(Fri) 16:33, Florian Riehm wrote:
>>>> Hi Tech,
>>>>
>>>> I have noticed that CARP IP-Balancing is broken, so I am testing and
>>>> fixing it.
>>>>
>>>> The first problem came in with this commit:
>>>> http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/net/if_ethersubr.c.diff?r1=1.176=1.177
>>>> It enforces that outgoing packets use the src mac of the carp interface 
>>>> instead
>>>> the mac of its carpdev. That's a good idea for failover carp but it breaks
>>>> balancing ip(-stealth), because the same source MAC is seen by multiple
>>>> switchports now. That leads to continues MAC flapping at switches.
>>>> My attached patch fixes the problem. Thanks Martin's refactoring, we can
>>>> enforce the right MAC on a central place in carp_start() now.
>>>
>>> Can't you completely get rid of the else chunk then?  It looks like a
>>> bad refactoring.
>>
>> I was also thinking about deleting the else chunk. At the end I decided to 
>> keep
>> it, because I wasn't sure about its purpose.
>> My intention was:
>> Fix the more or less obviously broken balancing case and don't change the
>> behavior of other CARP modes.
>> I will remove the else chunk and do some more testing to determine if it's
>> really unnecessary.
>>
>>> I'm also wondering can't we use "sc_realmac" here?
>>
>> I have not know "sc_realmac" until now. As far as I understand the intention 
>> of
>> the sc_realmac flag is, to indicate that the user has overwritten the mac
>> address of the carp interface with the mac of its parent. I have never had a
>> use case to do this. Without overwriting the mac by hand sc_realmac is 0 for
>> carp with balancing ip/ip-stealth AND carp without balancing.
>>
>> I don't see how sc_realmac could help us by this problem?!
>> Can you give me a hint please?
> 
> It was an open question, if that does not make any sense, then don't do
> it ;)
> 
>>>> A second problem was introduced two weeks ago. Since we always check the
>>>> destination MAC address of received unicast packets, not only when in
>>>> promiscuous mode, carp balancing ip is always broken, not only when in
>>>> promiscuous mode. The new check collides with "Clear mcast if received on a
>>>> carp IP balanced address." in carp_input().
>>>
>>> Could you describe the problem?  Which Destination MAC the packet
>>> contains and why it doesn't match the interface?  I still don't grok why
>>> a carp(4) interface in balancing mode cannot have the right MAC
>>> configured...  Do you know?  But if what you want is the parent's MAC,
>>> then I'd rather go with the symmetric of your other change:  modify
>>> carp_input() to overwrite ``ether_dhost''.
>>
>> If using carp balancing, incoming packets contain the destination mac address
>> of the carp interface, let's say: 01:00:5e:00:01:01.
>> The MCAST-Bit is set here!
>>
>> carp_input() removes the MCAST-Bit:
>>  /*
>>   * Clear mcast if received on a carp IP balanced address.
>>   */
>>  if (sc->sc_balancing == CARP_BAL_IP &&
>>  ETHER_IS_MULTICAST(eh->ether_dhost))
>>  *(eh->ether_dhost) &= ~0x01;
>>
>> Then we run into ether_input() and 
>> memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)
>> evaluates to false because 
>> ac->ac_enaddr (01:00:5e:00:01:01) != eh->ether_dhost (00:00:5e:00:01:01)
> 
> Taking a step back why do you want to clear the MULTICAST bit?
> 
> If it is just to differentiate between advertisement and normal traffic
> couldn't we use a mbuf_tag(9) for CARP advertisement instead?  The tag
> could be set in carp_input() and checked in carp_lsdrop() in the input
> path.  It could also be used in the output path to get rid of the
> horrible ``cur_vhe'' hack.
> 
Thanks for your comments. I took a look to the big picture:
The idea behind carp balancing is that clients send all their packets to layer 2
multicast addresses. Switches flood those packets to all ports and every
CARP-System receives them. Only one system processes the packet, the other
systems drop it. The processing system handles the packet like a normal unicast
packet. The packet distribution described above is the only reason why the
multicast bit is required. We don't want further mult

Re: Route priorities missing in route messages

2016-03-24 Thread Florian Riehm
On 03/24/16 15:47, Alexander Bluhm wrote:
> On Thu, Mar 24, 2016 at 03:03:18PM +0100, Florian Riehm wrote:
>> -void rt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int);
>> +void rt_missmsg(int, struct rt_addrinfo *, int, u_char, u_int, int, 
>> u_int);
> 
>> -rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, u_int ifidx,
>> -int error, u_int tableid)
>> +rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, uint8_t prio,
>> +u_int ifidx, int error, u_int tableid)
> 
> You should use uint8_t in both prototype and function definition.
> 
> otherwise OK bluhm@
> 

Thanks,

Updated patch below.

Florian

Index: net/route.c
===
RCS file: /openbsd//src/sys/net/route.c,v
retrieving revision 1.296
diff -u -p -r1.296 route.c
--- net/route.c 7 Mar 2016 18:44:00 -   1.296
+++ net/route.c 24 Mar 2016 14:55:12 -
@@ -248,8 +248,8 @@ rt_match(struct sockaddr *dst, uint32_t 
error = rtrequest(RTM_RESOLVE, , RTP_DEFAULT,
, tableid);
if (error) {
-   rt_missmsg(RTM_MISS, , 0, 0, error,
-   tableid);
+   rt_missmsg(RTM_MISS, , 0, RTP_NONE, 0,
+   error, tableid);
} else {
/* Inform listeners of the new route */
rt_sendmsg(rt, RTM_ADD, tableid);
@@ -488,7 +488,8 @@ rt_sendmsg(struct rtentry *rt, int cmd, 
info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
}
 
-   rt_missmsg(cmd, , rt->rt_flags, rt->rt_ifidx, 0, rtableid);
+   rt_missmsg(cmd, , rt->rt_flags, rt->rt_priority, rt->rt_ifidx, 0,
+   rtableid);
if_put(ifp);
 }
 
@@ -522,6 +523,7 @@ rtredirect(struct sockaddr *dst, struct 
struct ifaddr   *ifa;
unsigned int ifidx = 0;
int  flags = RTF_GATEWAY|RTF_HOST;
+   uint8_t  prio = RTP_NONE;
 
splsoftassert(IPL_SOFTNET);
 
@@ -578,8 +580,10 @@ create:
rt = NULL;
error = rtrequest(RTM_ADD, , RTP_DEFAULT, ,
rdomain);
-   if (error == 0)
+   if (error == 0) {
flags = rt->rt_flags;
+   prio = rt->rt_priority;
+   }
stat = _dynamic;
} else {
/*
@@ -588,6 +592,7 @@ create:
 */
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
+   prio = rt->rt_priority;
stat = _newgateway;
rt_setgate(rt, gateway);
}
@@ -609,7 +614,7 @@ out:
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_AUTHOR] = src;
-   rt_missmsg(RTM_REDIRECT, , flags, ifidx, error, rdomain);
+   rt_missmsg(RTM_REDIRECT, , flags, prio, ifidx, error, rdomain);
 }
 
 /*
@@ -638,7 +643,8 @@ rtdeletemsg(struct rtentry *rt, struct i
info.rti_flags = rt->rt_flags;
ifidx = rt->rt_ifidx;
error = rtrequest_delete(, rt->rt_priority, ifp, , tableid);
-   rt_missmsg(RTM_DELETE, , info.rti_flags, ifidx, error, tableid);
+   rt_missmsg(RTM_DELETE, , info.rti_flags, rt->rt_priority, ifidx,
+   error, tableid);
if (error == 0)
rtfree(rt);
return (error);
Index: net/route.h
===
RCS file: /openbsd//src/sys/net/route.h,v
retrieving revision 1.132
diff -u -p -r1.132 route.h
--- net/route.h 24 Feb 2016 22:41:53 -  1.132
+++ net/route.h 24 Mar 2016 14:55:12 -
@@ -360,7 +360,7 @@ void rt_maskedcopy(struct sockaddr *,
 struct sockaddr *rt_plen2mask(struct rtentry *, struct sockaddr_in6 *);
 voidrt_sendmsg(struct rtentry *, int, u_int);
 voidrt_sendaddrmsg(struct rtentry *, int);
-voidrt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int);
+voidrt_missmsg(int, struct rt_addrinfo *, int, uint8_t, u_int, int, u_int);
 int rt_setgate(struct rtentry *, struct sockaddr *);
 int rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
unsigned int, struct rtentry **);
Index: net/rtsock.c
===
RCS file: /openbsd//src/sys/net/rtsock.c,v
retrieving revision 1.186
diff -u -p -r1.186 rtsock.c
--- net/rtsock.c12 Jan 2016 09:27:46 -  1.186
+++ net/rtsock.c24 Mar 2016 14:55:12 -
@@ -1063,8 +106

Route priorities missing in route messages

2016-03-24 Thread Florian Riehm
Hi,

Routing daemons like ospfd use two interfaces to obtain route information from
the kernel:
- sysctl
- route messages

Route information from sysctl contain correct route priorities, route messages
not. This can lead to incorrect routes inside the daemons. If daemons see routes
with different priorities, they assume to see different routes.
My attached patch adds route priorities to route messages to offer a consistent
view to kernel routes with both interfaces.

Here an example of a problem, caused by this bug:

/etc/hostname.em3:
rtlabel EXPORT_DIRECT
inet 192.168.58.57 255.255.255.0 192.168.58.255
inet alias 1.0.0.1 255.255.255.0 192.168.58.255

/etc/ospfd.conf:
area 10.188.0.0 {
interface em3:192.168.58.57 {
}
}
redistribute rtlabel EXPORT_DIRECT

If the interface address 1.0.0.1 is set before starting the ospfd, we have ha
route with a priority of 4:
$ ospfctl show fib | grep 1.0.0
*C4 1.0.0.0/24   link#4

Now we delete the route with ifconfig and take a look to the generated route
message:
$ ifconfig em3 inet delete 1.0.0.1

RTM_DELETE: Delete Route: len 224, priority 0, table 0, ifidx 4, pid: 0, seq 0,
errno 0
flags:
use:0   mtu:0expire:0 
locks:  inits: 
sockaddrs: 
 1.0.0.0 1.0.0.1 255.255.255.0 08:00:27:fd:b8:81 1.0.0.1 EXPORT_DIRECT

Because the priority 0 of the deleted cloning route doesn't match to the
priority of the fib route, ospfd is still seeing/announcing the network:

$ ospfctl show fib | grep 1.0.0
*C4 1.0.0.0/24   link#4

Please take a look to my patch.

Thanks and Regards

Florian

Index: net/route.c
===
RCS file: /openbsd//src/sys/net/route.c,v
retrieving revision 1.296
diff -u -p -r1.296 route.c
--- net/route.c 7 Mar 2016 18:44:00 -   1.296
+++ net/route.c 24 Mar 2016 08:36:17 -
@@ -248,8 +248,8 @@ rt_match(struct sockaddr *dst, uint32_t 
error = rtrequest(RTM_RESOLVE, , RTP_DEFAULT,
, tableid);
if (error) {
-   rt_missmsg(RTM_MISS, , 0, 0, error,
-   tableid);
+   rt_missmsg(RTM_MISS, , 0, RTP_NONE, 0,
+   error, tableid);
} else {
/* Inform listeners of the new route */
rt_sendmsg(rt, RTM_ADD, tableid);
@@ -488,7 +488,8 @@ rt_sendmsg(struct rtentry *rt, int cmd, 
info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
}
 
-   rt_missmsg(cmd, , rt->rt_flags, rt->rt_ifidx, 0, rtableid);
+   rt_missmsg(cmd, , rt->rt_flags, rt->rt_priority, rt->rt_ifidx, 0,
+   rtableid);
if_put(ifp);
 }
 
@@ -522,6 +523,7 @@ rtredirect(struct sockaddr *dst, struct 
struct ifaddr   *ifa;
unsigned int ifidx = 0;
int  flags = RTF_GATEWAY|RTF_HOST;
+   uint8_t  prio = RTP_NONE;
 
splsoftassert(IPL_SOFTNET);
 
@@ -578,8 +580,10 @@ create:
rt = NULL;
error = rtrequest(RTM_ADD, , RTP_DEFAULT, ,
rdomain);
-   if (error == 0)
+   if (error == 0) {
flags = rt->rt_flags;
+   prio = rt->rt_priority;
+   }
stat = _dynamic;
} else {
/*
@@ -588,6 +592,7 @@ create:
 */
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
+   prio = rt->rt_priority;
stat = _newgateway;
rt_setgate(rt, gateway);
}
@@ -609,7 +614,7 @@ out:
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_AUTHOR] = src;
-   rt_missmsg(RTM_REDIRECT, , flags, ifidx, error, rdomain);
+   rt_missmsg(RTM_REDIRECT, , flags, prio, ifidx, error, rdomain);
 }
 
 /*
@@ -638,7 +643,8 @@ rtdeletemsg(struct rtentry *rt, struct i
info.rti_flags = rt->rt_flags;
ifidx = rt->rt_ifidx;
error = rtrequest_delete(, rt->rt_priority, ifp, , tableid);
-   rt_missmsg(RTM_DELETE, , info.rti_flags, ifidx, error, tableid);
+   rt_missmsg(RTM_DELETE, , info.rti_flags, rt->rt_priority, ifidx,
+   error, tableid);
if (error == 0)
rtfree(rt);
return (error);
Index: net/route.h
===
RCS file: /openbsd//src/sys/net/route.h,v
retrieving revision 1.132
diff -u -p -r1.132 route.h
--- net/route.h 24 Feb 2016 22:41:53 -  1.132
+++ net/route.h 

Re: [patch] Fix carp(4) with balancing ip / ip-stealth

2016-02-18 Thread Florian Riehm
On 02/16/16 11:23, Martin Pieuchot wrote:
> On 12/02/16(Fri) 16:33, Florian Riehm wrote:
>> Hi Tech,
>>
>> I have noticed that CARP IP-Balancing is broken, so I am testing and
>> fixing it.
>>
>> The first problem came in with this commit:
>> http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/net/if_ethersubr.c.diff?r1=1.176=1.177
>> It enforces that outgoing packets use the src mac of the carp interface 
>> instead
>> the mac of its carpdev. That's a good idea for failover carp but it breaks
>> balancing ip(-stealth), because the same source MAC is seen by multiple
>> switchports now. That leads to continues MAC flapping at switches.
>> My attached patch fixes the problem. Thanks Martin's refactoring, we can
>> enforce the right MAC on a central place in carp_start() now.
> 
> Can't you completely get rid of the else chunk then?  It looks like a
> bad refactoring.

I was also thinking about deleting the else chunk. At the end I decided to keep
it, because I wasn't sure about its purpose.
My intention was:
Fix the more or less obviously broken balancing case and don't change the
behavior of other CARP modes.
I will remove the else chunk and do some more testing to determine if it's
really unnecessary.

> I'm also wondering can't we use "sc_realmac" here?

I have not know "sc_realmac" until now. As far as I understand the intention of
the sc_realmac flag is, to indicate that the user has overwritten the mac
address of the carp interface with the mac of its parent. I have never had a
use case to do this. Without overwriting the mac by hand sc_realmac is 0 for
carp with balancing ip/ip-stealth AND carp without balancing.

I don't see how sc_realmac could help us by this problem?!
Can you give me a hint please?

> 
>> A second problem was introduced two weeks ago. Since we always check the
>> destination MAC address of received unicast packets, not only when in
>> promiscuous mode, carp balancing ip is always broken, not only when in
>> promiscuous mode. The new check collides with "Clear mcast if received on a
>> carp IP balanced address." in carp_input().
> 
> Could you describe the problem?  Which Destination MAC the packet
> contains and why it doesn't match the interface?  I still don't grok why
> a carp(4) interface in balancing mode cannot have the right MAC
> configured...  Do you know?  But if what you want is the parent's MAC,
> then I'd rather go with the symmetric of your other change:  modify
> carp_input() to overwrite ``ether_dhost''.

If using carp balancing, incoming packets contain the destination mac address
of the carp interface, let's say: 01:00:5e:00:01:01.
The MCAST-Bit is set here!

carp_input() removes the MCAST-Bit:
/*
 * Clear mcast if received on a carp IP balanced address.
 */
if (sc->sc_balancing == CARP_BAL_IP &&
ETHER_IS_MULTICAST(eh->ether_dhost))
*(eh->ether_dhost) &= ~0x01;

Then we run into ether_input() and 
memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)
evaluates to false because 
ac->ac_enaddr (01:00:5e:00:01:01) != eh->ether_dhost (00:00:5e:00:01:01)




[patch] Fix carp(4) with balancing ip / ip-stealth

2016-02-12 Thread Florian Riehm
Hi Tech,

I have noticed that CARP IP-Balancing is broken, so I am testing and
fixing it.

The first problem came in with this commit:
http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/net/if_ethersubr.c.diff?r1=1.176=1.177
It enforces that outgoing packets use the src mac of the carp interface instead
the mac of its carpdev. That's a good idea for failover carp but it breaks
balancing ip(-stealth), because the same source MAC is seen by multiple
switchports now. That leads to continues MAC flapping at switches.
My attached patch fixes the problem. Thanks Martin's refactoring, we can
enforce the right MAC on a central place in carp_start() now.

A second problem was introduced two weeks ago. Since we always check the
destination MAC address of received unicast packets, not only when in
promiscuous mode, carp balancing ip is always broken, not only when in
promiscuous mode. The new check collides with "Clear mcast if received on a
carp IP balanced address." in carp_input().
My fix in net/if_ethersubr.c is probably the wrong place to fix the problem,
but I don't have a better idea for now. Maybe the fix is ok for now, because we
don't have too much time before tagging 5.9.

Both of my fixes are independent from each other. They can be separately
committed.

Thanks and Regards,

Florian



Index: net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.233
diff -u -p -r1.233 if_ethersubr.c
--- net/if_ethersubr.c  22 Jan 2016 17:09:05 -  1.233
+++ net/if_ethersubr.c  6 Feb 2016 08:15:44 -
@@ -354,7 +354,8 @@ ether_input(struct ifnet *ifp, struct mb
 * This check is required in promiscous mode, and for some hypervisors
 * where the MAC filter is 'best effort' only.
 */
-   if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
+   if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 &&
+   ifp->if_type != IFT_CARP) {
if (memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)) {
m_freem(m);
return (1);
Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.286
diff -u -p -r1.286 ip_carp.c
--- netinet/ip_carp.c   21 Jan 2016 11:23:48 -  1.286
+++ netinet/ip_carp.c   6 Feb 2016 08:15:46 -
@@ -2275,9 +2275,15 @@ carp_start(struct ifnet *ifp)
 * advertisements in 'ip' and 'ip-stealth' balacing
 * modes.
 */
-   if (sc->sc_balancing != CARP_BAL_IPSTEALTH &&
-   sc->sc_balancing != CARP_BAL_IP &&
-   (sc->cur_vhe && !sc->cur_vhe->vhe_leader)) {
+   if (sc->sc_balancing == CARP_BAL_IP ||
+   sc->sc_balancing == CARP_BAL_IPSTEALTH) {
+   struct ether_header *eh;
+   uint8_t *esrc;
+
+   eh = mtod(m, struct ether_header *);
+   esrc = ((struct arpcom*) ifp->if_carpdev)->ac_enaddr;
+   memcpy(eh->ether_shost, esrc, sizeof(eh->ether_shost));
+   } else if (sc->cur_vhe && !sc->cur_vhe->vhe_leader) {
struct ether_header *eh;
uint8_t *esrc;
 





Re: libssl patch available

2015-03-11 Thread Florian Riehm
On 03/11/15 21:42, Ted Unangst wrote:
 
 Thanks to Florian Riehm for pointing out that 5.6 was still vulnerable to
 FREAK.
 

Thanks to Steffen Ulrich for testing. He has found the problem.
Also thanks to tedu@ for the fix.

Florian



Re: Sending route messages for local routes or cloning routes

2015-01-07 Thread Florian Riehm
Hi Martin,

Thanks for your diff! Regardless of my problem it makes our code
more clear. The loop in rt_newaddrmsg() was ugly.

 
 Here's a diff that should generate a RTM_ADD message for every CLONING
 route added while keeping the existing RTM_NEWADDR/RTM_DELADDR logic.
 
 dhclient(8) is happy with this change, does it fix your use case too?

There is a small bug in your diff of route.c, because rt_ifa_del() is 
never called with flag RTF_CLONING, so ospfd doesn't notice if an address
gets deleted.

I was thinking about this three options to fix the problem:
1.) Check with rt-rt_flags instead of flags for RTF_CLONING flag:
-if (flags  (RTF_LOCAL|RTF_CLONING))
+if (rt-rt_flags  (RTF_LOCAL|RTF_CLONING))

2) Add RTF_CLONING to flags argument at certain calls of rt_ifa_add() /
rt_ifa_delete()

3.) Remove the check completely and call always rt_sendmsg()

At the moment I would prefer the 3. solution. After rtrequest1() has been
called, we check for error and check if rt ist not NULL. If this conditions are
true we have added or deleted a route. Why should we not send a route message
then? 
Below an updated diff for route.c with my fix.

I noticed that the RTM_ADD/RTM_DELETE routing messages doesn't contain a 
priority anymore with your diff. I guess this is not a problem.
If nobody needs the priority I prefer the new behavior.
If something needs it, we had to add another argument to rt_missmsg().

Regards

Florian

Index: net/route.c
===
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.196
diff -u -p -r1.196 route.c
--- net/route.c 29 Dec 2014 11:53:58 -  1.196
+++ net/route.c 7 Jan 2015 17:29:56 -
@@ -382,11 +382,13 @@ void
 rt_sendmsg(struct rtentry *rt, int cmd, u_int rtableid)
 {
struct rt_addrinfo info;
+   struct sockaddr_rtlabel sa_rl;
 
-   bzero(info, sizeof(info));
+   memset(info, 0, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt-rt_gateway;
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
+   info.rti_info[RTAX_LABEL] = rtlabel_id2sa(rt-rt_labelid, sa_rl);
if (rt-rt_ifp != NULL) {
info.rti_info[RTAX_IFP] =(struct sockaddr *)rt-rt_ifp-if_sadl;
info.rti_info[RTAX_IFA] = rt-rt_ifa-ifa_addr;
@@ -1098,7 +1100,8 @@ rt_ifa_add(struct ifaddr *ifa, int flags
 * userland that a new address has been added.
 */
if (flags  RTF_LOCAL)
-   rt_newaddrmsg(RTM_ADD, ifa, error, nrt);
+   rt_sendaddrmsg(nrt, RTM_NEWADDR);
+   rt_sendmsg(nrt, RTM_ADD, rtableid);
}
return (error);
 }
@@ -1153,7 +1156,8 @@ rt_ifa_del(struct ifaddr *ifa, int flags
error = rtrequest1(RTM_DELETE, info, prio, nrt, rtableid);
if (error == 0  (rt = nrt) != NULL) {
if (flags  RTF_LOCAL)
-   rt_newaddrmsg(RTM_DELETE, ifa, error, nrt);
+   rt_sendaddrmsg(nrt, RTM_DELADDR);
+   rt_sendmsg(nrt, RTM_DELETE, rtableid);
if (rt-rt_refcnt = 0) {
rt-rt_refcnt++;
rtfree(rt);



Sending route messages for local routes or cloning routes

2014-12-23 Thread Florian Riehm
Hi Martin,

as requested in your commit message I would like to tell you about a regression
with the introduced local routes:

Before OpenBSD 5.6 it was possible to add route labels to interfaces and tell
ospfd to redistribute all labeled routes. After adding an address to a labeled
interface a new route was announced via ospf.

Now the ospfd can't detect added or deleted addresses anymore, because the
kernel informs about local routes instead of cloning routes.

route -nv monitor:

OPENBSD_5_5:
RTM_NEWADDR: address being added to iface: len 96, metric 0, flags:CLONING
sockaddrs: NETMASK,IFP,IFA,BRD
 255.255.255.0 08:00:27:ad:dd:7c 5.5.5.5 5.5.5.255

RTM_ADD: Add Route: len 192, priority 4, table 0, pid: 0, seq 0, errno 0
flags:UP,CLONING
use:0   mtu:0expire:0 
locks:  inits: 
sockaddrs: DST,GATEWAY,NETMASK,LABEL
 5.5.5.0 link#3 255.255.255.0 EXPORT

OPENBSD_5_6:
RTM_NEWADDR: address being added to iface: len 96, metric 0, flags:UP
sockaddrs: NETMASK,IFP,IFA,BRD
 255.255.255.0 08:00:27:fd:b8:81 5.5.5.5 5.5.5.255

RTM_ADD: Add Route: len 184, priority 1, table 0, pid: 0, seq 0, errno 0
flags:UP,HOST,LLINFO,LOCAL
use:0   mtu:0expire:0 
locks:  inits: 
sockaddrs: DST,GATEWAY,LABEL
 5.5.5.5 08:00:27:fd:b8:81 EXPORT

In the commit message of route.c r1.172 you said always generating one message
per locally configured address.
Are any userland tools needing this message?

Would it make sense to remove the loop in rt_newaddrmsg which generates the two
route messages? Instead of this rt_newaddrmsg sends only the RTM_NEWADDR /
RTM_DELADDR message and the other message gets send after creating/deleting the
cloning route.

By the way if rt_newaddrmsg() sends RTM_NEWADDR and RTM_DELADDR we should
rename it to rt_addrmsg().

Regards,

Florian



Re: Sending route messages for local routes or cloning routes

2014-12-23 Thread Florian Riehm
On 12/23/14 11:59, Martin Pieuchot wrote:
 Would it make sense to remove the loop in rt_newaddrmsg which generates the 
 two
  route messages? Instead of this rt_newaddrmsg sends only the RTM_NEWADDR /
  RTM_DELADDR message and the other message gets send after 
  creating/deleting the
  cloning route.
 I think it does make sense.  It would restore the RTM_ADD for
 RTF_CLONING routes and keep one RTM_NEWADDR for RTF_LOCAL routes.
 Apart from your scenario with ospfd/ospf6d, dhclient should be happy
 with this change and I can think of a third case.  If  you configure
 two addresses of the same subnet you should see 2 RTM_NEWADDR but only
 one RTM_ADD since only the first address will get a cloning route.
 
  By the way if rt_newaddrmsg() sends RTM_NEWADDR and RTM_DELADDR we should
  rename it to rt_addrmsg().
 If you remove the loop and generate only one message, I think that you can
 simply use rt_sendmsg() and kill rt_newaddrmsg().


ok, thanks for your advice. I will try it and let you know if it works.



Fix some vulnerabilities in file(1)

2014-12-14 Thread Florian Riehm
Hi,

the attached patch fixes two vulnerabilities in file(1):

CVE-2014-2270: A specifically crafted Portable Executable (PE) can trigger
out-of-bounds read.

CVE-2014-1943: A malicious input file could trigger infinite recursion in
libmagic(3).

The patch is based on a FreeBSD security advisory and fixes from the file
developers upstream. I had to do some adaptions because our version of file is a
bit older. We are not affected by the two other CVEs (CVE-2012-1571,
CVE-2012-1571) referred by the FreeBSD SA.

For further Information see:
https://www.freebsd.org/security/advisories/FreeBSD-SA-14:16.file.asc
http://security.FreeBSD.org/patches/SA-14:16/file-8.4.patch

I have ignored the 80 characters limit sometimes to keep the diff to upstream
smaller.
The regression tests for file were successful.

I have another patch which fixes the vulnerabilities described in
https://www.freebsd.org/security/advisories/FreeBSD-SA-14:28.file.asc .
I will submit it if the first part is committed to make reviewers job easier.

Regards

Florian Riehm

Index: ascmagic.c
===
RCS file: /cvs/src/usr.bin/file/ascmagic.c,v
retrieving revision 1.12
diff -u -p -r1.12 ascmagic.c
--- ascmagic.c  18 May 2014 17:50:11 -  1.12
+++ ascmagic.c  14 Dec 2014 14:10:55 -
@@ -175,7 +175,8 @@ file_ascmagic(struct magic_set *ms, cons
}
if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
goto done;
-   if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
+   if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf,
+   0, TEXTTEST) != 0) {
rv = 1;
goto done;
}
Index: file.h
===
RCS file: /cvs/src/usr.bin/file/file.h,v
retrieving revision 1.24
diff -u -p -r1.24 file.h
--- file.h  18 May 2014 17:50:11 -  1.24
+++ file.h  14 Dec 2014 14:10:55 -
@@ -332,7 +332,8 @@ protected int file_zmagic(struct magic_s
 const unsigned char *, size_t);
 protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
 protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
-protected int file_softmagic(struct magic_set *, const unsigned char *, 
size_t, int);
+protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
+size_t, int);
 protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
 protected uint64_t file_signextend(struct magic_set *, struct magic *,
 uint64_t);
Index: funcs.c
===
RCS file: /cvs/src/usr.bin/file/funcs.c,v
retrieving revision 1.8
diff -u -p -r1.8 funcs.c
--- funcs.c 18 May 2014 17:50:11 -  1.8
+++ funcs.c 14 Dec 2014 14:10:55 -
@@ -181,7 +181,7 @@ file_buffer(struct magic_set *ms, int fd
(m = file_is_tar(ms, buf, nb)) == 0) {
/* try tests in /etc/magic (or surrogate magic file) */
if ((ms-flags  MAGIC_NO_CHECK_SOFT) != 0 ||
-   (m = file_softmagic(ms, buf, nb, BINTEST)) == 0) {
+   (m = file_softmagic(ms, buf, nb, 0, BINTEST)) == 0) {
/* try known keywords, check whether it is ASCII */
if ((ms-flags  MAGIC_NO_CHECK_ASCII) != 0 ||
(m = file_ascmagic(ms, buf, nb)) == 0) {
Index: softmagic.c
===
RCS file: /cvs/src/usr.bin/file/softmagic.c,v
retrieving revision 1.17
diff -u -p -r1.17 softmagic.c
--- softmagic.c 17 Apr 2013 15:01:26 -  1.17
+++ softmagic.c 14 Dec 2014 14:10:56 -
@@ -39,9 +39,9 @@
 
 
 private int match(struct magic_set *, struct magic *, uint32_t,
-const unsigned char *, size_t, int);
+const unsigned char *, size_t, int, int);
 private int mget(struct magic_set *, const unsigned char *,
-struct magic *, size_t, unsigned int);
+struct magic *, size_t, unsigned int, int);
 private int magiccheck(struct magic_set *, struct magic *);
 private int32_t mprint(struct magic_set *, struct magic *);
 private void mdebug(uint32_t, const char *, size_t);
@@ -54,6 +54,7 @@ private void cvt_16(union VALUETYPE *, c
 private void cvt_32(union VALUETYPE *, const struct magic *);
 private void cvt_64(union VALUETYPE *, const struct magic *);
 
+#define OFFSET_OOB(n, o, i)((n)  (o) || (i)  ((n) - (o)))
 /*
  * Macro to give description string according to whether we want plain
  * text or MIME type
@@ -66,12 +67,13 @@ private void cvt_64(union VALUETYPE *, c
  */
 /*ARGSUSED1*/  /* nbytes passed for regularity, maybe need later */
 protected int
-file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, 
int mode)
+file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes,
+  size_t level, int mode

Disable NOINET6 for carp parent interfaces automatically

2014-11-28 Thread Florian Riehm
Hi,

in OpenBSD 5.6 NOINET6 gets disabled on a carp interface after configuring
an ipv6 address. Additionally you have to disable NOINET6 at the physical
interface or ip6_output() will fail because carp_send_ad() could not determine
an ipv6 source address via ifaof_ifpforaddr().

The attached patch enables IPv6 also at the carp parent interface if enabling
it at a carp interface.

Are there other interfaces which have the same problem?

Regards

Florian

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.304
diff -u -p -r1.304 if.c
--- net/if.c23 Nov 2014 07:39:02 -  1.304
+++ net/if.c28 Nov 2014 19:39:58 -
@@ -1323,6 +1323,18 @@ ifioctl(struct socket *so, u_long cmd, c
if (ifp-if_xflags  IFXF_NOINET6 
!(ifr-ifr_flags  IFXF_NOINET6)) {
ifp-if_xflags = ~IFXF_NOINET6;
+#if NCARP  0
+   /* also enable ipv6 on carp parent interface */
+   if (ifp-if_type == IFT_CARP  ifp-if_carpdev 
+   ifp-if_carpdev-if_xflags  IFXF_NOINET6) {
+   ifp-if_carpdev-if_xflags = ~IFXF_NOINET6;
+   if (ifp-if_carpdev-if_flags  IFF_UP) {
+   s = splnet();
+   in6_if_up(ifp-if_carpdev);
+   splx(s);
+   }
+   }
+#endif
if (ifp-if_flags  IFF_UP) {
/* configure link-local address */
s = splnet();



Behavior of changing routes on OpenBSD 5.6

2014-11-25 Thread Florian Riehm
Hi tech,

since OpenBSD 5.6 route change messages can change the interface of a route
(rt_ifa) even if a message doesn't seem to require it because of a changed
gateway or stuff like that.
I would like to ask if it's a regression or if the new behavior is intended.

Example: (only for testing - it doesn't represent my network topology)
ifconfig em0 inet6 fd88::1/64
ifconfig em1 inet6 fd99::1/64
route add -inet6 fd88::666 fd99::1
route get fd88::666
interface: em1 (as expected)
route change fd88::666 -mtu 1500
route get fd88::666
interface: em0 (broken - trying to ping the target results in No route to
host)

In the example I can workaround the problem with adding a gateway while changing
the mtu:
route change fd88::666 fd99::1 -mtu 1500

A comment in route_output (rtsock.c) says
/*
* new gateway could require new ifaddr, ifp;
* flags may also be different; ifp may be specified
* by ll sockaddr when protocol address is ambiguous
*/
but their is no check for a 'new gateway'.

With OpenBSD 5.5 some checks with RTAX_GATEWAY, RTAX_IFP and RTAX_IFA were made,
before
rt_ifa changed.

Would it make sense to add a check like follow or is the new behavior intended 
and
'route change' has to be called with destination and gateway now?
The patch is not tested very well yet, it's a proposal awaiting your comments.

Regards,

Florian

--- sys/net/rtsock.c2014-11-26 06:08:59.0 +0100
+++ sys/net/rtsock.c.new2014-11-26 06:08:24.0 +0100
@@ -763,7 +763,7 @@ report:
}
ifa = info.rti_ifa;
if (ifa) {
-   if (rt-rt_ifa != ifa) {
+   if (rt-rt_ifa != ifa  newgate) {
if (rt-rt_ifa-ifa_rtrequest)
rt-rt_ifa-ifa_rtrequest(
RTM_DELETE, rt);



Re: openospfd router-priority

2014-08-19 Thread Florian Riehm
On 08/19/14 21:45, Tim Epkes wrote:
 All,
 
 I had implemented a network using openospf and initially left
 router-priorities off.  Problem is I kept coming up FULL/OTHER and would
 not get routes.  I changed the router priority values (not to match as when
 I matched got the same).  I changed one side of a line to 10, while the
 other was 5.  When I reloaded the side I changed, it stayed in FULL/OTHER
 state when it came up again.  To get it to change to FULL/DR or FULL/BCKUP
 I needed to reload both systems ospfd daemons.  Then it recalced fine.
  This leads me to believe that their isn't a full renegotiation if the
 other side is still running.  That would be hard to do in an environment
 where you have to bring down the entire network to make a change.  Is this
 a bug or was this the intended deployment.  Also if priorities match,
 shouldn't they then move to see who has the highest RID to determine DR and
 Backup.  Cause coming up as OTHER by default was a real pain.  Thanks
 
 P.S. once all reloaded it works fine.
 
 Tim
 

Hi Tim,

If you start two ospfd without any special router priority, the default priority
of both routers is 1, one router becomes DR and the other one BDR.
That's how it supposed to work and if it doesn't something is wrong. I have
tried it with a very simple configuration and it works for me.

If your network is up alreade and their is a DR in it and another ospfd goes
up, the DR won't change regardless of it's priority.
See RFC 2328 section 7.3:
In general, when a router's interface to a network first becomes functional,
it checks to see whether there is currently a Designated Router for the
network. If there is, it accepts that Designated Router, regardless of its
Router Priority.

Why will you enforce a new DR if their is already one?

Florian



[Patch] Add router alert option to igmp packets

2014-04-26 Thread Florian Riehm
Hi tech,

our IGMP packets don't contain router alert options.
According rfc 2236 (Internet Group Management Protocol, Version 2)
packets without this option have to be ignored. Some layer 3 switches
are blocking our igmp packets because of that.
The following patch is based on a FreeBSD patch long years ago:
http://svnweb.freebsd.org/base?view=revisionrevision=14622

I would be happy if anybody could commit this.

Regards,

Florian


Index: igmp.c
===
RCS file: /cvs/src/sys/netinet/igmp.c,v
retrieving revision 1.39
diff -u -p -r1.39 igmp.c
--- igmp.c  21 Apr 2014 12:22:26 -  1.39
+++ igmp.c  26 Apr 2014 14:46:31 -
@@ -103,6 +103,7 @@ int *igmpctl_vars[IGMPCTL_MAXID] = IGMPC

 intigmp_timers_are_running;
 static struct router_info *rti_head;
+static struct mbuf *router_alert;
 struct igmpstat igmpstat;

 void igmp_checktimer(struct ifnet *);
@@ -113,12 +114,24 @@ struct router_info * rti_find(struct ifn
 void
 igmp_init(void)
 {
+   struct ipoption *ra;

/*
 * To avoid byte-swapping the same value over and over again.
 */
igmp_timers_are_running = 0;
rti_head = 0;
+   /*
+   * Construct a Router Alert option to use in outgoing packets
+   */
+   router_alert = m_get(M_DONTWAIT, MT_DATA);
+   ra = mtod(router_alert, struct ipoption *);
+   ra-ipopt_dst.s_addr = 0;
+   ra-ipopt_list[0] = IPOPT_RA;   /* Router Alert Option */
+   ra-ipopt_list[1] = 0x04;   /* 4 bytes long */
+   ra-ipopt_list[2] = 0x00;
+   ra-ipopt_list[3] = 0x00;
+   router_alert-m_len = sizeof(ra-ipopt_dst) + ra-ipopt_list[1];
 }

 /* Return -1 for error. */
@@ -634,7 +647,7 @@ igmp_sendpkt(struct in_multi *inm, int t
imo.imo_multicast_loop = 0;
 #endif /* MROUTING */

-   ip_output(m, NULL, NULL, IP_MULTICASTOPTS, imo, NULL, 0);
+   ip_output(m, router_alert, NULL, IP_MULTICASTOPTS, imo, NULL, 0);

++igmpstat.igps_snd_reports;
 }
Index: ip.h
===
RCS file: /cvs/src/sys/netinet/ip.h,v
retrieving revision 1.14
diff -u -p -r1.14 ip.h
--- ip.h24 Oct 2013 15:21:21 -  1.14
+++ ip.h26 Apr 2014 14:46:31 -
@@ -150,6 +150,7 @@ struct ip {
 #defineIPOPT_LSRR  131 /* loose source route */
 #defineIPOPT_SATID 136 /* satnet id */
 #defineIPOPT_SSRR  137 /* strict source route 
*/
+#defineIPOPT_RA148 /* router alert */

 /*
  * Offsets to fields in options other than EOL and NOP.



Re: [Patch] Add router alert option to igmp packets

2014-04-26 Thread Florian Riehm
On 04/26/14 20:35, Alexander Bluhm wrote:
 
 /*
  * To avoid byte-swapping the same value over and over again.
  */
 
 FreeBSD has code matching this comment.  In OpenBSD the code is
 gone and so should the comment.  Of course that is unrelated to
 this diff.
 

I have removed the comment in my new diff also.

 +   /*
 +   * Construct a Router Alert option to use in outgoing packets
 +   */
 
 The * must be aligned.
 

fixed.

 +   ra-ipopt_dst.s_addr = 0;
 
 Use INADDR_ANY instead of 0 as it is an IP address.
 

fixed.

 Otherwise OK bluhm@
 

Thanks.

Regards

Florian

Index: igmp.c
===
RCS file: /cvs/src/sys/netinet/igmp.c,v
retrieving revision 1.39
diff -u -p -r1.39 igmp.c
--- igmp.c  21 Apr 2014 12:22:26 -  1.39
+++ igmp.c  26 Apr 2014 20:10:54 -
@@ -103,6 +103,7 @@ int *igmpctl_vars[IGMPCTL_MAXID] = IGMPC

 intigmp_timers_are_running;
 static struct router_info *rti_head;
+static struct mbuf *router_alert;
 struct igmpstat igmpstat;

 void igmp_checktimer(struct ifnet *);
@@ -113,12 +114,21 @@ struct router_info * rti_find(struct ifn
 void
 igmp_init(void)
 {
+   struct ipoption *ra;

-   /*
-* To avoid byte-swapping the same value over and over again.
-*/
igmp_timers_are_running = 0;
rti_head = 0;
+   /*
+* Construct a Router Alert option to use in outgoing packets
+*/
+   router_alert = m_get(M_DONTWAIT, MT_DATA);
+   ra = mtod(router_alert, struct ipoption *);
+   ra-ipopt_dst.s_addr = INADDR_ANY;
+   ra-ipopt_list[0] = IPOPT_RA;   /* Router Alert Option */
+   ra-ipopt_list[1] = 0x04;   /* 4 bytes long */
+   ra-ipopt_list[2] = 0x00;
+   ra-ipopt_list[3] = 0x00;
+   router_alert-m_len = sizeof(ra-ipopt_dst) + ra-ipopt_list[1];
 }

 /* Return -1 for error. */
@@ -634,7 +644,7 @@ igmp_sendpkt(struct in_multi *inm, int t
imo.imo_multicast_loop = 0;
 #endif /* MROUTING */

-   ip_output(m, NULL, NULL, IP_MULTICASTOPTS, imo, NULL, 0);
+   ip_output(m, router_alert, NULL, IP_MULTICASTOPTS, imo, NULL, 0);

++igmpstat.igps_snd_reports;
 }
Index: ip.h
===
RCS file: /cvs/src/sys/netinet/ip.h,v
retrieving revision 1.14
diff -u -p -r1.14 ip.h
--- ip.h24 Oct 2013 15:21:21 -  1.14
+++ ip.h26 Apr 2014 20:10:54 -
@@ -150,6 +150,7 @@ struct ip {
 #defineIPOPT_LSRR  131 /* loose source route */
 #defineIPOPT_SATID 136 /* satnet id */
 #defineIPOPT_SSRR  137 /* strict source route 
*/
+#defineIPOPT_RA148 /* router alert */

 /*
  * Offsets to fields in options other than EOL and NOP.




[patch] ospfd: exporting default gateway via route label (fix ROUNDUP)

2014-03-02 Thread Florian Riehm
Hi all,

ospfd can't export the default gateway via route label because
get_rtaddrs gets confused by a netmask (RTAX_NETMASK) of 0 because
sa-sa_len in get_rtaddrs is 0 and ROUNDUP then returns 0 also.

The bug has been fixed in ospf6d in the same way a couple of years ago.
Ospf6d uses the ROUNDUP macro from route/show.c now.
I think ospfd should do the same.

Regards,

Florian


Index: usr.sbin/ospfd/kroute.c
===
RCS file: /cvs/src/usr.sbin/ospfd/kroute.c,v
retrieving revision 1.93
diff -u -p -r1.93 kroute.c
--- usr.sbin/ospfd/kroute.c 30 Oct 2013 17:24:35 -  1.93
+++ usr.sbin/ospfd/kroute.c 2 Mar 2014 23:18:47 -
@@ -987,8 +987,8 @@ prefixlen2mask(u_int8_t prefixlen)
return (htonl(0x  (32 - prefixlen)));
 }

-#defineROUNDUP(a)  \
-(((a)  (sizeof(long) - 1)) ? (1 + ((a) | (sizeof(long) - 1))) : (a))
+#define ROUNDUP(a) \
+   ((a)  0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))

 void
 get_rtaddrs(int addrs, struct sockaddr *sa, struct sockaddr **rti_info)



[Patch] ospfd: allow router lsa with 0 links

2014-02-12 Thread Florian Riehm
Hi Claudio,

could you please have a look to the following patch.
It removes a check that tries to make sure, a router lsa has at
least one link.

I have seen a problem with the check in the following situation:
On a router with only one ospf speaking interface, the link on this
interface goes down. The ospfd notice that, but doesn't generate
a router lsa for itself, because nlinks is zero. So the router doesn't
realize that it can not reach learned ospf routes anymore. The routes
get not deleted from the kernel routing table.

Sometimes I have also seen multiple times the same ospf routes inside
the kernel routing table, after the interface went up again. But this
is not reproducable all the time.

Do you see any situations were a router lsa with nlinks 0 could cause
trouble?

Regards,

Florian


Index: ospfe.c
===
RCS file: /cvs/src/usr.sbin/ospfd/ospfe.c,v
retrieving revision 1.87
diff -u -p -r1.87 ospfe.c
--- ospfe.c 13 Nov 2013 20:43:00 -  1.87
+++ ospfe.c 12 Feb 2014 18:22:43 -
@@ -1020,7 +1020,7 @@ orig_rtr_lsa(struct area *area)
memcpy(ibuf_seek(buf, LS_CKSUM_OFFSET, sizeof(chksum)),
chksum, sizeof(chksum));

-   if (self  num_links)
+   if (self)
imsg_compose_event(iev_rde, IMSG_LS_UPD, self-peerid, 0,
-1, buf-buf, ibuf_size(buf));
else
Index: rde_lsdb.c
===
RCS file: /cvs/src/usr.sbin/ospfd/rde_lsdb.c,v
retrieving revision 1.49
diff -u -p -r1.49 rde_lsdb.c
--- rde_lsdb.c  14 Aug 2013 20:16:09 -  1.49
+++ rde_lsdb.c  12 Feb 2014 18:22:43 -
@@ -306,10 +306,6 @@ lsa_router_check(struct lsa *lsa, u_int1
}

nlinks = ntohs(lsa-data.rtr.nlinks);
-   if (nlinks == 0) {
-   log_warnx(lsa_check: invalid LSA router packet);
-   return (0);
-   }
for (i = 0; i  nlinks; i++) {
rtr_link = (struct lsa_rtr_link *)(buf + off);
off += sizeof(struct lsa_rtr_link);



[Patch] Add rtlabel to rt_newaddrmsg

2014-02-11 Thread Florian Riehm
Hi tech@,

I'm using the ospfd with redistribute rtlabel statements.
If I add new addresses to interfaces with a route label, ospfd will
not notice it, because the route messages don't contain the route
label. Please have a look to the attached patch. It adds the
route label, so ospfd can handle it properly.

Regards,

florian


Index: rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.137
diff -u -p -r1.137 rtsock.c
--- rtsock.c22 Jan 2014 06:28:09 -  1.137
+++ rtsock.c11 Feb 2014 19:43:17 -
@@ -1193,12 +1193,15 @@ rt_newaddrmsg(int cmd, struct ifaddr *if
if ((cmd == RTM_ADD  pass == 2) ||
(cmd == RTM_DELETE  pass == 1)) {
struct rt_msghdr *rtm;
+   struct sockaddr_rtlabel sa_rl;

if (rt == 0)
continue;
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
info.rti_info[RTAX_DST] = sa = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt-rt_gateway;
+   info.rti_info[RTAX_LABEL] =
+   rtlabel_id2sa(rt-rt_labelid, sa_rl);
if ((m = rt_msg1(cmd, info)) == NULL)
continue;
rtm = mtod(m, struct rt_msghdr *);



rtsock: Why shouldn't rt flags be changeable in RTM_CHANGE messages?

2012-11-28 Thread Florian Riehm
Hi,

because of a bug in ospf6d I tried to allow ospf6d to change cloning
routes to gateway routes and the other way around. I've noticed there is
a check which tries to forbid this, but the check is broken. It allows to
toggle ALL rt flags, if ONE common bit in RTF_FMASK and rtm_fmask is set.
Because of this bug my modified ospf6d is working fine because it can set
proper route flags.

I'm asking me what is the reason for the check? In my opinion a process
which writes to a route socket, should know how it has to handle route
flags. If the process doesn't want to modify the route flags it has just
to specify an empty rtm_fmask. It's confusing for developers if there is
a bitmask (RTF_FMASK) that prohibit silently to change flags, which are
explicitly specified to be changeable by an other bitmask (rtm_fmask).

Does anybody knows a situation were changing route flags would break
something, which would make it necessary to delete a route first and add
the same route with different flags again? If not please remove the
check.

Regards,

Florian


Index: net/route.h
===
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.78
diff -u -p -r1.78 route.h
--- net/route.h 19 Sep 2012 16:14:01 -  1.78
+++ net/route.h 28 Nov 2012 20:52:19 -
@@ -146,11 +146,6 @@ struct rtentry {
 #define RTF_MPATH  0x4 /* multipath route or operation */
 #define RTF_MPLS   0x10/* MPLS additional infos */

-/* mask of RTF flags that are allowed to be modified by RTM_CHANGE */
-#define RTF_FMASK  \
-(RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_BLACKHOLE | \
- RTF_REJECT | RTF_STATIC)
-
 #ifndef _KERNEL
 /* obsoleted */
 #define RTF_SOURCE 0x2 /* this route has a source
selector */
Index: net/rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.123
diff -u -p -r1.123 rtsock.c
--- net/rtsock.c20 Sep 2012 20:53:13 -  1.123
+++ net/rtsock.c28 Nov 2012 20:52:20 -
@@ -836,11 +836,8 @@ report:
}
}
 #endif
-   /* Hack to allow some flags to be toggled */
-   if (rtm-rtm_fmask  RTF_FMASK)
-   rt-rt_flags = (rt-rt_flags 
-   ~rtm-rtm_fmask) |
-   (rtm-rtm_flags  rtm-rtm_fmask);
+   rt-rt_flags = (rt-rt_flags  ~rtm-rtm_fmask) |
+   (rtm-rtm_flags  rtm-rtm_fmask);

rt_setmetrics(rtm-rtm_inits, rtm-rtm_rmx,
rt-rt_rmx);