Re: validate addresses in routing message

2019-08-29 Thread Claudio Jeker
On Wed, Aug 28, 2019 at 11:58:36PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> The kernel may crash as there is not enough input validation in
> routing messages.
> 
> https://syzkaller.appspot.com/bug?id=e2076a6518b49730aefe64acf0a266f8e79685a5
> 
> Here the name of a routing label is not NUL terminated, but there
> are more things that can go wrong.  So I added some checks for
> incoming routing addresses from userland that the kernel actually
> uses.
> 
> It is not super strict as userland may provide incomplete addresses
> that work anyway.  I remember openvpn caused some problems in this
> area.  Could someone test it with this diff?
> 
> ok?

I don't think this is the right way to do this. The consumer of rtinfo
need to check the values based on their needs. Ideally we add some helpers
to make that easier. I think it is close to impossible to properly
validate the sockaddrs in rtm_xaddrs() since that function is missing
needed context.

 
> bluhm
> 
> Index: net/rtsock.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.290
> diff -u -p -w -r1.290 rtsock.c
> --- net/rtsock.c  28 Aug 2019 20:54:24 -  1.290
> +++ net/rtsock.c  28 Aug 2019 21:23:34 -
> @@ -1366,6 +1366,91 @@ rtm_xaddrs(caddr_t cp, caddr_t cplim, st
>   rtinfo->rti_info[i] = sa;
>   ADVANCE(cp, sa);
>   }
> + for (i = 0; i < RTAX_MAX; i++) {
> + size_t len, maxlen, size;
> +
> + sa = rtinfo->rti_info[i];
> + if (sa == NULL)
> + continue;
> + maxlen = size = 0;
> + switch (i) {
> + case RTAX_DST:
> + case RTAX_GATEWAY:
> + case RTAX_SRC:
> + switch (sa->sa_family) {
> + case AF_INET:
> + size = sizeof(struct sockaddr_in);
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + size = sizeof(struct sockaddr_in6);
> + break;
> +#endif
> +#ifdef MPLS
> + case AF_MPLS:
> + size = sizeof(struct sockaddr_mpls);
> + break;
> +#endif
> + }
> + break;
> + case RTAX_IFP:
> + if (sa->sa_family != AF_LINK)
> + return (EAFNOSUPPORT);
> + /*
> +  * XXX Should be sizeof(struct sockaddr_dl), but
> +  * route(8) has a bug and provides less memory.
> +  */
> + size = 16;
> + break;
> + case RTAX_IFA:
> + switch (sa->sa_family) {
> + case AF_INET:
> + size = sizeof(struct sockaddr_in);
> + break;
> +#ifdef INET6
> + case AF_INET6:
> + size = sizeof(struct sockaddr_in6);
> + break;
> +#endif
> + default:
> + return (EAFNOSUPPORT);
> + }
> + break;
> + case RTAX_LABEL:
> + maxlen = RTLABEL_LEN;
> + size = sizeof(struct sockaddr_rtlabel);
> + break;
> +#ifdef BFD
> + case RTAX_BFD:
> + size = sizeof(struct sockaddr_bfd);
> + break;
> +#endif
> + case RTAX_DNS:
> + maxlen = RTDNS_LEN;
> + size = sizeof(struct sockaddr_rtdns);
> + break;
> + case RTAX_STATIC:
> + maxlen = RTSTATIC_LEN;
> + size = sizeof(struct sockaddr_rtstatic);
> + break;
> + case RTAX_SEARCH:
> + maxlen = RTSEARCH_LEN;
> + size = sizeof(struct sockaddr_rtsearch);
> + break;
> + }
> + if (size) {
> + if (sa->sa_len < size)
> + return (EINVAL);
> + }
> + if (maxlen) {
> + if (2 + maxlen >= size)
> + return (EINVAL);
> + len = strnlen(sa->sa_data, maxlen);
> + if (len >= maxlen || 2 + len >= sa->sa_len)
> + return (EINVAL);
> + break;
> + }
> + }
>   return (0);
>  }
> 

-- 
:wq Claudio



Re: route address order

2019-08-29 Thread Claudio Jeker
On Thu, Aug 29, 2019 at 10:55:44PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> The kernel uses rtm_addrs as a bit field for addresses that are
> included in the routing message.  The significance of the bits has
> to be consistent with the order of the addresss.  In route(8) store
> addresses in ascending order of RTA values.  This allows to use
> MPLS routes together with route labels.
> 
> ok?

OK claudio@
 
> bluhm
> 
> Index: sbin/route/route.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/route/route.c,v
> retrieving revision 1.231
> diff -u -p -r1.231 route.c
> --- sbin/route/route.c29 Aug 2019 14:28:34 -  1.231
> +++ sbin/route/route.c29 Aug 2019 20:44:35 -
> @@ -68,7 +68,7 @@
>  const struct if_status_description
>   if_status_descriptions[] = LINK_STATE_DESCRIPTIONS;
> 
> -union sockunion so_dst, so_gate, so_mask, so_ifa, so_ifp, so_label, so_src;
> +union sockunion so_dst, so_gate, so_mask, so_ifa, so_ifp, so_src, so_label;
> 
>  typedef union sockunion *sup;
>  pid_tpid;
> @@ -1087,13 +1087,14 @@ rtmsg(int cmd, int flags, int fmask, uin
> 
>   if (rtm_addrs & RTA_NETMASK)
>   mask_addr(&so_dst, &so_mask, RTA_DST);
> + /* store addresses in ascending order of RTA values */
>   NEXTADDR(RTA_DST, so_dst);
>   NEXTADDR(RTA_GATEWAY, so_gate);
>   NEXTADDR(RTA_NETMASK, so_mask);
>   NEXTADDR(RTA_IFP, so_ifp);
>   NEXTADDR(RTA_IFA, so_ifa);
> - NEXTADDR(RTA_LABEL, so_label);
>   NEXTADDR(RTA_SRC, so_src);
> + NEXTADDR(RTA_LABEL, so_label);
>   rtm.rtm_msglen = l = cp - (char *)&m_rtmsg;
>   if (verbose)
>   print_rtmsg(&rtm, l);
> 

-- 
:wq Claudio



Re: route, arp, ndp padding

2019-08-30 Thread Claudio Jeker
On Fri, Aug 30, 2019 at 04:44:56PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> The algorithm in route(8) and arp(6) is still not correct.  While
> the values written to the kernel are fine, the bytes for padding
> are taken from memory after the sockaddr structs.
> 
> In route(8) the union of sockaddr can be made larger so that the
> padding is taken from there.
> 
> In arp(8) we know the size of the struct.  Copy only the struct and
> advance over the padding.  The memory has been zeroed before.
> 
> ndp(8) can take all the fixes from arp(8).
> 
> ok?
> 
> bluhm
> 
> Index: sbin/route/route.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/route/route.c,v
> retrieving revision 1.232
> diff -u -p -r1.232 route.c
> --- sbin/route/route.c29 Aug 2019 22:42:16 -  1.232
> +++ sbin/route/route.c30 Aug 2019 13:24:31 -
> @@ -137,10 +137,6 @@ usage(char *cp)
>   exit(1);
>  }
> 
> -#define ROUNDUP(a) \
> - ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
> -#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
> -
>  int
>  main(int argc, char **argv)
>  {
> Index: sbin/route/show.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/route/show.h,v
> retrieving revision 1.14
> diff -u -p -r1.14 show.h
> --- sbin/route/show.h 1 May 2018 18:13:21 -   1.14
> +++ sbin/route/show.h 30 Aug 2019 13:22:57 -
> @@ -19,6 +19,18 @@
>  #ifndef __SHOW_H__
>  #define __SHOW_H__
> 
> +#define ROUNDUP(a) \
> + ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
> +#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
> +#define MAXIMUM(a, b) ((a) > (b) ? (a) : (b))
> +#define SAMAXSIZE\
> + MAXIMUM(sizeof(struct sockaddr),\
> + MAXIMUM(sizeof(struct sockaddr_in), \
> + MAXIMUM(sizeof(struct sockaddr_in6),\
> + MAXIMUM(sizeof(struct sockaddr_dl), \
> + MAXIMUM(sizeof(struct sockaddr_rtlabel),\
> + sizeof(struct sockaddr_mpls))
> +
>  union sockunion {
>   struct sockaddr sa;
>   struct sockaddr_in  sin;
> @@ -26,6 +38,7 @@ union sockunion {
>   struct sockaddr_dl  sdl;
>   struct sockaddr_rtlabel rtlabel;
>   struct sockaddr_mplssmpls;
> + charpadding[ROUNDUP(SAMAXSIZE)];

Just throw a struct sockaddr_storage in that union. It will make sure
there is enough space for everything and then you can skip the MAXIMUM
dance you do now.

>  };
> 
>  void  get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
> Index: usr.sbin/arp/arp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/arp/arp.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 arp.c
> --- usr.sbin/arp/arp.c29 Aug 2019 19:11:15 -  1.85
> +++ usr.sbin/arp/arp.c30 Aug 2019 13:57:15 -
> @@ -675,9 +675,8 @@ rtmsg(int cmd)
> 
>  #define NEXTADDR(w, s)   
> \
>   if (rtm->rtm_addrs & (w)) { \
> - l = ROUNDUP(((struct sockaddr *)&(s))->sa_len); \
> - memcpy(cp, &(s), l);\
> - cp += l;\
> + memcpy(cp, &(s), sizeof(s));\
> + ADVANCE(cp, (struct sockaddr *)&(s));   \
>   }
> 
>   NEXTADDR(RTA_DST, sin_m);
> Index: usr.sbin/ndp/ndp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/ndp/ndp.c,v
> retrieving revision 1.97
> diff -u -p -r1.97 ndp.c
> --- usr.sbin/ndp/ndp.c27 Aug 2019 20:42:40 -  1.97
> +++ usr.sbin/ndp/ndp.c30 Aug 2019 14:08:52 -
> @@ -108,6 +108,7 @@
>  /* packing rule for routing socket */
>  #define ROUNDUP(a) \
>   ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
> +#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
> 
>  static pid_t pid;
>  static int nflag;
> @@ -323,7 +324,7 @@ parse_host(const char *host, struct in6_
>  struct   sockaddr_in6 so_mask = {sizeof(so_mask), AF_INET6 };
>  struct   sockaddr_in6 blank_sin = {sizeof(blank_sin), AF_INET6 }, sin_m;
>  struct   sockaddr_dl blank_sdl = {sizeof(blank_sdl), AF_LINK }, sdl_m;
> -struct   sockaddr_dl ifp_m = { sizeof(&ifp_m), AF_LINK };
> +struct   sockaddr_dl ifp_m = { sizeof(ifp_m), AF_LINK };
>  time_t   expire_time;
>  int  flags, found_entry;
>  struct   {
> @@ -766,9 +767,12 @@ rtmsg(int cmd)
>   case RTM_GET:
>   rtm->rtm_addrs |= (RTA_DST | RTA_IFP);
>   }
> -#define NEXTADDR(w, s) \
> - if (rtm->rtm_addrs & (w)) { \
> - bcopy((char *)&s, cp, sizeof(s)

Re: route, arp, ndp padding

2019-08-30 Thread Claudio Jeker
On Sat, Aug 31, 2019 at 12:07:21AM +0200, Alexander Bluhm wrote:
> On Fri, Aug 30, 2019 at 09:54:49PM +0200, Claudio Jeker wrote:
> > Just throw a struct sockaddr_storage in that union. It will make sure
> > there is enough space for everything and then you can skip the MAXIMUM
> > dance you do now.
> 
> Yes, that is much nicer.  Although I have to work around this
> compiler warning.
> 
> /home/bluhm/openbsd/cvs/src/sbin/route/route.c:911:19: warning: implicit
>   conversion from 'unsigned long' to '__uint8_t' (aka 'unsigned char')
>   changes value from 256 to 0 [-Wconstant-conversion]
> su->sa.sa_len = sizeof(*su);
>   ~ ^~~
> ok?
> 
> bluhm
> 
> Index: sbin/route/route.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/route/route.c,v
> retrieving revision 1.232
> diff -u -p -r1.232 route.c
> --- sbin/route/route.c29 Aug 2019 22:42:16 -  1.232
> +++ sbin/route/route.c30 Aug 2019 21:58:13 -
> @@ -484,7 +484,7 @@ newroute(int argc, char **argv)
>   break;
>   case K_SA:
>   af = PF_ROUTE;
> - aflen = sizeof(union sockunion);
> + aflen = sizeof(struct sockaddr_storage) - 1;
>   break;
>   case K_MPLS:
>   af = AF_MPLS;
> @@ -908,7 +908,7 @@ getaddr(int which, int af, char *s, stru
>   case AF_MPLS:
>   errx(1, "mpls labels require -in or -out switch");
>   case PF_ROUTE:
> - su->sa.sa_len = sizeof(*su);
> + su->sa.sa_len = sizeof(struct sockaddr_storage) - 1;
>   sockaddr(s, &su->sa);
>   return (1);
> 

Ugh, this could be cleaned up but that is a different diff.
I think aflen should be killed and instead the sa_len should be set based
on the af. Also sockaddr() could itself know about the max size and not
have that passed in via su->sa.sa_len.

Guess that is a different diff to make this not more insane.
Also when was the last time someone used -sa in route(8)?

> Index: sbin/route/show.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/route/show.h,v
> retrieving revision 1.14
> diff -u -p -r1.14 show.h
> --- sbin/route/show.h 1 May 2018 18:13:21 -   1.14
> +++ sbin/route/show.h 30 Aug 2019 21:46:34 -
> @@ -26,6 +26,7 @@ union sockunion {
>   struct sockaddr_dl  sdl;
>   struct sockaddr_rtlabel rtlabel;
>   struct sockaddr_mplssmpls;
> + struct sockaddr_storage padding;
>  };
> 
>  void  get_rtaddrs(int, struct sockaddr *, struct sockaddr **);
> Index: usr.sbin/arp/arp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/arp/arp.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 arp.c
> --- usr.sbin/arp/arp.c29 Aug 2019 19:11:15 -  1.85
> +++ usr.sbin/arp/arp.c30 Aug 2019 13:57:15 -
> @@ -675,9 +675,8 @@ rtmsg(int cmd)
> 
>  #define NEXTADDR(w, s)   
> \
>   if (rtm->rtm_addrs & (w)) { \
> - l = ROUNDUP(((struct sockaddr *)&(s))->sa_len); \
> - memcpy(cp, &(s), l);\
> - cp += l;\
> + memcpy(cp, &(s), sizeof(s));\
> + ADVANCE(cp, (struct sockaddr *)&(s));   \
>   }
> 
>   NEXTADDR(RTA_DST, sin_m);
> Index: usr.sbin/ndp/ndp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/ndp/ndp.c,v
> retrieving revision 1.97
> diff -u -p -r1.97 ndp.c
> --- usr.sbin/ndp/ndp.c27 Aug 2019 20:42:40 -  1.97
> +++ usr.sbin/ndp/ndp.c30 Aug 2019 14:08:52 -
> @@ -108,6 +108,7 @@
>  /* packing rule for routing socket */
>  #define ROUNDUP(a) \
>   ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
> +#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
> 
>  static pid_t pid;
>  static int nflag;
> @@ -323,7 +324,7 @@ parse_host(const char *host, struct in6_
>  struct   sockaddr_in6 so_mask = {sizeof(so_mask), AF_INET6 };
>  struct   sockaddr_in6 blank_sin = {sizeof(blank_sin), AF_INET6 }, sin_m;
>  str

Re: don't hand-roll bpf_mtap_hdr for radiotap in wireless drivers

2019-09-16 Thread Claudio Jeker
On Thu, Sep 12, 2019 at 01:21:46PM +1000, David Gwynne wrote:
> radiotap code puts an mbuf on the stack so it can build a chain that
> includes a radiotap header for bpf_mtap to chew on. bpf_mtap_hdr can do
> that for you though.
> 
> bpf_mtap_hdr also cheats by using an m_hdr instead of a full mbuf,
> which makes the stack usage less, but im arguing for the diff because it
> makes the calling code simpler.
> 
> this only tweaks the intel wireless drivers, there's a handful of other
> ones that could be fixed too if this diff goes in.
> 
> tests? ok?

OK claudio@
 
> Index: if_ipw.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_ipw.c,v
> retrieving revision 1.123
> diff -u -p -r1.123 if_ipw.c
> --- if_ipw.c  25 Jul 2019 01:46:14 -  1.123
> +++ if_ipw.c  12 Sep 2019 03:14:03 -
> @@ -878,7 +878,6 @@ ipw_data_intr(struct ipw_softc *sc, stru
>  
>  #if NBPFILTER > 0
>   if (sc->sc_drvbpf != NULL) {
> - struct mbuf mb;
>   struct ipw_rx_radiotap_header *tap = &sc->sc_rxtap;
>  
>   tap->wr_flags = 0;
> @@ -886,13 +885,8 @@ ipw_data_intr(struct ipw_softc *sc, stru
>   tap->wr_chan_freq = htole16(ic->ic_ibss_chan->ic_freq);
>   tap->wr_chan_flags = htole16(ic->ic_ibss_chan->ic_flags);
>  
> - mb.m_data = (caddr_t)tap;
> - mb.m_len = sc->sc_rxtap_len;
> - mb.m_next = m;
> - mb.m_nextpkt = NULL;
> - mb.m_type = 0;
> - mb.m_flags = 0;
> - bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_IN);
> + bpf_mtap_hdr(sc->sc_drvbpf, tap, sc->sc_rxtap_len,
> + m, BPF_DIRECTION_IN, NULL);
>   }
>  #endif
>  
> @@ -1153,20 +1147,14 @@ ipw_tx_start(struct ifnet *ifp, struct m
>  
>  #if NBPFILTER > 0
>   if (sc->sc_drvbpf != NULL) {
> - struct mbuf mb;
>   struct ipw_tx_radiotap_header *tap = &sc->sc_txtap;
>  
>   tap->wt_flags = 0;
>   tap->wt_chan_freq = htole16(ic->ic_ibss_chan->ic_freq);
>   tap->wt_chan_flags = htole16(ic->ic_ibss_chan->ic_flags);
>  
> - mb.m_data = (caddr_t)tap;
> - mb.m_len = sc->sc_txtap_len;
> - mb.m_next = m;
> - mb.m_nextpkt = NULL;
> - mb.m_type = 0;
> - mb.m_flags = 0;
> - bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_OUT);
> + bpf_mtap_hdr(sc->sc_drvbpf, tap, sc->sc_txtap_len,
> + m, BPF_DIRECTION_OUT, NULL);
>   }
>  #endif
>  
> Index: if_iwi.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_iwi.c,v
> retrieving revision 1.140
> diff -u -p -r1.140 if_iwi.c
> --- if_iwi.c  25 Jul 2019 01:46:14 -  1.140
> +++ if_iwi.c  12 Sep 2019 03:14:03 -
> @@ -923,7 +923,6 @@ iwi_frame_intr(struct iwi_softc *sc, str
>  
>  #if NBPFILTER > 0
>   if (sc->sc_drvbpf != NULL) {
> - struct mbuf mb;
>   struct iwi_rx_radiotap_header *tap = &sc->sc_rxtap;
>  
>   tap->wr_flags = 0;
> @@ -937,13 +936,8 @@ iwi_frame_intr(struct iwi_softc *sc, str
>   if (frame->antenna & 0x40)
>   tap->wr_flags |= IEEE80211_RADIOTAP_F_SHORTPRE;
>  
> - mb.m_data = (caddr_t)tap;
> - mb.m_len = sc->sc_rxtap_len;
> - mb.m_next = m;
> - mb.m_nextpkt = NULL;
> - mb.m_type = 0;
> - mb.m_flags = 0;
> - bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_IN);
> + bpf_mtap_hdr(sc->sc_drvbpf, tap, sc->sc_rxtap_len,
> + m, BPF_DIRECTION_IN, NULL);
>   }
>  #endif
>  
> @@ -1265,20 +1259,14 @@ iwi_tx_start(struct ifnet *ifp, struct m
>  
>  #if NBPFILTER > 0
>   if (sc->sc_drvbpf != NULL) {
> - struct mbuf mb;
>   struct iwi_tx_radiotap_header *tap = &sc->sc_txtap;
>  
>   tap->wt_flags = 0;
>   tap->wt_chan_freq = htole16(ic->ic_bss->ni_chan->ic_freq);
>   tap->wt_chan_flags = htole16(ic->ic_bss->ni_chan->ic_flags);
>  
> - mb.m_data = (caddr_t)tap;
> - mb.m_len = sc->sc_txtap_len;
> - mb.m_next = m0;
> - mb.m_nextpkt = NULL;
> - mb.m_type = 0;
> - mb.m_flags = 0;
> - bpf_mtap(sc->sc_drvbpf, &mb, BPF_DIRECTION_OUT);
> + bpf_mtap_hdr(sc->sc_drvbpf, tap, sc->sc_txtap_len,
> + m0, BPF_DIRECTION_OUT, NULL);
>   }
>  #endif
>  
> Index: if_iwm.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
> retrieving revision 1.244
> diff -u -p -r1.244 if_iwm.c
> --- if_iwm.c  8 Aug 2019 13:56:56 -   1.244
> +++ if_iwm.c  12 Sep 2019 03:14:03 -
> @@ -3629,7 +3629,6 @@ iwm_rx_rx_mpdu(struct iwm_softc *sc, str
>  
>  #if NBPFILTER > 0
>   if (sc->sc_

Re: bpf_mtap_hdr copy function is redundant

2019-09-19 Thread Claudio Jeker
On Thu, Sep 19, 2019 at 09:02:09AM +0200, Klemens Nanni wrote:
> On Thu, Sep 19, 2019 at 01:04:03PM +1000, David Gwynne wrote:
> > this removes the cpfn argument from bpf_mtap_hdr since nothing uses it
> > anymore.
> OK kn if you update pbf_mtap(9) as well.

Same here. OK claudio and please update man page.

-- 
:wq Claudio



Re: bgpctl sh nei [group XX] terse: add peer address

2019-09-24 Thread Claudio Jeker
On Tue, Sep 24, 2019 at 10:06:51AM +0100, Stuart Henderson wrote:
> On 2019/09/23 22:48, Stuart Henderson wrote:
> > "bgpctl XX nei" functions can now take "group XX" - when used as
> > "show nei group XX terse" you can't tell which entry relates to each
> > neighbour.
> > 
> > OK to add the address to the end of the line where it's reasonably
> > out of the way of existing parsers?
> 
> missing free, pointed out by benno. (not that bgpctl will stick around
> for long anyway :)

This is fine with me. I wonder if other data e.g. the peer description or
peer AS number should be added as well.
 
> Index: bgpctl.8
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.8,v
> retrieving revision 1.89
> diff -u -p -r1.89 bgpctl.8
> --- bgpctl.8  28 Jun 2019 12:12:06 -  1.89
> +++ bgpctl.8  24 Sep 2019 09:06:13 -
> @@ -289,7 +289,8 @@ Show statistics in an easily parseable t
>  The printed numbers are the sent and received open, sent and received
>  notifications, sent and received updates, sent and received keepalives, and
>  sent and received route refresh messages plus the current and maximum
> -prefix count, the number of sent and received updates, and withdraws.
> +prefix count, the number of sent and received updates, sent and
> +received withdraws, and finally the neighbor's address.
>  .It Cm timers
>  Show the BGP timers.
>  .El
> Index: bgpctl.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
> retrieving revision 1.243
> diff -u -p -r1.243 bgpctl.c
> --- bgpctl.c  5 Aug 2019 12:51:32 -   1.243
> +++ bgpctl.c  24 Sep 2019 09:06:13 -
> @@ -601,12 +601,26 @@ int
>  show_neighbor_terse(struct imsg *imsg)
>  {
>   struct peer *p;
> + char*s;
>  
>   switch (imsg->hdr.type) {
>   case IMSG_CTL_SHOW_NEIGHBOR:
>   p = imsg->data;
> + if ((p->conf.remote_addr.aid == AID_INET &&
> + p->conf.remote_masklen != 32) ||
> + (p->conf.remote_addr.aid == AID_INET6 &&
> + p->conf.remote_masklen != 128)) {
> + if (asprintf(&s, "%s/%u",
> + log_addr(&p->conf.remote_addr),
> + p->conf.remote_masklen) == -1)
> + err(1, NULL);
> + } else
> + if ((s = strdup(log_addr(&p->conf.remote_addr))) ==
> + NULL)
> + err(1, "strdup");
> +
>   printf("%llu %llu %llu %llu %llu %llu %llu "
> - "%llu %llu %llu %u %u %llu %llu %llu %llu\n",
> + "%llu %llu %llu %u %u %llu %llu %llu %llu %s\n",
>   p->stats.msg_sent_open, p->stats.msg_rcvd_open,
>   p->stats.msg_sent_notification,
>   p->stats.msg_rcvd_notification,
> @@ -616,7 +630,8 @@ show_neighbor_terse(struct imsg *imsg)
>   p->stats.prefix_cnt, p->conf.max_prefix,
>   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update,
>   p->stats.prefix_sent_withdraw,
> - p->stats.prefix_rcvd_withdraw);
> + p->stats.prefix_rcvd_withdraw, s);
> + free(s);
>   break;
>   case IMSG_CTL_END:
>   return (1);
> 

-- 
:wq Claudio



Re: bgpctl sh nei [group XX] terse: add peer address

2019-09-24 Thread Claudio Jeker
On Tue, Sep 24, 2019 at 03:51:43PM +0100, Stuart Henderson wrote:
> On 2019/09/24 11:10, Claudio Jeker wrote:
> > On Tue, Sep 24, 2019 at 10:06:51AM +0100, Stuart Henderson wrote:
> > > On 2019/09/23 22:48, Stuart Henderson wrote:
> > > > "bgpctl XX nei" functions can now take "group XX" - when used as
> > > > "show nei group XX terse" you can't tell which entry relates to each
> > > > neighbour.
> > > > 
> > > > OK to add the address to the end of the line where it's reasonably
> > > > out of the way of existing parsers?
> > > 
> > > missing free, pointed out by benno. (not that bgpctl will stick around
> > > for long anyway :)
> > 
> > This is fine with me. I wonder if other data e.g. the peer description or
> > peer AS number should be added as well.
> 
> That might be useful, though as the peer description could contain
> spaces we'd want to be reasonably sure we have already included any
> other useful data first so that it can go right at the end of the line
> (so that parsing the output isn't too awkward).
> 

I was thinking the same maybe even put the name in "" to make it look more
like a string. At least adding the AS number should be done.

-- 
:wq Claudio



Re: Argument order fix for MCLGETI

2019-09-25 Thread Claudio Jeker
On Wed, Sep 25, 2019 at 04:10:10PM +0800, Kevin Lo wrote:
> ok?

OK claudio@

How did that even work?
 
> Index: sys/dev/ic/ti.c
> ===
> RCS file: /cvs/src/sys/dev/ic/ti.c,v
> retrieving revision 1.25
> diff -u -p -u -p -r1.25 ti.c
> --- sys/dev/ic/ti.c   22 Jan 2017 10:17:38 -  1.25
> +++ sys/dev/ic/ti.c   25 Sep 2019 08:06:26 -
> @@ -576,7 +576,7 @@ ti_newbuf_std(struct ti_softc *sc, int i
>   sc->ti_cdata.ti_rx_std_map[i] = dmamap;
>  
>   if (m == NULL) {
> - m_new = MCLGETI(NULL, MCLBYTES, NULL, M_DONTWAIT);
> + m_new = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
>   if (m_new == NULL)
>   return (ENOBUFS);
>  
> @@ -695,7 +695,7 @@ ti_newbuf_jumbo(struct ti_softc *sc, int
>   bus_dmamap_unload(sc->sc_dmatag, dmamap);
>  
>   if (m == NULL) {
> - m_new = MCLGETI(NULL, TI_JUMBO_FRAMELEN, NULL, M_DONTWAIT);
> + m_new = MCLGETI(NULL, M_DONTWAIT, NULL, TI_JUMBO_FRAMELEN);
>   if (m_new == NULL)
>   return (ENOBUFS);
>  
> Index: sys/dev/pci/if_lge.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_lge.c,v
> retrieving revision 1.73
> diff -u -p -u -p -r1.73 if_lge.c
> --- sys/dev/pci/if_lge.c  22 Jan 2017 10:17:38 -  1.73
> +++ sys/dev/pci/if_lge.c  25 Sep 2019 08:06:26 -
> @@ -626,7 +626,7 @@ lge_newbuf(struct lge_softc *sc, struct 
>   struct mbuf *m_new = NULL;
>  
>   if (m == NULL) {
> - m_new = MCLGETI(NULL, LGE_JLEN, NULL, M_DONTWAIT);
> + m_new = MCLGETI(NULL, M_DONTWAIT, NULL, LGE_JLEN);
>   if (m_new == NULL)
>   return (ENOBUFS);
>   } else {
> Index: sys/dev/pci/if_nfe.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_nfe.c,v
> retrieving revision 1.120
> diff -u -p -u -p -r1.120 if_nfe.c
> --- sys/dev/pci/if_nfe.c  8 Sep 2017 05:36:52 -   1.120
> +++ sys/dev/pci/if_nfe.c  25 Sep 2019 08:06:26 -
> @@ -697,7 +697,7 @@ nfe_rxeof(struct nfe_softc *sc)
>* old mbuf. In the unlikely case that the old mbuf can't be
>* reloaded either, explicitly panic.
>*/
> - mnew = MCLGETI(NULL, MCLBYTES, NULL, M_DONTWAIT);
> + mnew = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
>   if (mnew == NULL) {
>   ifp->if_ierrors++;
>   goto skip;
> @@ -1210,7 +1210,7 @@ nfe_alloc_rx_ring(struct nfe_softc *sc, 
>   for (i = 0; i < NFE_RX_RING_COUNT; i++) {
>   data = &sc->rxq.data[i];
>  
> - data->m = MCLGETI(NULL, MCLBYTES, NULL, M_DONTWAIT);
> + data->m = MCLGETI(NULL, M_DONTWAIT, NULL, MCLBYTES);
>   if (data->m == NULL) {
>   printf("%s: could not allocate rx mbuf\n",
>   sc->sc_dev.dv_xname);
> Index: sys/dev/pci/if_nge.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_nge.c,v
> retrieving revision 1.92
> diff -u -p -u -p -r1.92 if_nge.c
> --- sys/dev/pci/if_nge.c  22 Jan 2017 10:17:38 -  1.92
> +++ sys/dev/pci/if_nge.c  25 Sep 2019 08:06:26 -
> @@ -962,7 +962,7 @@ nge_newbuf(struct nge_softc *sc, struct 
>   struct mbuf *m_new = NULL;
>  
>   if (m == NULL) {
> - m_new = MCLGETI(NULL, NGE_MCLBYTES, NULL, M_DONTWAIT);
> + m_new = MCLGETI(NULL, M_DONTWAIT, NULL, NGE_MCLBYTES);
>   if (m_new == NULL)
>   return (ENOBUFS);
>   } else {
> 

-- 
:wq Claudio



Re: bgpctl sh nei [group XX] terse: add peer address

2019-09-25 Thread Claudio Jeker
On Wed, Sep 25, 2019 at 12:19:23PM +0100, Stuart Henderson wrote:
> On 2019/09/24 22:06, Sebastian Benoit wrote:
> > Claudio Jeker(cje...@diehard.n-r-g.com) on 2019.09.24 17:01:21 +0200:
> > > On Tue, Sep 24, 2019 at 03:51:43PM +0100, Stuart Henderson wrote:
> > > > On 2019/09/24 11:10, Claudio Jeker wrote:
> > > > > On Tue, Sep 24, 2019 at 10:06:51AM +0100, Stuart Henderson wrote:
> > > > > > On 2019/09/23 22:48, Stuart Henderson wrote:
> > > > > > > "bgpctl XX nei" functions can now take "group XX" - when used as
> > > > > > > "show nei group XX terse" you can't tell which entry relates to 
> > > > > > > each
> > > > > > > neighbour.
> > > > > > > 
> > > > > > > OK to add the address to the end of the line where it's reasonably
> > > > > > > out of the way of existing parsers?
> > > > > > 
> > > > > > missing free, pointed out by benno. (not that bgpctl will stick 
> > > > > > around
> > > > > > for long anyway :)
> > > > > 
> > > > > This is fine with me. I wonder if other data e.g. the peer 
> > > > > description or
> > > > > peer AS number should be added as well.
> > > > 
> > > > That might be useful, though as the peer description could contain
> > > > spaces we'd want to be reasonably sure we have already included any
> > > > other useful data first so that it can go right at the end of the line
> > > > (so that parsing the output isn't too awkward).
> > > > 
> > > 
> > > I was thinking the same maybe even put the name in "" to make it look more
> > > like a string.
> > 
> > Yes, that makes it matchable again at least.
> > 
> > > At least adding the AS number should be done.
> > 
> > +1
> > 

OK claudio@, my diff looks the same.

> Index: bgpctl.8
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.8,v
> retrieving revision 1.90
> diff -u -p -r1.90 bgpctl.8
> --- bgpctl.8  24 Sep 2019 14:46:09 -  1.90
> +++ bgpctl.8  25 Sep 2019 11:18:28 -
> @@ -290,7 +290,8 @@ The printed numbers are the sent and rec
>  notifications, sent and received updates, sent and received keepalives, and
>  sent and received route refresh messages plus the current and maximum
>  prefix count, the number of sent and received updates, sent and
> -received withdraws, and finally the neighbor's address.
> +received withdraws, the neighbor's address (or subnet, for a template),
> +AS number, and finally description.
>  .It Cm timers
>  Show the BGP timers.
>  .El
> Index: bgpctl.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
> retrieving revision 1.244
> diff -u -p -r1.244 bgpctl.c
> --- bgpctl.c  24 Sep 2019 14:46:09 -  1.244
> +++ bgpctl.c  25 Sep 2019 11:18:28 -
> @@ -619,8 +619,8 @@ show_neighbor_terse(struct imsg *imsg)
>   NULL)
>   err(1, "strdup");
>  
> - printf("%llu %llu %llu %llu %llu %llu %llu "
> - "%llu %llu %llu %u %u %llu %llu %llu %llu %s\n",
> + printf("%llu %llu %llu %llu %llu %llu %llu %llu %llu "
> + "%llu %u %u %llu %llu %llu %llu %s %s \"%s\"\n",
>   p->stats.msg_sent_open, p->stats.msg_rcvd_open,
>   p->stats.msg_sent_notification,
>   p->stats.msg_rcvd_notification,
> @@ -630,7 +630,8 @@ show_neighbor_terse(struct imsg *imsg)
>   p->stats.prefix_cnt, p->conf.max_prefix,
>   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update,
>   p->stats.prefix_sent_withdraw,
> - p->stats.prefix_rcvd_withdraw, s);
> + p->stats.prefix_rcvd_withdraw, s,
> + log_as(p->conf.remote_as), p->conf.descr);
>   free(s);
>   break;
>   case IMSG_CTL_END:
> 

-- 
:wq Claudio



bgpd move pf table commits out of prefix_update and _withdraw

2019-09-26 Thread Claudio Jeker
This diff moves the rde_send_pftable_commit() out of the prefix update and
withdraw functions and up into the functions calling them.
This will result in bigger batches being committed to pf(4) but the main
reason is that this is needed to allow pipelining of RIB tables.

I move the early returns from rde_update_dispatch() up in the function so
that everything later will get to the end of the function where the commit
happens. The move should not change current behaviour though.
Also network_flush_upcall() was changed to call prefix_withdraw() to
respect the pftable setting on announced prefixes. Apart from that the
diff should be straight forward.

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.487
diff -u -p -r1.487 rde.c
--- rde.c   14 Aug 2019 11:57:21 -  1.487
+++ rde.c   26 Sep 2019 11:59:52 -
@@ -568,6 +568,8 @@ badnetdel:
RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
NULL, NULL) == -1)
log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
+   /* Deletions were performed in network_flush_upcall */
+   rde_send_pftable_commit();
break;
case IMSG_FILTER_SET:
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
@@ -682,7 +684,7 @@ rde_dispatch_imsg_parent(struct imsgbuf 
static struct l3vpn *vpn;
struct imsg  imsg;
struct mrt   xmrt;
-   struct rde_rib   rn;
+   struct rde_rib   rr;
struct filterstate   state;
struct imsgbuf  *i;
struct filter_head  *nr;
@@ -787,18 +789,18 @@ rde_dispatch_imsg_parent(struct imsgbuf 
if (imsg.hdr.len - IMSG_HEADER_SIZE !=
sizeof(struct rde_rib))
fatalx("IMSG_RECONF_RIB bad len");
-   memcpy(&rn, imsg.data, sizeof(rn));
-   rib = rib_byid(rib_find(rn.name));
+   memcpy(&rr, imsg.data, sizeof(rr));
+   rib = rib_byid(rib_find(rr.name));
if (rib == NULL) {
-   rib = rib_new(rn.name, rn.rtableid, rn.flags);
-   } else if (rib->flags == rn.flags &&
-   rib->rtableid == rn.rtableid) {
+   rib = rib_new(rr.name, rr.rtableid, rr.flags);
+   } else if (rib->flags == rr.flags &&
+   rib->rtableid == rr.rtableid) {
/* no change to rib apart from filters */
rib->state = RECONF_KEEP;
} else {
/* reload rib because somehing changed */
-   rib->flags_tmp = rn.flags;
-   rib->rtableid_tmp = rn.rtableid;
+   rib->flags_tmp = rr.flags;
+   rib->rtableid_tmp = rr.rtableid;
rib->state = RECONF_RELOAD;
}
break;
@@ -1073,8 +1075,23 @@ rde_update_dispatch(struct imsg *imsg)
 
nlri_len =
imsg->hdr.len - IMSG_HEADER_SIZE - 4 - withdrawn_len - attrpath_len;
-   bzero(&mpa, sizeof(mpa));
 
+   if (attrpath_len == 0) {
+   /* 0 = no NLRI information in this message */
+   if (nlri_len != 0) {
+   /* crap at end of update which should not be there */
+   rde_update_err(peer, ERR_UPDATE,
+   ERR_UPD_ATTRLIST, NULL, 0);
+   return (-1);
+   }
+   if (withdrawn_len == 0) {
+   /* EoR marker */
+   peer_recv_eor(peer, AID_INET);
+   return (0);
+   }
+   }
+
+   bzero(&mpa, sizeof(mpa));
rde_filterstate_prep(&state, NULL, NULL, NULL, 0);
if (attrpath_len != 0) { /* 0 = no NLRI information in this message */
/* parse path attributes */
@@ -1110,12 +1127,20 @@ rde_update_dispatch(struct imsg *imsg)
}
}
 
+   /* aspath needs to be loop free. This is not a hard error. */
+   if (state.aspath.flags & F_ATTR_ASPATH &&
+   peer->conf.ebgp &&
+   peer->conf.enforce_local_as == ENFORCE_AS_ON &&
+   !aspath_loopfree(state.aspath.aspath, peer->conf.local_as))
+   state.aspath.flags |= F_ATTR_LOOP;
+
rde_reflector(peer, &state.aspath);
}
 
p = imsg->data;
len = withdrawn_len;
p += 2;
+
/* withdraw prefix */
   

bgp show covering routes aka or-shorter

2019-09-26 Thread Claudio Jeker
bgpctl can show all routes under a prefix. For example
bgpctl show rib 8/8 all
but there was no way to do the inverse, show all routes which cover a
certain prefix. Since bgpd.conf has or-longer for the same as the all
keywork in bgpctl the idea was to call this or-shorter (and add or-longer
as alias to all)

e.g.
bgpctl show rib 8.8.8.8 or-shorter
or
bgpctl show rib 8.8.8.0/24 or-shorter
flags: * = Valid, > = Selected, I = via IBGP, A = Announced,
   S = Stale, E = Error
origin validation state: N = not-found, V = valid, ! = invalid
origin: i = IGP, e = EGP, ? = Incomplete

flags ovs destination  gateway  lpref   med aspath origin
*>  N 8.0.0.0/962.48.0.253100 0 1299 3356 i
*>  N 8.0.0.0/12   62.48.0.253100 0 1299 3356 i
*>  N 8.8.8.0/24   62.48.0.253100 0 15169 i

This was requested by Job and I agree that this is useful.
-- 
:wq Claudio

Index: bgpctl/bgpctl.8
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.8,v
retrieving revision 1.91
diff -u -p -r1.91 bgpctl.8
--- bgpctl/bgpctl.8 25 Sep 2019 15:35:50 -  1.91
+++ bgpctl/bgpctl.8 26 Sep 2019 19:29:46 -
@@ -223,7 +223,7 @@ Show routes from an MRT table dump file.
 .Ar filter
 can be an IP address, a CIDR prefix, an AS filter, a combination or nothing:
 .Pp
-.Bl -tag -width "address/len all" -compact
+.Bl -tag -width "address/len or-shorter" -compact
 .It Ar address
 Show best matching route for address.
 .It Ar address Ns Li / Ns Ar len
@@ -234,6 +234,11 @@ Show RIB entry for this CIDR prefix.
 .Xc
 Show all entries in the specified range.
 .\".It Ar address/len Cm longer-prefixes
+.It Xo
+.Ar address Ns Li / Ns Ar len
+.Cm or-shorter
+.Xc
+Show all entries covering and including the specified prefix.
 .It Cm as Ar as
 Show all entries with
 .Ar as
@@ -312,7 +317,7 @@ Routing Information Base.
 .Ar filter
 can be an IP address, a CIDR prefix, an AS filter or nothing:
 .Pp
-.Bl -tag -width "address/len all" -compact
+.Bl -tag -width "address/len or-shorter" -compact
 .It Ar address
 Show best matching route for address.
 .It Ar address Ns Li / Ns Ar len
@@ -323,6 +328,11 @@ Show RIB entry for this CIDR prefix.
 .Xc
 Show all entries in the specified range.
 .\".It Ar address/len Cm longer-prefixes
+.It Xo
+.Ar address Ns Li / Ns Ar len
+.Cm or-shorter
+.Xc
+Show all entries covering and including the specified prefix.
 .It Cm as Ar as
 Show all entries with
 .Ar as
Index: bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.245
diff -u -p -r1.245 bgpctl.c
--- bgpctl/bgpctl.c 25 Sep 2019 15:35:50 -  1.245
+++ bgpctl/bgpctl.c 26 Sep 2019 19:28:00 -
@@ -2132,15 +2132,25 @@ show_mrt_dump(struct mrt_rib *mr, struct
return;
/* filter by prefix */
if (req->prefix.aid != AID_UNSPEC) {
-   if (!prefix_compare(&req->prefix, &ctl.prefix,
-   req->prefixlen)) {
-   if (req->flags & F_LONGER) {
-   if (req->prefixlen > ctl.prefixlen)
-   return;
-   } else if (req->prefixlen != ctl.prefixlen)
+   if (req->flags & F_LONGER) {
+   if (req->prefixlen > ctl.prefixlen)
return;
-   } else
-   return;
+   if (prefix_compare(&req->prefix, &ctl.prefix,
+   req->prefixlen))
+   return;
+   } else if (req->flags & F_SHORTER) {
+   if (req->prefixlen < ctl.prefixlen)
+   return;
+   if (prefix_compare(&req->prefix, &ctl.prefix,
+   ctl.prefixlen))
+   return;
+   } else {
+   if (req->prefixlen != ctl.prefixlen)
+   return;
+   if (prefix_compare(&req->prefix, &ctl.prefix,
+   req->prefixlen))
+   return;
+   }
}
/* filter by AS */
if (req->as.type != AS_UNDEF &&
Index: bgpctl/parser.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/parser.c,v
retrieving revision 1.98
diff -u -p -r1.98 parser.c
--- bgpctl/parser.c 28 Jun 2019 12:12:06 -  1.98
+++ bgpctl/parser.c 26 Sep 2019 17:47:59 -
@@ -305,6 +305,8 @@ static c

Re: remove custom mbuf copy function support from bpf internals

2019-09-29 Thread Claudio Jeker
On Mon, Sep 30, 2019 at 12:06:34PM +1000, David Gwynne wrote:
> the "public" bpf api no longer supports custom copy functions, so we can
> remove the plumbing for it internally in the bpf code.
> 
> ok?
> 
> Index: bpf.c
> ===
> RCS file: /cvs/src/sys/net/bpf.c,v
> retrieving revision 1.180
> diff -u -p -r1.180 bpf.c
> --- bpf.c 30 Sep 2019 01:53:05 -  1.180
> +++ bpf.c 30 Sep 2019 02:04:37 -
> @@ -94,8 +94,6 @@ LIST_HEAD(, bpf_d) bpf_d_list;
>  
>  int  bpf_allocbufs(struct bpf_d *);
>  void bpf_ifname(struct bpf_if*, struct ifreq *);
> -int  _bpf_mtap(caddr_t, const struct mbuf *, u_int,
> - void (*)(const void *, void *, size_t));
>  void bpf_mcopy(const void *, void *, size_t);
>  int  bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
>   struct sockaddr *);
> @@ -105,7 +103,7 @@ int   bpfkqfilter(dev_t, struct knote *);
>  void bpf_wakeup(struct bpf_d *);
>  void bpf_wakeup_cb(void *);
>  void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
> - void (*)(const void *, void *, size_t), struct timeval *);
> + struct timeval *);
>  int  bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
>  int  bpf_setdlt(struct bpf_d *, u_int);
>  
> @@ -1241,12 +1239,8 @@ bpf_mcopy(const void *src_arg, void *dst
>   }
>  }
>  
> -/*
> - * like bpf_mtap, but copy fn can be given. used by various bpf_mtap*
> - */
>  int
> -_bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction,
> -void (*cpfn)(const void *, void *, size_t))
> +bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
>  {
>   struct bpf_if *bp = (struct bpf_if *)arg;
>   struct bpf_d *d;
> @@ -1259,9 +1253,6 @@ _bpf_mtap(caddr_t arg, const struct mbuf
>   if (m == NULL)
>   return (0);
>  
> - if (cpfn == NULL)
> - cpfn = bpf_mcopy;
> -
>   if (bp == NULL)
>   return (0);
>  
> @@ -1299,8 +1290,7 @@ _bpf_mtap(caddr_t arg, const struct mbuf
>   }
>  
>   mtx_enter(&d->bd_mtx);
> - bpf_catchpacket(d, (u_char *)m, pktlen, slen, cpfn,
> - &tv);
> + bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tv);
>   mtx_leave(&d->bd_mtx);
>   }
>   }
> @@ -1345,16 +1335,7 @@ bpf_tap_hdr(caddr_t arg, const void *hdr
>   *mp = (struct mbuf *)&md;
>   }
>  
> - return _bpf_mtap(arg, m0, direction, bpf_mcopy);
> -}
> -
> -/*
> - * Incoming linkage from device drivers, when packet is in an mbuf chain.
> - */
> -int
> -bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
> -{
> - return _bpf_mtap(arg, m, direction, NULL);
> + return bpf_mtap(arg, m0, direction);
>  }
>  
>  /*
> @@ -1382,7 +1363,7 @@ bpf_mtap_hdr(caddr_t arg, const void *da
>   } else 
>   m0 = m;
>  
> - return _bpf_mtap(arg, m0, direction, NULL);
> + return bpf_mtap(arg, m0, direction);
>  }
>  
>  /*
> @@ -1460,7 +1441,7 @@ bpf_mtap_ether(caddr_t arg, const struct
>   */
>  void
>  bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
> -void (*cpfn)(const void *, void *, size_t), struct timeval *tv)
> +struct timeval *tv)
>  {
>   struct bpf_hdr *hp;
>   int totlen, curlen;
> @@ -1513,10 +1494,12 @@ bpf_catchpacket(struct bpf_d *d, u_char 
>   hp->bh_tstamp.tv_usec = tv->tv_usec;
>   hp->bh_datalen = pktlen;
>   hp->bh_hdrlen = hdrlen;
> +
>   /*
>* Copy the packet data into the store buffer and update its length.
>*/
> - (*cpfn)(pkt, (u_char *)hp + hdrlen, (hp->bh_caplen = totlen - hdrlen));
> + bpf_mcopy(pkt, (u_char *)hp + hdrlen,
> + (hp->bh_caplen = totlen - hdrlen));

This new line is not really needed but also yuck on assigning the size in
a function call argument. Maybe do the hp->bh_caplen = totlen - hdrlen before
the call and pass hp->bh_caplen to bpf_mcopy().

>   d->bd_slen = curlen + totlen;
>  
>   if (d->bd_immediate) {
> 

OK claudio@

-- 
:wq Claudio



bgpd changes for portability

2019-09-30 Thread Claudio Jeker
The portable version returns -1 in kr_init() because then the fd is
skipped in the poll loop. Now the problem is I changed this some time ago
to exit bgpd. I changed the function to pass the fd a pointer arg and so
the return -1 still works.

Additionally introduce a tcp_md5_unset() function which will be used by
the linux compat to unregister TCP MD5SUM keys from listening sockets when
a peer is removed.

Last but not least, remove the call to pfkey_remove() in session.c (the
session engine does not even have the pfkey socket to talk to) and also
use the right bgpd_config pointer in merge_peers (don't fall back to the
gloabl conf).

OK?
-- 
:wq Claudio

Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.225
diff -u -p -r1.225 bgpd.c
--- bgpd.c  8 Aug 2019 11:33:08 -   1.225
+++ bgpd.c  30 Sep 2019 13:28:42 -
@@ -234,7 +234,7 @@ main(int argc, char *argv[])
imsg_init(ibuf_se, pipe_m2s[0]);
imsg_init(ibuf_rde, pipe_m2r[0]);
mrt_init(ibuf_rde, ibuf_se);
-   if ((rfd = kr_init()) == -1)
+   if (kr_init(&rfd) == -1)
quit = 1;
keyfd = pfkey_init();
 
Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.393
diff -u -p -r1.393 bgpd.h
--- bgpd.h  27 Sep 2019 10:33:06 -  1.393
+++ bgpd.h  30 Sep 2019 13:28:21 -
@@ -1185,7 +1185,7 @@ int   prefixset_cmp(struct prefixset_item
 RB_PROTOTYPE(prefixset_tree, prefixset_item, entry, prefixset_cmp);
 
 /* kroute.c */
-int kr_init(void);
+int kr_init(int *);
 int ktable_update(u_int, char *, int, u_int8_t);
 voidktable_preload(void);
 voidktable_postload(u_int8_t);
Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.238
diff -u -p -r1.238 kroute.c
--- kroute.c8 Aug 2019 20:06:29 -   1.238
+++ kroute.c30 Sep 2019 13:28:13 -
@@ -213,7 +213,7 @@ RB_GENERATE(kif_tree, kif_node, entry, k
  */
 
 int
-kr_init(void)
+kr_init(int *fd)
 {
int opt = 0, rcvbuf, default_rcvbuf;
unsigned inttid = RTABLE_ANY;
@@ -257,7 +257,8 @@ kr_init(void)
if (fetchifs(0) == -1)
return (-1);
 
-   return (kr_state.fd);
+   *fd = kr_state.fd;
+   return (0);
 }
 
 int
Index: pfkey.c
===
RCS file: /cvs/src/usr.sbin/bgpd/pfkey.c,v
retrieving revision 1.59
diff -u -p -r1.59 pfkey.c
--- pfkey.c 30 Sep 2019 12:10:38 -  1.59
+++ pfkey.c 30 Sep 2019 14:05:16 -
@@ -866,3 +866,9 @@ tcp_md5_listen(struct listen_addr *la, s
}
return 0;
 }
+
+/* dummy function for portable */
+void
+tcp_md5_unset(struct bgpd_config *conf, struct peer *p)
+{
+}
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.391
diff -u -p -r1.391 session.c
--- session.c   30 Sep 2019 12:10:38 -  1.391
+++ session.c   30 Sep 2019 14:03:50 -
@@ -276,7 +276,7 @@ session_main(int debug, int verbose)
log_peer_warnx(&p->conf, "removed");
RB_REMOVE(peer_head, &conf->peers, p);
timer_remove_all(p);
-   pfkey_remove(p);
+   tcp_md5_unset(conf, p);
free(p);
peer_cnt--;
continue;
@@ -3170,7 +3170,7 @@ merge_peers(struct bgpd_config *c, struc
 {
struct peer *p, *np, *next;
 
-   RB_FOREACH(p, peer_head, &conf->peers) {
+   RB_FOREACH(p, peer_head, &c->peers) {
/* templates are handled specially */
if (p->template != NULL)
continue;
@@ -3203,7 +3203,7 @@ merge_peers(struct bgpd_config *c, struc
/* apply the config to all clones of a template */
if (p->conf.template) {
struct peer *xp;
-   RB_FOREACH(xp, peer_head, &conf->peers) {
+   RB_FOREACH(xp, peer_head, &c->peers) {
if (xp->template != p)
continue;
session_template_clone(xp, NULL, xp->conf.id,
@@ -3218,7 +3218,7 @@ merge_peers(struct bgpd_config *c, struc
/* pfkeys of new peers already loaded by the parent process */
RB_FOREACH_SAFE(np, peer_head, &nc->peers, next) {
RB_REMOVE(peer_head, &nc->peers, np);
-   if (RB_INSERT(peer_head, &conf->peers

bgpd tcp md5sum changes for portable

2019-10-01 Thread Claudio Jeker
OK, this diff is actually what linux is going to need to be able to
configure TCP MD5SUM on sockets. The listening socket turned out to be a
bit more tricky since the MD5 key for each peer needs to be added to the
socket where the session will be expected (based on local-address) and so
these additions and deletions need to happen when peers are changed or
when new listeners are added. This is why this needs a few more functions.

I renamed the functions a bit but in the end this does not change any
behaviour for bgpd.
-- 
:wq Claudio

? obj
? test
? test.c
Index: pfkey.c
===
RCS file: /cvs/src/usr.sbin/bgpd/pfkey.c,v
retrieving revision 1.59
diff -u -p -r1.59 pfkey.c
--- pfkey.c 30 Sep 2019 12:10:38 -  1.59
+++ pfkey.c 1 Oct 2019 09:13:22 -
@@ -805,6 +805,7 @@ pfkey_init(void)
return (pfkey_fd);
 }
 
+/* verify that connection is using TCP MD5UM if required by config */
 int
 tcp_md5_check(int fd, struct peer *p)
 {
@@ -830,6 +831,7 @@ tcp_md5_check(int fd, struct peer *p)
return 0;
 }
 
+/* enable or set TCP MD5SIG on a new client connection */
 int
 tcp_md5_set(int fd, struct peer *p)
 {
@@ -850,8 +852,9 @@ tcp_md5_set(int fd, struct peer *p)
return 0;
 }
 
+/* enable or prepare a new listening socket for TCP MD5SIG usage */
 int
-tcp_md5_listen(struct listen_addr *la, struct peer_head *p)
+tcp_md5_prep_listener(struct listen_addr *la, struct peer_head *p)
 {
int opt = 1;
 
@@ -865,4 +868,16 @@ tcp_md5_listen(struct listen_addr *la, s
return -1;
}
return 0;
+}
+
+/* add md5 key to all listening sockets, dummy function for portable */
+void
+tcp_md5_add_listener(struct bgpd_config *conf, struct peer *p)
+{
+}
+
+/* delete md5 key form all listening sockets, dummy function for portable */
+void
+tcp_md5_del_listener(struct bgpd_config *conf, struct peer *p)
+{
 }
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.393
diff -u -p -r1.393 session.c
--- session.c   1 Oct 2019 09:03:43 -   1.393
+++ session.c   1 Oct 2019 09:13:22 -
@@ -152,8 +152,8 @@ setup_listeners(u_int *la_cnt)
continue;
}
 
-   if (tcp_md5_listen(la, &conf->peers) == -1)
-   fatal("tcp_md5_listen");
+   if (tcp_md5_prep_listener(la, &conf->peers) == -1)
+   fatal("tcp_md5_prep_listener");
 
/* set ttl to 255 so that ttl-security works */
if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
@@ -276,6 +276,7 @@ session_main(int debug, int verbose)
log_peer_warnx(&p->conf, "removed");
RB_REMOVE(peer_head, &conf->peers, p);
timer_remove_all(p);
+   tcp_md5_del_listener(conf, p);
free(p);
peer_cnt--;
continue;
@@ -3179,6 +3180,13 @@ merge_peers(struct bgpd_config *c, struc
continue;
}
 
+   /* peer no longer uses TCP MD5SIG so deconfigure */
+   if (p->conf.auth.method == AUTH_MD5SIG &&
+   np->conf.auth.method != AUTH_MD5SIG)
+   tcp_md5_del_listener(c, p);
+   else if (np->conf.auth.method == AUTH_MD5SIG)
+   tcp_md5_add_listener(c, np);
+
memcpy(&p->conf, &np->conf, sizeof(p->conf));
RB_REMOVE(peer_head, &nc->peers, np);
free(np);
@@ -3219,5 +3227,7 @@ merge_peers(struct bgpd_config *c, struc
RB_REMOVE(peer_head, &nc->peers, np);
if (RB_INSERT(peer_head, &c->peers, np) != NULL)
fatalx("%s: peer tree is corrupt", __func__);
+   if (np->conf.auth.method == AUTH_MD5SIG)
+   tcp_md5_add_listener(c, np);
}
 }
Index: session.h
===
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.140
diff -u -p -r1.140 session.h
--- session.h   30 Sep 2019 12:10:38 -  1.140
+++ session.h   1 Oct 2019 09:13:22 -
@@ -285,7 +285,9 @@ int pfkey_remove(struct peer *);
 intpfkey_init(void);
 inttcp_md5_check(int, struct peer *);
 inttcp_md5_set(int, struct peer *);
-inttcp_md5_listen(struct listen_addr *, struct peer_head *);
+inttcp_md5_prep_listener(struct listen_addr *, struct peer_head *);
+void   tcp_md5_add_listener(struct bgpd_config *, struct peer *);
+void   tcp_md5_del_listener(struct bgpd_config *, struct peer *);
 
 /* printconf.c */
 void   print_config(struct bgpd_config *, struct rib_names *);



minor bgpd cleanup

2019-10-02 Thread Claudio Jeker
When removing a peer currently there is a ugly warning:
pfkey reload: no such peer: id=3

Because the peer removal happens late in the SE the parent already removed
it and so the last IMSG_PFKEY_RELOAD call when stopping the session before
removal has no peer to match against. So silence the warning and ignore
it.

Additionally reorder the peer removal in the SE to do the log and
RB_REMOVE right before the free() call.

OK? 
-- 
:wq Claudio

Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.226
diff -u -p -r1.226 bgpd.c
--- bgpd.c  1 Oct 2019 08:57:47 -   1.226
+++ bgpd.c  1 Oct 2019 14:55:29 -
@@ -812,13 +812,12 @@ dispatch_imsg(struct imsgbuf *ibuf, int 
rv = -1;
break;
case IMSG_PFKEY_RELOAD:
-   if (idx != PFD_PIPE_SESSION)
+   if (idx != PFD_PIPE_SESSION) {
log_warnx("pfkey reload request not from SE");
-   else if ((p = getpeerbyid(conf, imsg.hdr.peerid)) ==
-   NULL)
-   log_warnx("pfkey reload: no such peer: id=%u",
-   imsg.hdr.peerid);
-   else {
+   break;
+   }
+   p = getpeerbyid(conf, imsg.hdr.peerid);
+   if (p != NULL) {
if (pfkey_establish(p) == -1)
log_peer_warnx(&p->conf,
"pfkey setup failed");
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.394
diff -u -p -r1.394 session.c
--- session.c   1 Oct 2019 11:05:30 -   1.394
+++ session.c   1 Oct 2019 14:53:52 -
@@ -273,10 +273,10 @@ session_main(int debug, int verbose)
session_demote(p, -1);
p->conf.demote_group[0] = 0;
session_stop(p, ERR_CEASE_PEER_UNCONF);
-   log_peer_warnx(&p->conf, "removed");
-   RB_REMOVE(peer_head, &conf->peers, p);
timer_remove_all(p);
tcp_md5_del_listener(conf, p);
+   log_peer_warnx(&p->conf, "removed");
+   RB_REMOVE(peer_head, &conf->peers, p);
free(p);
peer_cnt--;
continue;



umb(4) switch bpf to DLT_LOOP

2019-10-02 Thread Claudio Jeker
umb(4) is currently the only user of DLT_RAW. The problem with this is
that it only works for IPv4 packets and that is less than ideal.
This diff switches umb(4) to DLT_LOOP like lo(4), tun(4), gif(4), ...

To make this work the driver injects the af header in umb_decap() when
pulling the packet of the ring and pops that header in umb_input() this
way the common interface input handling can still be used.
For the outbound packets umb_output() will now set ph_family header in the
mbuf so that umb_start() can use this in bpf_mtap_af().

With this tcpdump should be able to show also IPv6 packets. 
My provider does not do IPv6 (neither via umb MBIM_CID_IP_CONFIGURATION or
via slaac) but maybe someone else has a more tech competent provider than
me and is willing to make IPv6 go on umb(4).
-- 
:wq Claudio

Index: dev/usb/if_umb.c
===
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.26
diff -u -p -r1.26 if_umb.c
--- dev/usb/if_umb.c29 Sep 2019 15:31:16 -  1.26
+++ dev/usb/if_umb.c1 Oct 2019 11:00:10 -
@@ -516,7 +516,7 @@ umb_attach(struct device *parent, struct
if_alloc_sadl(ifp);
ifp->if_softc = sc;
 #if NBPFILTER > 0
-   bpfattach(&ifp->if_bpf, ifp, DLT_RAW, 0);
+   bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
 #endif
/*
 * Open the device now so that we are able to query device information.
@@ -759,19 +759,20 @@ umb_output(struct ifnet *ifp, struct mbu
m_freem(m);
return ENETDOWN;
}
+   m->m_pkthdr.ph_family = dst->sa_family;
return if_enqueue(ifp, m);
 }
 
 int
 umb_input(struct ifnet *ifp, struct mbuf *m, void *cookie)
 {
-   uint8_t ipv;
+   uint32_t af;
 
if ((ifp->if_flags & IFF_UP) == 0) {
m_freem(m);
return 1;
}
-   if (m->m_pkthdr.len < sizeof (struct ip)) {
+   if (m->m_pkthdr.len < sizeof (struct ip) + sizeof(af)) {
ifp->if_ierrors++;
DPRINTFN(4, "%s: dropping short packet (len %d)\n", __func__,
m->m_pkthdr.len);
@@ -779,16 +780,19 @@ umb_input(struct ifnet *ifp, struct mbuf
return 1;
}
m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
-   m_copydata(m, 0, sizeof (ipv), &ipv);
-   ipv >>= 4;
+
+   /* pop of DLT_LOOP header, no longer needed */
+   af = *mtod(m, uint32_t *);
+   m_adj(m, sizeof (af));
+   af = ntohl(af);
 
ifp->if_ibytes += m->m_pkthdr.len;
-   switch (ipv) {
-   case 4:
+   switch (af) {
+   case AF_INET:
ipv4_input(ifp, m);
return 1;
 #ifdef INET6
-   case 6:
+   case AF_INET6:
ipv6_input(ifp, m);
return 1;
 #endif /* INET6 */
@@ -878,7 +882,8 @@ umb_start(struct ifnet *ifp)
 
 #if NBPFILTER > 0
if (ifp->if_bpf)
-   bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
+   bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, m,
+   BPF_DIRECTION_OUT);
 #endif
}
if (ml_empty(&sc->sc_tx_ml))
@@ -1916,7 +1921,7 @@ umb_decap(struct umb_softc *sc, struct u
struct ifnet *ifp = GET_IFP(sc);
int  s;
void*buf;
-   uint32_t len;
+   uint32_t len, af = 0;
char*dp;
struct ncm_header16 *hdr16;
struct ncm_header32 *hdr32;
@@ -2033,12 +2038,25 @@ umb_decap(struct umb_softc *sc, struct u
 
dp = buf + doff;
DPRINTFN(3, "%s: decap %d bytes\n", DEVNAM(sc), dlen);
-   m = m_devget(dp, dlen, 0);
+   m = m_devget(dp, dlen, sizeof(uint32_t));
if (m == NULL) {
ifp->if_iqdrops++;
continue;
}
-
+   m = m_prepend(m, sizeof(uint32_t), M_DONTWAIT);
+   if (m == NULL) {
+   ifp->if_iqdrops++;
+   continue;
+   }
+   switch (*dp & 0xf0) {
+   case 4 << 4:
+   af = htonl(AF_INET);
+   break;
+   case 6 << 4:
+   af = htonl(AF_INET6);
+   break;
+   }
+   *mtod(m, uint32_t *) = af;
ml_enqueue(&ml, m);
}
 done:



Re: rpki-client patch submission

2019-10-02 Thread Claudio Jeker
On Wed, Oct 02, 2019 at 03:41:06PM -0300, Alexandre Hamada wrote:
> Hi Tech,
> 
> I've found a missing initialization at ip.c (rpki-client project), and I
> would like to share this patch with the repository maintainer.
> 
> Kind regards,
> Alexandre Hamada

Thanks for this patch. The memset() of addr makes sense and I will commit
this part tomorrow if nobody else does it before me.
I will skip the first hunk checking the addr argument for NULL. The right
use of this function is to pass a valid pointer to a struct ip_addr. There
is no need to check for one particular case of misuse here. Let the
program crash if one of the callers is doing it wrong and the person
introducing the bug will find it quickly. Currently all callers are safe.
 
> https://patch-diff.githubusercontent.com/raw/kristapsdz/rpki-client/pull/8.patch
> 
> From e74a5c02fbab9172dd856fc7214be42ef1dc6b65 Mon Sep 17 00:00:00 2001
> From: dev-gto <43351700+dev-...@users.noreply.github.com>
> Date: Wed, 2 Oct 2019 15:34:28 -0300
> Subject: [PATCH] Fix non-initialized addr
> 
> Running test-roa several times on the same .roa containing /24 ipV4 block 
> gives different output in the last octet.
> ---
>  ip.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/ip.c b/ip.c
> index 8587c42..968a474 100644
> --- a/ip.c
> +++ b/ip.c
> @@ -205,6 +205,9 @@ ip_addr_parse(const ASN1_BIT_STRING *p,
>   warnx("%s: RFC 3779 section 2.2.3.8: "
>   "unused bit count must be zero if length is zero", fn);
>   return 0;
> + } else if (addr == NULL) {
> + warnx("%s: Invalid param addr", fn);
> + return 0;
>   }
>   /*
> @@ -229,6 +232,7 @@ ip_addr_parse(const ASN1_BIT_STRING *p,
>   return 0;
>   }
> + memset (addr, 0, sizeof(struct ip_addr));
>   addr->prefixlen = p->length * 8 - unused;
>   memcpy(addr->addr, p->data, p->length);
>   return 1;
> 
> 

-- 
:wq Claudio



Re: snmp(1): Add set command

2019-10-03 Thread Claudio Jeker
On Thu, Oct 03, 2019 at 10:01:06AM +0200, Martijn van Duren wrote:
> On 10/3/19 9:21 AM, Sebastien Marie wrote:
> > On Thu, Sep 26, 2019 at 02:33:11PM +0200, Martijn van Duren wrote:
> >> On 9/26/19 9:54 AM, Martijn van Duren wrote:
> >>> Hello,
> >>>
> >>> I reckon this will be on of the last major additions.
> >>> Adding "snmp set" allows us to run snmpd's regress without installing
> >>> netsnmp. :-)
> >>>
> >>> Tested with snmpd's regress test.
> >>>
> >>> Majority of diff is moving oid/value parsing from snmp trap to a
> >>> separate function.
> >>>
> >>> OK?
> >>>
> >>> martijn@
> > 
> > few comments.
> > 
> > thanks.
> > 
> >> Index: snmpc.c
> >> ===
> >> RCS file: /cvs/src/usr.bin/snmp/snmpc.c,v
> >> retrieving revision 1.11
> >> diff -u -p -r1.11 snmpc.c
> >> --- snmpc.c18 Sep 2019 09:54:36 -  1.11
> >> +++ snmpc.c26 Sep 2019 12:32:38 -
> >> @@ -666,19 +669,54 @@ snmpc_walk(int argc, char *argv[])
> >>  }
> >>  
> >>  int
> >> +snmpc_set(int argc, char *argv[])
> >> +{
> >> +  struct snmp_agent *agent;
> >> +  struct ber_element *pdu, *varbind;
> >> +  int errorstatus, errorindex;
> >> +  int class;
> >> +  unsigned type;
> >> +
> >> +  if (argc < 4)
> >> +  usage();
> >> +  if ((agent = snmpc_connect(argv[0], "161")) == NULL)
> >> +  err(1, "%s", snmp_app->name);
> >> +  argc--;
> >> +  argv++;
> >> +
> >> +  if (argc < 3 || argc % 3 != 0)
> >> +  usage();
> >> +
> >> +  if (pledge("stdio", NULL) == -1)
> >> +  err(1, "pledge");
> >> +
> >> +  pdu = snmp_set(agent, snmpc_varbindparse(argc, argv));
> >> +
> >> +  (void) ber_scanf_elements(pdu, "t{Sdd{e", &class, &type, &errorstatus,
> >> +  &errorindex, &varbind);
> >> +  if (errorstatus != 0)
> >> +  snmpc_printerror((enum snmp_error) errorstatus,
> >> +  argv[errorindex - 1]);
> > 
> > is "errorindex - 1" the right index ?
> 
> Yes: RFC 3416 section 4.1:
> A variable binding is identified by its index value.  The first variable
> binding in a variable-binding list is index one, the second is index 
> two, etc.
> 
> I choose to use argv, because it's easier for the end user to see his
> own input for where he mistyped instead of a double parsed oid to -O
> based output.
> 
> > 
> > $ ./obj/snmp set 192.168.1.5 sysContact.0 s "test"
> > snmp: Can't parse oid 192.168.1.5: Not writable
> > 
> > Note that the same pattern is used in others commands.
> 
> Somehow the machines I test against don't return a not writable message.
> Tested against snmpd, net-snmpd, HP Laserjet.
> Could you show me a tcpdump -v output on this output?
> > 
> 
> >> +struct ber_element *
> >> +snmpc_varbindparse(int argc, char *argv[])
> >> +{
> >> +  struct ber_oid oid, oidval;
> >> +  struct in_addr addr4;
> >> +  char *addr = (char *)&addr4;
> >> +  char *str = NULL, *tmpstr, *endstr;
> >> +  const char *errstr = NULL;
> >> +  struct ber_element *varbind = NULL, *vblist = NULL;
> >> +  int i, ret;
> >> +  size_t strl, byte;
> >> +  long long lval;
> >> +
> >> +  if (argc % 3 != 0)
> >> +  usage();
> > 
> > if I don't mess myself, callers already checks that 'argc % 3 != 0'. So I 
> > think
> > the condition is more a defense for programmer error that user error, and I
> > would use an assert(argc % 3 == 0) or abort().
> 
> I removed these checks from snmpc_{set,trap}. Even less code. :-)
> 
> Index: snmp.1
> ===
> RCS file: /cvs/src/usr.bin/snmp/snmp.1,v
> retrieving revision 1.6
> diff -u -p -r1.6 snmp.1
> --- snmp.118 Sep 2019 09:54:36 -  1.6
> +++ snmp.13 Oct 2019 07:59:07 -
> @@ -110,6 +110,28 @@
>  .Ar agent
>  .Op Ar oid
>  .Nm
> +.Cm set
> +.Op Fl A Ar authpass
> +.Op Fl a Ar digest
> +.Op Fl c Ar community
> +.Op Fl E Ar ctxengineid
> +.Op Fl e Ar secengineid
> +.Op Fl K Ar localpriv
> +.Op Fl k Ar localauth
> +.Op Fl l Ar seclevel
> +.Op Fl n Ar ctxname
> +.Op Fl O Cm afnQqSvx
> +.Op Fl r Ar retries
> +.Op Fl t Ar timeout
> +.Op Fl u Ar user
> +.Op Fl v Ar version
> +.Op Fl X Ar privpass
> +.Op Fl x Ar cipher
> +.Op Fl Z Ar boots , Ns Ar time
> +.Ar agent
> +.Ar varoid type value
> +.Oo Ar varoid type value Oc ...
> +.Nm
>  .Cm trap
>  .Op Fl A Ar authpass
>  .Op Fl a Ar digest
> @@ -182,6 +204,12 @@ This uses the
>  subcommand internally to retrieve multiple MIBs at a time.
>  This command is not available for
>  .Fl v Cm 1 .
> +.It Cm set
> +Set one or more OID to a new value.
> +The triple
> +.Ar varoid , type , value
> +is described in
> +.Sx Data types .
>  .It Cm trap
>  Send a trap message to the
>  .Ar agent .
> @@ -194,8 +222,8 @@ The
>  .Ar trapoid
>  is the identification OID used by the trap handler to determine its action.
>  The triple
> -.Op Ar varoid , type, value
> -is described below
> +.Op Ar varoid , type , value
> +is described in
>  .Sx Data types .
>  This command is not available for
>  .Fl v Cm 1 .
> Ind

Re: snmpd allow walk on agentx [2/2]

2019-10-03 Thread Claudio Jeker
On Thu, Oct 03, 2019 at 08:28:02AM +0200, Martijn van Duren wrote:
> Any feedback on the relayd part?
> 
> On 9/25/19 8:58 AM, Martijn van Duren wrote:
> > On 9/25/19 8:55 AM, Martijn van Duren wrote:
> >> Hello,
> >>
> >> Mischa found that relayd's agentx support is pretty much unusable for 
> >> the uninitiated, because you have to know the tables beforehand.
> >>
> >> I managed to track it down to two issue with both snmpd and relayd.
> >> The fix is far from proper support for agentx, but it's good enough
> >> for going on a walk.
> >>
> > Part 2:
> > If the requested OID is a predecessor of what we support and the request
> > is getnext we should retrieve the first available mib.
> > 
> > OK?
> > 
> > martijn@
> > 
> > Index: snmp.c
> > ===
> > RCS file: /cvs/src/usr.sbin/relayd/snmp.c,v
> > retrieving revision 1.29
> > diff -u -p -r1.29 snmp.c
> > --- snmp.c  28 May 2017 10:39:15 -  1.29
> > +++ snmp.c  25 Sep 2019 06:58:21 -
> > @@ -321,21 +321,27 @@ snmp_agentx_process(struct agentx_handle
> >  
> > bcopy(&sr.start, &oid, sizeof(oid));
> >  
> > -   /*
> > -* If the requested OID is not part of the registered
> > -* MIB, return "no such object", per RFC
> > -*/
> > if (snmp_oid_cmp(&relaydinfooid, &oid) == -1) {
> > -   if (snmp_agentx_varbind(resp, &sr.start,
> > -   AGENTX_NO_SUCH_OBJECT, NULL, 0) == -1) {
> > -   log_warn("%s: unable to generate"
> > -   " response", __func__);
> > -   snmp_agentx_pdu_free(resp);
> > -   resp = NULL;
> > +   /*
> > +* If the requested OID is not part of the 
> > registered
> > +* MIB, return "no such object", per RFC
> > +*/
> > +   if (pdu->hdr->type == AGENTX_GET) {
> > +   if (snmp_agentx_varbind(resp, &sr.start,
> > +   AGENTX_NO_SUCH_OBJECT, NULL, 0) == 
> > -1) {
> > +   log_warn("%s: unable to 
> > generate"
> > +   " response", __func__);
> > +   snmp_agentx_pdu_free(resp);
> > +   resp = NULL;
> > +   }
> > +   goto reply;
> > }
> > -   goto reply;
> > +   bcopy(&relaydinfooid, &oid, sizeof(oid));
> > +   }

Up to here it mostly makes sense but...

> > +   if (oid.o_n == 9) {
> > +   oid.o_id[9] = 1;
> > +   oid.o_n++;
> > }

It is really hard to convince myself that this check is right. I'm also
surprised that none of the other possible snmp_oid_cmp() return values are
checked (I assume that 2 would be the usual case for a GET request and a
return of 1 should trigger an error). 

I wonder if there is a better way to write this to be less magic. My
argument here is that this is example code on how to integrate agentx
supoprt into other daemons and looking at this I have to say I'm sceptical
if I want such code in e.g. bgpd.

It is however OK claudio@

> > -
> > if (oid.o_n != OIDIDX_relaydInfo + 2 + 1) {
> > /* GET requests require the exact OID */
> > if (pdu->hdr->type == AGENTX_GET)
> > 
> 

-- 
:wq Claudio



Re: snmp(1): Add set command

2019-10-03 Thread Claudio Jeker
On Thu, Oct 03, 2019 at 10:42:42AM +0200, Martijn van Duren wrote:
> On 10/3/19 10:19 AM, Claudio Jeker wrote:
> > On Thu, Oct 03, 2019 at 10:01:06AM +0200, Martijn van Duren wrote:
> >> On 10/3/19 9:21 AM, Sebastien Marie wrote:
> >>> On Thu, Sep 26, 2019 at 02:33:11PM +0200, Martijn van Duren wrote:
> >>>> On 9/26/19 9:54 AM, Martijn van Duren wrote:
> >>>>> Hello,
> >>>>>
> >>>>> I reckon this will be on of the last major additions.
> >>>>> Adding "snmp set" allows us to run snmpd's regress without installing
> >>>>> netsnmp. :-)
> >>>>>
> >>>>> Tested with snmpd's regress test.
> >>>>>
> >>>>> Majority of diff is moving oid/value parsing from snmp trap to a
> >>>>> separate function.
> >>>>>
> >>>>> OK?
> >>>>>
> >>>>> martijn@
> >>>
> >>> few comments.
> >>>
> >>> thanks.
> >>>
> >>>> Index: snmpc.c
> >>>> ===
> >>>> RCS file: /cvs/src/usr.bin/snmp/snmpc.c,v
> >>>> retrieving revision 1.11
> >>>> diff -u -p -r1.11 snmpc.c
> >>>> --- snmpc.c  18 Sep 2019 09:54:36 -  1.11
> >>>> +++ snmpc.c  26 Sep 2019 12:32:38 -
> >>>> @@ -666,19 +669,54 @@ snmpc_walk(int argc, char *argv[])
> >>>>  }
> >>>>  
> >>>>  int
> >>>> +snmpc_set(int argc, char *argv[])
> >>>> +{
> >>>> +struct snmp_agent *agent;
> >>>> +struct ber_element *pdu, *varbind;
> >>>> +int errorstatus, errorindex;
> >>>> +int class;
> >>>> +unsigned type;
> >>>> +
> >>>> +if (argc < 4)
> >>>> +usage();
> >>>> +if ((agent = snmpc_connect(argv[0], "161")) == NULL)
> >>>> +err(1, "%s", snmp_app->name);
> >>>> +argc--;
> >>>> +argv++;
> >>>> +
> >>>> +if (argc < 3 || argc % 3 != 0)
> >>>> +usage();
> >>>> +
> >>>> +if (pledge("stdio", NULL) == -1)
> >>>> +err(1, "pledge");
> >>>> +
> >>>> +pdu = snmp_set(agent, snmpc_varbindparse(argc, argv));
> >>>> +
> >>>> +(void) ber_scanf_elements(pdu, "t{Sdd{e", &class, &type, 
> >>>> &errorstatus,
> >>>> +&errorindex, &varbind);
> >>>> +if (errorstatus != 0)
> >>>> +snmpc_printerror((enum snmp_error) errorstatus,
> >>>> +argv[errorindex - 1]);
> >>>
> >>> is "errorindex - 1" the right index ?
> >>
> >> Yes: RFC 3416 section 4.1:
> >> A variable binding is identified by its index value.  The first variable
> >> binding in a variable-binding list is index one, the second is index 
> >> two, etc.
> >>
> >> I choose to use argv, because it's easier for the end user to see his
> >> own input for where he mistyped instead of a double parsed oid to -O
> >> based output.
> >>
> >>>
> >>> $ ./obj/snmp set 192.168.1.5 sysContact.0 s "test"
> >>> snmp: Can't parse oid 192.168.1.5: Not writable
> >>>
> >>> Note that the same pattern is used in others commands.
> >>
> >> Somehow the machines I test against don't return a not writable message.
> >> Tested against snmpd, net-snmpd, HP Laserjet.
> >> Could you show me a tcpdump -v output on this output?
> >>>
> >>
> >>>> +struct ber_element *
> >>>> +snmpc_varbindparse(int argc, char *argv[])
> >>>> +{
> >>>> +struct ber_oid oid, oidval;
> >>>> +struct in_addr addr4;
> >>>> +char *addr = (char *)&addr4;
> >>>> +char *str = NULL, *tmpstr, *endstr;
> >>>> +const char *errstr = NULL;
> >>>> +struct ber_element *varbind = NULL, *vblist = NULL;
> >>>> +int i, ret;
> >&

bgpd fix for pftable panic

2019-10-04 Thread Claudio Jeker
Because of delaying the commits in the RDE it is now possible that table
additions and deletions mix. This triggers a fatal("attempt to mix pf table
additions/deletions") in the parent. Instead of the fatal() it is actually
safe to just commit the pending work and start with a fresh worklist
afterwards.

-- 
:wq Claudio

Index: pftable.c
===
RCS file: /cvs/src/usr.sbin/bgpd/pftable.c,v
retrieving revision 1.14
diff -u -p -r1.14 pftable.c
--- pftable.c   8 Aug 2019 20:06:29 -   1.14
+++ pftable.c   4 Oct 2019 10:22:15 -
@@ -191,10 +191,13 @@ pftable_add_work(const char *table, stru
return (-1);
}
 
-   /* Only one type of work on the list at a time */
+   /*
+* Only one type of work on the list at a time,
+* commit pending work first before adding new work
+*/
what = del ? DIOCRDELADDRS : DIOCRADDADDRS;
if (pft->naddrs != 0 && pft->what != what)
-   fatal("attempt to mix pf table additions/deletions");
+   pftable_commit();
 
if (pft->nalloc <= pft->naddrs)
pft->nalloc = pft->nalloc == 0 ? 1 : pft->nalloc * 2;



Re: snmp(1): Better index checking on pdu error.

2019-10-08 Thread Claudio Jeker
On Tue, Oct 08, 2019 at 09:36:27AM +0200, Martijn van Duren wrote:
> ping. I think this (or similar) fix should go in before release.
> 
> On 10/3/19 5:33 PM, Martijn van Duren wrote:
> > As discussed with semarie@ this morning: We're a bit too loose when it
> > comes to accessing indexes based directly from the pdu.
> > 
> > Diff below allows only indexes within the range of the pdu.
> > If user supplied oid and returned oid on index match the user supplied
> > input is shown (for easier reference). If they don't match then show
> > the oid based on the -O flag. If the index is out of range throw an
> > additional error message and show a "?".
> > 
> > Since it apparently is hard to get a proper error status with the
> > daemons available to me, this is only compile tested. Testers welcome.
> > 
> > This also adds a missing NULL-check, which could be committed
> > separately.
> > 
> > OK?

A little bit of bikeshedding below.

> > martijn@
> > 
> > Index: snmpc.c
> > ===
> > RCS file: /cvs/src/usr.bin/snmp/snmpc.c,v
> > retrieving revision 1.13
> > diff -u -p -r1.13 snmpc.c
> > --- snmpc.c 3 Oct 2019 11:02:26 -   1.13
> > +++ snmpc.c 3 Oct 2019 15:30:50 -
> > @@ -52,7 +52,8 @@ int snmpc_mibtree(int, char *[]);
> >  struct snmp_agent *snmpc_connect(char *, char *);
> >  int snmpc_parseagent(char *, char *);
> >  int snmpc_print(struct ber_element *);
> > -__dead void snmpc_printerror(enum snmp_error, char *);
> > +__dead void snmpc_printerror(enum snmp_error, struct ber_element *, int,
> > +const char *);
> >  char *snmpc_hex2bin(char *, size_t *);
> >  struct ber_element *snmpc_varbindparse(int, char *[]);
> >  void usage(void);
> > @@ -480,6 +481,7 @@ snmpc_get(int argc, char *argv[])
> > int i;
> > int class;
> > unsigned type;
> > +   char *hint = NULL;
> >  
> > if (argc < 2)
> > usage();
> > @@ -519,9 +521,12 @@ snmpc_get(int argc, char *argv[])
> >  
> > (void) ber_scanf_elements(pdu, "t{Sdd{e", &class, &type, &errorstatus,
> > &errorindex, &varbind);
> > -   if (errorstatus != 0)
> > -   snmpc_printerror((enum snmp_error) errorstatus,
> > -   argv[errorindex - 1]);
> > +   if (errorstatus != 0) {
> > +   if (errorindex >= 1 && errorindex <= argc)
> > +   hint = argv[errorindex - 1];
> > +   snmpc_printerror((enum snmp_error) errorstatus, varbind,
> > +   errorindex, hint);
> > +   }
> >  
> > if (class == BER_CLASS_CONTEXT && type == SNMP_C_REPORT)
> > printf("Received report:\n");
> > @@ -542,7 +547,6 @@ snmpc_walk(int argc, char *argv[])
> > struct timespec start, finish;
> > struct snmp_agent *agent;
> > const char *oids;
> > -   char oidstr[SNMP_MAX_OID_STRLEN];
> > int n = 0, prev_cmp;
> > int errorstatus, errorindex;
> > int class;
> > @@ -574,8 +578,8 @@ snmpc_walk(int argc, char *argv[])
> > (void) ber_scanf_elements(pdu, "t{Sdd{e", &class, &type,
> > &errorstatus, &errorindex, &varbind);
> > if (errorstatus != 0)
> > -   snmpc_printerror((enum snmp_error) errorstatus,
> > -   argv[errorindex - 1]);
> > +   snmpc_printerror((enum snmp_error) errorstatus, varbind,
> > +   errorindex, oids);
> >  
> > if (class == BER_CLASS_CONTEXT && type == SNMP_C_REPORT)
> > printf("Received report:\n");
> > @@ -600,9 +604,8 @@ snmpc_walk(int argc, char *argv[])
> > (void) ber_scanf_elements(pdu, "t{Sdd{e", &class, &type,
> > &errorstatus, &errorindex, &varbind);
> > if (errorstatus != 0) {
> > -   smi_oid2string(&noid, oidstr, sizeof(oidstr),
> > -   oid_lookup);
> > -   snmpc_printerror((enum snmp_error) errorstatus, oidstr);
> > +   snmpc_printerror((enum snmp_error) errorstatus, varbind,
> > +   errorindex, NULL);
> > }
> >  
> > if (class == BER_CLASS_CONTEXT && type == SNMP_C_REPORT)
> > @@ -639,8 +642,8 @@ snmpc_walk(int argc, char *argv[])
> > (void) ber_scanf_elements(pdu, "t{Sdd{e", &class, &type,
> > &errorstatus, &errorindex, &varbind);
> > if (errorstatus != 0)
> > -   snmpc_printerror((enum snmp_error) errorstatus,
> > -   argv[errorindex - 1]);
> > +   snmpc_printerror((enum snmp_error) errorstatus, varbind,
> > +   errorindex, oids);
> >  
> > if (class == BER_CLASS_CONTEXT && type == SNMP_C_REPORT)
> > printf("Received report:\n");
> > @@ -676,6 +679,7 @@ snmpc_set(int argc, char *argv[])
> > int errorstatus, errorindex;
> > int class;
> > unsigned type;
> > +   char *hint = NULL;
> >  
> > if (argc < 4)
> > usa

Re: rpki-client patch submission

2019-10-16 Thread Claudio Jeker
On Wed, Oct 16, 2019 at 07:26:25AM -0300, Alexandre Hamada wrote:
> Hi Tech,
> I would like to suggest to use UTC functions on all date/time convertions,
> to avoid some clock drift errors.

I came to the conclusion that more is needed in this function.
Converting time using ASN1_GENERALIZEDTIME_print(), strptime() and
whatever version of mktime() is not going to work well.
I think this code should be rewritten using X509_cmp_time() instead.
In libressl there is even ASN1_time_parse() but since this code still
needs openssl code for the CMS functions it is better to stick to
X509_cmp_time().

I put it on my list of things to look at in rpki-client.
 
> Kind regards,
> Alexandre Hamada
> 
> https://patch-diff.githubusercontent.com/raw/kristapsdz/rpki-client/pull/9.patch
> 
> From a463f8cb23375f15b74eff49a06e8934423e3dbf Mon Sep 17 00:00:00 2001
> From: dev-gto <43351700+dev-...@users.noreply.github.com>
> Date: Wed, 16 Oct 2019 07:22:46 -0300
> Subject: [PATCH] Avoid local time conversion
> 
> ---
>  mft.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mft.c b/mft.c
> index f9176b4..738f3ff 100644
> --- a/mft.c
> +++ b/mft.c
> @@ -75,8 +75,8 @@ gentime2time(struct parse *p, const ASN1_GENERALIZEDTIME 
> *tp)
>   memset(&tm, 0, sizeof(struct tm));
>   if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
>   errx(EXIT_FAILURE, "%s: strptime", buf);
> - if ((t = mktime(&tm)) == -1)
> - errx(EXIT_FAILURE, "%s: mktime", buf);
> + if ((t = timegm(&tm)) == -1)
> + errx(EXIT_FAILURE, "%s: timegm", buf);
>   return t;
>  }
> 
> 

-- 
:wq Claudio



Re: rpki-client patch submission

2019-10-16 Thread Claudio Jeker
On Wed, Oct 16, 2019 at 07:26:25AM -0300, Alexandre Hamada wrote:
> Hi Tech,
> I would like to suggest to use UTC functions on all date/time convertions,
> to avoid some clock drift errors.
> 
> Kind regards,
> Alexandre Hamada
> 
> https://patch-diff.githubusercontent.com/raw/kristapsdz/rpki-client/pull/9.patch
> 
> From a463f8cb23375f15b74eff49a06e8934423e3dbf Mon Sep 17 00:00:00 2001
> From: dev-gto <43351700+dev-...@users.noreply.github.com>
> Date: Wed, 16 Oct 2019 07:22:46 -0300
> Subject: [PATCH] Avoid local time conversion
> 
> ---
>  mft.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/mft.c b/mft.c
> index f9176b4..738f3ff 100644
> --- a/mft.c
> +++ b/mft.c
> @@ -75,8 +75,8 @@ gentime2time(struct parse *p, const ASN1_GENERALIZEDTIME 
> *tp)
>   memset(&tm, 0, sizeof(struct tm));
>   if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
>   errx(EXIT_FAILURE, "%s: strptime", buf);
> - if ((t = mktime(&tm)) == -1)
> - errx(EXIT_FAILURE, "%s: mktime", buf);
> + if ((t = timegm(&tm)) == -1)
> + errx(EXIT_FAILURE, "%s: timegm", buf);
>   return t;
>  }
> 

Hi Alexandre,

How about this diff instead. This is inspired by OCSP_check_validity() and
uses ASN1_GENERALIZEDTIME_check() and X509_cmp_time() to do the validity
check. I think this has a way better chance to produce the expected
results. My quick testing seems to indicate that it works but review and
testing is very welcome.

-- 
:wq Claudio

Index: mft.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/mft.c,v
retrieving revision 1.7
diff -u -p -r1.7 mft.c
--- mft.c   13 Aug 2019 13:27:26 -  1.7
+++ mft.c   16 Oct 2019 20:39:36 -
@@ -35,49 +35,57 @@ struct  parse {
struct mft  *res; /* result object */
 };
 
-/*
- * Convert from the ASN.1 generalised time to a time_t.
- * Return the time.
- * This is a stupid requirement due to using ASN1_GENERALIZEDTIME
- * instead of the native ASN1_TIME functions for comparing time.
- */
-static time_t
-gentime2time(struct parse *p, const ASN1_GENERALIZEDTIME *tp)
+static const char *
+gentime2str(const ASN1_GENERALIZEDTIME *time)
 {
+   static char buf[64];
BIO *mem;
-   char*pp;
-   char buf[64];
-   long len;
-   struct tmtm;
-   time_t   t;
 
if ((mem = BIO_new(BIO_s_mem())) == NULL)
cryptoerrx("BIO_new");
-   if (!ASN1_GENERALIZEDTIME_print(mem, tp))
+   if (!ASN1_GENERALIZEDTIME_print(mem, time))
cryptoerrx("ASN1_GENERALIZEDTIME_print");
+   if (BIO_gets(mem, buf, sizeof(buf)) < 0)
+   cryptoerrx("BIO_gets");
 
-   /*
-* The manpage says nothing about being NUL terminated and
-* strptime(3) needs a string.
-* So convert into a static buffer of decent size and NUL
-* terminate in that way.
-*/
-
-   len = BIO_get_mem_data(mem, &pp);
-   if (len < 0 || (size_t)len > sizeof(buf) - 1)
-   errx(EXIT_FAILURE, "BIO_get_mem_data");
-
-   memcpy(buf, pp, len);
-   buf[len] = '\0';
BIO_free(mem);
+   return buf;
+}
+
+/*
+ * Validate and verify the time validity of the mft.
+ * Returns 1 if all is good, 0 if mft is stale, any other case -1.
+ */
+static time_t
+check_validity(const ASN1_GENERALIZEDTIME *from,
+const ASN1_GENERALIZEDTIME *until, const char *fn, int force)
+{
+   time_t now = time(NULL);
 
-   memset(&tm, 0, sizeof(struct tm));
-   if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
-   errx(EXIT_FAILURE, "%s: strptime", buf);
-   if ((t = mktime(&tm)) == -1)
-   errx(EXIT_FAILURE, "%s: mktime", buf);
+   if (!ASN1_GENERALIZEDTIME_check(from) ||
+   !ASN1_GENERALIZEDTIME_check(until)) {
+   warnx("%s: embedded time format invalid", fn);
+   return -1;
+   }
+   /* check that until is not before from */
+   if (ASN1_STRING_cmp(until, from) < 0) {
+   warnx("%s: bad update interval", fn);
+   return -1;
+   }
+   /* check that now is not before from */
+   if (X509_cmp_time(from, &now) > 0) {
+   warnx("%s: mft not yet valid %s", fn, gentime2str(from));
+   return -1;
+   }
+   /* check that now is not after until */
+   if (X509_cmp_time(until, &now) < 0) {
+   warnx("%s: mft expired on %s%s", fn, gentime2str(until),
+   force ? " (ignoring)" : "");
+   if (!force)
+   return 0;
+   }
 
-   return t;
+   return 1;
 }
 
 /*
@@ -229,6 +237,7 @@ mft_parse_econtent(const unsigned char *
 {
ASN1_SEQUENCE_ANY   *seq;
const ASN1_TYPE *t;
+   const ASN1_GENERALIZEDTIME *from, *until;
int  i, rc = -1;
 

Re: rpki-client patch submission

2019-10-17 Thread Claudio Jeker
On Thu, Oct 17, 2019 at 08:07:18AM -0300, Alexandre Hamada wrote:
> Hi Claudio,
> 
> Thanks for this, much appreciated.
> 
> This is a much cleaner solution, I really liked it.
> 
> I've tested both versions against a brand new manifest, and it is working
> well.

Thanks for testing. Did you manage to trigger warnings with the original
version (the one using mktime())? I was not really abel to trigger them
but my guess is that CEST is just to close to GMT.
 
> One thing about this patch, at mft_parse_econtent() there are some unused
> variables left (this, next, now and buf[64]).

Good point. I removed them in my tree.
 
> Thanks again and regards,
> Alexandre Hamada
> 
> 
> On 16/10/2019 17:47, Claudio Jeker wrote:
> > On Wed, Oct 16, 2019 at 07:26:25AM -0300, Alexandre Hamada wrote:
> > > Hi Tech,
> > > I would like to suggest to use UTC functions on all date/time convertions,
> > > to avoid some clock drift errors.
> > > 
> > > Kind regards,
> > > Alexandre Hamada
> > > 
> > > https://patch-diff.githubusercontent.com/raw/kristapsdz/rpki-client/pull/9.patch
> > > 
> > >  From a463f8cb23375f15b74eff49a06e8934423e3dbf Mon Sep 17 00:00:00 2001
> > > From: dev-gto <43351700+dev-...@users.noreply.github.com>
> > > Date: Wed, 16 Oct 2019 07:22:46 -0300
> > > Subject: [PATCH] Avoid local time conversion
> > > 
> > > ---
> > >   mft.c | 4 ++--
> > >   1 file changed, 2 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/mft.c b/mft.c
> > > index f9176b4..738f3ff 100644
> > > --- a/mft.c
> > > +++ b/mft.c
> > > @@ -75,8 +75,8 @@ gentime2time(struct parse *p, const 
> > > ASN1_GENERALIZEDTIME *tp)
> > >   memset(&tm, 0, sizeof(struct tm));
> > >   if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
> > >   errx(EXIT_FAILURE, "%s: strptime", buf);
> > > - if ((t = mktime(&tm)) == -1)
> > > - errx(EXIT_FAILURE, "%s: mktime", buf);
> > > + if ((t = timegm(&tm)) == -1)
> > > + errx(EXIT_FAILURE, "%s: timegm", buf);
> > >   return t;
> > >   }
> > > 
> > Hi Alexandre,
> > 
> > How about this diff instead. This is inspired by OCSP_check_validity() and
> > uses ASN1_GENERALIZEDTIME_check() and X509_cmp_time() to do the validity
> > check. I think this has a way better chance to produce the expected
> > results. My quick testing seems to indicate that it works but review and
> > testing is very welcome.
> > 
> 

-- 
:wq Claudio



Re: netstart diff for aggr(4) handling

2019-10-18 Thread Claudio Jeker
On Thu, Oct 17, 2019 at 11:24:24PM +0200, Alexander Bluhm wrote:
> On Thu, Oct 17, 2019 at 04:47:27PM -0400, Brad Smith wrote:
> > Shouldn't aggr(4) be handled in the same manner as trunk(4)?
> 
> I guess so, OK bluhm@

+1 OK claudio
 
> > Index: netstart
> > ===
> > RCS file: /home/cvs/src/etc/netstart,v
> > retrieving revision 1.200
> > diff -u -p -u -p -r1.200 netstart
> > --- netstart29 Aug 2018 11:30:48 -  1.200
> > +++ netstart17 Oct 2019 20:45:29 -
> > @@ -300,12 +300,12 @@ vifscreate
> >
> >  # Configure all the non-loopback interfaces which we know about, but
> >  # do not start interfaces which must be delayed. Refer to hostname.if(5)
> > -ifmstart "" "trunk svlan vlan carp pppoe tun tap gif etherip gre egre 
> > mobileip pflow"
> > +ifmstart "" "aggr trunk svlan vlan carp pppoe tun tap gif etherip gre egre 
> > mobileip pflow"
> >
> >  # The trunk interfaces need to come up first in this list.
> >  # The (s)vlan interfaces need to come up after trunk.
> >  # Configure all the carp interfaces which we know about before default 
> > route.
> > -ifmstart "trunk svlan vlan carp pppoe"
> > +ifmstart "aggr trunk svlan vlan carp pppoe"
> >
> >  # Set default routes for IPv4 and IPv6.
> >  defaultroute
> 

-- 
:wq Claudio



bgpctl(8) shutdown communication

2019-10-19 Thread Claudio Jeker
bgpd supports up to 255 byte shutdown communications. So the manpage is
not telling the truth. Also I don't think it is helpful to mention the
limit at all. bgpctl will exit with 'shutdown reason too long' if the text
is too long which is good enough. For best interop people should keep the
shutdown message as simple and short as possible.

-- 
:wq Claudio

Index: bgpctl.8
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.8,v
retrieving revision 1.92
diff -u -p -r1.92 bgpctl.8
--- bgpctl.827 Sep 2019 10:34:54 -  1.92
+++ bgpctl.819 Oct 2019 09:01:20 -
@@ -87,9 +87,6 @@ If a
 is provided, the
 .Ar reason
 is sent as Administrative Shutdown Communication to the neighbor.
-The
-.Ar reason
-cannot exceed 128 octets.
 .Ar peer
 may be the neighbor's address, description or the word
 .Cm group
@@ -108,9 +105,6 @@ If a
 is provided, the
 .Ar reason
 is sent as Administrative Shutdown Communication to the neighbor.
-The
-.Ar reason
-cannot exceed 128 octets.
 .Ar peer
 may be the neighbor's address, description or the word
 .Cm group



rpki-client change way TAL are loaded

2019-10-20 Thread Claudio Jeker
Change the way TAL files are loaded into rpki-client. Instead of passing
the filename to the parser process and have that one open the file. Do it
in the main process and pass the buffer to the parser. The benefit of this
is that TAL files are not read by the parser and therefor the unveil can
be locked to the base directory early on. In my opinion this is a good
thing.

-- 
:wq Claudio


Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.9
diff -u -p -r1.9 extern.h
--- extern.h16 Oct 2019 17:43:29 -  1.9
+++ extern.h20 Oct 2019 10:17:11 -
@@ -229,7 +229,7 @@ extern int verbose;
 
 voidtal_buffer(char **, size_t *, size_t *, const struct tal *);
 voidtal_free(struct tal *);
-struct tal *tal_parse(const char *);
+struct tal *tal_parse(const char *, char *);
 struct tal *tal_read(int);
 
 voidcert_buffer(char **, size_t *, size_t *, const struct cert *);
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.20
diff -u -p -r1.20 main.c
--- main.c  16 Oct 2019 21:43:41 -  1.20
+++ main.c  20 Oct 2019 11:18:18 -
@@ -22,6 +22,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -462,14 +463,28 @@ queue_add_from_mft_set(int fd, struct en
 static void
 queue_add_tal(int fd, struct entityq *q, const char *file, size_t *eid)
 {
+   static unsigned char buf[4096];
char*nfile;
+   ssize_t  n, i;
+   int  tfd;
+
+   if ((tfd = open(file, O_RDONLY)) == -1)
+   err(EXIT_FAILURE, "open: %s", file);
+   n = read(tfd, buf, sizeof(buf));
+   if (n == -1)
+   err(EXIT_FAILURE, "read: %s", file);
+   if (n == sizeof(buf))
+   errx(EXIT_FAILURE, "read: %s: file too big", file);
+   for (i = 0; i < n; i++)
+   if (!isprint(buf[i]) && !isspace(buf[i]))
+   errx(EXIT_FAILURE, "read: %s: invalid content", file);
+   buf[n] = '\0';
 
if ((nfile = strdup(file)) == NULL)
err(EXIT_FAILURE, "strdup");
 
/* Not in a repository, so directly add to queue. */
-
-   entityq_add(fd, q, nfile, RTYPE_TAL, NULL, NULL, NULL, 0, NULL, eid);
+   entityq_add(fd, q, nfile, RTYPE_TAL, NULL, NULL, NULL, 0, buf, eid);
 }
 
 /*
@@ -1020,7 +1035,6 @@ proc_parser(int fd, int force, int norev
X509_STORE  *store;
X509_STORE_CTX  *ctx;
struct auth *auths = NULL;
-   int  first_tals = 1;
 
ERR_load_crypto_strings();
OpenSSL_add_all_ciphers();
@@ -1102,31 +1116,12 @@ proc_parser(int fd, int force, int norev
entp = TAILQ_FIRST(&q);
assert(entp != NULL);
 
-   /*
-* Extra security.
-* Our TAL files may be anywhere, but the repository
-* resources may only be in BASE_DIR.
-* When we've finished processing TAL files, make sure
-* that we can only see what's under that.
-*/
-
-   if (entp->type != RTYPE_TAL && first_tals) {
-   if (unveil(BASE_DIR, "r") == -1)
-   err(EXIT_FAILURE, "%s: unveil", BASE_DIR);
-   if (unveil(NULL, NULL) == -1)
-   err(EXIT_FAILURE, "unveil");
-   first_tals = 0;
-   } else if (entp->type != RTYPE_TAL) {
-   assert(!first_tals);
-   } else if (entp->type == RTYPE_TAL)
-   assert(first_tals);
-
entity_buffer_resp(&b, &bsz, &bmax, entp);
 
switch (entp->type) {
case RTYPE_TAL:
assert(!entp->has_dgst);
-   if ((tal = tal_parse(entp->uri)) == NULL)
+   if ((tal = tal_parse(entp->uri, entp->descr)) == NULL)
goto out;
tal_buffer(&b, &bsz, &bmax, tal);
tal_free(tal);
@@ -1420,7 +1415,12 @@ main(int argc, char *argv[])
 
if (procpid == 0) {
close(fd[1]);
-   if (pledge("stdio rpath unveil", NULL) == -1)
+   /* Only allow access to BASE_DIR. */
+   if (unveil(BASE_DIR, "r") == -1)
+   err(EXIT_FAILURE, "%s: unveil", BASE_DIR);
+   if (unveil(NULL, NULL) == -1)
+   err(EXIT_FAILURE, "unveil");
+   if (pledge("stdio rpath", NULL) == -1)
err(EXIT_FAILURE, "pledge");
proc_parser(fd[0], force, norev);
/* NOTREACHED */
@@ -1460,13 +1460,7 @@ main(int argc, char *argv[])
 
assert(rsync != proc);
 
-   

Re: rpki-client change way TAL are loaded

2019-10-21 Thread Claudio Jeker
On Sun, Oct 20, 2019 at 12:46:44PM -0600, Theo de Raadt wrote:
> There has been an update to
> 
> https://www.ietf.org/rfcdiff?url2=draft-ietf-sidrops-https-tal-05
> 
> Which permits comments in the tal files.  I proposed this at nanog yvr
> as something which might help the arin tal hangup.
> 
>  queue_add_tal(int fd, struct entityq *q, const char *file, size_t *eid)
>  {
> + static unsigned char buf[4096];
>   char*nfile;
> + ssize_t  n, i;
> + int  tfd;
> +
> + if ((tfd = open(file, O_RDONLY)) == -1)
> + err(EXIT_FAILURE, "open: %s", file);
> + n = read(tfd, buf, sizeof(buf));
> + if (n == -1)
> + err(EXIT_FAILURE, "read: %s", file);
> + if (n == sizeof(buf))
> + errx(EXIT_FAILURE, "read: %s: file too big", file);
> + for (i = 0; i < n; i++)
> 
> A commented file might not fit in your buffer.  I suggest you minimally
> parse the file, discarding the comment lines.  The remaining tal contents
> will be small enough to pass to the other process.

I would have preferred to not parse anything in the parent but skipping
comments makes sense since it keeps the passed buffer small. 

Here is an updated diff.
-- 
:wq Claudio

Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.9
diff -u -p -r1.9 extern.h
--- extern.h16 Oct 2019 17:43:29 -  1.9
+++ extern.h20 Oct 2019 10:17:11 -
@@ -229,7 +229,7 @@ extern int verbose;
 
 voidtal_buffer(char **, size_t *, size_t *, const struct tal *);
 voidtal_free(struct tal *);
-struct tal *tal_parse(const char *);
+struct tal *tal_parse(const char *, char *);
 struct tal *tal_read(int);
 
 voidcert_buffer(char **, size_t *, size_t *, const struct cert *);
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.20
diff -u -p -r1.20 main.c
--- main.c  16 Oct 2019 21:43:41 -  1.20
+++ main.c  21 Oct 2019 12:10:15 -
@@ -22,6 +22,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -462,14 +463,64 @@ queue_add_from_mft_set(int fd, struct en
 static void
 queue_add_tal(int fd, struct entityq *q, const char *file, size_t *eid)
 {
-   char*nfile;
+   char*nfile, *nbuf, *line = NULL, *buf = NULL;
+   FILE*in;
+   ssize_t  n, i;
+   size_t   sz = 0, bsz = 0;
+   int  optcomment = 1;
+
+   if ((in = fopen(file, "r")) == NULL)
+   err(EXIT_FAILURE, "fopen: %s", file);
+
+   while ((n = getline(&line, &sz, in)) != -1) {
+   /* replace CRLF with just LF */
+   if (n > 1 && line[n - 1] == '\n' && line[n - 2] == '\r') {
+   line[n - 2] = '\n';
+   line[n - 1] = '\0';
+   n--;
+   }
+   if (optcomment) {
+   /* if this is comment, just eat the line */
+   if (line[0] == '#')
+   continue;
+   optcomment = 0;
+   /*
+* Empty line is end of section and needs
+* to be eaten as well.
+*/
+   if (line[0] == '\n')
+   continue;
+   }
+
+   /* make sure every line is valid ascii */
+   for (i = 0; i < n; i++)
+   if (!isprint(line[i]) && !isspace(line[i]))
+   errx(EXIT_FAILURE, "getline: %s: "
+   "invalid content", file);
+
+   /* concat line to buf */
+   if ((nbuf = realloc(buf, bsz + n + 1)) == NULL)
+   err(EXIT_FAILURE, NULL);
+   buf = nbuf;
+   bsz += n + 1;
+   strlcat(buf, line, bsz);
+   /* limit the buffer size */
+   if (bsz > 4096)
+   errx(EXIT_FAILURE, "%s: file too big", file);
+   }
+
+   free(line);
+   if (ferror(in))
+   err(EXIT_FAILURE, "getline: %s", file);
+   fclose(in);
 
if ((nfile = strdup(file)) == NULL)
err(EXIT_FAILURE, "strdup");
 
/* Not in a repository, so directly add to queue. */
-
-   entityq_add(fd, q, nfile, RTYPE_TAL, NULL, NULL, NULL, 0, NULL, eid);
+   entityq_add(fd, q, nfile, RTYPE_TAL, NULL, NULL, NULL, 0, buf, eid);
+   /* entityq_add makes a copy of buf */
+   free(buf);
 }
 
 /*
@@ -1020,7 +1071,6 @@ proc_parser(int fd, int force, int norev
X509_STORE  *store;
X509_STORE_CTX  *ctx;
struct auth *auths = NULL;
-   int  first_tals = 1;
 
ER

Re: rpki-client patch submission

2019-10-22 Thread Claudio Jeker
On Wed, Oct 16, 2019 at 10:47:25PM +0200, Claudio Jeker wrote:
> On Wed, Oct 16, 2019 at 07:26:25AM -0300, Alexandre Hamada wrote:
> > Hi Tech,
> > I would like to suggest to use UTC functions on all date/time convertions,
> > to avoid some clock drift errors.
> > 
> > Kind regards,
> > Alexandre Hamada
> > 
> > https://patch-diff.githubusercontent.com/raw/kristapsdz/rpki-client/pull/9.patch
> > 
> > From a463f8cb23375f15b74eff49a06e8934423e3dbf Mon Sep 17 00:00:00 2001
> > From: dev-gto <43351700+dev-...@users.noreply.github.com>
> > Date: Wed, 16 Oct 2019 07:22:46 -0300
> > Subject: [PATCH] Avoid local time conversion
> > 
> > ---
> >  mft.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/mft.c b/mft.c
> > index f9176b4..738f3ff 100644
> > --- a/mft.c
> > +++ b/mft.c
> > @@ -75,8 +75,8 @@ gentime2time(struct parse *p, const ASN1_GENERALIZEDTIME 
> > *tp)
> > memset(&tm, 0, sizeof(struct tm));
> > if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
> > errx(EXIT_FAILURE, "%s: strptime", buf);
> > -   if ((t = mktime(&tm)) == -1)
> > -   errx(EXIT_FAILURE, "%s: mktime", buf);
> > +   if ((t = timegm(&tm)) == -1)
> > +   errx(EXIT_FAILURE, "%s: timegm", buf);
> > return t;
> >  }
> > 
> 
> Hi Alexandre,
> 
> How about this diff instead. This is inspired by OCSP_check_validity() and
> uses ASN1_GENERALIZEDTIME_check() and X509_cmp_time() to do the validity
> check. I think this has a way better chance to produce the expected
> results. My quick testing seems to indicate that it works but review and
> testing is very welcome.

I will commit this diff later today unless somebody speaks up.
 
> -- 
> :wq Claudio
> 
> Index: mft.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/mft.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 mft.c
> --- mft.c 13 Aug 2019 13:27:26 -  1.7
> +++ mft.c 16 Oct 2019 20:39:36 -
> @@ -35,49 +35,57 @@ structparse {
>   struct mft  *res; /* result object */
>  };
>  
> -/*
> - * Convert from the ASN.1 generalised time to a time_t.
> - * Return the time.
> - * This is a stupid requirement due to using ASN1_GENERALIZEDTIME
> - * instead of the native ASN1_TIME functions for comparing time.
> - */
> -static time_t
> -gentime2time(struct parse *p, const ASN1_GENERALIZEDTIME *tp)
> +static const char *
> +gentime2str(const ASN1_GENERALIZEDTIME *time)
>  {
> + static char buf[64];
>   BIO *mem;
> - char*pp;
> - char buf[64];
> - long len;
> - struct tmtm;
> - time_t   t;
>  
>   if ((mem = BIO_new(BIO_s_mem())) == NULL)
>   cryptoerrx("BIO_new");
> - if (!ASN1_GENERALIZEDTIME_print(mem, tp))
> + if (!ASN1_GENERALIZEDTIME_print(mem, time))
>   cryptoerrx("ASN1_GENERALIZEDTIME_print");
> + if (BIO_gets(mem, buf, sizeof(buf)) < 0)
> + cryptoerrx("BIO_gets");
>  
> - /*
> -  * The manpage says nothing about being NUL terminated and
> -  * strptime(3) needs a string.
> -  * So convert into a static buffer of decent size and NUL
> -  * terminate in that way.
> -  */
> -
> - len = BIO_get_mem_data(mem, &pp);
> - if (len < 0 || (size_t)len > sizeof(buf) - 1)
> - errx(EXIT_FAILURE, "BIO_get_mem_data");
> -
> - memcpy(buf, pp, len);
> - buf[len] = '\0';
>   BIO_free(mem);
> + return buf;
> +}
> +
> +/*
> + * Validate and verify the time validity of the mft.
> + * Returns 1 if all is good, 0 if mft is stale, any other case -1.
> + */
> +static time_t
> +check_validity(const ASN1_GENERALIZEDTIME *from,
> +const ASN1_GENERALIZEDTIME *until, const char *fn, int force)
> +{
> + time_t now = time(NULL);
>  
> - memset(&tm, 0, sizeof(struct tm));
> - if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
> - errx(EXIT_FAILURE, "%s: strptime", buf);
> - if ((t = mktime(&tm)) == -1)
> - errx(EXIT_FAILURE, "%s: mktime", buf);
> + if (!ASN1_GENERALIZEDTIME_check(from) ||
> + !ASN1_GENERALIZEDTIME_check(until)) {
> + warnx("%s: embedded time format invalid", fn);
> + return -1;
> + }
> + /* check that un

Re: rpki-client patch submission

2019-10-23 Thread Claudio Jeker
On Wed, Oct 23, 2019 at 08:47:25AM +0200, Theo Buehler wrote:
> On Wed, Oct 23, 2019 at 08:04:26AM +0200, Claudio Jeker wrote:
> > On Wed, Oct 16, 2019 at 10:47:25PM +0200, Claudio Jeker wrote:
> > > On Wed, Oct 16, 2019 at 07:26:25AM -0300, Alexandre Hamada wrote:
> > > > Hi Tech,
> > > > I would like to suggest to use UTC functions on all date/time 
> > > > convertions,
> > > > to avoid some clock drift errors.
> > > > 
> > > > Kind regards,
> > > > Alexandre Hamada
> > > > 
> > > > https://patch-diff.githubusercontent.com/raw/kristapsdz/rpki-client/pull/9.patch
> > > > 
> > > > From a463f8cb23375f15b74eff49a06e8934423e3dbf Mon Sep 17 00:00:00 2001
> > > > From: dev-gto <43351700+dev-...@users.noreply.github.com>
> > > > Date: Wed, 16 Oct 2019 07:22:46 -0300
> > > > Subject: [PATCH] Avoid local time conversion
> > > > 
> > > > ---
> > > >  mft.c | 4 ++--
> > > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/mft.c b/mft.c
> > > > index f9176b4..738f3ff 100644
> > > > --- a/mft.c
> > > > +++ b/mft.c
> > > > @@ -75,8 +75,8 @@ gentime2time(struct parse *p, const 
> > > > ASN1_GENERALIZEDTIME *tp)
> > > > memset(&tm, 0, sizeof(struct tm));
> > > > if (strptime(buf, "%b %d %T %Y %Z", &tm) == NULL)
> > > > errx(EXIT_FAILURE, "%s: strptime", buf);
> > > > -   if ((t = mktime(&tm)) == -1)
> > > > -   errx(EXIT_FAILURE, "%s: mktime", buf);
> > > > +   if ((t = timegm(&tm)) == -1)
> > > > +   errx(EXIT_FAILURE, "%s: timegm", buf);
> > > > return t;
> > > >  }
> > > > 
> > > 
> > > Hi Alexandre,
> > > 
> > > How about this diff instead. This is inspired by OCSP_check_validity() and
> > > uses ASN1_GENERALIZEDTIME_check() and X509_cmp_time() to do the validity
> > > check. I think this has a way better chance to produce the expected
> > > results. My quick testing seems to indicate that it works but review and
> > > testing is very welcome.
> > 
> > I will commit this diff later today unless somebody speaks up.
> 
> I am a bit worried about the ASN1_STRING_cmp(until, from) line.
> According to RFC 6486 4.2.1, the times are to be encoded as for
> the corresponding fields in RFC 5280, which normalizes UTCTime (for
> dates until 2049) to the form YYMMDDHHMMSSZ and GeneralizedTime to
> MMDDHHMMSSZ (for dates from 2050 on), so this comparison is iffy.

Ha, I stole that from openssl I think once rpki-client can use our
libcrypto then this could can be rewritten with ASN1_time_parse() and
ASN1_time_tm_cmp(). I hope this happens before 2049 :)
 
> If you think that it's not the right moment to worry about this, I'm ok
> with your diff: apart from that, it looks good.

Thanks for checking.

> >  
> > > -- 
> > > :wq Claudio
> > > 
> > > Index: mft.c
> > > ===
> > > RCS file: /cvs/src/usr.sbin/rpki-client/mft.c,v
> > > retrieving revision 1.7
> > > diff -u -p -r1.7 mft.c
> > > --- mft.c 13 Aug 2019 13:27:26 -  1.7
> > > +++ mft.c 16 Oct 2019 20:39:36 -
> > > @@ -35,49 +35,57 @@ structparse {
> > >   struct mft  *res; /* result object */
> > >  };
> > >  
> > > -/*
> > > - * Convert from the ASN.1 generalised time to a time_t.
> > > - * Return the time.
> > > - * This is a stupid requirement due to using ASN1_GENERALIZEDTIME
> > > - * instead of the native ASN1_TIME functions for comparing time.
> > > - */
> > > -static time_t
> > > -gentime2time(struct parse *p, const ASN1_GENERALIZEDTIME *tp)
> > > +static const char *
> > > +gentime2str(const ASN1_GENERALIZEDTIME *time)
> > >  {
> > > + static char buf[64];
> > >   BIO *mem;
> > > - char*pp;
> > > - char buf[64];
> > > - long len;
> > > - struct tmtm;
> > > - time_t   t;
> > >  
> > >   if ((mem = BIO_new(BIO_s_mem())) == NULL)
> > >   cryptoerrx("BIO_new");
> > > - if (!ASN1_GENERALIZEDTIME_print(mem, tp))
> > > + if (!ASN1_GENERALIZEDTIME_print(mem, time

bgpd fix graceful restart and route refresh

2019-10-29 Thread Claudio Jeker
While investigating the cause of bgpd crashing for some people I noticed
that both graceful restart and route refresh were broken.
This diff fixes this by a) marking the adj-rib-out as stale when a peer
goes stale during graceful restart and b) changing peer_dump to force all
entries in the Adj-RIB-Out into pending UPDATES. Stale entries will be
removed in the latter case.

Additionally in prefix_adjout_withdraw() the check if an element is on the
update or withdraw tree was not correct which could lead to a call to
RB_REMOVE for an element that is not part of the RB tree.

Please test and review
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.489
diff -u -p -r1.489 rde.c
--- rde.c   27 Sep 2019 14:50:39 -  1.489
+++ rde.c   29 Oct 2019 06:52:10 -
@@ -2828,15 +2828,44 @@ rde_up_flush_upcall(struct prefix *p, vo
 }
 
 static void
-rde_up_dump_done(void *ptr, u_int8_t aid)
+rde_up_adjout_force_upcall(struct prefix *p, void *ptr)
+{
+   if (p->flags & PREFIX_FLAG_STALE) {
+   /* remove stale entries */
+   prefix_adjout_destroy(p);
+   } else if (p->flags & PREFIX_FLAG_DEAD) {
+   /* ignore dead prefixes, they will go away soon */
+   } else if ((p->flags & PREFIX_FLAG_MASK) == 0) {
+   /* put entries on the update queue if not allready on a queue */
+   p->flags |= PREFIX_FLAG_UPDATE;
+   if (RB_INSERT(prefix_tree, &prefix_peer(p)->updates[p->pt->aid],
+   p) != NULL)
+   fatalx("%s: RB tree invariant violated", __func__);
+   }
+}
+
+static void
+rde_up_adjout_force_done(void *ptr, u_int8_t aid)
 {
struct rde_peer *peer = ptr;
 
+   /* Adj-RIB-Out ready, unthrottle peer and inject EOR */
peer->throttled = 0;
if (peer->capa.grestart.restart)
prefix_add_eor(peer, aid);
 }
 
+static void
+rde_up_dump_done(void *ptr, u_int8_t aid)
+{
+   struct rde_peer *peer = ptr;
+
+   /* force out all updates of Adj-RIB-Out for this peer */
+   if (prefix_dump_new(peer, aid, 0, peer, rde_up_adjout_force_upcall,
+   rde_up_adjout_force_done, NULL) == -1)
+   fatal("%s: prefix_dump_new", __func__);
+}
+
 u_char queue_buf[4096];
 
 int
@@ -3387,16 +3416,17 @@ rde_softreconfig_in(struct rib_entry *re
 static void
 rde_softreconfig_out(struct rib_entry *re, void *bula)
 {
-   struct prefix   *new = re->active;
+   struct prefix   *p = re->active;
struct rde_peer *peer;
 
-   if (new == NULL)
+   if (p == NULL)
+   /* no valid path for prefix */
return;
 
LIST_FOREACH(peer, &peerlist, peer_l) {
if (peer->loc_rib_id == re->rib_id && peer->reconf_out)
/* Regenerate all updates. */
-   up_generate_updates(out_rules, peer, new, new);
+   up_generate_updates(out_rules, peer, p, p);
}
 }
 
@@ -3668,6 +3698,22 @@ peer_adjout_clear_upcall(struct prefix *
prefix_adjout_destroy(p);
 }
 
+static void
+peer_adjout_stale_upcall(struct prefix *p, void *arg)
+{
+   if (p->flags & PREFIX_FLAG_DEAD) {
+   return;
+   } else if (p->flags & PREFIX_FLAG_WITHDRAW) {
+   /* no need to keep stale withdraws, they miss all attributes */
+   prefix_adjout_destroy(p);
+   return;
+   } else if (p->flags & PREFIX_FLAG_UPDATE) {
+   RB_REMOVE(prefix_tree, &prefix_peer(p)->updates[p->pt->aid], p);
+   p->flags &= ~PREFIX_FLAG_UPDATE;
+   }
+   p->flags |= PREFIX_FLAG_STALE;
+}
+
 void
 peer_up(u_int32_t id, struct session_up *sup)
 {
@@ -3680,8 +3726,7 @@ peer_up(u_int32_t id, struct session_up 
return;
}
 
-   if (peer->state != PEER_DOWN && peer->state != PEER_NONE &&
-   peer->state != PEER_UP) {
+   if (peer->state == PEER_ERR) {
/*
 * There is a race condition when doing PEER_ERR -> PEER_DOWN.
 * So just do a full reset of the peer here.
@@ -3831,12 +3876,18 @@ peer_stale(u_int32_t id, u_int8_t aid)
/* flush the now even staler routes out */
if (peer->staletime[aid])
peer_flush(peer, aid, peer->staletime[aid]);
+
peer->staletime[aid] = now = time(NULL);
+   peer->state = PEER_DOWN;
+
+   /* mark Adj-RIB-Out stale for this peer */
+   if (prefix_dump_new(peer, AID_UNSPEC, 0, NULL,
+   peer_adjout_stale_upcall, NULL, NULL) == -1)
+   fatal("%s: prefix_dump_new", __func__);
 
/* make sure new prefixes start on a higher timestamp */
-   do {
+   while (now >= time(NULL))
sleep(1);
-   } while (now >= time(NULL));
 }
 
 void
@@ -3856,13 +3907,13 @@ peer_dump

Re: rpki-client: check certificate policies

2022-02-04 Thread Claudio Jeker
On Fri, Feb 04, 2022 at 10:41:03AM +0100, Theo Buehler wrote:
> It was pointed out to Claudio that rpki-client does not enforce
> certificate policies.
> 
> The diff below does that. It has two parts.
> 
> In cert.c we check that the certificate policy extension matches the
> specification in RFC 6487, section 4.8.9, as amended by RFC 7318
> section 2. That's maybe a bit lengthy but completely straightforward.
> If you're curious what's in a CPS URI, it might be this:
> https://www.arin.net/resources/manage/rpki/cps.pdf
> 
> The second bit is in parser.c and makes sure that the verifier builds a
> policy tree enforcing that the certification path uses a policy OID of
> id-cp-ipAddr-asNumber (see RFC 6484). This is a bit trickier since it
> involves X509_policy_check() under the hood. In short, we set up a
> policy containing that OID in the verify parameters and set verifier
> flags that ensure that the user set policy is enforced.
> 
> If you will, the second part improves the validation. The verifier
> doesn't have a mechanism to enforce things like there's exactly one
> policy identifier, etc. That's what's done in cert.c
> 
> This works for me. Please test.

Looks good to me. OK claudio@
Few questions below.
 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 cert.c
> --- cert.c20 Jan 2022 16:36:19 -  1.53
> +++ cert.c4 Feb 2022 09:09:38 -
> @@ -29,6 +29,7 @@
>  
>  #include 
>  #include 
> +#include 
>  
>  #include "extern.h"
>  
> @@ -969,6 +970,77 @@ out:
>   return rc;
>  }
>  
> +static int
> +certificate_policies(struct parse *p, X509_EXTENSION *ext)
> +{
> + STACK_OF(POLICYINFO)*policies = NULL;
> + POLICYINFO  *policy;
> + STACK_OF(POLICYQUALINFO)*qualifiers;
> + POLICYQUALINFO  *qualifier;
> + int  nid;
> + int  rc = 0;
> +
> + if (!X509_EXTENSION_get_critical(ext)) {
> + cryptowarnx("%s: RFC 6487 section 4.8.9: certificatePolicies: "
> + "extension not critical", p->fn);
> + goto out;
> + }
> +
> + if ((policies = X509V3_EXT_d2i(ext)) == NULL) {
> + cryptowarnx("%s: RFC 6487 section 4.8.9: certificatePolicies: "
> + "failed extension parse", p->fn);
> + goto out;
> + }
> +
> + if (sk_POLICYINFO_num(policies) != 1) {
> + warnx("%s: RFC 6487 section 4.8.9: certificatePolicies: "
> + "want 1 policy, got %d", p->fn,
> + sk_POLICYINFO_num(policies));
> + goto out;
> + }
> +
> + policy = sk_POLICYINFO_value(policies, 0);
> + assert(policy != NULL && policy->policyid != NULL);

Should this be an assert() or a proper error check?
I guess sk_POLICYINFO_value() can not really fail because of the check
above. What about policy->policyid? I gess X509V3_EXT_d2i() sets this
correctly or it failed above.

> + if ((nid = OBJ_obj2nid(policy->policyid)) != NID_ipAddr_asNumber) {
> + warnx("%s: RFC 6487 section 4.8.9: certificatePolicies: "
> + "unexpected policy identifier %d (%s)", p->fn, nid,
> + OBJ_nid2sn(nid));
> + goto out;
> + }
> +
> + /* Policy qualifiers are optional. If they're absent, we're done. */
> + if ((qualifiers = policy->qualifiers) == NULL) {
> + rc = 1;
> + goto out;
> + }
> +
> + if (sk_POLICYQUALINFO_num(qualifiers) != 1) {
> + warnx("%s: RFC 7318 section 2: certificatePolicies: "
> + "want 1 policy qualifier, got %d", p->fn,
> + sk_POLICYQUALINFO_num(qualifiers));
> + goto out;
> + }
> +
> + qualifier = sk_POLICYQUALINFO_value(qualifiers, 0);
> + assert(qualifier != NULL && qualifier->pqualid != NULL);

Same as above.

> + if ((nid = OBJ_obj2nid(qualifier->pqualid)) != NID_id_qt_cps) {
> + warnx("%s: RFC 7318 section 2: certificatePolicies: "
> + "want CPS, got %d (%s)", p->fn, nid, OBJ_nid2sn(nid));
> + goto out;
> + }
> +
> + if (verbose > 1)
> + warnx("%s: CPS %.*s", p->fn, qualifier->d.cpsuri->length,
> + qualifier->d.cpsuri->data);
> +
> + rc = 1;
> + out:
> + sk_POLICYINFO_pop_free(policies, POLICYINFO_free);
> + return rc;
> +}
> +
>  /*
>   * Parse and partially validate an RPKI X509 certificate (either a trust
>   * anchor or a certificate) as defined in RFC 6487.
> @@ -1024,6 +1096,9 @@ cert_parse_inner(const char *fn, const u
>   case NID_sinfo_access:
>   sia_present = 1;
>   c = sbgp_sia(&p, ext);
> + break;
> + case NID_certificate_policies:
> + c = certifi

openbgpd vs illumos

2022-02-04 Thread Claudio Jeker
On illumos sun is defined by some header so better not use sun as a
variable name. Rename variable to sa_un to reduce hacks in -portable.

-- 
:wq Claudio

Index: bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.273
diff -u -p -r1.273 bgpctl.c
--- bgpctl/bgpctl.c 9 Aug 2021 08:24:36 -   1.273
+++ bgpctl/bgpctl.c 4 Feb 2022 11:10:31 -
@@ -78,7 +78,7 @@ usage(void)
 int
 main(int argc, char *argv[])
 {
-   struct sockaddr_un   sun;
+   struct sockaddr_un   sa_un;
int  fd, n, done, ch, verbose = 0;
struct imsg  imsg;
struct network_confignet;
@@ -160,12 +160,12 @@ main(int argc, char *argv[])
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1)
err(1, "control_init: socket");
 
-   bzero(&sun, sizeof(sun));
-   sun.sun_family = AF_UNIX;
-   if (strlcpy(sun.sun_path, sockname, sizeof(sun.sun_path)) >=
-   sizeof(sun.sun_path))
+   bzero(&sa_un, sizeof(sa_un));
+   sa_un.sun_family = AF_UNIX;
+   if (strlcpy(sa_un.sun_path, sockname, sizeof(sa_un.sun_path)) >=
+   sizeof(sa_un.sun_path))
errx(1, "socket name too long");
-   if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == -1)
+   if (connect(fd, (struct sockaddr *)&sa_un, sizeof(sa_un)) == -1)
err(1, "connect: %s", sockname);
 
if (pledge("stdio", NULL) == -1)
Index: bgpd/control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.105
diff -u -p -r1.105 control.c
--- bgpd/control.c  27 Apr 2021 15:34:18 -  1.105
+++ bgpd/control.c  4 Feb 2022 11:07:25 -
@@ -42,19 +42,19 @@ ssize_t  imsg_read_nofd(struct imsgbuf 
 int
 control_check(char *path)
 {
-   struct sockaddr_un   sun;
+   struct sockaddr_un   sa_un;
int  fd;
 
-   bzero(&sun, sizeof(sun));
-   sun.sun_family = AF_UNIX;
-   strlcpy(sun.sun_path, path, sizeof(sun.sun_path));
+   bzero(&sa_un, sizeof(sa_un));
+   sa_un.sun_family = AF_UNIX;
+   strlcpy(sa_un.sun_path, path, sizeof(sa_un.sun_path));
 
if ((fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0)) == -1) {
log_warn("%s: socket", __func__);
return (-1);
}
 
-   if (connect(fd, (struct sockaddr *)&sun, sizeof(sun)) == 0) {
+   if (connect(fd, (struct sockaddr *)&sa_un, sizeof(sa_un)) == 0) {
log_warnx("control socket %s already in use", path);
close(fd);
return (-1);
@@ -68,7 +68,7 @@ control_check(char *path)
 int
 control_init(int restricted, char *path)
 {
-   struct sockaddr_un   sun;
+   struct sockaddr_un   sa_un;
int  fd;
mode_t   old_umask, mode;
 
@@ -78,10 +78,10 @@ control_init(int restricted, char *path)
return (-1);
}
 
-   bzero(&sun, sizeof(sun));
-   sun.sun_family = AF_UNIX;
-   if (strlcpy(sun.sun_path, path, sizeof(sun.sun_path)) >=
-   sizeof(sun.sun_path)) {
+   bzero(&sa_un, sizeof(sa_un));
+   sa_un.sun_family = AF_UNIX;
+   if (strlcpy(sa_un.sun_path, path, sizeof(sa_un.sun_path)) >=
+   sizeof(sa_un.sun_path)) {
log_warn("control_init: socket name too long");
close(fd);
return (-1);
@@ -102,7 +102,7 @@ control_init(int restricted, char *path)
mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP;
}
 
-   if (bind(fd, (struct sockaddr *)&sun, sizeof(sun)) == -1) {
+   if (bind(fd, (struct sockaddr *)&sa_un, sizeof(sa_un)) == -1) {
log_warn("control_init: bind: %s", path);
close(fd);
umask(old_umask);
@@ -159,12 +159,12 @@ control_accept(int listenfd, int restric
 {
int  connfd;
socklen_tlen;
-   struct sockaddr_un   sun;
+   struct sockaddr_un   sa_un;
struct ctl_conn *ctl_conn;
 
-   len = sizeof(sun);
+   len = sizeof(sa_un);
if ((connfd = accept4(listenfd,
-   (struct sockaddr *)&sun, &len,
+   (struct sockaddr *)&sa_un, &len,
SOCK_NONBLOCK | SOCK_CLOEXEC)) == -1) {
if (errno == ENFILE || errno == EMFILE) {
pauseaccept = getmonotime();



Re: rpki-client: check certificate policies

2022-02-04 Thread Claudio Jeker
On Fri, Feb 04, 2022 at 03:56:18PM +0100, Theo Buehler wrote:
> On Fri, Feb 04, 2022 at 12:03:41PM +0100, Claudio Jeker wrote:
> > On Fri, Feb 04, 2022 at 10:41:03AM +0100, Theo Buehler wrote:
> > > It was pointed out to Claudio that rpki-client does not enforce
> > > certificate policies.
> > > 
> > > The diff below does that. It has two parts.
> > > 
> > > In cert.c we check that the certificate policy extension matches the
> > > specification in RFC 6487, section 4.8.9, as amended by RFC 7318
> > > section 2. That's maybe a bit lengthy but completely straightforward.
> > > If you're curious what's in a CPS URI, it might be this:
> > > https://www.arin.net/resources/manage/rpki/cps.pdf
> > > 
> > > The second bit is in parser.c and makes sure that the verifier builds a
> > > policy tree enforcing that the certification path uses a policy OID of
> > > id-cp-ipAddr-asNumber (see RFC 6484). This is a bit trickier since it
> > > involves X509_policy_check() under the hood. In short, we set up a
> > > policy containing that OID in the verify parameters and set verifier
> > > flags that ensure that the user set policy is enforced.
> > > 
> > > If you will, the second part improves the validation. The verifier
> > > doesn't have a mechanism to enforce things like there's exactly one
> > > policy identifier, etc. That's what's done in cert.c
> > > 
> > > This works for me. Please test.
> > 
> > Looks good to me. OK claudio@
> 
> Unfortunately, NID_ipAddr_asNumber is only available in LibreSSL 3.3 and
> later and didn't make it into OpenSSL 1.1 so I had to do some things a
> bit differently. I added a certpol_oid to x509_init_oid() and use it
> instead of NID_ipAddr_asNumber(). Since we have this global already,
> it's easier to use X509_VERIFY_PARAM_add0_policy(). It also forces one
> check to use OBJ_cmp() instead OBJ_obj2nid().

Sounds good to me. A comment below to that.
 
> > > + policy = sk_POLICYINFO_value(policies, 0);
> > > + assert(policy != NULL && policy->policyid != NULL);
> > 
> > Should this be an assert() or a proper error check?
> > I guess sk_POLICYINFO_value() can not really fail because of the check
> > above. What about policy->policyid? I gess X509V3_EXT_d2i() sets this
> > correctly or it failed above.
> 
> Yes. Both != NULL are guaranteed. You're right about sk_value(). Also
> your guess is correct: policy->policyid != NULL is guaranteed by the
> templated ASN.1:
> 
> In POLICYINFO_seq_tt (lib/libcrypto/x509/x509_cpols.c:147), there is no
> ASN1_TFLG_OPTIONAL flag to indicate that policy->policyid can be NULL
> (whereas policy->qualifiers is optional). Therefore if policy != NULL
> and since X509V3_EXT_d2i didn't fail, it must have deserialized
> correctly, so policy->policyid != NULL.
> 
> I left the assert as it is for now, but I can change it into an error
> check or drop it if you prefer.

I think the assert() is OK. I just want to make sure we don't abuse
assert() for proper error checking. There were some of those in the
codebase and in the end less assert() calls are preferred.
 
> > > + qualifier = sk_POLICYQUALINFO_value(qualifiers, 0);
> > > + assert(qualifier != NULL && qualifier->pqualid != NULL);
> > 
> > Same as above.
> 
> The answer is the same again, this time arguing with the flags in
> POLICYQUALINFO_seq_tt.
> 
> > > + flags |= X509_V_FLAG_EXPLICIT_POLICY;
> > > + flags |= X509_V_FLAG_INHIBIT_MAP;
> > 
> > I do not really understand the meaning of X509_V_FLAG_INHIBIT_MAP. Neither
> > the manpage nor the referenced RFC really help to explain what is
> > inhibited and if we really want that or not. It seems to be the more
> > strict option so I think it is the right thing to do but wow this is
> > complicated.
> 
> It is super complicated indeed. Policy mappings are explained here:
> https://datatracker.ietf.org/doc/html/rfc5280#section-4.2.1.5
> It's another extension that allows translation between policy OIDs.
> I'm pretty sure we don't want to allow this.

Yes, I came to a similar conclusion after reading too much of rfc5280 for
my taste. Maybe something to reconsider once rfc8360 is needed (I still
hope it will just die and instead the original policy will be updated).
 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 cert.c
> --- cert.c20 Jan 2022 16:36:

Re: convert bgpd to stdint.h types

2022-02-04 Thread Claudio Jeker
On Fri, Feb 04, 2022 at 07:20:21PM +0100, Theo Buehler wrote:
> On Fri, Feb 04, 2022 at 03:59:34PM +0100, Claudio Jeker wrote:
> > This is something I wanted to do for a while. Switch from u_intXY_t to
> > uintXY_t from stdint.h. The diff is mostly mechanical and was done with
> > sed -i 's/u_intX_t/uintX_t/g' but uint8_t changes the tab spacing and so
> > I had a look over the code and reindented where it made sense.
> > Using stdint.h types will mostly help portability.
> 
> ok (the only object change on amd64 is in util.c - not sure why).
> 
> I was wondering if u_char shouldn't be replaced as well.

There are u_char, u_short and u_int in bgpd. I have not fully made up my
mind about those. There will be a follow up diff for those.

-- 
:wq Claudio



Re: wskbd_set_mixervolume

2022-02-05 Thread Claudio Jeker
On Sat, Feb 05, 2022 at 12:28:08PM +0100, Mark Kettenis wrote:
> > Date: Sat, 5 Feb 2022 09:29:42 +0100
> > From: Anton Lindqvist 
> > 
> > Hi,
> > I recently got a USB headset with physical volume buttons, handled by
> > ucc(4). However, after enabling the device in sndiod the volume buttons
> > does not cause the volume to change. Turns out wskbd_set_mixervolume()
> > is only propagating volume changes to first attached audio device. Is
> > their any good not to consider all attached audio devices?
> 
> I think this is tricky.  The mixer values of different audio devices
> may start out differently and may have different granularity and
> probably operate on a different scale.  This may lead to situations
> where as you turn the volume up and down, the relative output volume
> between devices changes considerably.  I also think that your
> implementation will unmute all audio devices as soon as you touch the
> volume control buttons, which is probably not desirable.
> 
> Thinking about other ways to do this, we could:
> 
> - Add a knob that allows the user to control which audio device is
>   controlled by the volume control buttons.  The choice could include
>   "none" and "all" as well as the individual devices.
> 
> - Add infrastructure to bind specific keyboards to specific audio
>   devices, a bit like how we support binding specific wskbd devices to
>   specific wsdisplay devices.
> 
> The first suggestion is probably relatively easy to achieve.  The
> implementation of the latter would defenitely need more thought and
> discussion.
> 
> The "none" choice above would (partly) solve another issue where
> userland applications see the key presses and act upon them even
> though the kernel already did the volume adjustment.

There is a 3rd option of passing the information to sndiod and let it do
the volume scaling.
 
> Cheers,
> 
> Mark
> 
> > The diff below gives me the desired behavior by propagating volume
> > changes to all attached audio devices.
> > 
> > diff --git sys/dev/audio.c sys/dev/audio.c
> > index ec52ee6ef01..ca19557d39e 100644
> > --- sys/dev/audio.c
> > +++ sys/dev/audio.c
> > @@ -2452,10 +2452,6 @@ wskbd_mixer_init(struct audio_softc *sc)
> > };
> > int i;
> >  
> > -   if (sc->dev.dv_unit != 0) {
> > -   DPRINTF("%s: not configuring wskbd keys\n", DEVNAME(sc));
> > -   return;
> > -   }
> > for (i = 0; i < sizeof(spkr_names) / sizeof(spkr_names[0]); i++) {
> > if (wskbd_initvol(sc, &sc->spkr,
> > spkr_names[i].cn, spkr_names[i].dn))
> > @@ -2569,19 +2565,26 @@ wskbd_set_mixermute(long mute, long out)
> >  int
> >  wskbd_set_mixervolume(long dir, long out)
> >  {
> > -   struct audio_softc *sc;
> > -   struct wskbd_vol *vol;
> > +   int error = ENODEV;
> > +   int minor;
> >  
> > -   sc = (struct audio_softc *)device_lookup(&audio_cd, 0);
> > -   if (sc == NULL)
> > -   return ENODEV;
> > -   vol = out ? &sc->spkr : &sc->mic;
> > -   if (dir == 0)
> > -   vol->mute_pending ^= WSKBD_MUTE_TOGGLE;
> > -   else
> > -   vol->val_pending += dir;
> > -   if (!task_add(systq, &sc->wskbd_task))
> > -   device_unref(&sc->dev);
> > -   return 0;
> > +   for (minor = 0; minor < audio_cd.cd_ndevs; minor++) {
> > +   struct audio_softc *sc;
> > +   struct wskbd_vol *vol;
> > +
> > +   sc = (struct audio_softc *)device_lookup(&audio_cd, minor);
> > +   if (sc == NULL)
> > +   continue;
> > +   vol = out ? &sc->spkr : &sc->mic;
> > +   if (dir == 0)
> > +   vol->mute_pending ^= WSKBD_MUTE_TOGGLE;
> > +   else
> > +   vol->val_pending += dir;
> > +   if (!task_add(systq, &sc->wskbd_task))
> > +   device_unref(&sc->dev);
> > +   error = 0;
> > +   }
> > +
> > +   return error;
> >  }
> >  #endif /* NWSKBD > 0 */
> > 
> > 
> 

-- 
:wq Claudio



Re: rpki-client: check crl validity times

2022-02-09 Thread Claudio Jeker
On Wed, Feb 09, 2022 at 02:59:41PM +0100, Theo Buehler wrote:
> We should not use CRLs if now isn't between thisUpdate and nextUpdate.
> This also ensures that thisUpdate <= nextUpdate. While the verifier will
> catch all this, doing this early will often remove one of the two
> possible choices of a CRL to use for a MFT since these are typically
> short-lived. While there, let's simplify the exit of crl_parse().
> 
> I was pondering whether we should mark such CRLs stale and add them to
> the statistics as we do for MFTs, but I think it's not super
> interesting.

I'm not fully convinced by this. Mainly not returning a CRL will alter the
error reported by X509_verify_cert() and make it more confusing
(especially since the warnings in crl_parse are only if verbose > 1.

I would not mind to do this check in parse_load_crl_from_mft().
Another thing we should consider is that the CRL used to validate the MFT
should also be the one used to validate the rest. This is currently not
enforced.

Will need to think about this more.
 
> Index: crl.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/crl.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 crl.c
> --- crl.c 8 Feb 2022 14:53:03 -   1.13
> +++ crl.c 9 Feb 2022 06:23:30 -
> @@ -34,7 +34,7 @@ crl_parse(const char *fn, const unsigned
>   struct crl  *crl;
>   const ASN1_TIME *at;
>   struct tmissued_tm, expires_tm;
> - int  rc = 0;
> + time_t   now;
>  
>   /* just fail for empty buffers, the warning was printed elsewhere */
>   if (der == NULL)
> @@ -66,7 +66,6 @@ crl_parse(const char *fn, const unsigned
>   if ((crl->issued = mktime(&issued_tm)) == -1)
>   errx(1, "%s: mktime failed", fn);
>  
> - /* extract expire time for later use */
>   at = X509_CRL_get0_nextUpdate(crl->x509_crl);
>   if (at == NULL) {
>   warnx("%s: X509_CRL_get0_nextUpdate failed", fn);
> @@ -80,13 +79,25 @@ crl_parse(const char *fn, const unsigned
>   if ((crl->expires = mktime(&expires_tm)) == -1)
>   errx(1, "%s: mktime failed", fn);
>  
> - rc = 1;
> - out:
> - if (rc == 0) {
> - crl_free(crl);
> - crl = NULL;
> + now = time(NULL);
> + if (now < crl->issued) {
> + if (verbose > 1)
> + warnx("%s: crl not yet valid %s", fn,
> + time2str(crl->issued));
> + goto out;
> + }
> + if (now > crl->expires) {
> + if (verbose > 1)
> + warnx("%s: crl expired on %s", fn,
> + time2str(crl->expires));
> + goto out;
>   }
> +
>   return crl;
> +
> + out:
> + crl_free(crl);
> + return NULL;
>  }
>  
>  static inline int
> Index: extern.h
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
> retrieving revision 1.118
> diff -u -p -r1.118 extern.h
> --- extern.h  8 Feb 2022 14:53:03 -   1.118
> +++ extern.h  9 Feb 2022 06:21:49 -
> @@ -502,6 +502,7 @@ void   entity_free(struct entity *);
>  void  entity_read_req(struct ibuf *, struct entity *);
>  void  entityq_flush(struct entityq *, struct repo *);
>  void  proc_parser(int) __attribute__((noreturn));
> +char *time2str(time_t);
>  
>  /* Rsync-specific. */
>  
> Index: parser.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 parser.c
> --- parser.c  8 Feb 2022 14:53:03 -   1.63
> +++ parser.c  9 Feb 2022 06:19:40 -
> @@ -94,7 +94,7 @@ repo_add(unsigned int id, char *path, ch
>   errx(1, "repository already added: id %d, %s", id, path);
>  }
>  
> -static char *
> +char *
>  time2str(time_t t)
>  {
>   static char buf[64];
> 

-- 
:wq Claudio



Re: rpki-client: plug leak in http_parse_header()

2022-02-10 Thread Claudio Jeker
On Thu, Feb 10, 2022 at 08:44:08AM +0100, Theo Buehler wrote:
> On Thu, Feb 10, 2022 at 07:51:45AM +0100, Theo Buehler wrote:
> > At this point conn->last_modified may or may not be allocated.
> > If it is, overriting it will leak 30 bytes.
> 
> rrdp_input_handler() has a leak of the same kind.
> 
> Index: http.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/http.c,v
> retrieving revision 1.52
> diff -u -p -r1.52 http.c
> --- http.c23 Jan 2022 12:09:24 -  1.52
> +++ http.c10 Feb 2022 02:09:38 -
> @@ -1231,6 +1231,7 @@ http_parse_header(struct http_connection
>   } else if (strncasecmp(cp, LAST_MODIFIED,
>   sizeof(LAST_MODIFIED) - 1) == 0) {
>   cp += sizeof(LAST_MODIFIED) - 1;
> + free(conn->last_modified);
>   if ((conn->last_modified = strdup(cp)) == NULL)
>   err(1, NULL);
>   }

This one is OK claudio@

> Index: rrdp.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/rrdp.c,v
> retrieving revision 1.21
> diff -u -p -r1.21 rrdp.c
> --- rrdp.c23 Jan 2022 12:09:24 -  1.21
> +++ rrdp.c10 Feb 2022 07:41:54 -
> @@ -429,6 +429,7 @@ rrdp_input_handler(int fd)
>   errx(1, "%s: bad internal state", s->local);
>  
>   s->res = res;
> + free(s->last_mod);
>   s->last_mod = last_mod;
>   s->state |= RRDP_STATE_HTTP_DONE;
>   rrdp_finished(s);
> 

This one needs more thought. rrdp_finished() -> notification_done() moves
the last_mod into nxml->current->last_mod and so you can end up with
either a double free or use after free. That code needs a proper refactor.
I think it would be best to remove last_mod from the rrdp state and just
pass it as char * to rrdp_finished. Still need to look when and how to
free the value reliably.

-- 
:wq Claudio



Re: rpki-client: disk space warning on btrfs

2022-02-10 Thread Claudio Jeker
On Thu, Feb 10, 2022 at 09:13:25AM +0100, Theo Buehler wrote:
> This is purely cosmetic. I did some testing on fedora which ships with
> btrfs by default. btrfs is special in that df -i and other tools always
> report 0 inodes. As a consequence, each rpki-client run prints the disk
> space warning, which seems a bit silly. Should we special case the 0
> inodes case? If your disk is actually that full, you'll find out quickly
> enough.
> 
> On this box I see this:
> 
> WARNING: rpki-client may need more than the available disk space
> on the file-system holding /usr/local/var/cache/rpki-client.
> available space: 118878020kB, suggested minimum 512000kB
> available inodes 0, suggested minimum 30
> 
> Index: main.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
> retrieving revision 1.187
> diff -u -p -r1.187 main.c
> --- main.c28 Jan 2022 15:30:23 -  1.187
> +++ main.c10 Feb 2022 08:06:29 -
> @@ -695,7 +695,8 @@ check_fs_size(int fd, const char *cached
>   if (fstatvfs(fd, &fs) == -1)
>   err(1, "statfs %s", cachedir);
>  
> - if (fs.f_bavail < minsize / fs.f_frsize || fs.f_favail < minnode) {
> + if (fs.f_bavail < minsize / fs.f_frsize ||
> + (fs.f_favail > 0 && fs.f_favail < minnode)) {
>   fprintf(stderr, "WARNING: rpki-client may need more than "
>   "the available disk space\n"
>   "on the file-system holding %s.\n", cachedir);
> 

Fine with me. Rather dumb default from Linux here.

-- 
:wq Claudio



Re: rpki-client: plug leak in http_parse_header()

2022-02-10 Thread Claudio Jeker
On Thu, Feb 10, 2022 at 11:45:06AM +0100, Theo Buehler wrote:
> > > Index: rrdp.c
> > > ===
> > > RCS file: /cvs/src/usr.sbin/rpki-client/rrdp.c,v
> > > retrieving revision 1.21
> > > diff -u -p -r1.21 rrdp.c
> > > --- rrdp.c23 Jan 2022 12:09:24 -  1.21
> > > +++ rrdp.c10 Feb 2022 07:41:54 -
> > > @@ -429,6 +429,7 @@ rrdp_input_handler(int fd)
> > >   errx(1, "%s: bad internal state", s->local);
> > >  
> > >   s->res = res;
> > > + free(s->last_mod);
> > >   s->last_mod = last_mod;
> > >   s->state |= RRDP_STATE_HTTP_DONE;
> > >   rrdp_finished(s);
> > > 
> > 
> > This one needs more thought. rrdp_finished() -> notification_done() moves
> > the last_mod into nxml->current->last_mod and so you can end up with
> > either a double free or use after free.
> 
> I saw that and thought ownership was transferred to
> s->nxml->current->last_mod since s->last_mod is nulled out right after
> 
>   s->task = notification_done(s->nxml, s->last_mod);
>   s->last_mod = NULL;
> 
> I can't see where that's freed though.

Indeed. I missed that somehow. So your diff is indeed OK.
The free happens in rrdp_free() which is OK. There is only one call to
notification_done() so that is fine.
 
So your diff is indeed OK and should go in. OK claudio@

> > That code needs a proper refactor.
> 
> That may well be...
> 
> > I think it would be best to remove last_mod from the rrdp state and just
> > pass it as char * to rrdp_finished. Still need to look when and how to
> > free the value reliably.

Actually that does not really work because the RRDP_HTTP_FIN could arrive
before all data has been handled by rrdp_data_handler(). That is why it
is in the state.

As said, OK on the original diff.
-- 
:wq Claudio



rpki-client print crl data

2022-02-10 Thread Claudio Jeker
This adds the needed bits to print CRL files.
Using ASN1_INTEGER_get() is probably bad at least I think there is the
possibility the serial number wont fit in the long. I hope tb@ has a
better solution :)

I created x509_get_time() to streamline the ASN1_TIME to time_t
conversion and replaced a bunch of calls. mft.c uses ASN1_GENERALIZEDTIME
and can not be converted.

Apart from that it seems to work.
-- 
:wq Claudio

Index: crl.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/crl.c,v
retrieving revision 1.13
diff -u -p -r1.13 crl.c
--- crl.c   8 Feb 2022 14:53:03 -   1.13
+++ crl.c   10 Feb 2022 13:27:51 -
@@ -33,7 +33,6 @@ crl_parse(const char *fn, const unsigned
 {
struct crl  *crl;
const ASN1_TIME *at;
-   struct tmissued_tm, expires_tm;
int  rc = 0;
 
/* just fail for empty buffers, the warning was printed elsewhere */
@@ -58,27 +57,20 @@ crl_parse(const char *fn, const unsigned
warnx("%s: X509_CRL_get0_lastUpdate failed", fn);
goto out;
}
-   memset(&issued_tm, 0, sizeof(issued_tm));
-   if (ASN1_time_parse(at->data, at->length, &issued_tm, 0) == -1) {
+   if (x509_get_time(at, &crl->issued) == -1) {
warnx("%s: ASN1_time_parse failed", fn);
goto out;
}
-   if ((crl->issued = mktime(&issued_tm)) == -1)
-   errx(1, "%s: mktime failed", fn);
 
-   /* extract expire time for later use */
at = X509_CRL_get0_nextUpdate(crl->x509_crl);
if (at == NULL) {
warnx("%s: X509_CRL_get0_nextUpdate failed", fn);
goto out;
}
-   memset(&expires_tm, 0, sizeof(expires_tm));
-   if (ASN1_time_parse(at->data, at->length, &expires_tm, 0) == -1) {
+   if (x509_get_time(at, &crl->expires) == -1) {
warnx("%s: ASN1_time_parse failed", fn);
goto out;
}
-   if ((crl->expires = mktime(&expires_tm)) == -1)
-   errx(1, "%s: mktime failed", fn);
 
rc = 1;
  out:
Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.118
diff -u -p -r1.118 extern.h
--- extern.h8 Feb 2022 14:53:03 -   1.118
+++ extern.h10 Feb 2022 13:27:35 -
@@ -585,13 +585,16 @@ char  *x509_get_crl(X509 *, const char *
 char   *x509_crl_get_aki(X509_CRL *, const char *);
 char   *x509_get_pubkey(X509 *, const char *);
 enum cert_purpose   x509_get_purpose(X509 *, const char *);
+int x509_get_time(const ASN1_TIME *, time_t *);
 
 /* printers */
-void   tal_print(const struct tal *);
-void   cert_print(const struct cert *);
-void   mft_print(const struct mft *);
-void   roa_print(const struct roa *);
-void   gbr_print(const struct gbr *);
+char   *time2str(time_t);
+voidtal_print(const struct tal *);
+voidcert_print(const struct cert *);
+voidcrl_print(const struct crl *);
+voidmft_print(const struct mft *);
+voidroa_print(const struct roa *);
+voidgbr_print(const struct gbr *);
 
 /* Output! */
 
Index: parser.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
retrieving revision 1.63
diff -u -p -r1.63 parser.c
--- parser.c8 Feb 2022 14:53:03 -   1.63
+++ parser.c10 Feb 2022 13:42:12 -
@@ -94,19 +94,6 @@ repo_add(unsigned int id, char *path, ch
errx(1, "repository already added: id %d, %s", id, path);
 }
 
-static char *
-time2str(time_t t)
-{
-   static char buf[64];
-   struct tm tm;
-
-   if (gmtime_r(&t, &tm) == NULL)
-   return "could not convert time";
-
-   strftime(buf, sizeof(buf), "%h %d %T %Y %Z", &tm);
-   return buf;
-}
-
 /*
  * Build access path to file based on repoid, path, location and file values.
  */
@@ -1009,6 +996,7 @@ proc_parser_file(char *file, unsigned ch
static int num;
X509 *x509 = NULL;
struct cert *cert = NULL;
+   struct crl *crl = NULL;
struct mft *mft = NULL;
struct roa *roa = NULL;
struct gbr *gbr = NULL;
@@ -1044,6 +1032,12 @@ proc_parser_file(char *file, unsigned ch
if (X509_up_ref(x509) == 0)
errx(1, "%s: X509_up_ref failed", __func__);
break;
+   case RTYPE_CRL:
+   crl = crl_parse(file, buf, len);
+   if (crl == NULL)
+   break;
+   crl_print(crl);
+   break;
case RTYPE_MFT:
mft = mft_parse(&x509, file, buf, len);
if (mft == NULL)
@@ -1074,7 +1068,6 @@ proc_parser_file(char *file, unsigned ch
bre

Re: rpki-client print crl data

2022-02-10 Thread Claudio Jeker
On Thu, Feb 10, 2022 at 04:09:40PM +0100, Theo Buehler wrote:
> On Thu, Feb 10, 2022 at 03:02:15PM +0100, Claudio Jeker wrote:
> > This adds the needed bits to print CRL files.
> > Using ASN1_INTEGER_get() is probably bad at least I think there is the
> > possibility the serial number wont fit in the long. I hope tb@ has a
> > better solution :)
> 
> According to RFC 5280, issuer + serialNumber must identify the cert
> uniquely so applications should be able to handle serialNumbers of 
> at least 20 octets. The upper bound is 64 octets.
> 
> I don't have a particularly elegant solution. The options offered
> by libcrypto that come to mind are to convert to a BIGNUM and use
> BN_print_fp() or to use a BIO and i2a_ASN1_INTEGER. Neither is
> particularly appealing.

I would suggest we extract the code from mft.c to handle the manifest
number. The only difference is the limit of 20 vs 64 it seems.
Then we have a common function for serial numbers.
 
> I would suggest something along these lines:
> 
>   const ASN1_INTEGER  *serial;
>   char*hex_str;
> 
>   serial = X509_REVOKED_get0_serialNumber(rev);
>   if (serial != NULL && ASN1_STRING_length(serial) > 0)
>   hex_str = hex_encode(ASN1_STRING_get0_data(serial),
>   ASN1_STRING_length(serial));
>   else {
>   if ((hex_str = strdup("invalid")) == NULL)
>   err(1, NULL);
>   }
>   x509_get_time(X509_REVOKED_get0_revocationDate(rev), &t);
>   printf("Serial: %8s\tRevocation Date: %s\n", hex_str,
>   time2str(t));
>   free(hex_str);
> 
> That is, if you can live with leading zeros and uppercase hex digits.
> 
> > I created x509_get_time() to streamline the ASN1_TIME to time_t
> > conversion and replaced a bunch of calls. mft.c uses ASN1_GENERALIZEDTIME
> > and can not be converted.
> 
> We already check that the ASN.1 type is ASN1_GENERALIZEDTIME before
> calling mft_parse_time(). I'm not sure how much this being slightly
> stricter buys us.

Can we typecast a ASN1_GENERALIZEDTIME into a ASN1_TIME?
 
> > Apart from that it seems to work.
> 
> I like it. This line has a trailing tab:
> 
> > +   printf("Authority key identifier: %s\n", pretty_key_id(p->aki));

Fixed.
 
> I'm ok to land this as it is and we can bikeshed the ASN1_INTEGER
> conversion in tree.

Sure lets bikeshed in the tree :)

-- 
:wq Claudio



Re: rewritten vxlan(4)

2022-02-15 Thread Claudio Jeker
On Tue, Feb 15, 2022 at 04:49:10PM +1000, David Gwynne wrote:
> On Fri, Feb 11, 2022 at 03:13:25PM +1000, David Gwynne wrote:
> > On Fri, Mar 05, 2021 at 05:09:29PM +1000, David Gwynne wrote:
> > > On Thu, Mar 04, 2021 at 03:36:19PM +1000, David Gwynne wrote:
> > > > as the subject says, this is a rewrite of vxlan(4).
> > > > 
> > > > vxlan(4) relies on bridge(4) to implement learning, but i want to be
> > > > able to remove bridge(4) one day. while working on veb(4), i wrote
> > > > the guts of a learning bridge implementation that is now used by veb(4),
> > > > bpe(4), and nvgre(4). that learning bridge code is now also used by
> > > > vxlan(4).
> > > > 
> > > > this means that a few of the modes that the manpage talks about are
> > > > different now. because vxlan doesnt need a bridge for learning, there's
> > > > no "multicast mode" anymore, it just does "dynamic mode" out of the box
> > > > when configured with a multicast destination address. there's no
> > > > multipoint mode now too.
> > > > 
> > > > another thing that's always bothered me about vxlan(4) is how it 
> > > > occupies
> > > > the "udp namespace" and gets how it steals packets from the udp stack.
> > > > the new code actually creates and bind udp sockets to handle the
> > > > vxlan packets. this means userland can't collide with a vxlan interface,
> > > > and you get to see that the port is in use in things like netstat. e.g.:
> > > > 
> > > > dlg@ikkaku ~$ ifconfig vxlan0
> > > > vxlan0: flags=8843 mtu 1500
> > > > lladdr fe:e1:ba:d1:17:2a
> > > > index 11 llprio 3
> > > > encap: vnetid none parent aggr0 txprio 0 rxprio outer
> > > > groups: vxlan
> > > > tunnel: inet 192.0.2.36 port 4789 --> 239.0.0.1 ttl 1 nodf
> > > > Addresses (max cache: 100, timeout: 240):
> > > > inet 100.64.1.36 netmask 0xff00 broadcast 100.64.1.255
> > > > dlg@ikkaku ~$ netstat -na -f inet -p udp
> > > > Active Internet connections (including servers)
> > > > Proto   Recv-Q Send-Q  Local Address  Foreign Address   
> > > > udp  0  0  130.102.96.36.29742129.250.35.250.123
> > > > udp  0  0  130.102.96.36.8965 162.159.200.123.123   
> > > > udp  0  0  130.102.96.36.13189162.159.200.1.123 
> > > > udp  0  0  130.102.96.36.46580220.158.215.20.123
> > > > udp  0  0  130.102.96.36.23109103.38.121.36.123 
> > > > udp  0  0  239.0.0.1.4789 *.*   
> > > > udp  0  0  192.0.2.36.4789*.*   
> > > > 
> > > > ive also added loop prevention, ie, sending an interfaces vxlan
> > > > packets over itself should fail rather than panic now.
> > > 
> > > here's an updated diff with a few fixes.
> > >
> > 
> > this diff better supports vxlan p2p and multicast vxlan configs that
> > share a UDP listener.
> 
> it doesn't look like anyone (else) cares about vxlan(4), so i'm
> going to commit this tomorrow unless anyone really objects.

I do not use vxlan(4) and I only glanced at the diff but I like what you
did. Especially removing the tentacles from udp by using propper socket
code.

You created a fair amount of static functions, did we change the rule of
no static in the kernel?

I'm OK with you to commit this.
 
> > Index: net/if_vxlan.c
> > ===
> > RCS file: /cvs/src/sys/net/if_vxlan.c,v
> > retrieving revision 1.83
> > diff -u -p -r1.83 if_vxlan.c
> > --- net/if_vxlan.c  10 Jan 2022 14:07:59 -  1.83
> > +++ net/if_vxlan.c  11 Feb 2022 05:11:13 -
> > @@ -1,7 +1,7 @@
> > -/* $OpenBSD: if_vxlan.c,v 1.83 2022/01/10 14:07:59 jan Exp $   */
> > +/* $OpenBSD$ */
> >  
> >  /*
> > - * Copyright (c) 2013 Reyk Floeter 
> > + * Copyright (c) 2021 David Gwynne 
> >   *
> >   * Permission to use, copy, modify, and distribute this software for any
> >   * purpose with or without fee is hereby granted, provided that the above
> > @@ -17,475 +17,781 @@
> >   */
> >  
> >  #include "bpfilter.h"
> > -#include "vxlan.h"
> > -#include "vlan.h"
> >  #include "pf.h"
> > -#include "bridge.h"
> >  
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> > -#include 
> >  #include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +
> > +#include 
> > +#include 
> >  
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> > +#include 
> >  #include 
> > -
> > -#if NBPFILTER > 0
> > -#include 
> > -#endif
> > +#include 
> >  
> >  #include 
> >  #include 
> >  #include 
> >  #include 
> > -#include 
> >  #include 
> > -#include 
> >  #include 
> > +#include 
> >  
> > -#if NPF > 0
> > -#include 
> > +#ifdef INET6
> > +#include 
> > +#include 
> > +#include 
> >  #endif
> >  
> > -#if NBRIDGE > 0
> > +/* for bridge stuff */
> >  #include 
> > +#include 
> > +
> > +#if NBPFILTER > 0
> > +#include 
> >  #endif
> >  
> > -#include 
> > +/*

Re: ifconfig(8): always print the mtu, don't hide it on "bridges"

2022-02-22 Thread Claudio Jeker
On Tue, Feb 22, 2022 at 03:46:05PM +1000, David Gwynne wrote:
> this lets ifconfig show the MTU on interfaces like nvgre, vxlan, etc.
> they currently don't show it because they also implement a bridge ioctl,
> so ifconfig thinks they're a bridge.
> 
> why ifconfig hides the mtu on bridges looks to be a hold over from when
> brconfig was merged into ifconfig. if we dont want bridge(4) to report
> an mtu, then i can make bridge(4) itself hide the mtu or stop setting
> the mtu.
> 
> found by jason tubnor.
> 
> ok?
> 
> Index: ifconfig.c
> ===
> RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
> retrieving revision 1.451
> diff -u -p -r1.451 ifconfig.c
> --- ifconfig.c23 Nov 2021 19:13:45 -  1.451
> +++ ifconfig.c22 Feb 2022 05:38:48 -
> @@ -1027,11 +1027,7 @@ getinfo(struct ifreq *ifr, int create)
>   metric = 0;
>   else
>   metric = ifr->ifr_metric;
> -#ifdef SMALL
>   if (ioctl(sock, SIOCGIFMTU, (caddr_t)ifr) == -1)
> -#else
> - if (is_bridge() || ioctl(sock, SIOCGIFMTU, (caddr_t)ifr) == -1)
> -#endif
>   mtu = 0;
>   else
>   mtu = ifr->ifr_mtu;
> 

OK claudio@

-- 
:wq Claudio



Re: acme-client: plug leak in ec_key_create()

2022-02-22 Thread Claudio Jeker
On Tue, Feb 22, 2022 at 02:01:26PM +0100, Theo Buehler wrote:
> EVP_PKEY_set1_EC_KEY() bumps eckey's refcount (that's what "set1" means),
> so eckey isn't freed when pkey is freed at the end of keyproc() or
> acctproc() (which means that secret data isn't wiped). Moving the
> freeing of eckey to the end of ec_key_create() decrements the refcount
> again which should fix this.
> 
> I don't currently have an easy way to test this, so I would appreciate
> if someone could try this.

I agree with the diff and tested this with a ecdsa domain key. No problem
found. OK claudio@
 
> Index: key.c
> ===
> RCS file: /cvs/src/usr.sbin/acme-client/key.c,v
> retrieving revision 1.5
> diff -u -p -r1.5 key.c
> --- key.c 22 Feb 2022 12:38:30 -  1.5
> +++ key.c 22 Feb 2022 12:51:32 -
> @@ -116,10 +116,10 @@ ec_key_create(FILE *f, const char *fname
>   goto out;
>  
>  err:
> - EC_KEY_free(eckey);
>   EVP_PKEY_free(pkey);
>   pkey = NULL;
>  out:
> + EC_KEY_free(eckey);
>   return pkey;
>  }
>  
> 

-- 
:wq Claudio



allow bgpd to listen and connect to non common ports

2022-02-22 Thread Claudio Jeker
Sometimes (mainly for tests) it can be useful to run bgpd on something
different than port 179. The following diff does mostly that. It allows
to define a port with 'listen on' and makes it possible to set the port
on a neighbor like it is done for rtr sessions.

The only thing not working are IPsec flows. Those assume that the default
port is used and changing that is way to complex for the limited usecase.

-- 
:wq Claudio

Index: bgpd.conf.5
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
retrieving revision 1.216
diff -u -p -r1.216 bgpd.conf.5
--- bgpd.conf.5 22 Feb 2022 12:08:22 -  1.216
+++ bgpd.conf.5 22 Feb 2022 16:30:59 -
@@ -237,8 +237,8 @@ The default is 90 seconds.
 The minimum acceptable holdtime in seconds.
 This value must be at least 3.
 .Pp
-.It Ic listen on Ar address
-Specify the local IP address for
+.It Ic listen on Ar address Op Ic port Ar port
+Specify the local IP address and optional port for
 .Xr bgpd 8
 to listen on.
 The default is to listen on all local addresses on the current default
@@ -1078,6 +1078,9 @@ aes-128-cbc 
 .Pp
 Keys must be given in hexadecimal format.
 After changing settings a session needs to be reset to use the new keys.
+The
+.Ic ipsec
+flows only work with session using the default port 179.
 .Pp
 .It Xo
 .Ic ipsec
@@ -1113,6 +1116,9 @@ and
 .Xr bgpd 8
 daemons on both sides, the session should be established.
 After changing settings a session needs to be reset to use the new keys.
+The
+.Ic ipsec
+flows only work with session using the default port 179.
 .Pp
 .It Ic local-address Ar address
 .It Ic no local-address
@@ -1183,6 +1189,11 @@ statement defines the maximum hops the n
 .Pp
 .It Ic passive
 Do not attempt to actively open a TCP connection to the neighbor system.
+.Pp
+.It Ic port Ar port
+Connect to the peer using
+.Ar port
+instead of the default BGP port 179.
 .Pp
 .It Xo
 .Ic reject Ic as-set
Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.419
diff -u -p -r1.419 bgpd.h
--- bgpd.h  6 Feb 2022 09:51:19 -   1.419
+++ bgpd.h  22 Feb 2022 15:11:13 -
@@ -36,6 +36,7 @@
 
 #defineBGP_VERSION 4
 #defineBGP_PORT179
+#defineRTR_PORT323
 #defineCONFFILE"/etc/bgpd.conf"
 #defineBGPD_USER   "_bgpd"
 #definePEER_DESCR_LEN  32
@@ -402,6 +403,7 @@ struct peer_config {
uint16_t holdtime;
uint16_t min_holdtime;
uint16_t local_short_as;
+   uint16_t remote_port;
uint8_t  template;
uint8_t  remote_masklen;
uint8_t  ebgp;  /* 0 = ibgp else ebgp */
Index: config.c
===
RCS file: /cvs/src/usr.sbin/bgpd/config.c,v
retrieving revision 1.100
diff -u -p -r1.100 config.c
--- config.c6 Feb 2022 09:51:19 -   1.100
+++ config.c22 Feb 2022 11:00:43 -
@@ -447,30 +447,6 @@ prepare_listeners(struct bgpd_config *co
int  opt = 1;
int  r = 0;
 
-   if (TAILQ_EMPTY(conf->listen_addrs)) {
-   if ((la = calloc(1, sizeof(struct listen_addr))) == NULL)
-   fatal("setup_listeners calloc");
-   la->fd = -1;
-   la->flags = DEFAULT_LISTENER;
-   la->reconf = RECONF_REINIT;
-   la->sa_len = sizeof(struct sockaddr_in);
-   ((struct sockaddr_in *)&la->sa)->sin_family = AF_INET;
-   ((struct sockaddr_in *)&la->sa)->sin_addr.s_addr =
-   htonl(INADDR_ANY);
-   ((struct sockaddr_in *)&la->sa)->sin_port = htons(BGP_PORT);
-   TAILQ_INSERT_TAIL(conf->listen_addrs, la, entry);
-
-   if ((la = calloc(1, sizeof(struct listen_addr))) == NULL)
-   fatal("setup_listeners calloc");
-   la->fd = -1;
-   la->flags = DEFAULT_LISTENER;
-   la->reconf = RECONF_REINIT;
-   la->sa_len = sizeof(struct sockaddr_in6);
-   ((struct sockaddr_in6 *)&la->sa)->sin6_family = AF_INET6;
-   ((struct sockaddr_in6 *)&la->sa)->sin6_port = htons(BGP_PORT);
-   TAILQ_INSERT_TAIL(conf->listen_addrs, la, entry);
-   }
-
for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL; la = next) {
next = TAILQ_NEXT(la, entry);
if (la->reconf != RECONF_REINIT)
Index: parse.y
===
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
retrieving revision 1.420
diff -u -p -r1.420 parse.y
--- parse.y 15 Oct 2021 15:0

bgpd convert parse.y to uintXY_t

2022-02-22 Thread Claudio Jeker
In the big conversion I forgot to include parse.y in the files.
This diff fixes that.

-- 
:wq Claudio

Index: parse.y
===
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
retrieving revision 1.420
diff -u -p -r1.420 parse.y
--- parse.y 15 Oct 2021 15:01:27 -  1.420
+++ parse.y 22 Feb 2022 16:57:03 -
@@ -161,10 +161,10 @@ intparsecommunity(struct community *,
 int parseextcommunity(struct community *, char *,
char *);
 static int  new_as_set(char *);
-static void add_as_set(u_int32_t);
+static void add_as_set(uint32_t);
 static void done_as_set(void);
 static struct prefixset*new_prefix_set(char *, int);
-static void add_roa_set(struct prefixset_item *, u_int32_t, u_int8_t,
+static void add_roa_set(struct prefixset_item *, uint32_t, uint8_t,
time_t);
 static struct rtr_config   *get_rtr(struct bgpd_addr *);
 static int  insert_rtr(struct rtr_config *);
@@ -174,7 +174,7 @@ typedef struct {
long longnumber;
char*string;
struct bgpd_addr addr;
-   u_int8_t u8;
+   uint8_t  u8;
struct filter_rib_l *filter_rib;
struct filter_peers_l   *filter_peers;
struct filter_match_lfilter_match;
@@ -185,14 +185,14 @@ typedef struct {
struct filter_set_head  *filter_set_head;
struct {
struct bgpd_addrprefix;
-   u_int8_tlen;
+   uint8_t len;
}   prefix;
struct filter_prefixlen prefixlen;
struct prefixset_item   *prefixset_item;
struct {
-   u_int8_tenc_alg;
+   uint8_t enc_alg;
+   uint8_t enc_key_len;
charenc_key[IPSEC_ENC_KEY_LEN];
-   u_int8_tenc_key_len;
}   encspec;
} v;
int lineno;
@@ -283,7 +283,7 @@ asnumber: NUMBER{
 as4number  : STRING{
const char  *errstr;
char*dot;
-   u_int32_tuvalh = 0, uval;
+   uint32_t uvalh = 0, uval;
 
if ((dot = strchr($1,'.')) != NULL) {
*dot++ = '\0';
@@ -315,7 +315,7 @@ as4number   : STRING{
| asnumber {
if ($1 == AS_TRANS || $1 == 0) {
yyerror("AS %u is reserved and may not be used",
-   (u_int32_t)$1);
+   (uint32_t)$1);
YYERROR;
}
$$ = $1;
@@ -325,7 +325,7 @@ as4number   : STRING{
 as4number_any  : STRING{
const char  *errstr;
char*dot;
-   u_int32_tuvalh = 0, uval;
+   uint32_t uvalh = 0, uval;
 
if ((dot = strchr($1,'.')) != NULL) {
*dot++ = '\0';
@@ -1063,7 +1063,7 @@ restricted: RESTRICTED{ $$ = 1; }
;
 
 address: STRING{
-   u_int8_tlen;
+   uint8_t len;
 
if (!host($1, &$$, &len)) {
yyerror("could not parse address spec \"%s\"",
@@ -1439,8 +1439,8 @@ peeropts  : REMOTEAS as4number{
curpeer->conf.min_holdtime = $3;
}
| ANNOUNCE family safi {
-   u_int8_taid, safi;
-   u_int16_t   afi;
+   uint8_t aid, safi;
+   uint16_tafi;
 
if ($3 == SAFI_NONE) {
for (aid = 0; aid < AID_MAX; aid++) {
@@ -1474,7 +1474,7 @@ peeropts  : REMOTEAS as4number{
}
| ANNOUNCE ADDPATH RECV yesno {
int8_t *ap = curpeer->conf.capabilities.add_path;
-   u_int8_t i;
+   uint8_t i;
 
for (i = 0; i < AID_MAX; i++)
if ($4)
@@ -1576,8 +1576,8 @@ peeropts  : REMOTEAS as4number{
curpeer->conf.auth.method = AUTH_IPSEC_IKE_AH;
  

Re: assert "sc->sc_dev == NUM" failed in if_tun.c (2)

2022-02-24 Thread Claudio Jeker
On Thu, Feb 24, 2022 at 07:39:54PM +1000, David Gwynne wrote:
> On Mon, Feb 21, 2022 at 03:00:01PM +1000, David Gwynne wrote:
> > On Sun, Feb 20, 2022 at 10:30:22AM +1000, David Gwynne wrote:
> > > 
> > > 
> > > > On 20 Feb 2022, at 09:46, David Gwynne  wrote:
> > > > 
> > > > On Sat, Feb 19, 2022 at 02:58:08PM -0800, Greg Steuck wrote:
> > > >> There's no reproducer, but maybe this race is approachable without one?
> > > >> 
> > > >> dev = sc->sc_dev;
> > > >> if (dev) {
> > > >> struct vnode *vp;
> > > >> 
> > > >> if (vfinddev(dev, VCHR, &vp))
> > > >> VOP_REVOKE(vp, REVOKEALL);
> > > >> 
> > > >> KASSERT(sc->sc_dev == 0);
> > > >> }
> > > > 
> > > > this was my last run at it:
> > > > https://marc.info/?l=openbsd-tech&m=164489981621957&w=2
> > > > 
> > > > maybe i need another dvthing thread to push it a bit harder...
> > > 
> > > adding another dvthing thread or two made it blow up pretty quickly :(
> > 
> > so it is this section:
> > 
> > /* kick userland off the device */
> > dev = sc->sc_dev;
> > if (dev) {
> > struct vnode *vp;
> > 
> > if (vfinddev(dev, VCHR, &vp))
> > VOP_REVOKE(vp, REVOKEALL);
> > 
> > KASSERT(sc->sc_dev == 0);
> > }
> > 
> > my assumption was/is that VOP_REVOKE would call tunclose (or tapclose)
> > on the currently open tun (or tap) device, and swap the vfs backend out
> > behind the scenes with deadfs.
> > 
> > the context behind this is that isnt really a strong binding between an
> > open /dev entry (eg, /dev/tun0) and an instance of an interface (eg,
> > tun0). all the device entrypoints (eg, tunopen, tunread, tunwrite,
> > etc) pass a dev_t, and that's used to look up an interface instance to
> > work with.
> > 
> > the problem with this is an interface could be destroyed and recreated
> > in between calls to the device entrypoints. ie, you could do the
> > following high level steps:
> > 
> > - ifconfig tun0 create
> > - open /dev/tun0 -> fd 3
> > - ifconfig tun0 destroy
> > - ifconfig tun0 create
> > - write to fd 3
> > 
> > and that would send a packet on the newly created tun0 because it had
> > the same minor number as the previous one.
> > 
> > there was a lot of consensus that this was Not The Best(tm), and that if
> > a tun interface was destroyed while the /dev entry was open, it should
> > act like the interface was detached and the open /dev entry should stop
> > working. this is what VOP_REVOKE helps with. or it is supposed to help
> > with.
> > 
> > when we create a tun interface, it's added to a global list of tun
> > interfaces. when a tun device is opened, we look for the interface on
> > that list (and create it if it doesnt exist), and then check to see if
> > it is already open by looking at sc_dev. if sc_dev is 0, it's not open
> > and tunopen can set sc_dev to claim ownership of it. if sc_dev is
> > non-zero, then the device is busy and open fails.
> > 
> > tunclose clears sc_dev to say ownership is given up.
> > 
> > tun destroy checks sc_dev, and if it is != 0 then it knows something has
> > it open and will call VOP_REVOKE. VOP_REVOKE is supposed to do what i
> > described above, which is call tunclose on the programs behalf and swap
> > the vfs ops out.
> > 
> > what i'm seeing is that sometimes VOP_REVOKE gets called, it happily
> > returns 0, but tunclose is not called. this means sc_dev is not cleared,
> > and then this KASSERT fires.
> > 
> > ive tried changing it something like this in the destroy path:
> > 
> > -   KASSERT(sc->sc_dev == 0);
> > +   while (sc->sc_dev != 0)
> > +   tsleep_nsec(&sc->sc_dev, PWAIT, "tunclose", INFSLP);
> > 
> > with tunclose calling wakeup(&sc->dev) when it's finished, but this ends
> > up getting stuck in the tsleep.
> > 
> > however, if i cut the KASSERT out and let destroy keep going, i do see
> > tunclose get called against the now non-existent interface. this would
> > be fine, but now we're back where we started. if someone recreates tun0
> > after it's been destroyed, tunclose will find the new interface and try
> > to close it.
> > 
> > ive tried to follow what VOP_REVOKE actually does and how it finds
> > tunclose to call it, but it's pretty twisty and i got tired.
> > 
> > i guess my question at this point is are my assumptions about how
> > VOP_REVOKE works valid? specifically, should it reliably be calling
> > tunclose? if not, what causes tunclose to be called in the future and
> > why can't i wait for it in tun_clone_destroy?
> 
> claudio figured it out. his clue was that multiple concurrent calls
> to tunopen (or tapopen) will share a vnode. because tunopen can sleep,
> multiple programs can be inside tunopen for the same tun interface at
> the same time, all with references against the same vnode.
> 
> at the same time as this another thread/program can call VOP_REVOKE
> via tun_clone_destroy (eg, ifconfig tun1 destroy does this).
> VOP_REVOKE marks a vnode as ba

Re: assert "sc->sc_dev == NUM" failed in if_tun.c (2)

2022-02-24 Thread Claudio Jeker
On Thu, Feb 24, 2022 at 08:56:59PM +1000, David Gwynne wrote:
> On Thu, Feb 24, 2022 at 11:13:48AM +0100, Claudio Jeker wrote:
> > On Thu, Feb 24, 2022 at 07:39:54PM +1000, David Gwynne wrote:
> > > 
> > > here's the diff.
> > > 
> > > Index: if_tun.c
> > > ===
> > > RCS file: /cvs/src/sys/net/if_tun.c,v
> > > retrieving revision 1.234
> > > diff -u -p -r1.234 if_tun.c
> > > --- if_tun.c  16 Feb 2022 02:22:39 -  1.234
> > > +++ if_tun.c  24 Feb 2022 08:08:38 -
> > > @@ -374,10 +374,19 @@ tun_dev_open(dev_t dev, const struct if_
> > >   struct ifnet *ifp;
> > >   int error;
> > >   u_short stayup = 0;
> > > + struct vnode *vp;
> > >  
> > 
> > Why is there this empty line? It was there before but still wondering.
> 
> feng shui? laziness? i'll fix it later.
> 
> > >   char name[IFNAMSIZ];
> > >   unsigned int rdomain;
> > >  
> > > + /*
> > > +  * Find the vnode associated with this open before we sleep
> > > +  * and let something else revoke it. Our caller has a reference
> > > +  * to it so we don't need to account for it.
> > > +  */
> > > + if (!vfinddev(dev, VCHR, &vp))
> > > + panic("%s vfinddev failed", __func__);
> > > +
> > >   snprintf(name, sizeof(name), "%s%u", ifc->ifc_name, minor(dev));
> > >   rdomain = rtable_l2(p->p_p->ps_rtableid);
> > >  
> > > @@ -412,6 +421,12 @@ tun_dev_open(dev_t dev, const struct if_
> > >   /* XXX if_clone_destroy if stayup? */
> > >   goto done;
> > >   }
> > > + }
> > > +
> > > + /* Has tun_clone_destroy torn the rug out under us? */
> > > + if (vp->v_type == VBAD) {
> > > + error = ENXIO;
> > > + goto done;
> > >   }
> > >  
> > >   if (sc->sc_dev != 0) {
> > > 
> > 
> > OK claudio@
> > 
> > After sleeping over this I think this is the cleanest and simplest way
> > around this problem. A bit ugly that tun needs to peek into the vnode.
> > 
> > Another option is to split the clone destroy from the softc / device node.
> > Remove the VOP_REVOKE and actually allow tun to be destroyed and recreated
> > while open and in that case the device remains open and only the network
> > bits are destroyed and later recreated. So in your example from above:
> > 
> > > > - ifconfig tun0 create
> > > > - open /dev/tun0 -> fd 3
> > > > - ifconfig tun0 destroy
> > > > - ifconfig tun0 create
> > > > - write to fd 3
> > 
> > The write would be perfectly fine since the destroy did not distroy this
> > connection (only close(2) would do that). Actually a call to open
> > /dev/tun0 after the 2nd create would fail because the device is still
> > open. Doing this seems a lot more complex.
> 
> we talked about this a lot around the time of src/sys/net/if_tun.c
> r1.210. there seemed to be more weight on the side of the argument for
> VOP_REVOKE than against, and i still think that's the case now. tun
> going away and coming back in between open and write could go either
> way, but what about these:
> 
> - open() /dev/tun0, tun0 is destroyed, write() to tun0
> 
> should the write error? if we run with "only close can destroy the
> connection" does this mean the write will create tun0 again? in which
> rdomain should it be?

Normally, yes you would generate an error like EPIPE in such a case.
You may even need to send SIGPIPE but that is one of those nasty details.
 
> - open() /dev/tun0, tun0 is destroyed, read() from tun0
> 
> same as above?

Idealy you would signal EOF with a return of 0 (again similar to how pipes
behave).
 
> - begin blocking read of tun0, tun0 is destroyed, let's go shopping!
> 
> should the read wake up and return an error, or does it just block?

Again return 0.
 
> - poll on tun0, tun0 is destroyed
> 
> same as above?

poll should signal POLLHUP | POLLIN (iirc that's what pipes do).
 
> if we leave the /dev side of things operational if the interface goes
> away, then this would be inconsistent with something workign with bpf on
> the same interfaces. wouldnt this be inconsistent with hotplug devices
> and their /dev things?

Maybe, on the other hand it is how pipes, socketpairs and unix sockets
behave. As I said going down this road is possible but much more complex
because all these edgecas

bgpd start using path_id_tx

2022-02-24 Thread Claudio Jeker
This is one small step closer to support add-path send side.
We store the path_id_tx on the prefix and we can adjust a few places to
make use of that field. Now it is always 0 so nothing changes in the end
apart from removing some XXX comments.

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.534
diff -u -p -r1.534 rde.c
--- rde.c   6 Feb 2022 09:51:19 -   1.534
+++ rde.c   24 Feb 2022 13:01:13 -
@@ -2422,7 +2422,7 @@ rde_dump_rib_as(struct prefix *p, struct
}
} else {
if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND)) {
-   rib.path_id = 0;/* XXX add-path send */
+   rib.path_id = p->path_id_tx;
rib.flags |= F_PREF_PATH_ID;
}
}
@@ -2507,12 +2507,16 @@ rde_dump_filter(struct prefix *p, struct
if ((req->flags & F_CTL_INVALID) &&
(asp->flags & F_ATTR_PARSE_ERR) == 0)
return;
-   /*
-* XXX handle out specially since then we want to match against our
-* path ids.
-*/
-   if ((req->flags & F_CTL_HAS_PATHID) && req->path_id != p->path_id)
-   return;
+   if ((req->flags & F_CTL_HAS_PATHID)) {
+   /* Match against the transmit path id if adjout is used.  */
+   if (adjout) {
+   if (req->path_id != p->path_id_tx)
+   return;
+   } else {
+   if (req->path_id != p->path_id)
+   return;
+   }
+   }
if (req->as.type != AS_UNDEF &&
!aspath_match(asp->aspath, &req->as, 0))
return;
Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.132
diff -u -p -r1.132 rde_update.c
--- rde_update.c6 Feb 2022 09:51:19 -   1.132
+++ rde_update.c24 Feb 2022 13:01:13 -
@@ -632,8 +632,7 @@ up_dump_prefix(u_char *buf, int len, str
if (peer_has_add_path(peer, p->pt->aid, CAPA_AP_SEND)) {
if (len <= wpos + (int)sizeof(pathid))
break;
-   /* XXX add-path send side */
-   pathid = 0;
+   pathid = htonl(p->path_id_tx);
memcpy(buf + wpos, &pathid, sizeof(pathid));
wpos += sizeof(pathid);
}



bgpd make adjout handle multiple paths per prefix

2022-02-25 Thread Claudio Jeker
For add-path send the Adj-RIB-Out needs to handle multiple paths per
prefix. The Adj-RIB-Out stores the prefixes on RB trees and so extend
the lookup function to include the path_id (which will be path_id_tx).

For now the path_id_tx in the Adj-RIB-Out is forced to 0 since
up_generate_updates() is not ready to handle more than one path per prefix.

This mainly adjusts the bgpctl interface and the internals. Some functions
are renamed to start with prefix_adjout_ like all other prefix functions
opertating on the Adj-RIB-Out.

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.535
diff -u -p -r1.535 rde.c
--- rde.c   24 Feb 2022 14:54:03 -  1.535
+++ rde.c   25 Feb 2022 09:26:43 -
@@ -2728,12 +2728,16 @@ rde_dump_ctx_new(struct ctl_show_rib_req
 
do {
if (req->prefixlen == hostplen)
-   p = prefix_match(peer, &req->prefix);
+   p = prefix_adjout_match(peer,
+   &req->prefix);
else
-   p = prefix_lookup(peer, &req->prefix,
-   req->prefixlen);
-   if (p)
+   p = prefix_adjout_lookup(peer,
+   &req->prefix, req->prefixlen);
+   /* dump all matching paths */
+   while (p != NULL) {
rde_dump_adjout_upcall(p, ctx);
+   p = prefix_adjout_next(peer, p);
+   };
} while ((peer = peer_match(&req->neighbor,
peer->conf.id)));
 
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.243
diff -u -p -r1.243 rde.h
--- rde.h   6 Feb 2022 09:51:19 -   1.243
+++ rde.h   25 Feb 2022 09:21:50 -
@@ -582,8 +582,13 @@ voidpath_put(struct rde_aspath *);
 #definePREFIX_SIZE(x)  (((x) + 7) / 8 + 1)
 struct prefix  *prefix_get(struct rib *, struct rde_peer *, uint32_t,
struct bgpd_addr *, int);
-struct prefix  *prefix_lookup(struct rde_peer *, struct bgpd_addr *, int);
+struct prefix  *prefix_adjout_get(struct rde_peer *, uint32_t,
+   struct bgpd_addr *, int);
 struct prefix  *prefix_match(struct rde_peer *, struct bgpd_addr *);
+struct prefix  *prefix_adjout_match(struct rde_peer *, struct bgpd_addr *);
+struct prefix  *prefix_adjout_lookup(struct rde_peer *, struct bgpd_addr *,
+int);
+struct prefix  *prefix_adjout_next(struct rde_peer *, struct prefix *);
 int prefix_update(struct rib *, struct rde_peer *, uint32_t,
 struct filterstate *, struct bgpd_addr *, int, uint8_t);
 int prefix_withdraw(struct rib *, struct rde_peer *, uint32_t,
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.225
diff -u -p -r1.225 rde_rib.c
--- rde_rib.c   6 Feb 2022 09:51:19 -   1.225
+++ rde_rib.c   25 Feb 2022 10:05:20 -
@@ -860,6 +860,21 @@ static void prefix_free(struct prefix 
 
 /* RB tree comparison function */
 static inline int
+prefix_index_cmp(struct prefix *a, struct prefix *b)
+{
+   int r;
+   r = pt_prefix_cmp(a->pt, b->pt);
+   if (r != 0)
+   return r;
+
+   if (a->path_id_tx > b->path_id_tx)
+   return 1;
+   if (a->path_id_tx < b->path_id_tx)
+   return -1;
+   return 0;
+}
+
+static inline int
 prefix_cmp(struct prefix *a, struct prefix *b)
 {
if (a->eor != b->eor)
@@ -876,22 +891,14 @@ prefix_cmp(struct prefix *a, struct pref
return (a->nexthop > b->nexthop ? 1 : -1);
if (a->nhflags != b->nhflags)
return (a->nhflags > b->nhflags ? 1 : -1);
-   /* XXX path_id ??? */
-   return pt_prefix_cmp(a->pt, b->pt);
-}
-
-static inline int
-prefix_index_cmp(struct prefix *a, struct prefix *b)
-{
-   /* XXX path_id ??? */
-   return pt_prefix_cmp(a->pt, b->pt);
+   return prefix_index_cmp(a, b);
 }
 
 RB_GENERATE(prefix_tree, prefix, entry.tree.update, prefix_cmp)
 RB_GENERATE_STATIC(prefix_index, prefix, entry.tree.index, prefix_index_cmp)
 
 /*
- * search for specified prefix of a peer. Returns NULL if not found.
+ * Search for specified prefix of a peer. Returns NULL if not found.
  */
 struct prefix *
 prefix_get(struct rib *rib, struct rde_peer *peer, uint32_t path_id,
@@ -906,11 +913,12 @@ prefix_get(struct rib *rib, struct rde_p
 }
 
 /*
- * lookup p

Re: bgpd make adjout handle multiple paths per prefix

2022-02-25 Thread Claudio Jeker
On Fri, Feb 25, 2022 at 11:55:08AM +0100, Theo Buehler wrote:
> On Fri, Feb 25, 2022 at 11:15:49AM +0100, Claudio Jeker wrote:
> > For add-path send the Adj-RIB-Out needs to handle multiple paths per
> > prefix. The Adj-RIB-Out stores the prefixes on RB trees and so extend
> > the lookup function to include the path_id (which will be path_id_tx).
> > 
> > For now the path_id_tx in the Adj-RIB-Out is forced to 0 since
> > up_generate_updates() is not ready to handle more than one path per prefix.
> > 
> > This mainly adjusts the bgpctl interface and the internals. Some functions
> > are renamed to start with prefix_adjout_ like all other prefix functions
> > opertating on the Adj-RIB-Out.
> 
> ok
> 
> > +/*
> > + * Lookup a prefix without considering path_id in the peer prefix_index.
> > + * Returns NULL if not found.
> > + */
> > +struct prefix *
> > +prefix_adjout_lookup(struct rde_peer *peer, struct bgpd_addr *prefix,
> > +int prefixlen)
> > +{
> > +   struct prefix xp, *np;
> > +   struct pt_entry *pte;
> > +
> > +   memset(&xp, 0, sizeof(xp));
> > +   pte = pt_fill(prefix, prefixlen);
> > +   xp.pt = pte;
> 
> I don't understand the benefit of the pte variable, but other lookup
> functions do the same, so I guess it's better to be consistent.

How about this diff that removes this extra variable?
pt_fill() is a nasty function since it returns a pointer to some static
memory.

-- 
:wq Claudio

? obj
Index: rde_prefix.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_prefix.c,v
retrieving revision 1.40
diff -u -p -r1.40 rde_prefix.c
--- rde_prefix.c18 Jan 2021 12:15:36 -  1.40
+++ rde_prefix.c25 Feb 2022 11:39:52 -
@@ -171,8 +171,7 @@ pt_add(struct bgpd_addr *prefix, int pre
 {
struct pt_entry *p = NULL;
 
-   p = pt_fill(prefix, prefixlen);
-   p = pt_alloc(p);
+   p = pt_alloc(pt_fill(prefix, prefixlen));
 
if (RB_INSERT(pt_tree, &pttable, p) != NULL)
fatalx("pt_add: insert failed");
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.226
diff -u -p -r1.226 rde_rib.c
--- rde_rib.c   25 Feb 2022 11:36:54 -  1.226
+++ rde_rib.c   25 Feb 2022 11:39:52 -
@@ -297,11 +297,9 @@ struct rib_entry *
 rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
 {
struct rib_entry xre, *re;
-   struct pt_entry *pte;
 
-   pte = pt_fill(prefix, prefixlen);
memset(&xre, 0, sizeof(xre));
-   xre.prefix = pte;
+   xre.prefix = pt_fill(prefix, prefixlen);
 
re = RB_FIND(rib_tree, rib_tree(rib), &xre);
if (re && re->rib_id != rib->id)
@@ -921,11 +919,9 @@ prefix_adjout_get(struct rde_peer *peer,
 struct bgpd_addr *prefix, int prefixlen)
 {
struct prefix xp;
-   struct pt_entry *pte;
 
memset(&xp, 0, sizeof(xp));
-   pte = pt_fill(prefix, prefixlen);
-   xp.pt = pte;
+   xp.pt = pt_fill(prefix, prefixlen);
xp.path_id_tx = path_id;
 
return RB_FIND(prefix_index, &peer->adj_rib_out, &xp);
@@ -940,11 +936,9 @@ prefix_adjout_lookup(struct rde_peer *pe
 int prefixlen)
 {
struct prefix xp, *np;
-   struct pt_entry *pte;
 
memset(&xp, 0, sizeof(xp));
-   pte = pt_fill(prefix, prefixlen);
-   xp.pt = pte;
+   xp.pt = pt_fill(prefix, prefixlen);
 
np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
if (np != NULL && pt_prefix_cmp(np->pt, xp.pt) != 0)



bgpd more checks for adjout prefixes

2022-02-28 Thread Claudio Jeker
rde_dump_adjout_upcall() and rde_dump_adjout_prefix_upcall() work only
on prefixes that belong to the Adj-RIB-Out so check for the
PREFIX_FLAG_ADJOUT to make sure it is set.

Other code has the same 'assert' in rde_rib.c and I think it makes most
sense to put it here as well.
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.537
diff -u -p -r1.537 rde.c
--- rde.c   26 Feb 2022 11:48:50 -  1.537
+++ rde.c   28 Feb 2022 08:24:17 -
@@ -2574,6 +2574,8 @@ rde_dump_adjout_upcall(struct prefix *p,
 {
struct rde_dump_ctx *ctx = ptr;
 
+   if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
+   fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
return;
rde_dump_filter(p, &ctx->req, 1);
@@ -2585,6 +2587,8 @@ rde_dump_adjout_prefix_upcall(struct pre
struct rde_dump_ctx *ctx = ptr;
struct bgpd_addr addr;
 
+   if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
+   fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
if (p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD))
return;
 



bgpd rework prefix link/unlink

2022-02-28 Thread Claudio Jeker
>From the start bgpd had prefix_link and prefix_unlink to link all the
various data objects together to build an actual prefix. Now prefix_move()
tries to be smart and reimplemented prefix_link and prefix_unlink as
inline versions (with minimal differences). Later the prefix_adjout_*
functions were added and again similar code to prefix_link / prefix_unlink
was added.

This diff removes the code duplication and uses prefix_link and
prefix_unlink in all those extra places. This removes a common error when
changing the object layout because one of the many codepaths was often
missed.

To make this happen prefix_link() gained an extra argument for the
pt_entry (the adjout has no rib_entry and so no way to get the pt from
there). It also moves the pftable handling and prefix_evaluate() call to
the currently only caller (prefix_add). Add an extra check in nexthop_link
to skip prefixes in the adj-rib-out since those are not linked up.
With this prefix_link can be used in all other places.

prefix_unlink can be almost used as is only the pt_unref() needs attention
(or actually setting p->pt to NULL). In the Adj-RIB-Out prefixes are
unlinked but remain on the RB tree to act as withdraw requests. For this
prefix_adjout_update() adds an extra reference to the pt_entry so that
after calling prefix_unlink() the pt_entry is still valid.
prefix_adjout_destroy() takes this last reference before freeing the
prefix again.

The result is a much cleaner handling of prefixes and far fewer places to
introduce new bugs.
-- 
:wq Claudio

Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.227
diff -u -p -r1.227 rde_rib.c
--- rde_rib.c   25 Feb 2022 12:56:12 -  1.227
+++ rde_rib.c   28 Feb 2022 09:10:40 -
@@ -848,9 +848,9 @@ static int  prefix_move(struct prefix *, 
struct nexthop *, uint8_t, uint8_t);
 
 static voidprefix_link(struct prefix *, struct rib_entry *,
-struct rde_peer *, uint32_t, struct rde_aspath *,
-struct rde_community *, struct nexthop *,
-uint8_t, uint8_t);
+struct pt_entry *, struct rde_peer *, uint32_t,
+struct rde_aspath *, struct rde_community *,
+struct nexthop *, uint8_t, uint8_t);
 static voidprefix_unlink(struct prefix *);
 
 static struct prefix   *prefix_alloc(void);
@@ -941,18 +941,21 @@ prefix_adjout_lookup(struct rde_peer *pe
xp.pt = pt_fill(prefix, prefixlen);
 
np = RB_NFIND(prefix_index, &peer->adj_rib_out, &xp);
-   if (np != NULL && pt_prefix_cmp(np->pt, xp.pt) != 0)
+   if (np == NULL || pt_prefix_cmp(np->pt, xp.pt) != 0)
return NULL;
return np;
 }
 
+/*
+ * Return next prefix after a lookup that is actually an update.
+ */
 struct prefix *
 prefix_adjout_next(struct rde_peer *peer, struct prefix *p)
 {
struct prefix *np;
 
np = RB_NEXT(prefix_index, &peer->adj_rib_out, p);
-   if (np == NULL || p->pt != np->pt)
+   if (np == NULL || np->pt != p->pt)
return NULL;
return np;
 }
@@ -1060,7 +1063,14 @@ prefix_add(struct bgpd_addr *prefix, int
re = rib_add(rib, prefix, prefixlen);
 
p = prefix_alloc();
-   prefix_link(p, re, peer, path_id, asp, comm, nexthop, nhflags, vstate);
+   prefix_link(p, re, re->prefix, peer, path_id, asp, comm, nexthop,
+   nhflags, vstate);
+
+   /* add possible pftable reference form aspath */
+   if (asp && asp->pftableid)
+   rde_pftable_add(asp->pftableid, p);
+   /* make route decision */
+   prefix_evaluate(re, p, NULL);
return (1);
 }
 
@@ -1082,18 +1092,8 @@ prefix_move(struct prefix *p, struct rde
 
/* add new prefix node */
np = prefix_alloc();
-   /* add reference to new AS path and communities */
-   np->aspath = path_ref(asp);
-   np->communities = communities_ref(comm);
-   np->peer = peer;
-   np->entry.list.re = prefix_re(p);
-   np->pt = p->pt; /* skip refcnt update since ref is moved */
-   np->path_id = p->path_id;
-   np->validation_state = vstate;
-   np->nhflags = nhflags;
-   np->nexthop = nexthop_ref(nexthop);
-   nexthop_link(np);
-   np->lastchange = getmonotime();
+   prefix_link(np, prefix_re(p), p->pt, peer, p->path_id, asp, comm,
+   nexthop, nhflags, vstate);
 
/* add possible pftable reference from new aspath */
if (asp && asp->pftableid)
@@ -1105,21 +1105,12 @@ prefix_move(struct prefix *p, struct rde
 */
prefix_evaluate(prefix_re(np), np, p);
 
-   /* remove possible pftable reference from old path first */
+   /* remove possible pftable reference from old path */
if (p->aspath && p->aspath->pftableid)
rde_pftable_del(p->aspath->pftableid, p);
 
/* remove old prefix n

Re: bgpd rework prefix link/unlink

2022-02-28 Thread Claudio Jeker
On Mon, Feb 28, 2022 at 02:32:07PM +0100, Theo Buehler wrote:
> On Mon, Feb 28, 2022 at 12:35:09PM +0100, Claudio Jeker wrote:
> > From the start bgpd had prefix_link and prefix_unlink to link all the
> > various data objects together to build an actual prefix. Now prefix_move()
> > tries to be smart and reimplemented prefix_link and prefix_unlink as
> > inline versions (with minimal differences). Later the prefix_adjout_*
> > functions were added and again similar code to prefix_link / prefix_unlink
> > was added.
> > 
> > This diff removes the code duplication and uses prefix_link and
> > prefix_unlink in all those extra places. This removes a common error when
> > changing the object layout because one of the many codepaths was often
> > missed.
> > 
> > To make this happen prefix_link() gained an extra argument for the
> > pt_entry (the adjout has no rib_entry and so no way to get the pt from
> > there). It also moves the pftable handling and prefix_evaluate() call to
> > the currently only caller (prefix_add). Add an extra check in nexthop_link
> > to skip prefixes in the adj-rib-out since those are not linked up.
> > With this prefix_link can be used in all other places.
> > 
> > prefix_unlink can be almost used as is only the pt_unref() needs attention
> > (or actually setting p->pt to NULL). In the Adj-RIB-Out prefixes are
> > unlinked but remain on the RB tree to act as withdraw requests. For this
> > prefix_adjout_update() adds an extra reference to the pt_entry so that
> > after calling prefix_unlink() the pt_entry is still valid.
> > prefix_adjout_destroy() takes this last reference before freeing the
> > prefix again.
> > 
> > The result is a much cleaner handling of prefixes and far fewer places to
> > introduce new bugs.
> 
> It's a tricky diff to review, but it looks all correct. The result is
> indeed much cleaner

Thanks for the review, I know this is tricky stuff especially since there
are so many things struct prefix is tracking.

-- 
:wq Claudio



Re: [PATCH] httpd initialize kv structs on stack

2022-03-02 Thread Claudio Jeker
On Wed, Mar 02, 2022 at 10:15:07AM +0100, Florian Obser wrote:
> On 2022-03-01 10:22 -08, j...@bitminer.ca wrote:
> > Looking at the gz option, I noticed some kv structs allocated on
> > stack but not fully initialized.
> 
> Nice catch.
> 
> >
> > This patches initializes the kv struct to avoid randomly getting
> > KV_GLAG_GLOBBING in kv_find depending on stack contents, whenever
> > a kv struct is allocated.
> >
> > Only kv structs seem to be affected.
> 
> The diff is correct, but a bit inconsistent. in server_fcgi.c you set
> kv_flags every time kv_key is set, but you could just initialize it at
> the beginning of the function. In server_http.c server_log_http() you
> depend on kv_flags being initialized once.
> 
> TBH, I'd like my bikeshed green, how about we remove kv_flags, it's
> unused and alreayd diverged from relayd(8).
> 
> OK?

I like this better. Less code is always good :)
Ok claudio@
 
> If people want to keep the functionality, OK florian for John's diff.
> 
> diff --git httpd.c httpd.c
> index 99687a18939..86d5ea9f96f 100644
> --- httpd.c
> +++ httpd.c
> @@ -1063,22 +1063,7 @@ kv_free(struct kv *kv)
>  struct kv *
>  kv_find(struct kvtree *keys, struct kv *kv)
>  {
> - struct kv   *match;
> - const char  *key;
> -
> - if (kv->kv_flags & KV_FLAG_GLOBBING) {
> - /* Test header key using shell globbing rules */
> - key = kv->kv_key == NULL ? "" : kv->kv_key;
> - RB_FOREACH(match, kvtree, keys) {
> - if (fnmatch(key, match->kv_key, FNM_CASEFOLD) == 0)
> - break;
> - }
> - } else {
> - /* Fast tree-based lookup only works without globbing */
> - match = RB_FIND(kvtree, keys, kv);
> - }
> -
> - return (match);
> + return (RB_FIND(kvtree, keys, kv));
>  }
>  
>  int
> diff --git httpd.h httpd.h
> index 692c5611bb5..1b37d87c6e6 100644
> --- httpd.h
> +++ httpd.h
> @@ -131,10 +131,6 @@ struct kv {
>   char*kv_key;
>   char*kv_value;
>  
> -#define KV_FLAG_INVALID   0x01
> -#define KV_FLAG_GLOBBING  0x02
> - uint8_t  kv_flags;
> -
>   struct kvlistkv_children;
>   struct kv   *kv_parent;
>   TAILQ_ENTRY(kv)  kv_entry;
> diff --git server_fcgi.c server_fcgi.c
> index 6542b1f1739..810b217ebe0 100644
> --- server_fcgi.c
> +++ server_fcgi.c
> @@ -702,9 +702,6 @@ server_fcgi_writeheader(struct client *clt, struct kv 
> *hdr, void *arg)
>   const char  *key;
>   int  ret;
>  
> - if (hdr->kv_flags & KV_FLAG_INVALID)
> - return (0);
> -
>   /* The key might have been updated in the parent */
>   if (hdr->kv_parent != NULL && hdr->kv_parent->kv_key != NULL)
>   key = hdr->kv_parent->kv_key;
> diff --git server_http.c server_http.c
> index d5d31fa03ef..40e202665b5 100644
> --- server_http.c
> +++ server_http.c
> @@ -1647,9 +1647,6 @@ server_writeheader_http(struct client *clt, struct kv 
> *hdr, void *arg)
>   char*ptr;
>   const char  *key;
>  
> - if (hdr->kv_flags & KV_FLAG_INVALID)
> - return (0);
> -
>   /* The key might have been updated in the parent */
>   if (hdr->kv_parent != NULL && hdr->kv_parent->kv_key != NULL)
>   key = hdr->kv_parent->kv_key;
> 
> 
> >
> >
> >
> > John
> >
> > Index: server_fcgi.c
> > ===
> > RCS file: /cvs/src/usr.sbin/httpd/server_fcgi.c,v
> > retrieving revision 1.89
> > diff -u -p -u -p -r1.89 server_fcgi.c
> > --- server_fcgi.c   23 Oct 2021 15:52:44 -  1.89
> > +++ server_fcgi.c   1 Mar 2022 15:35:41 -
> > @@ -629,6 +629,7 @@ server_fcgi_header(struct client *clt, u
> >  
> > /* But then we need a Content-Length unless method is HEAD... */
> > if (desc->http_method != HTTP_METHOD_HEAD) {
> > +   key.kv_flags = 0;
> > key.kv_key = "Content-Length";
> > if ((kv = kv_find(&resp->http_headers, &key)) == NULL) {
> > if (kv_add(&resp->http_headers,
> > @@ -641,6 +642,7 @@ server_fcgi_header(struct client *clt, u
> > /* Send chunked encoding header */
> > if (clt->clt_fcgi.chunked) {
> > /* but only if no Content-Length header is supplied */
> > +   key.kv_flags = 0;
> > key.kv_key = "Content-Length";
> > if ((kv = kv_find(&resp->http_headers, &key)) != NULL) {
> > clt->clt_fcgi.chunked = 0;
> > @@ -679,6 +681,7 @@ server_fcgi_header(struct client *clt, u
> > }
> >  
> > /* Date header is mandatory and should be added as late as possible */
> > +   key.kv_flags = 0;
> > key.kv_key = "Date";
> > if (kv_find(&resp->http_headers, &key) == NULL &&
> > (server_htt

bgpd refactor prefix_adjout_withdraw

2022-03-02 Thread Claudio Jeker
This diff changes prefix_adjout_withdraw() to take a prefix pointer
as argument. So instead of doing the lookup in the withdraw function the
caller may need to do it.

With this one call to up_generate_updates() can be replaced with a direct
call to prefix_adjout_withdraw(). rde_up_flush_upcall() tries to withdraw
every prefix in the Adj-RIB-Out of a peer. The indirection via
up_generate_updates() makes little sense here.

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.538
diff -u -p -r1.538 rde.c
--- rde.c   28 Feb 2022 12:52:38 -  1.538
+++ rde.c   2 Mar 2022 11:50:19 -
@@ -3050,9 +3050,7 @@ rde_generate_updates(struct rib *rib, st
 static void
 rde_up_flush_upcall(struct prefix *p, void *ptr)
 {
-   struct rde_peer *peer = ptr;
-
-   up_generate_updates(out_rules, peer, NULL, p);
+   prefix_adjout_withdraw(p);
 }
 
 u_char queue_buf[4096];
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.244
diff -u -p -r1.244 rde.h
--- rde.h   25 Feb 2022 11:36:54 -  1.244
+++ rde.h   2 Mar 2022 11:49:59 -
@@ -596,8 +596,7 @@ int  prefix_withdraw(struct rib *, stru
 voidprefix_add_eor(struct rde_peer *, uint8_t);
 int prefix_adjout_update(struct rde_peer *, struct filterstate *,
struct bgpd_addr *, int, uint8_t);
-int prefix_adjout_withdraw(struct rde_peer *, struct bgpd_addr *,
-   int);
+int prefix_adjout_withdraw(struct prefix *);
 voidprefix_adjout_destroy(struct prefix *p);
 voidprefix_adjout_dump(struct rde_peer *, void *,
void (*)(struct prefix *, void *));
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.230
diff -u -p -r1.230 rde_rib.c
--- rde_rib.c   1 Mar 2022 09:39:36 -   1.230
+++ rde_rib.c   2 Mar 2022 11:49:39 -
@@ -1252,21 +1252,19 @@ prefix_adjout_update(struct rde_peer *pe
  * the prefix in the RIB linked to the peer withdraw list.
  */
 int
-prefix_adjout_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
-int prefixlen)
+prefix_adjout_withdraw(struct prefix *p)
 {
-   struct prefix *p;
-
-   p = prefix_adjout_get(peer, 0, prefix, prefixlen);
-   if (p == NULL)  /* Got a dummy withdrawn request. */
-   return (0);
+   struct rde_peer *peer = prefix_peer(p);
 
if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
 
-   /* already a withdraw, error */
-   if (p->flags & PREFIX_FLAG_WITHDRAW)
-   log_warnx("%s: prefix already withdrawed", __func__);
+   /* already a withdraw, shortcut */
+   if (p->flags & PREFIX_FLAG_WITHDRAW) {
+   p->lastchange = getmonotime();
+   p->flags &= ~PREFIX_FLAG_STALE;
+   return (0);
+   }
/* pending update just got withdrawn */
if (p->flags & PREFIX_FLAG_UPDATE)
RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
@@ -1279,7 +1277,7 @@ prefix_adjout_withdraw(struct rde_peer *
p->lastchange = getmonotime();
 
p->flags |= PREFIX_FLAG_WITHDRAW;
-   if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
+   if (RB_INSERT(prefix_tree, &peer->withdraws[p->pt->aid], p) != NULL)
fatalx("%s: RB tree invariant violated", __func__);
return (1);
 }
Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.134
diff -u -p -r1.134 rde_update.c
--- rde_update.c1 Mar 2022 09:53:42 -   1.134
+++ rde_update.c2 Mar 2022 11:49:39 -
@@ -102,6 +102,7 @@ up_generate_updates(struct filter_head *
 {
struct filterstate  state;
struct bgpd_addraddr;
+   struct prefix   *p;
int need_withdraw;
uint8_t prefixlen;
 
@@ -119,7 +120,9 @@ up_generate_updates(struct filter_head *
 again:
if (new == NULL) {
/* withdraw prefix */
-   if (prefix_adjout_withdraw(peer, &addr, prefixlen) == 1) {
+   if ((p = prefix_adjout_get(peer, 0, &addr, prefixlen)) == NULL)
+   return;
+   if (prefix_adjout_withdraw(p) == 1) {
peer->prefix_out_cnt--;
peer->up_wcnt++;
}



Re: bgpd refactor prefix_adjout_withdraw

2022-03-02 Thread Claudio Jeker
On Wed, Mar 02, 2022 at 01:03:04PM +0100, Claudio Jeker wrote:
> This diff changes prefix_adjout_withdraw() to take a prefix pointer
> as argument. So instead of doing the lookup in the withdraw function the
> caller may need to do it.
> 
> With this one call to up_generate_updates() can be replaced with a direct
> call to prefix_adjout_withdraw(). rde_up_flush_upcall() tries to withdraw
> every prefix in the Adj-RIB-Out of a peer. The indirection via
> up_generate_updates() makes little sense here.

Forgot a hunk from the much bigger diff I wrestle with.
There is no need to pass the peer to rde_up_flush_upcall. This also fixes
a minor bug with rde_softreconfig_in_done() which expects arg to be
NULL.
 
-- 
:wq Claudio
 

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.538
diff -u -p -r1.538 rde.c
--- rde.c   28 Feb 2022 12:52:38 -  1.538
+++ rde.c   2 Mar 2022 12:03:49 -
@@ -3050,9 +3050,7 @@ rde_generate_updates(struct rib *rib, st
 static void
 rde_up_flush_upcall(struct prefix *p, void *ptr)
 {
-   struct rde_peer *peer = ptr;
-
-   up_generate_updates(out_rules, peer, NULL, p);
+   prefix_adjout_withdraw(p);
 }
 
 u_char queue_buf[4096];
@@ -3444,7 +3442,7 @@ rde_reload_done(void)
 
if (peer->reconf_rib) {
if (prefix_dump_new(peer, AID_UNSPEC,
-   RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
+   RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
rde_softreconfig_in_done, NULL) == -1)
fatal("%s: prefix_dump_new", __func__);
log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.244
diff -u -p -r1.244 rde.h
--- rde.h   25 Feb 2022 11:36:54 -  1.244
+++ rde.h   2 Mar 2022 11:49:59 -
@@ -596,8 +596,7 @@ int  prefix_withdraw(struct rib *, stru
 voidprefix_add_eor(struct rde_peer *, uint8_t);
 int prefix_adjout_update(struct rde_peer *, struct filterstate *,
struct bgpd_addr *, int, uint8_t);
-int prefix_adjout_withdraw(struct rde_peer *, struct bgpd_addr *,
-   int);
+int prefix_adjout_withdraw(struct prefix *);
 voidprefix_adjout_destroy(struct prefix *p);
 voidprefix_adjout_dump(struct rde_peer *, void *,
void (*)(struct prefix *, void *));
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.230
diff -u -p -r1.230 rde_rib.c
--- rde_rib.c   1 Mar 2022 09:39:36 -   1.230
+++ rde_rib.c   2 Mar 2022 11:49:39 -
@@ -1252,21 +1252,19 @@ prefix_adjout_update(struct rde_peer *pe
  * the prefix in the RIB linked to the peer withdraw list.
  */
 int
-prefix_adjout_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
-int prefixlen)
+prefix_adjout_withdraw(struct prefix *p)
 {
-   struct prefix *p;
-
-   p = prefix_adjout_get(peer, 0, prefix, prefixlen);
-   if (p == NULL)  /* Got a dummy withdrawn request. */
-   return (0);
+   struct rde_peer *peer = prefix_peer(p);
 
if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
 
-   /* already a withdraw, error */
-   if (p->flags & PREFIX_FLAG_WITHDRAW)
-   log_warnx("%s: prefix already withdrawed", __func__);
+   /* already a withdraw, shortcut */
+   if (p->flags & PREFIX_FLAG_WITHDRAW) {
+   p->lastchange = getmonotime();
+   p->flags &= ~PREFIX_FLAG_STALE;
+   return (0);
+   }
/* pending update just got withdrawn */
if (p->flags & PREFIX_FLAG_UPDATE)
RB_REMOVE(prefix_tree, &peer->updates[p->pt->aid], p);
@@ -1279,7 +1277,7 @@ prefix_adjout_withdraw(struct rde_peer *
p->lastchange = getmonotime();
 
p->flags |= PREFIX_FLAG_WITHDRAW;
-   if (RB_INSERT(prefix_tree, &peer->withdraws[prefix->aid], p) != NULL)
+   if (RB_INSERT(prefix_tree, &peer->withdraws[p->pt->aid], p) != NULL)
fatalx("%s: RB tree invariant violated", __func__);
return (1);
 }
Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.134
diff -u -p -r1.134 rde_update.c
--- rde_update.c1 Mar 2022 09:53:42 -   1.134
+++ rde_update.c2 Mar 2022 11:49:39 -000

Re: bgpd refactor prefix_adjout_withdraw

2022-03-02 Thread Claudio Jeker
On Wed, Mar 02, 2022 at 01:25:42PM +0100, Theo Buehler wrote:
> On Wed, Mar 02, 2022 at 01:07:09PM +0100, Claudio Jeker wrote:
> > On Wed, Mar 02, 2022 at 01:03:04PM +0100, Claudio Jeker wrote:
> > > This diff changes prefix_adjout_withdraw() to take a prefix pointer
> > > as argument. So instead of doing the lookup in the withdraw function the
> > > caller may need to do it.
> > > 
> > > With this one call to up_generate_updates() can be replaced with a direct
> > > call to prefix_adjout_withdraw(). rde_up_flush_upcall() tries to withdraw
> > > every prefix in the Adj-RIB-Out of a peer. The indirection via
> > > up_generate_updates() makes little sense here.
> > 
> > Forgot a hunk from the much bigger diff I wrestle with.
> > There is no need to pass the peer to rde_up_flush_upcall. This also fixes
> > a minor bug with rde_softreconfig_in_done() which expects arg to be
> > NULL.
> 
> I'm ok with this, though I have one question.
> 
> > @@ -1252,21 +1252,19 @@ prefix_adjout_update(struct rde_peer *pe
> >   * the prefix in the RIB linked to the peer withdraw list.
> >   */
> >  int
> > -prefix_adjout_withdraw(struct rde_peer *peer, struct bgpd_addr *prefix,
> > -int prefixlen)
> > +prefix_adjout_withdraw(struct prefix *p)
> >  {
> > -   struct prefix *p;
> > -
> > -   p = prefix_adjout_get(peer, 0, prefix, prefixlen);
> > -   if (p == NULL)  /* Got a dummy withdrawn request. */
> > -   return (0);
> > +   struct rde_peer *peer = prefix_peer(p);
> >  
> > if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
> > fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
> >  
> > -   /* already a withdraw, error */
> > -   if (p->flags & PREFIX_FLAG_WITHDRAW)
> > -   log_warnx("%s: prefix already withdrawed", __func__);
> > +   /* already a withdraw, shortcut */
> > +   if (p->flags & PREFIX_FLAG_WITHDRAW) {
> > +   p->lastchange = getmonotime();
> > +   p->flags &= ~PREFIX_FLAG_STALE;
> > +   return (0);
> > +   }
> 
> I'm a bit confused by this part. You changed this to an error a few days
> ago, now you change it back to a shortcut. Why?
> 

I forgot to mention that. So during reconfigure rde_up_flush_upcall()
walks over the full adj_rib_out tree. This includes pending withdraws and
we need to properly handle them. I decided it is best to revert this back
to the way it was.

The current code is actually incorrect.  The RB_INSERT() at the end of
prefix_adjout_withdraw() would try re-insert the prefix that is already in
the tree.

Looking at this again, I realized that the accounting is not quite right.
When prefix_adjout_withdraw is called on a PREFIX_FLAG_DEAD prefix the
prefix_out_cnt is lowered but it should not. Again this is a very uncommon
case but it is wrong none the less.
up_wcnt and up_nlricnt need to be lowered where the corresponding RB_REMOVE()
calls are. The prefix_out_cnt() needs to be lowered when prefix_unlink()
is called. A similar change should be done for prefix_adjout_update() but
I will do that as a next step.

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.538
diff -u -p -r1.538 rde.c
--- rde.c   28 Feb 2022 12:52:38 -  1.538
+++ rde.c   2 Mar 2022 13:32:38 -
@@ -3050,9 +3050,7 @@ rde_generate_updates(struct rib *rib, st
 static void
 rde_up_flush_upcall(struct prefix *p, void *ptr)
 {
-   struct rde_peer *peer = ptr;
-
-   up_generate_updates(out_rules, peer, NULL, p);
+   prefix_adjout_withdraw(p);
 }
 
 u_char queue_buf[4096];
@@ -3444,7 +3442,7 @@ rde_reload_done(void)
 
if (peer->reconf_rib) {
if (prefix_dump_new(peer, AID_UNSPEC,
-   RDE_RUNNER_ROUNDS, peer, rde_up_flush_upcall,
+   RDE_RUNNER_ROUNDS, NULL, rde_up_flush_upcall,
rde_softreconfig_in_done, NULL) == -1)
fatal("%s: prefix_dump_new", __func__);
log_peer_info(&peer->conf, "flushing Adj-RIB-Out");
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.244
diff -u -p -r1.244 rde.h
--- rde.h   25 Feb 2022 11:36:54 -  1.244
+++ rde.h   2 Mar 2022 13:32:12 -
@@ -596,8 +596,7 @@ int  prefix_withdraw(struct rib *, stru
 voidprefix_add_eor(struct rde_peer *, uint8_t);
 int prefix_adjout_update(struct rde_peer *, struct filterstate *,

bgpd adjust prefix_adjout_update

2022-03-02 Thread Claudio Jeker
This moves the count adjustments into prefix_adjout_update() in a similar
way that was just done for prefix_adjout_withdraw().
Having the counts closer to the actual places where things are
added/removed makes the code a bit easier to grasp. The if cascade in
the prefix_adjout_get != NULL case can be made more similar to the code in
prefix_adjout_withdraw().

-- 
:wq Claudio

Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.246
diff -u -p -r1.246 rde.h
--- rde.h   2 Mar 2022 14:49:25 -   1.246
+++ rde.h   2 Mar 2022 15:03:27 -
@@ -594,7 +594,7 @@ int  prefix_update(struct rib *, struct
 int prefix_withdraw(struct rib *, struct rde_peer *, uint32_t,
struct bgpd_addr *, int);
 voidprefix_add_eor(struct rde_peer *, uint8_t);
-int prefix_adjout_update(struct rde_peer *, struct filterstate *,
+voidprefix_adjout_update(struct rde_peer *, struct filterstate *,
struct bgpd_addr *, int, uint8_t);
 voidprefix_adjout_withdraw(struct prefix *);
 voidprefix_adjout_destroy(struct prefix *);
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.231
diff -u -p -r1.231 rde_rib.c
--- rde_rib.c   2 Mar 2022 14:44:46 -   1.231
+++ rde_rib.c   2 Mar 2022 15:37:10 -
@@ -1163,29 +1163,27 @@ prefix_add_eor(struct rde_peer *peer, ui
 /*
  * Put a prefix from the Adj-RIB-Out onto the update queue.
  */
-int
+void
 prefix_adjout_update(struct rde_peer *peer, struct filterstate *state,
 struct bgpd_addr *prefix, int prefixlen, uint8_t vstate)
 {
-   struct prefix_tree *prefix_head = NULL;
struct rde_aspath *asp;
struct rde_community *comm;
struct prefix *p;
-   int created = 0;
 
if ((p = prefix_adjout_get(peer, 0, prefix, prefixlen)) != NULL) {
if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit",
__func__);
+
/* prefix is already in the Adj-RIB-Out */
if (p->flags & PREFIX_FLAG_WITHDRAW) {
-   created = 1;/* consider this a new entry */
+   RB_REMOVE(prefix_tree,
+   &peer->withdraws[prefix->aid], p);
peer->up_wcnt--;
-   prefix_head = &peer->withdraws[prefix->aid];
-   RB_REMOVE(prefix_tree, prefix_head, p);
-   } else if (p->flags & PREFIX_FLAG_DEAD) {
-   created = 1;/* consider this a new entry */
-   } else {
+   }
+   if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) ==
+   0) {
if (prefix_nhflags(p) == state->nhflags &&
prefix_nexthop(p) == state->nexthop &&
communities_equal(&state->communities,
@@ -1196,22 +1194,23 @@ prefix_adjout_update(struct rde_peer *pe
p->validation_state = vstate;
p->lastchange = getmonotime();
p->flags &= ~PREFIX_FLAG_STALE;
-   return 0;
+   return;
}
 
if (p->flags & PREFIX_FLAG_UPDATE) {
-   /* created = 0 so up_nlricnt is not increased */
-   prefix_head = &peer->updates[prefix->aid];
-   RB_REMOVE(prefix_tree, prefix_head, p);
+   RB_REMOVE(prefix_tree,
+   &peer->updates[prefix->aid], p);
+   peer->up_nlricnt--;
}
/* unlink prefix so it can be relinked below */
prefix_unlink(p);
+   peer->prefix_out_cnt--;
}
+   /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
p->flags &= ~PREFIX_FLAG_MASK;
} else {
p = prefix_alloc();
p->flags |= PREFIX_FLAG_ADJOUT;
-   created = 1;
 
p->pt = pt_get(prefix, prefixlen);
if (p->pt == NULL)
@@ -1237,14 +1236,14 @@ prefix_adjout_update(struct rde_peer *pe
 
prefix_link(p, NULL, p->pt, peer, 0, asp, comm, state->nexthop,
state->nhflags, vstate);
+   peer->prefix_out_cnt++;
 
if (p->flags & PREFIX_FLAG_MASK)
fatalx("%s: bad flags %x", __func__, p->flags);
p->flags |= PREFIX_FLAG_UPDATE;
if (RB_INSERT(prefix_tree, &peer->updates[prefix->aid], p) != NULL)
fatalx("%s: RB tr

bgpd, remove labelid from struct kroute_full

2022-03-03 Thread Claudio Jeker
struct kroute_full is the external representation of kroutes.
It includes the routing label as a string. For some reason there was also
a labelid field but that one is not used and needed, the labelid is an
internal id that has no value for any other process.

Just remove the field and the two places where it was set.
-- 
:wq Claudio

Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.420
diff -u -p -r1.420 bgpd.h
--- bgpd.h  23 Feb 2022 11:20:35 -  1.420
+++ bgpd.h  3 Mar 2022 10:08:55 -
@@ -674,7 +674,6 @@ struct kroute_full {
struct bgpd_addrprefix;
struct bgpd_addrnexthop;
charlabel[RTLABEL_LEN];
-   uint16_tlabelid;
uint16_tflags;
u_short ifindex;
uint8_t prefixlen;
Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.242
diff -u -p -r1.242 kroute.c
--- kroute.c6 Feb 2022 09:51:19 -   1.242
+++ kroute.c3 Mar 2022 10:12:39 -
@@ -1625,7 +1625,6 @@ kr_tofull(struct kroute *kr)
kf.nexthop.aid = AID_INET;
kf.nexthop.v4.s_addr = kr->nexthop.s_addr;
strlcpy(kf.label, rtlabel_id2name(kr->labelid), sizeof(kf.label));
-   kf.labelid = kr->labelid;
kf.flags = kr->flags;
kf.ifindex = kr->ifindex;
kf.prefixlen = kr->prefixlen;
@@ -1646,7 +1645,6 @@ kr6_tofull(struct kroute6 *kr6)
kf.nexthop.aid = AID_INET6;
memcpy(&kf.nexthop.v6, &kr6->nexthop, sizeof(struct in6_addr));
strlcpy(kf.label, rtlabel_id2name(kr6->labelid), sizeof(kf.label));
-   kf.labelid = kr6->labelid;
kf.flags = kr6->flags;
kf.ifindex = kr6->ifindex;
kf.prefixlen = kr6->prefixlen;



bgpd refactor rde_send_kroute

2022-03-03 Thread Claudio Jeker
Another day another cleanup.

This diff moves rde_send_kroute() out of rde_generate_update() and back
into prefix_evaluate(). rde_generate_update() should only track the RIBs.
rde_generate_update() is mainly called from prefix_evaluate().
The only other caller is in rde_softreconfig_sync_reeval() to clear a RIB
that switched to F_RIB_NOEVALUATE. In this case the reload code already
flushed the FIB earlier so no need to call rde_send_kroute().

While there cleanup some more code. Don't need an extra bgpd addr just use
the kr.prefix in the pt_getaddr call. Skip setting flags and rtlabel for
IMSG_KROUTE_DELETE, only the prefix & prefixlen are needed there.
Also cache the rib in prefix_evaluate() to skip some extra function calls.
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.539
diff -u -p -r1.539 rde.c
--- rde.c   2 Mar 2022 14:44:46 -   1.539
+++ rde.c   3 Mar 2022 11:16:03 -
@@ -2912,9 +2912,7 @@ void
 rde_send_kroute(struct rib *rib, struct prefix *new, struct prefix *old)
 {
struct kroute_full   kr;
-   struct bgpd_addr addr;
struct prefix   *p;
-   struct rde_aspath   *asp;
struct l3vpn*vpn;
enum imsg_type   type;
 
@@ -2934,21 +2932,21 @@ rde_send_kroute(struct rib *rib, struct 
p = new;
}
 
-   asp = prefix_aspath(p);
-   pt_getaddr(p->pt, &addr);
bzero(&kr, sizeof(kr));
-   memcpy(&kr.prefix, &addr, sizeof(kr.prefix));
+   pt_getaddr(p->pt, &kr.prefix);
kr.prefixlen = p->pt->prefixlen;
-   if (prefix_nhflags(p) == NEXTHOP_REJECT)
-   kr.flags |= F_REJECT;
-   if (prefix_nhflags(p) == NEXTHOP_BLACKHOLE)
-   kr.flags |= F_BLACKHOLE;
-   if (type == IMSG_KROUTE_CHANGE)
+   if (type == IMSG_KROUTE_CHANGE) {
+   if (prefix_nhflags(p) == NEXTHOP_REJECT)
+   kr.flags |= F_REJECT;
+   if (prefix_nhflags(p) == NEXTHOP_BLACKHOLE)
+   kr.flags |= F_BLACKHOLE;
memcpy(&kr.nexthop, &prefix_nexthop(p)->true_nexthop,
sizeof(kr.nexthop));
-   strlcpy(kr.label, rtlabel_id2name(asp->rtlabelid), sizeof(kr.label));
+   strlcpy(kr.label, rtlabel_id2name(prefix_aspath(p)->rtlabelid),
+   sizeof(kr.label));
+   }
 
-   switch (addr.aid) {
+   switch (kr.prefix.aid) {
case AID_VPN_IPv4:
case AID_VPN_IPv6:
if (!(rib->flags & F_RIB_LOCAL))
@@ -3026,9 +3024,6 @@ rde_generate_updates(struct rib *rib, st
 */
if (old == NULL && new == NULL)
return;
-
-   if (!eval_all && (rib->flags & F_RIB_NOFIB) == 0)
-   rde_send_kroute(rib, new, old);
 
if (new)
aid = new->pt->aid;
Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.88
diff -u -p -r1.88 rde_decide.c
--- rde_decide.c1 Mar 2022 09:46:22 -   1.88
+++ rde_decide.c3 Mar 2022 11:17:32 -
@@ -454,8 +454,10 @@ void
 prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
 {
struct prefix   *xp;
+   struct rib  *rib;
 
-   if (re_rib(re)->flags & F_RIB_NOEVALUATE) {
+   rib = re_rib(re);
+   if (rib->flags & F_RIB_NOEVALUATE) {
/* decision process is turned off */
if (old != NULL)
LIST_REMOVE(old, entry.list.rib);
@@ -468,7 +470,7 @@ prefix_evaluate(struct rib_entry *re, st
 * active. Clean up now to ensure that the RIB
 * is consistant.
 */
-   rde_generate_updates(re_rib(re), NULL, re->active, 0);
+   rde_generate_updates(rib, NULL, re->active, 0);
re->active = NULL;
}
return;
@@ -494,7 +496,9 @@ prefix_evaluate(struct rib_entry *re, st
 * but remember that xp may be NULL aka ineligible.
 * Additional decision may be made by the called functions.
 */
-   rde_generate_updates(re_rib(re), xp, re->active, 0);
+   rde_generate_updates(rib, xp, re->active, 0);
+   if ((rib->flags & F_RIB_NOFIB) == 0)
+   rde_send_kroute(rib, xp, re->active);
re->active = xp;
return;
}
@@ -506,5 +510,5 @@ prefix_evaluate(struct rib_entry *re, st
 */
if (rde_evaluate_all())
if ((new != NULL && prefix_eligible(new)) || old != NULL)
-   rde_generate_updates(re_rib(re), re->active, NULL, 1);
+   rde_generate_updates(rib, re->active, NULL, 1);
 }


Re: bgpd: plug leaks in rtr_parse_ipv{4,6}_prefix()

2022-03-08 Thread Claudio Jeker
On Tue, Mar 08, 2022 at 01:33:01PM +0100, Theo Buehler wrote:
> If the length checks trigger, roa is leaked.  It makes more sense to me
> to copy the data into ip4 and ip6, check lengths and then calloc rather
> than the current order, so I moved the calloc down a bit. Alternatively,
> we could just add a free(roa) before the return -1 in the length checks.
> 
> Index: rtr_proto.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
> retrieving revision 1.5
> diff -u -p -U4 -r1.5 rtr_proto.c
> --- rtr_proto.c   6 Feb 2022 09:51:19 -   1.5
> +++ rtr_proto.c   8 Mar 2022 12:26:29 -
> @@ -441,23 +441,23 @@ rtr_parse_ipv4_prefix(struct rtr_session
>   return -1;
>   }
>  
>   memcpy(&ip4, buf + sizeof(struct rtr_header), sizeof(ip4));
> -
> - if ((roa = calloc(1, sizeof(*roa))) == NULL) {
> - log_warn("rtr %s: received %s",
> - log_rtr(rs), log_rtr_type(IPV4_PREFIX));
> - rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
> - return -1;
> - }
>   if (ip4.prefixlen > 32 || ip4.maxlen > 32 ||
>   ip4.prefixlen > ip4.maxlen) {
>   log_warnx("rtr: %s: received %s: bad prefixlen / maxlen",
>   log_rtr(rs), log_rtr_type(IPV4_PREFIX));
>   rtr_send_error(rs, CORRUPT_DATA, "bad prefixlen / maxlen",
>   buf, len);
>   return -1;
>   }
> +
> + if ((roa = calloc(1, sizeof(*roa))) == NULL) {
> + log_warn("rtr %s: received %s",
> + log_rtr(rs), log_rtr_type(IPV4_PREFIX));
> + rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
> + return -1;
> + }
>   roa->aid = AID_INET;
>   roa->prefixlen = ip4.prefixlen;
>   roa->maxlen = ip4.maxlen;
>   roa->asnum = ntohl(ip4.asnum);
> @@ -510,21 +510,21 @@ rtr_parse_ipv6_prefix(struct rtr_session
>   return -1;
>   }
>  
>   memcpy(&ip6, buf + sizeof(struct rtr_header), sizeof(ip6));
> -
> - if ((roa = calloc(1, sizeof(*roa))) == NULL) {
> - log_warn("rtr %s: received %s",
> - log_rtr(rs), log_rtr_type(IPV6_PREFIX));
> - rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
> - return -1;
> - }
>   if (ip6.prefixlen > 128 || ip6.maxlen > 128 ||
>   ip6.prefixlen > ip6.maxlen) {
>   log_warnx("rtr: %s: received %s: bad prefixlen / maxlen",
>   log_rtr(rs), log_rtr_type(IPV6_PREFIX));
>   rtr_send_error(rs, CORRUPT_DATA, "bad prefixlen / maxlen",
>   buf, len);
> + return -1;
> + }
> +
> + if ((roa = calloc(1, sizeof(*roa))) == NULL) {
> + log_warn("rtr %s: received %s",
> + log_rtr(rs), log_rtr_type(IPV6_PREFIX));
> + rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
>   return -1;
>   }
>   roa->aid = AID_INET6;
>   roa->prefixlen = ip6.prefixlen;
> 

OK claudio@

-- 
:wq Claudio



bgpd expand macros in strings

2022-03-08 Thread Claudio Jeker
bgpd's parse.y uses a lot of STRING that is then further bisected in the
actual rule. One good example are all communities. Now if someone wants to
use macros in such arguments they do not work in all cases. e.g.
large-community $someas:1:2 works but large-community 1:$someas:2 does
not.

Right now macro expansion only happens at the start of a token but not
inside a string token. The following diff changes this. It will also
expand:
large-community $someas:$otheras:42

This only works if the macro name ends on a not-allowed-in-macro-name
character ([^a-zA-Z0-9_]). So while 'descr v4_$name' or 'descr $name-v4'
works 'descr $name_v4' will not. Also no expansion happens inside quoted
strings like 'descr "$name-v4"'.

-- 
:wq Claudio

Index: parse.y
===
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
retrieving revision 1.422
diff -u -p -r1.422 parse.y
--- parse.y 23 Feb 2022 11:20:35 -  1.422
+++ parse.y 8 Mar 2022 09:25:55 -
@@ -48,6 +48,8 @@
 #include "rde.h"
 #include "log.h"
 
+#define MACRO_NAME_LEN 128
+
 TAILQ_HEAD(files, file) files = TAILQ_HEAD_INITIALIZER(files);
 static struct file {
TAILQ_ENTRY(file)entry;
@@ -74,6 +76,7 @@ intigetc(void);
 int lgetc(int);
 voidlungetc(int);
 int findeol(void);
+int expand_macro(void);
 
 TAILQ_HEAD(symhead, sym)symhead = TAILQ_HEAD_INITIALIZER(symhead);
 struct sym {
@@ -380,17 +383,25 @@ yesno :  STRING   {
 
 varset : STRING '=' string {
char *s = $1;
+   if (strlen($1) >= MACRO_NAME_LEN) {
+   yyerror("macro name to long, max %d characters",
+   MACRO_NAME_LEN - 1);
+   free($1);
+   free($3);
+   YYERROR;
+   }
+   do {
+   if (isalnum((unsigned char)*s) || *s == '_')
+   continue;
+   yyerror("macro name can only contain "
+   "alphanumerics and '_'");
+   free($1);
+   free($3);
+   YYERROR;
+   } while (*++s);
+
if (cmd_opts & BGPD_OPT_VERBOSE)
printf("%s = \"%s\"\n", $1, $3);
-   while (*s++) {
-   if (isspace((unsigned char)*s)) {
-   yyerror("macro name cannot contain "
-   "whitespace");
-   free($1);
-   free($3);
-   YYERROR;
-   }
-   }
if (symset($1, $3, 0) == -1)
fatal("cannot store variable");
free($1);
@@ -3169,10 +3180,46 @@ findeol(void)
 }
 
 int
+expand_macro(void)
+{
+   char buf[MACRO_NAME_LEN];
+   char*p, *val;
+   int  c;
+
+   p = buf;
+   while (1) {
+   if ((c = lgetc('$')) == EOF)
+   return (ERROR);
+   if (p + 1 >= buf + sizeof(buf) - 1) {
+   yyerror("macro name too long");
+   return (ERROR);
+   }
+   if (isalnum(c) || c == '_') {
+   *p++ = c;
+   continue;
+   }
+   *p = '\0';
+   lungetc(c);
+   break;
+   }
+   val = symget(buf);
+   if (val == NULL)
+   yyerror("macro '%s' not defined", buf);
+   p = val + strlen(val) - 1;
+   lungetc(DONE_EXPAND);
+   while (p >= val) {
+   lungetc((unsigned char)*p);
+   p--;
+   }
+   lungetc(START_EXPAND);
+   return (0);
+}
+
+int
 yylex(void)
 {
char buf[8096];
-   char*p, *val;
+   char*p;
int  quotec, next, c;
int  token;
 
@@ -3186,34 +3233,9 @@ top:
while ((c = lgetc(0)) != '\n' && c != EOF)
; /* nothing */
if (c == '$' && !expanding) {
-   while (1) {
-   if ((c = lgetc(0)) == EOF)
-   return (0);
-
-   if (p + 1 >= buf + sizeof(buf) - 1) {
-   yyerror("string too long");
-   return (findeol());
-   }
-   if (isalnum(c) || c == '_') {
-   *p++ = c;
-   continue

Re: ieee80211_stats userland vs. kernel

2022-03-08 Thread Claudio Jeker
On Tue, Mar 08, 2022 at 07:17:33PM +0100, Stefan Sperling wrote:
> On Tue, Mar 08, 2022 at 03:55:48PM +0100, Stefan Sperling wrote:
> > On Mon, Mar 07, 2022 at 03:04:06PM -0700, Theo de Raadt wrote:
> > > > For now, the structs are identical so the code copying data out is
> > > > kept simple.
> > > 
> > > I think this is unwise, and you should write the field-by-field copying
> > > function at the same time, otherwise this is just asking for trouble.
> > > You really cannot wait until an intentional change.
> > 
> > Sure, here it is.
> 
> On second thought, avoiding the malloc/free dance is better.
> The struct is still small enough to fit on the stack.
> 
> diff refs/heads/master refs/heads/statsreq
> blob - 85d795d745eb21fd218056c2f3faf7fbc2c7fe49
> blob + 62938001ed22fc133a0c98e27ef5690c978e21f3
> --- sys/net80211/ieee80211_ioctl.c
> +++ sys/net80211/ieee80211_ioctl.c
> @@ -55,6 +55,8 @@ void ieee80211_node2req(struct ieee80211com *,
>   const struct ieee80211_node *, struct ieee80211_nodereq *);
>  void  ieee80211_req2node(struct ieee80211com *,
>   const struct ieee80211_nodereq *, struct ieee80211_node *);
> +void ieee80211_stats2req(struct ieee80211_statsreq *,
> + struct ieee80211_stats *);
>  
>  void
>  ieee80211_node2req(struct ieee80211com *ic, const struct ieee80211_node *ni,
> @@ -180,6 +182,89 @@ ieee80211_req2node(struct ieee80211com *ic, const stru
>  }
>  
>  void
> +ieee80211_stats2req(struct ieee80211_statsreq *req,
> +struct ieee80211_stats *stats)
> +{
> + memset(req, 0, sizeof(*req));
> +
> + req->is_rx_badversion = stats->is_rx_badversion;
> + req->is_rx_tooshort = stats->is_rx_tooshort;
> + req->is_rx_wrongbss = stats->is_rx_wrongbss;
> + req->is_rx_dup = stats->is_rx_dup;
> + req->is_rx_wrongdir = stats->is_rx_wrongdir;
> + req->is_rx_mcastecho = stats->is_rx_mcastecho;
> + req->is_rx_notassoc = stats->is_rx_notassoc;
> + req->is_rx_nowep = stats->is_rx_nowep;
> + req->is_rx_unencrypted = stats->is_rx_unencrypted;
> + req->is_rx_wepfail = stats->is_rx_wepfail;
> + req->is_rx_decap = stats->is_rx_decap;
> + req->is_rx_mgtdiscard = stats->is_rx_mgtdiscard;
> + req->is_rx_ctl = stats->is_rx_ctl;
> + req->is_rx_rstoobig = stats->is_rx_rstoobig;
> + req->is_rx_elem_missing = stats->is_rx_elem_missing;
> + req->is_rx_elem_toobig = stats->is_rx_elem_toobig;
> + req->is_rx_elem_toosmall = stats->is_rx_elem_toosmall;
> + req->is_rx_badchan = stats->is_rx_badchan;
> + req->is_rx_chanmismatch = stats->is_rx_chanmismatch;
> + req->is_rx_nodealloc = stats->is_rx_nodealloc;
> + req->is_rx_ssidmismatch = stats->is_rx_ssidmismatch;
> + req->is_rx_auth_unsupported = stats->is_rx_auth_unsupported;
> + req->is_rx_auth_fail = stats->is_rx_auth_fail;
> + req->is_rx_assoc_bss = stats->is_rx_assoc_bss;
> + req->is_rx_assoc_notauth = stats->is_rx_assoc_notauth;
> + req->is_rx_assoc_capmismatch = stats->is_rx_assoc_capmismatch;
> + req->is_rx_assoc_norate = stats->is_rx_assoc_norate;
> + req->is_rx_deauth = stats->is_rx_deauth;
> + req->is_rx_disassoc = stats->is_rx_disassoc;
> + req->is_rx_badsubtype = stats->is_rx_badsubtype;
> + req->is_rx_nombuf = stats->is_rx_nombuf;
> + req->is_rx_decryptcrc = stats->is_rx_decryptcrc;
> + req->is_rx_ahdemo_mgt = stats->is_rx_ahdemo_mgt;
> + req->is_rx_bad_auth = stats->is_rx_bad_auth;
> + req->is_tx_nombuf = stats->is_tx_nombuf;
> + req->is_tx_nonode = stats->is_tx_nonode;
> + req->is_tx_unknownmgt = stats->is_tx_unknownmgt;
> + req->is_scan_active = stats->is_scan_active;
> + req->is_scan_passive = stats->is_scan_passive;
> + req->is_node_timeout = stats->is_node_timeout;
> + req->is_crypto_nomem = stats->is_crypto_nomem;
> + req->is_rx_assoc_badrsnie = stats->is_rx_assoc_badrsnie;
> + req->is_rx_unauth = stats->is_rx_unauth;
> + req->is_tx_noauth = stats->is_tx_noauth;
> + req->is_rx_eapol_key = stats->is_rx_eapol_key;
> + req->is_rx_eapol_replay = stats->is_rx_eapol_replay;
> + req->is_rx_eapol_badmic = stats->is_rx_eapol_badmic;
> + req->is_rx_remmicfail = stats->is_rx_remmicfail;
> + req->is_rx_locmicfail = stats->is_rx_locmicfail;
> + req->is_tkip_replays = stats->is_tkip_replays;
> + req->is_tkip_icv_errs = stats->is_tkip_icv_errs;
> + req->is_ccmp_replays = stats->is_ccmp_replays;
> + req->is_ccmp_dec_errs = stats->is_ccmp_dec_errs;
> + req->is_cmac_replays = stats->is_cmac_replays;
> + req->is_cmac_icv_errs = stats->is_cmac_icv_errs;
> + req->is_pbac_errs = stats->is_pbac_errs;
> + req->is_ht_nego_no_mandatory_mcs = stats->is_ht_nego_no_mandatory_mcs;
> + req->is_ht_nego_no_basic_mcs = stats->is_ht_nego_no_basic_mcs;
> + req->is_ht_nego_bad_crypto = stats->is_ht_nego_bad_crypto;
> + req->is_ht_prot_change = stats->is_ht_prot_change;
> + req->is_ht_rx_ba_agreements = stats->is_h

Re: rpki-client: fix wrong conditional

2022-03-10 Thread Claudio Jeker
On Thu, Mar 10, 2022 at 05:33:28PM +0100, Martin Vahlensieck wrote:
> Hi
> 
> This pulls up and adjusts the check if i exceeds the bounds of pfds.
> Before it was technically wrong, as i > NPFDS means that the last
> write (i == NPFDS) was already out of bounds.
 
I see no reason to pull up the check but the if condition should indeed be
greater or equal. One could consider to change this into an assert() but I
think I stick with the errx().

> Martin
> 
> 
> Index: http.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/http.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 http.c
> --- http.c10 Feb 2022 11:10:40 -  1.53
> +++ http.c10 Mar 2022 16:28:48 -
> @@ -1820,6 +1820,10 @@ proc_http(char *bind_addr, int fd)
>   if (timeout == INFTIM || diff < timeout)
>   timeout = diff;
>   }
> +
> + if (i >= NPFDS)
> + errx(1, "too many connections");
> +
>   if (conn->state == STATE_WRITE_DATA)
>   pfds[i].fd = conn->req->outfd;
>   else
> @@ -1828,8 +1832,6 @@ proc_http(char *bind_addr, int fd)
>   pfds[i].events = conn->events;
>   conn->pfd = &pfds[i];
>   i++;
> - if (i > NPFDS)
> - errx(1, "too many connections");
>   }
>   LIST_FOREACH(conn, &idle, entry) {
>   if (conn->idle_time <= now)
> @@ -1840,12 +1842,14 @@ proc_http(char *bind_addr, int fd)
>   if (timeout == INFTIM || diff < timeout)
>   timeout = diff;
>   }
> +
> + if (i >= NPFDS)
> + errx(1, "too many connections");
> +
>   pfds[i].fd = conn->fd;
>   pfds[i].events = POLLIN;
>   conn->pfd = &pfds[i];
>   i++;
> - if (i > NPFDS)
> - errx(1, "too many connections");
>   }
>  
>   if (poll(pfds, i, timeout) == -1) {
> 

-- 
:wq Claudio



Re: rpki-client: fix wrong conditional

2022-03-10 Thread Claudio Jeker
On Thu, Mar 10, 2022 at 05:54:21PM +0100, Theo Buehler wrote:
> On Thu, Mar 10, 2022 at 05:51:46PM +0100, Claudio Jeker wrote:
> > On Thu, Mar 10, 2022 at 05:33:28PM +0100, Martin Vahlensieck wrote:
> > > Hi
> > > 
> > > This pulls up and adjusts the check if i exceeds the bounds of pfds.
> > > Before it was technically wrong, as i > NPFDS means that the last
> > > write (i == NPFDS) was already out of bounds.
> >  
> > I see no reason to pull up the check but the if condition should indeed be
> > greater or equal. One could consider to change this into an assert() but I
> > think I stick with the errx().
> 
> Agreed. ok for the diff that just changes the checks to >=

Actually I was wrong, the check needs to happen at the start of the loop
not at the end else it does not work if the list is exactly the number of
elements to fill NPFDS. 

for (first element; if not end; next element) {
if (i >= NPFDS)
errx();

/* do work */

i++;
/* here condition would trigger on last element */
}
 
> > 
> > > Martin
> > > 
> > > 
> > > Index: http.c
> > > ===
> > > RCS file: /cvs/src/usr.sbin/rpki-client/http.c,v
> > > retrieving revision 1.53
> > > diff -u -p -r1.53 http.c
> > > --- http.c10 Feb 2022 11:10:40 -  1.53
> > > +++ http.c10 Mar 2022 16:28:48 -
> > > @@ -1820,6 +1820,10 @@ proc_http(char *bind_addr, int fd)
> > >   if (timeout == INFTIM || diff < timeout)
> > >   timeout = diff;
> > >   }
> > > +
> > > + if (i >= NPFDS)
> > > + errx(1, "too many connections");
> > > +
> > >   if (conn->state == STATE_WRITE_DATA)
> > >   pfds[i].fd = conn->req->outfd;
> > >   else
> > > @@ -1828,8 +1832,6 @@ proc_http(char *bind_addr, int fd)
> > >   pfds[i].events = conn->events;
> > >   conn->pfd = &pfds[i];
> > >   i++;
> > > - if (i > NPFDS)
> > > - errx(1, "too many connections");
> > >   }
> > >   LIST_FOREACH(conn, &idle, entry) {
> > >   if (conn->idle_time <= now)
> > > @@ -1840,12 +1842,14 @@ proc_http(char *bind_addr, int fd)
> > >   if (timeout == INFTIM || diff < timeout)
> > >   timeout = diff;
> > >   }
> > > +
> > > + if (i >= NPFDS)
> > > + errx(1, "too many connections");
> > > +
> > >   pfds[i].fd = conn->fd;
> > >   pfds[i].events = POLLIN;
> > >   conn->pfd = &pfds[i];
> > >   i++;
> > > - if (i > NPFDS)
> > > - errx(1, "too many connections");
> > >   }
> > >  
> > >   if (poll(pfds, i, timeout) == -1) {
> > > 
> > 
> > -- 
> > :wq Claudio
> > 
> 

-- 
:wq Claudio



bgpd refactor prefix_adjout_update

2022-03-15 Thread Claudio Jeker
This diff just refactors the code by moving the alloc part up.
It makes the code a bit easier to read and more similar with other
prefix_adjout functions. Also I plan to pass the struct prefix in
as an argument and do the prefix_adjout_get() in the callee.

-- 
:wq Claudio

Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.232
diff -u -p -r1.232 rde_rib.c
--- rde_rib.c   2 Mar 2022 16:51:43 -   1.232
+++ rde_rib.c   2 Mar 2022 17:39:12 -
@@ -1171,46 +1171,10 @@ prefix_adjout_update(struct rde_peer *pe
struct rde_community *comm;
struct prefix *p;
 
-   if ((p = prefix_adjout_get(peer, 0, prefix, prefixlen)) != NULL) {
-   if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
-   fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit",
-   __func__);
-
-   /* prefix is already in the Adj-RIB-Out */
-   if (p->flags & PREFIX_FLAG_WITHDRAW) {
-   RB_REMOVE(prefix_tree,
-   &peer->withdraws[prefix->aid], p);
-   peer->up_wcnt--;
-   }
-   if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) ==
-   0) {
-   if (prefix_nhflags(p) == state->nhflags &&
-   prefix_nexthop(p) == state->nexthop &&
-   communities_equal(&state->communities,
-   prefix_communities(p)) &&
-   path_compare(&state->aspath, prefix_aspath(p)) ==
-   0) {
-   /* nothing changed */
-   p->validation_state = vstate;
-   p->lastchange = getmonotime();
-   p->flags &= ~PREFIX_FLAG_STALE;
-   return;
-   }
-
-   if (p->flags & PREFIX_FLAG_UPDATE) {
-   RB_REMOVE(prefix_tree,
-   &peer->updates[prefix->aid], p);
-   peer->up_nlricnt--;
-   }
-   /* unlink prefix so it can be relinked below */
-   prefix_unlink(p);
-   peer->prefix_out_cnt--;
-   }
-   /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
-   p->flags &= ~PREFIX_FLAG_MASK;
-   } else {
+   if ((p = prefix_adjout_get(peer, 0, prefix, prefixlen)) == NULL) {
p = prefix_alloc();
-   p->flags |= PREFIX_FLAG_ADJOUT;
+   /* initally mark DEAD so code below is skipped */
+   p->flags |= PREFIX_FLAG_ADJOUT | PREFIX_FLAG_DEAD;
 
p->pt = pt_get(prefix, prefixlen);
if (p->pt == NULL)
@@ -1222,6 +1186,40 @@ prefix_adjout_update(struct rde_peer *pe
if (RB_INSERT(prefix_index, &peer->adj_rib_out, p) != NULL)
fatalx("%s: RB index invariant violated", __func__);
}
+
+   if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
+   fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
+   if ((p->flags & (PREFIX_FLAG_WITHDRAW | PREFIX_FLAG_DEAD)) == 0) {
+   if (prefix_nhflags(p) == state->nhflags &&
+   prefix_nexthop(p) == state->nexthop &&
+   communities_equal(&state->communities,
+   prefix_communities(p)) &&
+   path_compare(&state->aspath, prefix_aspath(p)) ==
+   0) {
+   /* nothing changed */
+   p->validation_state = vstate;
+   p->lastchange = getmonotime();
+   p->flags &= ~PREFIX_FLAG_STALE;
+   return;
+   }
+
+   /* if pending update unhook it before it is unlinked */
+   if (p->flags & PREFIX_FLAG_UPDATE) {
+   RB_REMOVE(prefix_tree, &peer->updates[prefix->aid], p);
+   peer->up_nlricnt--;
+   }
+
+   /* unlink prefix so it can be relinked below */
+   prefix_unlink(p);
+   peer->prefix_out_cnt--;
+   }
+   if (p->flags & PREFIX_FLAG_WITHDRAW) {
+   RB_REMOVE(prefix_tree, &peer->withdraws[prefix->aid], p);
+   peer->up_wcnt--;
+   }
+
+   /* nothing needs to be done for PREFIX_FLAG_DEAD and STALE */
+   p->flags &= ~PREFIX_FLAG_MASK;
 
if ((asp = path_lookup(&state->aspath)) == NULL) {
/* Path not available, create and link a new one. */



bgpd mark EoR prefix with a flag field

2022-03-15 Thread Claudio Jeker
Currently EoR markers use a full byte in struct prefix what can be done in
a bit. Use the last flags field so that that 1 byte is available again.
I already have a need for that byte this is why I came up with this
change.
 
-- 
:wq Claudio

? obj
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.247
diff -u -p -r1.247 rde.h
--- rde.h   2 Mar 2022 16:51:43 -   1.247
+++ rde.h   15 Mar 2022 14:59:27 -
@@ -333,7 +333,7 @@ struct prefix {
uint32_t path_id_tx;
uint8_t  validation_state;
uint8_t  nhflags;
-   uint8_t  eor;
+   uint8_t  unused;
uint8_t  flags;
 #definePREFIX_FLAG_WITHDRAW0x01/* enqueued on withdraw queue */
 #definePREFIX_FLAG_UPDATE  0x02/* enqueued on update queue */
@@ -341,6 +341,7 @@ struct prefix {
 #definePREFIX_FLAG_STALE   0x08/* stale entry (graceful 
reload) */
 #definePREFIX_FLAG_MASK0x0f/* mask for the prefix types */
 #definePREFIX_FLAG_ADJOUT  0x10/* prefix is in the adj-out rib 
*/
+#definePREFIX_FLAG_EOR 0x20/* prefix is EoR */
 #definePREFIX_NEXTHOP_LINKED   0x40/* prefix is linked onto 
nexthop list */
 #definePREFIX_FLAG_LOCKED  0x80/* locked by rib walker */
 };
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.233
diff -u -p -r1.233 rde_rib.c
--- rde_rib.c   15 Mar 2022 14:39:34 -  1.233
+++ rde_rib.c   15 Mar 2022 14:59:28 -
@@ -875,10 +875,10 @@ prefix_index_cmp(struct prefix *a, struc
 static inline int
 prefix_cmp(struct prefix *a, struct prefix *b)
 {
-   if (a->eor != b->eor)
-   return a->eor - b->eor;
-   /* if EOR marker no need to check the rest also a->eor == b->eor */
-   if (a->eor)
+   if ((a->flags & PREFIX_FLAG_EOR) != (b->flags & PREFIX_FLAG_EOR))
+   return (a->flags & PREFIX_FLAG_EOR) ? 1 : -1;
+   /* if EOR marker no need to check the rest */
+   if (a->flags & PREFIX_FLAG_EOR)
return 0;
 
if (a->aspath != b->aspath)
@@ -1152,8 +1152,7 @@ prefix_add_eor(struct rde_peer *peer, ui
struct prefix *p;
 
p = prefix_alloc();
-   p->flags = PREFIX_FLAG_ADJOUT | PREFIX_FLAG_UPDATE;
-   p->eor = 1;
+   p->flags = PREFIX_FLAG_ADJOUT | PREFIX_FLAG_UPDATE | PREFIX_FLAG_EOR;
if (RB_INSERT(prefix_tree, &peer->updates[aid], p) != NULL)
/* no need to add if EoR marker already present */
prefix_free(p);
@@ -1290,7 +1289,7 @@ prefix_adjout_destroy(struct prefix *p)
if ((p->flags & PREFIX_FLAG_ADJOUT) == 0)
fatalx("%s: prefix without PREFIX_FLAG_ADJOUT hit", __func__);
 
-   if (p->eor) {
+   if (p->flags & PREFIX_FLAG_EOR) {
/* EOR marker is not linked in the index */
prefix_free(p);
return;
Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.136
diff -u -p -r1.136 rde_update.c
--- rde_update.c2 Mar 2022 16:51:43 -   1.136
+++ rde_update.c15 Mar 2022 14:59:28 -
@@ -586,7 +586,7 @@ up_is_eor(struct rde_peer *peer, uint8_t
struct prefix *p;
 
p = RB_MIN(prefix_tree, &peer->updates[aid]);
-   if (p != NULL && p->eor) {
+   if (p != NULL && (p->flags & PREFIX_FLAG_EOR)) {
/*
 * Need to remove eor from update tree because
 * prefix_adjout_destroy() can't handle that.
@@ -635,7 +635,7 @@ up_dump_prefix(u_char *buf, int len, str
np->communities != p->communities ||
np->nexthop != p->nexthop ||
np->nhflags != p->nhflags ||
-   np->eor)
+   (np->flags & PREFIX_FLAG_EOR))
done = 1;
 
 



Re: pcb mutex userland

2022-03-17 Thread Claudio Jeker
On Thu, Mar 17, 2022 at 12:47:15AM +0100, Alexander Bluhm wrote:
> Hi,
> 
> My previous atempt to add a mutex to in_pcb.h was reverted as it
> broke userland build.
> 
> Is the correct fix to include sys/mutex.h in every .c file that
> includes netinet/in_pcb.h ?  I made a release with it.
> Or should I include sys/mutex.h in netinet/in_pcb.h ?

I would add sys/mutex.h in netinet/in_pcb.h. We do the same in other
headers like sys/proc.h etc.
 
> ok?
> 
> bluhm
> 
> Index: lib/libkvm/kvm_file2.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/lib/libkvm/kvm_file2.c,v
> retrieving revision 1.57
> diff -u -p -r1.57 kvm_file2.c
> --- lib/libkvm/kvm_file2.c22 Feb 2022 17:35:01 -  1.57
> +++ lib/libkvm/kvm_file2.c16 Mar 2022 16:42:15 -
> @@ -74,6 +74,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> Index: sbin/sysctl/sysctl.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sbin/sysctl/sysctl.c,v
> retrieving revision 1.258
> diff -u -p -r1.258 sysctl.c
> --- sbin/sysctl/sysctl.c  12 Jul 2021 15:09:19 -  1.258
> +++ sbin/sysctl/sysctl.c  15 Mar 2022 09:18:31 -
> @@ -42,9 +42,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +
>  #include 
>  #include 
>  
> Index: usr.bin/netstat/inet.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
> retrieving revision 1.173
> diff -u -p -r1.173 inet.c
> --- usr.bin/netstat/inet.c5 Dec 2021 22:36:19 -   1.173
> +++ usr.bin/netstat/inet.c16 Mar 2022 16:44:32 -
> @@ -34,6 +34,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #define _KERNEL
> @@ -41,6 +42,7 @@
>  #undef _KERNEL
>  
>  #include 
> +
>  #include 
>  #include 
>  #include 
> Index: usr.bin/tcpbench/tcpbench.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/tcpbench/tcpbench.c,v
> retrieving revision 1.65
> diff -u -p -r1.65 tcpbench.c
> --- usr.bin/tcpbench/tcpbench.c   12 Jul 2021 15:09:20 -  1.65
> +++ usr.bin/tcpbench/tcpbench.c   16 Mar 2022 16:44:55 -
> @@ -21,6 +21,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> Index: usr.sbin/trpt/trpt.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/trpt/trpt.c,v
> retrieving revision 1.39
> diff -u -p -r1.39 trpt.c
> --- usr.sbin/trpt/trpt.c  2 Dec 2019 21:47:54 -   1.39
> +++ usr.sbin/trpt/trpt.c  16 Mar 2022 16:45:23 -
> @@ -62,6 +62,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #define PRUREQUESTS
>  #include 
>  #define _KERNEL
> 

-- 
:wq Claudio



Re: pcb mutex userland

2022-03-17 Thread Claudio Jeker
On Thu, Mar 17, 2022 at 02:09:39PM +0100, Mark Kettenis wrote:
> > Date: Thu, 17 Mar 2022 13:24:24 +0100
> > From: Alexander Bluhm 
> > 
> > On Thu, Mar 17, 2022 at 08:24:10AM +0100, Claudio Jeker wrote:
> > > On Thu, Mar 17, 2022 at 12:47:15AM +0100, Alexander Bluhm wrote:
> > > > Hi,
> > > > 
> > > > My previous atempt to add a mutex to in_pcb.h was reverted as it
> > > > broke userland build.
> > > > 
> > > > Is the correct fix to include sys/mutex.h in every .c file that
> > > > includes netinet/in_pcb.h ?  I made a release with it.
> > > > Or should I include sys/mutex.h in netinet/in_pcb.h ?
> > > 
> > > I would add sys/mutex.h in netinet/in_pcb.h. We do the same in other
> > > headers like sys/proc.h etc.
> > 
> > This survived make release.  It is similar to what we do in sys/proc.h
> > as suggested by claudio@ and has more #ifdef _KERNEL to please
> > kettenis@.
> > 
> > ok?
> 
> Sorry, but I don't think it is.  The problem is that "struct mutex"
> changes depending on whether WITNESS is defined.  This means that if
> you include mutex in a data structure exported to userland and you're
> running a kernel with WITNESS enabled, the data structures don't match
> up.
> 
> The reverse is also possible (although much less likely).  Since
> WITNESS is in the global namespace, code might define it and therefore
> accidentally change the data structure based and cause a mismatch when
> you're running on a normal kernel.
> 
> I fear the fundamental problem is that we should not expose data
> structures internal to the kernel to userland.  What I don't
> understand though is how that happens.  The sysctl code doesn't seem
> to export "struct inpcb" instances directly, but instead it exports
> selected members through "struct kinfo_file".  So why is "struct
> inpcb" exposed to userland at all?

The netstat -P code is using kvm so does -M using a core dump.
We probably have similar issues in other tools. At least the -M core -N
system options are also present in other tools like ps(1). So this issue
is not new.

Not sure how to fix this issue with altering struct sizes based on kernel
options. For netstat -P it would be possible to add a interface to give
access to this info using sysctl and struct kinfo_file (but the struct
would have to be extended a fair amount) or maybe add a kinfo_pcb for this
which works like kinfo_file but for pcbs.
 
> > Index: sys/netinet/in_pcb.h
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> > retrieving revision 1.125
> > diff -u -p -r1.125 in_pcb.h
> > --- sys/netinet/in_pcb.h14 Mar 2022 22:38:43 -  1.125
> > +++ sys/netinet/in_pcb.h17 Mar 2022 00:44:54 -
> > @@ -65,6 +65,7 @@
> >  #define _NETINET_IN_PCB_H_
> >  
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > Index: sys/sys/mutex.h
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mutex.h,v
> > retrieving revision 1.18
> > diff -u -p -r1.18 mutex.h
> > --- sys/sys/mutex.h 23 Apr 2019 13:35:12 -  1.18
> > +++ sys/sys/mutex.h 17 Mar 2022 00:44:23 -
> > @@ -48,6 +48,8 @@ struct mutex {
> >  #endif
> >  };
> >  
> > +#ifdef _KERNEL
> > +
> >  /*
> >   * To prevent lock ordering problems with the kernel lock, we need to
> >   * make sure we block all interrupts that can grab the kernel lock.
> > @@ -148,7 +150,7 @@ void_mtx_init_flags(struct mutex *, int
> >  
> >  #endif /* WITNESS */
> >  
> > -#if defined(_KERNEL) && defined(DDB)
> > +#ifdef DDB
> >  
> >  struct db_mutex {
> > struct cpu_info *mtx_owner;
> > @@ -160,6 +162,8 @@ struct db_mutex {
> >  void   db_mtx_enter(struct db_mutex *);
> >  void   db_mtx_leave(struct db_mutex *);
> >  
> > -#endif /* _KERNEL && DDB */
> > +#endif /* DDB */
> > +
> > +#endif /* _KERNEL */
> >  
> >  #endif
> > 
> > 
> 

-- 
:wq Claudio



bgpd, rename flag field

2022-03-21 Thread Claudio Jeker
This diff just renames F_CTL_ACTIVE and F_PREF_ACTIVE to the more correct
F_CTL_BEST and F_PREF_BEST. The flags are used to mark the one best path.

ACTIVE is not the right term here since with ECMP and add-path more than
one route can be active. I will probably add more flags to mark ECMP
prefixes but more changes are needed for that.

-- 
:wq Claudio

Index: usr.sbin/bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.275
diff -u -p -r1.275 bgpctl.c
--- usr.sbin/bgpctl/bgpctl.c6 Feb 2022 09:52:32 -   1.275
+++ usr.sbin/bgpctl/bgpctl.c21 Mar 2022 09:48:13 -
@@ -697,7 +697,7 @@ fmt_flags(uint8_t flags, int sum)
*p++ = 'S';
if (flags & F_PREF_ELIGIBLE)
*p++ = '*';
-   if (flags & F_PREF_ACTIVE)
+   if (flags & F_PREF_BEST)
*p++ = '>';
*p = '\0';
snprintf(buf, sizeof(buf), "%-5s", flagstr);
@@ -711,7 +711,7 @@ fmt_flags(uint8_t flags, int sum)
strlcat(buf, ", stale", sizeof(buf));
if (flags & F_PREF_ELIGIBLE)
strlcat(buf, ", valid", sizeof(buf));
-   if (flags & F_PREF_ACTIVE)
+   if (flags & F_PREF_BEST)
strlcat(buf, ", best", sizeof(buf));
if (flags & F_PREF_ANNOUNCE)
strlcat(buf, ", announced", sizeof(buf));
Index: usr.sbin/bgpctl/output_json.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
retrieving revision 1.13
diff -u -p -r1.13 output_json.c
--- usr.sbin/bgpctl/output_json.c   6 Feb 2022 09:52:32 -   1.13
+++ usr.sbin/bgpctl/output_json.c   21 Mar 2022 09:48:13 -
@@ -875,7 +875,7 @@ json_rib(struct ctl_show_rib *r, u_char 
 
/* flags */
json_do_bool("valid", r->flags & F_PREF_ELIGIBLE);
-   if (r->flags & F_PREF_ACTIVE)
+   if (r->flags & F_PREF_BEST)
json_do_bool("best", 1);
if (r->flags & F_PREF_INTERNAL)
json_do_printf("source", "%s", "internal");
Index: usr.sbin/bgpctl/parser.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/parser.c,v
retrieving revision 1.108
diff -u -p -r1.108 parser.c
--- usr.sbin/bgpctl/parser.c6 Feb 2022 09:52:32 -   1.108
+++ usr.sbin/bgpctl/parser.c21 Mar 2022 09:48:13 -
@@ -172,8 +172,8 @@ static const struct token t_show_rib[] =
{ KEYWORD,  "community",NONE,   t_show_community},
{ KEYWORD,  "ext-community", NONE,  t_show_extcommunity},
{ KEYWORD,  "large-community", NONE,t_show_largecommunity},
-   { FLAG, "best", F_CTL_ACTIVE,   t_show_rib},
-   { FLAG, "selected", F_CTL_ACTIVE,   t_show_rib},
+   { FLAG, "best", F_CTL_BEST, t_show_rib},
+   { FLAG, "selected", F_CTL_BEST, t_show_rib},
{ FLAG, "detail",   F_CTL_DETAIL,   t_show_rib},
{ FLAG, "error",F_CTL_INVALID,  t_show_rib},
{ FLAG, "ssv"   ,   F_CTL_SSV,  t_show_rib},
Index: usr.sbin/bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.421
diff -u -p -r1.421 bgpd.h
--- usr.sbin/bgpd/bgpd.h3 Mar 2022 11:19:41 -   1.421
+++ usr.sbin/bgpd/bgpd.h21 Mar 2022 09:48:39 -
@@ -89,7 +89,7 @@
 #defineF_CTL_DETAIL0x1000  /* only set on requests */
 #defineF_CTL_ADJ_IN0x2000  /* only set on requests */
 #defineF_CTL_ADJ_OUT   0x4000  /* only set on requests */
-#defineF_CTL_ACTIVE0x8000
+#defineF_CTL_BEST  0x8000
 #defineF_RTLABEL   0x1
 #defineF_CTL_SSV   0x2 /* only used by bgpctl */
 #defineF_CTL_INVALID   0x4 /* only set on requests */
@@ -790,7 +790,7 @@ struct ctl_neighbor {
 };
 
 #defineF_PREF_ELIGIBLE 0x01
-#defineF_PREF_ACTIVE   0x02
+#defineF_PREF_BEST 0x02
 #defineF_PREF_INTERNAL 0x04
 #defineF_PREF_ANNOUNCE 0x08
 #defineF_PREF_STALE0x10
Index: usr.sbin/bgpd/rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.540
diff -u -p -r1.540 rde.c
--- usr.sbin/bgpd/rde.c 3 Mar 2022 13:06:15 -   1.540
+++ usr.sbin/bgpd/rde.c 21 Mar 2022 09:49:08 -
@@ -2401,7 +2401,7 @@ rde_dump_rib_as(struct prefix *p, struct
rib.flags = 0;
re = prefix_re(p);
if (re != NULL && re->active == p)
-   rib.flags |= F_PREF_ACTIVE;
+

Re: have in_pcbselsrc copy the selected ip to the caller instead of a reference to it

2022-03-21 Thread Claudio Jeker
On Mon, Mar 21, 2022 at 02:17:21PM +1000, David Gwynne wrote:
> in_pcbselsrc has this:
> 
>   ifp = if_get(mopts->imo_ifidx);
>   if (ifp != NULL) {
>   if (ifp->if_rdomain == rtable_l2(rtableid))
>   IFP_TO_IA(ifp, ia);
>   if (ia == NULL) {
>   if_put(ifp);
>   return (EADDRNOTAVAIL);
>   }
> 
>   *insrc = ia->ia_addr.sin_addr;
>   if_put(ifp);
>   return (0);
>   }
> 
> which looks very much like it releases a reference to the interface
> holding the address it's passing back to the caller to use.

This seems indeed to be an issue.
 
> this diff has it copy the address to memory the caller provides instead.
> 
> ok?

I think it makes to code overall a bit simpler.
OK claudio@

In in_pcbselsrc() you could even eliminate laddr and just replace it with
inp->inp_laddr. Or assign to laddr instead of making it a pointer.
 
> Index: in_pcb.c
> ===
> RCS file: /cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.262
> diff -u -p -r1.262 in_pcb.c
> --- in_pcb.c  21 Mar 2022 03:51:09 -  1.262
> +++ in_pcb.c  21 Mar 2022 04:10:24 -
> @@ -476,7 +476,7 @@ in_pcbpickport(u_int16_t *lport, void *l
>  int
>  in_pcbconnect(struct inpcb *inp, struct mbuf *nam)
>  {
> - struct in_addr *ina = NULL;
> + struct in_addr ina;
>   struct sockaddr_in *sin;
>   int error;
>  
> @@ -495,7 +495,7 @@ in_pcbconnect(struct inpcb *inp, struct 
>   return (error);
>  
>   if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
> - *ina, inp->inp_lport, inp->inp_rtableid) != NULL)
> + ina, inp->inp_lport, inp->inp_rtableid) != NULL)
>   return (EADDRINUSE);
>  
>   KASSERT(inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_lport);
> @@ -506,13 +506,13 @@ in_pcbconnect(struct inpcb *inp, struct 
>   if (error)
>   return (error);
>   if (in_pcbhashlookup(inp->inp_table, sin->sin_addr,
> - sin->sin_port, *ina, inp->inp_lport,
> + sin->sin_port, ina, inp->inp_lport,
>   inp->inp_rtableid) != NULL) {
>   inp->inp_lport = 0;
>   return (EADDRINUSE);
>   }
>   }
> - inp->inp_laddr = *ina;
> + inp->inp_laddr = ina;
>   }
>   inp->inp_faddr = sin->sin_addr;
>   inp->inp_fport = sin->sin_port;
> @@ -870,7 +870,7 @@ in_pcbrtentry(struct inpcb *inp)
>   * an entry to the caller for later use.
>   */
>  int
> -in_pcbselsrc(struct in_addr **insrc, struct sockaddr_in *sin,
> +in_pcbselsrc(struct in_addr *insrc, struct sockaddr_in *sin,
>  struct inpcb *inp)
>  {
>   struct ip_moptions *mopts = inp->inp_moptions;
> @@ -886,9 +886,9 @@ in_pcbselsrc(struct in_addr **insrc, str
>* If the socket(if any) is already bound, use that bound address
>* unless it is INADDR_ANY or INADDR_BROADCAST.
>*/
> - if (laddr && laddr->s_addr != INADDR_ANY &&
> + if (laddr->s_addr != INADDR_ANY &&
>   laddr->s_addr != INADDR_BROADCAST) {
> - *insrc = laddr;
> + *insrc = *laddr;
>   return (0);
>   }
>  
> @@ -911,7 +911,7 @@ in_pcbselsrc(struct in_addr **insrc, str
>   return (EADDRNOTAVAIL);
>   }
>  
> - *insrc = &ia->ia_addr.sin_addr;
> + *insrc = ia->ia_addr.sin_addr;
>   if_put(ifp);
>   return (0);
>   }
> @@ -962,7 +962,7 @@ in_pcbselsrc(struct in_addr **insrc, str
>   struct ifaddr *ifa;
>   if ((ifa = ifa_ifwithaddr(ip4_source, rtableid)) !=
>   NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) {
> - *insrc = &satosin(ip4_source)->sin_addr;
> + *insrc = satosin(ip4_source)->sin_addr;
>   return (0);
>   }
>   }
> @@ -971,7 +971,7 @@ in_pcbselsrc(struct in_addr **insrc, str
>   if (ia == NULL)
>   return (EADDRNOTAVAIL);
>  
> - *insrc = &ia->ia_addr.sin_addr;
> + *insrc = ia->ia_addr.sin_addr;
>   return (0);
>  }
>  
> Index: in_pcb.h
> ===
> RCS file: /cvs/src/sys/netinet/in_pcb.h,v
> retrieving revision 1.125
> diff -u -p -r1.125 in_pcb.h
> --- in_pcb.h  14 Mar 2022 22:38:43 -  1.125
> +++ in_pcb.h  21 Mar 2022 04:10:25 -
> @@ -305,7 +305,7 @@ void   in_setpeeraddr(struct inpcb *, str
>  void  in_setsockaddr(struct i

bgpd reload when rib flags change

2022-03-21 Thread Claudio Jeker
During config reload the RIB may need to be resynced when the
'no evaluate' setting changes.

This changes the code to actually flush the Adj-RIB-Out of affected peers
and then adjust the RIB in a 2nd step. That way there is no need to use
rde_generate_updates() to remove the prefixes one by one in the NOFIB case.

Also fix the loop to reinsert all prefixes to remove the prefix from the
temporary list before calling prefix_evaluate(). Calling prefix_evaluate()
with p as the old prefix is just wrong. It is not on the rib_entry list at
that time.

This can be improved further but since this code path is almost never
needed (changing rib flags happens almost never) it is not urgent.
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.541
diff -u -p -r1.541 rde.c
--- rde.c   21 Mar 2022 10:15:34 -  1.541
+++ rde.c   21 Mar 2022 11:13:34 -
@@ -3469,7 +3469,33 @@ rde_reload_done(void)
rib_free(rib);
break;
case RECONF_RELOAD:
-   rib_update(rib);
+   if (rib_update(rib)) {
+   LIST_FOREACH(peer, &peerlist, peer_l) {
+   /* ignore peerself*/
+   if (peer->conf.id == 0)
+   continue;
+   /* skip peers using a different rib */
+   if (peer->loc_rib_id != rib->id)
+   continue;
+   /* peer rib is already being flushed */
+   if (peer->reconf_rib)
+   continue;
+
+   if (prefix_dump_new(peer, AID_UNSPEC,
+   RDE_RUNNER_ROUNDS, NULL,
+   rde_up_flush_upcall,
+   rde_softreconfig_in_done,
+   NULL) == -1)
+   fatal("%s: prefix_dump_new",
+   __func__);
+
+   log_peer_info(&peer->conf,
+   "flushing Adj-RIB-Out");
+   /* account for the running flush */
+   softreconfig++;
+   }
+   }
+
rib->state = RECONF_KEEP;
/* FALLTHROUGH */
case RECONF_KEEP:
@@ -3717,17 +3743,14 @@ rde_softreconfig_sync_reeval(struct rib_
if (rib->flags & F_RIB_NOEVALUATE) {
/*
 * evaluation process is turned off
-* so remove all prefixes from adj-rib-out
-* also unlink nexthop if it was linked
+* all dependent adj-rib-out were already flushed
+* unlink nexthop if it was linked
 */
LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
if (p->flags & PREFIX_NEXTHOP_LINKED)
nexthop_unlink(p);
}
-   if (re->active) {
-   rde_generate_updates(rib, NULL, re->active, 0);
-   re->active = NULL;
-   }
+   re->active = NULL;
return;
}
 
@@ -3736,11 +3759,18 @@ rde_softreconfig_sync_reeval(struct rib_
prefixes = re->prefix_h;
LIST_INIT(&re->prefix_h);
 
+   /*
+* TODO: this code works but is not optimal. prefix_evaluate()
+* does a lot of extra work in the worst case. Would be better
+* to resort the list once and then call rde_generate_updates()
+* and rde_send_kroute() once.
+*/
LIST_FOREACH_SAFE(p, &prefixes, entry.list.rib, next) {
/* need to re-link the nexthop if not already linked */
+   LIST_REMOVE(p, entry.list.rib);
if ((p->flags & PREFIX_NEXTHOP_LINKED) == 0)
nexthop_link(p);
-   prefix_evaluate(re, p, p);
+   prefix_evaluate(re, p, NULL);
}
 }
 
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.248
diff -u -p -r1.248 rde.h
--- rde.h   15 Mar 2022 16:50:29 -  1.248
+++ rde.h   21 Mar 2022 10:31:17 -
@@ -548,7 +548,7 @@ pt_unref(struct pt_entry *pt)
 extern uint16_trib_size;
 
 struct rib *rib_new(char *, u_int, uint16_t);
-voidrib_update(struct rib *);
+int rib_update(struct rib *);
 struct rib *rib

Re: bgpd reload when rib flags change

2022-03-21 Thread Claudio Jeker
On Mon, Mar 21, 2022 at 01:19:53PM +0100, Theo Buehler wrote:
> On Mon, Mar 21, 2022 at 12:24:33PM +0100, Claudio Jeker wrote:
> > During config reload the RIB may need to be resynced when the
> > 'no evaluate' setting changes.
> > 
> > This changes the code to actually flush the Adj-RIB-Out of affected peers
> > and then adjust the RIB in a 2nd step. That way there is no need to use
> > rde_generate_updates() to remove the prefixes one by one in the NOFIB case.
> > 
> > Also fix the loop to reinsert all prefixes to remove the prefix from the
> > temporary list before calling prefix_evaluate(). Calling prefix_evaluate()
> > with p as the old prefix is just wrong. It is not on the rib_entry list at
> > that time.
> 
> I'm ok with this.
> 
> I noticed that the bit changing prefix_evaluate(re, p, p) to
> prefix_evaluate(re, p, NULL) undoes part of rde.c r1.512 whose commit
> message said
> 
> Doing the LIST_REMOVE() outside of prefix_evalute() is no longer valid.

Yes, I think that was in this case a mistake. This is a rather special
case. The full list is first removed from the rib_entry by copying the
LIST_HEAD and LIST_INIT. So this is no longer a case of a LIST_REMOVE() of
a prefix that is part of evaluated prefixes.
 
> > This can be improved further but since this code path is almost never
> > needed (changing rib flags happens almost never) it is not urgent.
> > -- 
> > :wq Claudio
> > 
> > Index: rde.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > retrieving revision 1.541
> > diff -u -p -r1.541 rde.c
> > --- rde.c   21 Mar 2022 10:15:34 -  1.541
> > +++ rde.c   21 Mar 2022 11:13:34 -
> > @@ -3469,7 +3469,33 @@ rde_reload_done(void)
> > rib_free(rib);
> > break;
> > case RECONF_RELOAD:
> > -   rib_update(rib);
> > +   if (rib_update(rib)) {
> > +   LIST_FOREACH(peer, &peerlist, peer_l) {
> > +   /* ignore peerself*/
> 
> Missing space after peerself

Hah, there is another copy of that further up. Fixed both. 

-- 
:wq Claudio



bgpd, remove active prefix cache in rib_element

2022-03-21 Thread Claudio Jeker
In struct rib_entry bgpd keeps the 'best' or active prefix cached.
Now to support more than one one prefix per path (for ECMP and add-path)
I need the ability to access the previous element. The currently used
LIST macros do not support that. So I want to switch that to TAILQ but
the TAILQ head is 2 pointers not 1 and so I need to free a pointer from
struct rib_entry. Also this active cache makes less sense with multiple
paths.

Access to the active prefix is replaced with the better named
prefix_best() which returns the LIST_FIRST entry if that one is eligible.

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.542
diff -u -p -r1.542 rde.c
--- rde.c   21 Mar 2022 13:33:20 -  1.542
+++ rde.c   21 Mar 2022 16:02:16 -
@@ -2400,7 +2400,7 @@ rde_dump_rib_as(struct prefix *p, struct
rib.validation_state = p->validation_state;
rib.flags = 0;
re = prefix_re(p);
-   if (re != NULL && re->active == p)
+   if (re != NULL && prefix_best(re) == p)
rib.flags |= F_PREF_BEST;
if (!peer->conf.ebgp)
rib.flags |= F_PREF_INTERNAL;
@@ -2502,7 +2502,7 @@ rde_dump_filter(struct prefix *p, struct
re = prefix_re(p);
if (asp == NULL)/* skip pending withdraw in Adj-RIB-Out */
return;
-   if ((req->flags & F_CTL_BEST) && re != NULL && re->active != p)
+   if ((req->flags & F_CTL_BEST) && re != NULL && prefix_best(re) != p)
return;
if ((req->flags & F_CTL_INVALID) &&
(asp->flags & F_ATTR_PARSE_ERR) == 0)
@@ -3713,11 +3713,11 @@ rde_softreconfig_in(struct rib_entry *re
 static void
 rde_softreconfig_out(struct rib_entry *re, void *bula)
 {
-   struct prefix   *p = re->active;
+   struct prefix   *p;
struct rde_peer *peer;
uint8_t  aid = re->prefix->aid;
 
-   if (p == NULL)
+   if ((p = prefix_best(re)) == NULL)
/* no valid path for prefix */
return;
 
@@ -3750,12 +3750,10 @@ rde_softreconfig_sync_reeval(struct rib_
if (p->flags & PREFIX_NEXTHOP_LINKED)
nexthop_unlink(p);
}
-   re->active = NULL;
return;
}
 
/* evaluation process is turned on, so evaluate all prefixes again */
-   re->active = NULL;
prefixes = re->prefix_h;
LIST_INIT(&re->prefix_h);
 
@@ -3777,8 +3775,10 @@ rde_softreconfig_sync_reeval(struct rib_
 static void
 rde_softreconfig_sync_fib(struct rib_entry *re, void *bula)
 {
-   if (re->active)
-   rde_send_kroute(re_rib(re), re->active, NULL);
+   struct prefix *p;
+
+   if ((p = prefix_best(re)) != NULL)
+   rde_send_kroute(re_rib(re), p, NULL);
 }
 
 static void
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.249
diff -u -p -r1.249 rde.h
--- rde.h   21 Mar 2022 13:33:20 -  1.249
+++ rde.h   21 Mar 2022 16:07:56 -
@@ -43,7 +43,6 @@ RB_HEAD(rib_tree, rib_entry);
 struct rib_entry {
RB_ENTRY(rib_entry)  rib_e;
struct prefix_list   prefix_h;
-   struct prefix   *active;/* for fast access */
struct pt_entry *prefix;
uint16_t rib_id;
uint16_t lock;
@@ -499,8 +498,10 @@ communities_unref(struct rde_community *
 intcommunity_to_rd(struct community *, uint64_t *);
 
 /* rde_decide.c */
-intprefix_eligible(struct prefix *);
-void   prefix_evaluate(struct rib_entry *, struct prefix *, struct prefix *);
+int prefix_eligible(struct prefix *);
+struct prefix  *prefix_best(struct rib_entry *);
+voidprefix_evaluate(struct rib_entry *, struct prefix *,
+struct prefix *);
 
 /* rde_filter.c */
 void   rde_apply_set(struct filter_set_head *, struct rde_peer *,
Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.89
diff -u -p -r1.89 rde_decide.c
--- rde_decide.c3 Mar 2022 13:06:15 -   1.89
+++ rde_decide.c21 Mar 2022 16:03:05 -
@@ -443,6 +443,23 @@ prefix_eligible(struct prefix *p)
return 1;
 }
 
+struct prefix *
+prefix_best(struct rib_entry *re)
+{
+   struct prefix   *xp;
+   struct rib  *rib;
+
+   rib = re_rib(re);
+   if (rib->flags & F_RIB_NOEVALUATE)
+   /* decision process is turned off */
+   return NULL;
+
+xp = LIST_FIRST(&re->prefix_h);
+   if (xp != NULL && !prefix_eligible(xp))
+   xp = NULL;
+   return xp;
+}
+
 /*
  * Find the correct place to insert the prefix in the prefix list.
  * If

Re: bgpd, remove active prefix cache in rib_element

2022-03-21 Thread Claudio Jeker
On Mon, Mar 21, 2022 at 05:51:36PM +0100, Theo Buehler wrote:
> On Mon, Mar 21, 2022 at 05:16:53PM +0100, Claudio Jeker wrote:
> > In struct rib_entry bgpd keeps the 'best' or active prefix cached.
> > Now to support more than one one prefix per path (for ECMP and add-path)
> > I need the ability to access the previous element. The currently used
> > LIST macros do not support that. So I want to switch that to TAILQ but
> > the TAILQ head is 2 pointers not 1 and so I need to free a pointer from
> > struct rib_entry. Also this active cache makes less sense with multiple
> > paths.
> > 
> > Access to the active prefix is replaced with the better named
> > prefix_best() which returns the LIST_FIRST entry if that one is eligible.
> 
> Missed one bit in prefix_evaluate_all():
> 
> /usr/src/usr.sbin/bgpd/rde_rib.c:1526:17: error: no member named 'active' in 
> 'struct rib_entry'
> p == re->active)
>  ~~  ^

Arrg, this happens when you go too fast. I missed to extract that bit from
another tree. That just needs to be replaced with a prefix_best(re) call.

> With that fixed, ok
> 
> tiny nit below:
> 
> > Index: rde_decide.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> > retrieving revision 1.89
> > diff -u -p -r1.89 rde_decide.c
> > --- rde_decide.c3 Mar 2022 13:06:15 -   1.89
> > +++ rde_decide.c21 Mar 2022 16:03:05 -
> > @@ -443,6 +443,23 @@ prefix_eligible(struct prefix *p)
> > return 1;
> >  }
> >  
> > +struct prefix *
> > +prefix_best(struct rib_entry *re)
> > +{
> > +   struct prefix   *xp;
> > +   struct rib  *rib;
> > +
> > +   rib = re_rib(re);
> > +   if (rib->flags & F_RIB_NOEVALUATE)
> > +   /* decision process is turned off */
> > +   return NULL;
> > +
> > +xp = LIST_FIRST(&re->prefix_h);
> 
> Use tab instead of 8 spaces for indent

Fixed.
 
> > +   if (xp != NULL && !prefix_eligible(xp))
> > +   xp = NULL;
> > +   return xp;
> > +}
> > +
> 

-- 
:wq Claudio



Re: CMSG_DATA(3): add an example for receiving mulitple control messages

2022-03-22 Thread Claudio Jeker
On Tue, Mar 22, 2022 at 02:24:25PM +1000, David Gwynne wrote:
> i couldnt find any good examples of what to do when you wanted to
> receive multiple control messages from a single recvmsg call. the most
> interesting bit is how much space the buffer needs to be.
> 
> if i struggled maybe someone else will too?
> 
> Index: CMSG_DATA.3
> ===
> RCS file: /cvs/src/share/man/man3/CMSG_DATA.3,v
> retrieving revision 1.6
> diff -u -p -r1.6 CMSG_DATA.3
> --- CMSG_DATA.3   3 Apr 2017 19:40:43 -   1.6
> +++ CMSG_DATA.3   22 Mar 2022 04:23:50 -
> @@ -116,7 +116,8 @@ if (sendmsg(s, &msg, 0) == -1)
>   err(1, "sendmsg");
>  .Ed
>  .Pp
> -And an example that receives and decomposes the control message:
> +The following example receives and decomposes a control message
> +containing a file descriptor:
>  .Bd -literal -offset indent
>  struct msghdr msg;
>  struct cmsghdr   *cmsg;
> @@ -146,6 +147,62 @@ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg !=
>   cmsg->cmsg_type == SCM_RIGHTS) {
>   fd = *(int *)CMSG_DATA(cmsg);
>   /* Do something with the descriptor. */
> + }
> +}
> +.Ed
> +.Pp
> +The following example shows how to to receive multiple control
> +messages for a single datagram.
> +In this example a program is receiving an IPv4 UDP datagram
> +using a socket that has been configured to provide the local
> +(destination) IP address and port using
> +.Xr setsockopt 2
> +and the
> +.Dv IP_RECVDSTADDR
> +and
> +.Dv IP_RECVDSTADDR

I guess this should be IP_RECVDSTPORT

> +.Xr ip 4
> +options respectively.
> +.Bd -literal -offset indent
> +struct msghdr msg;
> +struct cmsghdr   *cmsg;
> +union {
> + struct cmsghdr   hdr;
> + unsigned charbuf[CMSG_SPACE(sizeof(struct in_addr)) +
> +  CMSG_SPACE(sizeof(in_port_t))];
> +} cmsgbuf;

Should we add a comment that this union is used for proper alignment of
the buffer?

> +struct sockaddr_in sin;
> +struct iovec io_vector[1];
> +
> +sin.sin_family = AF_INET;
> +
> +io_vector[0].iov_base = &ch;
> +io_vector[0].iov_len = 1;
> +
> +memset(&msg, 0, sizeof(msg));
> +msg.msg_control = &cmsgbuf.buf;
> +msg.msg_controllen = sizeof(cmsgbuf.buf);
> +msg.msg_iov = io_vector;
> +msg.msg_iovlen = 1;
> +
> +if (recvmsg(s, &msg, 0) == -1)
> + err(1, "recvmsg");
> +if ((msg.msg_flags & MSG_TRUNC) || (msg.msg_flags & MSG_CTRUNC))
> + errx(1, "control message truncated");
> +for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
> +cmsg = CMSG_NXTHDR(&msg, cmsg)) {
> + if (cmsg->cmsg_len == CMSG_LEN(sizeof(struct sockaddr_in)) &&
> + cmsg->cmsg_level == IPPROTO_IP &&
> + cmsg->cmsg_type == IP_RECVDSTADDR) {
> + sin.sin_addr = *(struct in_addr *)CMSG_DATA(cmsg);
> + continue;
> + }
> +
> + if (cmsg->cmsg_len == CMSG_LEN(sizeof(in_port_t)) &&
> + cmsg->cmsg_level == IPPROTO_IP &&
> + cmsg->cmsg_type == IP_RECVDSTPORT) {
> + sin.sin_port = *(in_port_t *)CMSG_DATA(cmsg);
> + continue;
>   }

Not sure but IIRC some code uses switch statements here:
if (cmsg->cmsg_level == IPPROTO_IP)
switch (cmsg->cmsg_type) {
case IP_RECVDSTADDR:
if (cmsg->cmsg_len ==
CMSG_LEN(sizeof(struct sockaddr_in))
sin.sin_addr =
*(struct in_addr *)CMSG_DATA(cmsg);
break;

>  }
>  .Ed
> 

The fd passing example in that manpage is missing a few extra checks.
Mainly the fact that more than one fd could be received on 64bit archs.
We should probably update that example using the code from imsg.c

-- 
:wq Claudio



bgpd replace rib_entry prefix list with tailq

2022-03-22 Thread Claudio Jeker
As mentioned I need a TAILQ for the list of prefixes that belong to a rib
entry. Mainly because I need TAILQ_PREV. This diff does this replacement.
I did not change the nexhtop LIST of prefixes to a TAILQ. Maybe something
to consider but there is no real need for that.

This is mostly a mechanical change. The only thing that had to change is
rde_softreconfig_sync_reeval() where before the LIST_HEAD was copied which
is not possible with TAILQ (there is a pointer to the TAILQ_HEAD struct
from inside the TAILQ). Instead use TAILQ_CONCAT() to move the queue from
the rib_entry to the local tailq head. I checked the rest of the code and
did not find any other case where the rib_entry prefix_h was copied
around.

I also have a fix for the unittest regress ready for this change. I left
that one out since it is just a trivial mechanical change.
-- 
:wq Claudio

Index: mrt.c
===
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.106
diff -u -p -r1.106 mrt.c
--- mrt.c   6 Feb 2022 09:51:19 -   1.106
+++ mrt.c   21 Mar 2022 17:51:09 -
@@ -620,7 +620,7 @@ mrt_dump_entry_v2_rib(struct rib_entry *
*np = 0;
*app = 0;
 
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
struct nexthop  *nexthop;
struct bgpd_addr*nh;
struct ibuf *tbuf;
@@ -895,7 +895,7 @@ mrt_dump_upcall(struct rib_entry *re, vo
 * dumps the table so we do the same. If only the active route should
 * be dumped p should be set to p = pt->active.
 */
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
if (mrtbuf->type == MRT_TABLE_DUMP)
mrt_dump_entry(mrtbuf, p, mrtbuf->seqnum++,
prefix_peer(p));
Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.543
diff -u -p -r1.543 rde.c
--- rde.c   21 Mar 2022 17:35:56 -  1.543
+++ rde.c   21 Mar 2022 17:41:29 -
@@ -2536,7 +2536,7 @@ rde_dump_upcall(struct rib_entry *re, vo
struct rde_dump_ctx *ctx = ptr;
struct prefix   *p;
 
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib)
rde_dump_filter(p, &ctx->req, 0);
 }
 
@@ -2557,14 +2557,14 @@ rde_dump_prefix_upcall(struct rib_entry 
return;
if (!prefix_compare(&ctx->req.prefix, &addr,
ctx->req.prefixlen))
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib)
rde_dump_filter(p, &ctx->req, 0);
} else {
if (ctx->req.prefixlen < pt->prefixlen)
return;
if (!prefix_compare(&addr, &ctx->req.prefix,
pt->prefixlen))
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib)
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib)
rde_dump_filter(p, &ctx->req, 0);
}
 }
@@ -3673,7 +3673,7 @@ rde_softreconfig_in(struct rib_entry *re
 
pt = re->prefix;
pt_getaddr(pt, &prefix);
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
asp = prefix_aspath(p);
peer = prefix_peer(p);
 
@@ -3736,7 +3736,7 @@ rde_softreconfig_out(struct rib_entry *r
 static void
 rde_softreconfig_sync_reeval(struct rib_entry *re, void *arg)
 {
-   struct prefix_list  prefixes;
+   struct prefix_queue prefixes = TAILQ_HEAD_INITIALIZER(prefixes);
struct prefix   *p, *next;
struct rib  *rib = arg;
 
@@ -3746,7 +3746,7 @@ rde_softreconfig_sync_reeval(struct rib_
 * all dependent adj-rib-out were already flushed
 * unlink nexthop if it was linked
 */
-   LIST_FOREACH(p, &re->prefix_h, entry.list.rib) {
+   TAILQ_FOREACH(p, &re->prefix_h, entry.list.rib) {
if (p->flags & PREFIX_NEXTHOP_LINKED)
nexthop_unlink(p);
}
@@ -3754,8 +3754,7 @@ rde_softreconfig_sync_reeval(struct rib_
}
 
/* evaluation process is turned on, so evaluate all prefixes again */
-   prefixes = re->prefix_h;
-   LIST_INIT(&re->prefix_h);
+   TAILQ_CONCAT(&prefixes, &re->prefix_h, entry.list.rib);
 
/*
 * TODO: this code works but is not optimal. prefix_evaluate()
@@ -3763,9 +3762,9 @@ rde_softreconfig_sync_reeval(struct rib_
 * to resort the list once and then cal

Re: bgpd replace rib_entry prefix list with tailq

2022-03-22 Thread Claudio Jeker
On Tue, Mar 22, 2022 at 11:40:12AM +0100, Theo Buehler wrote:
> On Tue, Mar 22, 2022 at 10:55:48AM +0100, Claudio Jeker wrote:
> > As mentioned I need a TAILQ for the list of prefixes that belong to a rib
> > entry. Mainly because I need TAILQ_PREV. This diff does this replacement.
> > I did not change the nexhtop LIST of prefixes to a TAILQ. Maybe something
> > to consider but there is no real need for that.
> > 
> > This is mostly a mechanical change. The only thing that had to change is
> > rde_softreconfig_sync_reeval() where before the LIST_HEAD was copied which
> > is not possible with TAILQ (there is a pointer to the TAILQ_HEAD struct
> > from inside the TAILQ). Instead use TAILQ_CONCAT() to move the queue from
> > the rib_entry to the local tailq head. I checked the rest of the code and
> > did not find any other case where the rib_entry prefix_h was copied
> > around.
> > 
> > I also have a fix for the unittest regress ready for this change. I left
> > that one out since it is just a trivial mechanical change.
> 
> ok
> 
> One question on the changes in prefix_{insert,remove}():
> 
> > @@ -321,14 +321,8 @@ prefix_insert(struct prefix *new, struct
> >  * MED inversion, take out prefix and
> >  * put it onto redo queue.
> >  */
> > -   LIST_REMOVE(xp, entry.list.rib);
> > -   if (tailp == NULL)
> > -   LIST_INSERT_HEAD(&redo, xp,
> > -   entry.list.rib);
> > -   else
> > -   LIST_INSERT_AFTER(tailp, xp,
> > -   entry.list.rib);
> > -   tailp = xp;
> > +   TAILQ_REMOVE(&re->prefix_h, xp, entry.list.rib);
> > +   TAILQ_INSERT_TAIL(&redo, xp, entry.list.rib);
> 
> The above looks right: you keep appending at the end. In prefix_remove()
> you flipped the insertion to the start. Shouldn't the below be changed
> to use TAILQ_INSERT_TAIL() as well?

Yes, indeed. That should be a TAILQ_INSERT_TAIL().
Not sure if it matters that much but it is better to keep the order on the
redo queue to limit recursion when reinserting them at the end.
 
> > @@ -402,22 +396,16 @@ prefix_remove(struct prefix *old, struct
> >  * possible MED inversion, take out prefix and
> >  * put it onto redo queue.
> >  */
> > -   LIST_REMOVE(xp, entry.list.rib);
> > -   if (tailp == NULL)
> > -   LIST_INSERT_HEAD(&redo, xp,
> > -   entry.list.rib);
> > -   else
> > -   LIST_INSERT_AFTER(tailp, xp,
> > -   entry.list.rib);
> > -   tailp = xp;
> > +   TAILQ_REMOVE(&re->prefix_h, xp, entry.list.rib);
> > +   TAILQ_INSERT_HEAD(&redo, xp, entry.list.rib);
> 

-- 
:wq Claudio



Re: rip sbappendaddr() with inpcb table mutex

2022-03-22 Thread Claudio Jeker
On Tue, Mar 22, 2022 at 02:09:51PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> syzkaller and witness found the same bug I introduced in UDP also
> for Raw IP.  Fix it the same was for rip and rip6.
> 
> https://syzkaller.appspot.com/bug?extid=9bac6356a881dc644265
> https://syzkaller.appspot.com/bug?extid=5b2679ee9be0895d26f9
> 
> ok?

Absolutly not a fan of this "fix". It just moves the landmine that is
about to explode a bit further to the left for the next person to step on.
The moment someone tries to run these input handlers in parallel all of
this will blow up. It is a workaround for now but how will we get out of
this in the future when the code runs in parallel up to the socket
layer?
 
> bluhm
> 
> Index: netinet/raw_ip.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
> retrieving revision 1.125
> diff -u -p -r1.125 raw_ip.c
> --- netinet/raw_ip.c  21 Mar 2022 09:12:34 -  1.125
> +++ netinet/raw_ip.c  22 Mar 2022 12:59:05 -
> @@ -122,9 +122,9 @@ rip_input(struct mbuf **mp, int *offp, i
>  {
>   struct mbuf *m = *mp;
>   struct ip *ip = mtod(m, struct ip *);
> - struct inpcb *inp, *last = NULL;
> + struct inpcb *inp;
> + SIMPLEQ_HEAD(, inpcb) inpcblist;
>   struct in_addr *key;
> - struct mbuf *opts = NULL;
>   struct counters_ref ref;
>   uint64_t *counters;
>  
> @@ -150,7 +150,8 @@ rip_input(struct mbuf **mp, int *offp, i
>   }
>   }
>  #endif
> - NET_ASSERT_LOCKED();
> + NET_ASSERT_WLOCKED();
> + SIMPLEQ_INIT(&inpcblist);
>   mtx_enter(&rawcbtable.inpt_mtx);
>   TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
>   if (inp->inp_socket->so_state & SS_CANTRCVMORE)
> @@ -171,41 +172,16 @@ rip_input(struct mbuf **mp, int *offp, i
>   if (inp->inp_faddr.s_addr &&
>   inp->inp_faddr.s_addr != ip->ip_src.s_addr)
>   continue;
> - if (last) {
> - struct mbuf *n;
>  
> - if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
> - if (last->inp_flags & INP_CONTROLOPTS ||
> - last->inp_socket->so_options & SO_TIMESTAMP)
> - ip_savecontrol(last, &opts, ip, n);
> - if (sbappendaddr(last->inp_socket,
> - &last->inp_socket->so_rcv,
> - sintosa(&ripsrc), n, opts) == 0) {
> - /* should notify about lost packet */
> - m_freem(n);
> - m_freem(opts);
> - } else
> - sorwakeup(last->inp_socket);
> - opts = NULL;
> - }
> - }
> - last = inp;
> + in_pcbref(inp);
> + SIMPLEQ_INSERT_TAIL(&inpcblist, inp, inp_notify);
>   }
>   mtx_leave(&rawcbtable.inpt_mtx);
>  
> - if (last) {
> - if (last->inp_flags & INP_CONTROLOPTS ||
> - last->inp_socket->so_options & SO_TIMESTAMP)
> - ip_savecontrol(last, &opts, ip, m);
> - if (sbappendaddr(last->inp_socket, &last->inp_socket->so_rcv,
> - sintosa(&ripsrc), m, opts) == 0) {
> - m_freem(m);
> - m_freem(opts);
> - } else
> - sorwakeup(last->inp_socket);
> - } else {
> + if (SIMPLEQ_EMPTY(&inpcblist)) {
>   if (ip->ip_p != IPPROTO_ICMP)
> - icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 
> 0);
> + icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
> + 0, 0);
>   else
>   m_freem(m);
>  
> @@ -213,6 +189,30 @@ rip_input(struct mbuf **mp, int *offp, i
>   counters[ips_noproto]++;
>   counters[ips_delivered]--;
>   counters_leave(&ref, ipcounters);
> + }
> +
> + while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
> + struct mbuf *n, *opts = NULL;
> +
> + SIMPLEQ_REMOVE_HEAD(&inpcblist, inp_notify);
> + if (SIMPLEQ_EMPTY(&inpcblist))
> + n = m;
> + else
> + n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
> + if (n != NULL) {
> + if (inp->inp_flags & INP_CONTROLOPTS ||
> + inp->inp_socket->so_options & SO_TIMESTAMP)
> + ip_savecontrol(inp, &opts, ip, n);
> + if (sbappendaddr(inp->inp_socket,
> + &inp->inp_socket->so_rcv,
> + sintosa(&ripsrc), n, opts) == 0) {
> + /* should notify about l

Re: rip sbappendaddr() with inpcb table mutex

2022-03-22 Thread Claudio Jeker
On Tue, Mar 22, 2022 at 02:56:43PM +0100, Alexander Bluhm wrote:
> On Tue, Mar 22, 2022 at 02:25:08PM +0100, Claudio Jeker wrote:
> > On Tue, Mar 22, 2022 at 02:09:51PM +0100, Alexander Bluhm wrote:
> > > Hi,
> > > 
> > > syzkaller and witness found the same bug I introduced in UDP also
> > > for Raw IP.  Fix it the same was for rip and rip6.
> > > 
> > > https://syzkaller.appspot.com/bug?extid=9bac6356a881dc644265
> > > https://syzkaller.appspot.com/bug?extid=5b2679ee9be0895d26f9
> > > 
> > > ok?
> > 
> > Absolutly not a fan of this "fix". It just moves the landmine that is
> > about to explode a bit further to the left for the next person to step on.
> > The moment someone tries to run these input handlers in parallel all of
> > this will blow up. It is a workaround for now but how will we get out of
> > this in the future when the code runs in parallel up to the socket
> > layer?
> 
> Moving the problem around is the only way to make any progress.
> 
> The bug with MP forwarding I try to solve is this one.
> https://marc.info/?l=openbsd-tech&m=163857624429253&w=2
> 
> After 4 months of ideas that were denied by different people, I
> came to this solution.  Put a mutex around PCB tables.  I think
> this is necessary anyway if we want to reach parallel protocol
> processing.  Unfortunately I missed 3 of 4 places where I hold the
> mutex too long.  I am trying to fix the last 2 of them.
> 
> I do not want to delay parallel forwaring until parallel protocol
> layer is finished.  Then neither will happen.  If someone is working
> on parallel protocols, this code will blow up due to NET_ASSERT_WLOCKED().
> It has to be fixed then.  My change is delaying work to make progress
> elsewhere.  We cannot solve everything in a big commit.
> 
> Do you have a better idea?
 
No but you push this layer into a specifc direction and by that make it
harder to fix the PCB tables in a different way. I just see people
changing the NET_ASSERT_WLOCKED() without realizing the actual reason for
the exclusive netlock use.

Looking at the pcb hash problem, I have to wonder if this reinserting of
PCBs is actually resulting in a measurable performance difference. The
hash table should be large enough to keep the number of PCB per bucket low. 

One comment below.
-- 
:wq Claudio

> > > Index: netinet/raw_ip.c
> > > ===
> > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
> > > retrieving revision 1.125
> > > diff -u -p -r1.125 raw_ip.c
> > > --- netinet/raw_ip.c  21 Mar 2022 09:12:34 -  1.125
> > > +++ netinet/raw_ip.c  22 Mar 2022 12:59:05 -
> > > @@ -122,9 +122,9 @@ rip_input(struct mbuf **mp, int *offp, i
> > >  {
> > >   struct mbuf *m = *mp;
> > >   struct ip *ip = mtod(m, struct ip *);
> > > - struct inpcb *inp, *last = NULL;
> > > + struct inpcb *inp;
> > > + SIMPLEQ_HEAD(, inpcb) inpcblist;
> > >   struct in_addr *key;
> > > - struct mbuf *opts = NULL;
> > >   struct counters_ref ref;
> > >   uint64_t *counters;
> > >  
> > > @@ -150,7 +150,8 @@ rip_input(struct mbuf **mp, int *offp, i
> > >   }
> > >   }
> > >  #endif
> > > - NET_ASSERT_LOCKED();
> > > + NET_ASSERT_WLOCKED();
> > > + SIMPLEQ_INIT(&inpcblist);
> > >   mtx_enter(&rawcbtable.inpt_mtx);
> > >   TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
> > >   if (inp->inp_socket->so_state & SS_CANTRCVMORE)
> > > @@ -171,41 +172,16 @@ rip_input(struct mbuf **mp, int *offp, i
> > >   if (inp->inp_faddr.s_addr &&
> > >   inp->inp_faddr.s_addr != ip->ip_src.s_addr)
> > >   continue;
> > > - if (last) {
> > > - struct mbuf *n;
> > >  
> > > - if ((n = m_copym(m, 0, M_COPYALL, M_NOWAIT)) != NULL) {
> > > - if (last->inp_flags & INP_CONTROLOPTS ||
> > > - last->inp_socket->so_options & SO_TIMESTAMP)
> > > - ip_savecontrol(last, &opts, ip, n);
> > > - if (sbappendaddr(last->inp_socket,
> > > - &last->inp_socket->so_rcv,
> > > - sintosa(&ripsrc), n, opts) == 0) {
> > > - /* should notify about lost packet */
> >

Re: rip sbappendaddr() with inpcb table mutex

2022-03-22 Thread Claudio Jeker
On Tue, Mar 22, 2022 at 06:35:47PM +0100, Alexander Bluhm wrote:
> On Tue, Mar 22, 2022 at 04:42:45PM +0100, Claudio Jeker wrote:
> > No but you push this layer into a specifc direction and by that make it
> > harder to fix the PCB tables in a different way. I just see people
> > changing the NET_ASSERT_WLOCKED() without realizing the actual reason for
> > the exclusive netlock use.
> 
> Of course MP for PCB could be implemented differently.  SRP, SMR,
> whatever.  But mutex is easy and we should start with that.  As we
> currently run only on one CPU, it does not matter.

Mutexes are easy but they also tend to cause deadlocks and lock order
issues when used in more complex situation. It makes sense to use them for
PCBs but maybe something else is needed for the PCB lookup tables. The
locking dance done in FreeBSD in that area of code is rather complicated.
 
> There is much more to be done like ref counting and protecting the
> PCB fields.  But I want to go in small steps.  This NET_ASSERT_WLOCKED()
> beside SIMPLEQ_INIT() makes it quite obvious where the next unlocking
> problem is.  Look in netinet/ip_ipsp.c tdb_walk(), there is another
> one.  When they will be only left in the slow path a lot is gained.
> And if not, we have to fix them step by step.

Agreed. There is a lot of work to be done and not enough hands to actually
work on them.
 
> > Looking at the pcb hash problem, I have to wonder if this reinserting of
> > PCBs is actually resulting in a measurable performance difference. The
> > hash table should be large enough to keep the number of PCB per bucket low. 
> 
> The reinsertion is done for PCB notify, UDP multicast, Raw IPv4 and
> IPv6.  I don't have benchmarks for these cases and I doubt that
> others will feel much difference there.
> 
> There is one thing that might make things slower.  The in_pcbref(inp)
> in_pcbunref(inp) is not strictly necessary, we have exclusive net
> lock.  But I put it there so we will not forget it when unlocking.
> Maybe it costs a bit of performance, but who cares about multicast
> and rip.

Oh, I was talking about the mail you referenced where in_pcbhashlookup()
does this:
if (inp != LIST_FIRST(head)) {
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
}

I highly doubt that this improves throughput in most cases (even with e.g.
100k active connections).
 
> I have measured the PCB mutex diff.  But as usual benchmarks have
> to be explained.
> 
> http://bluhm.genua.de/perform/results/2022-03-10T17%3A19%3A00Z/perform.html
> 
> Left column is baseline, middle column is as mistake, where I only
> applied the IPv4 part.  Right columns is the full PCB diff, but
> without UDP Multicast and Raw IP queuing, which I missed before.
> 
> In TCP Perfomance graph (IPv4 only), the right column looks slow.
> But the code difference between middle and right only affects IPv6.
> How can that be?  The answer is in this row:
> 
> kernel name list  +53 -52 +40054 -40054
> 
> Mapping the kernel object files to different pages affects throughput
> a lot, more than most diffs.  So I sort and align them and compare
> the nm /bsd output.  When you click on it you see how 4 symbol
> addresses move around.
> 
> So the more or less correct numbers are in the middle column but
> only for IPv4.
> 
> Look at the kstack output of one TCP benchmark.
> 
> http://bluhm.genua.de/perform/results/2022-03-10T17%3A19%3A00Z/patch-sys-pcbtable-mtx.0/btrace/iperf3_-c10.3.45.35_-w1m_-t10_-R-btrace-kstack.0.svg
> 
> Search for mtx in the right top field.  Then you see mutex contension.
> They are not in PCB lockup as the pf state links to the socket.
> 
> When receiving short UDP packets you can see the affected code:
> 
> http://bluhm.genua.de/perform/results/2022-03-10T17%3A19%3A00Z/patch-sys-pcbtable-mtx.0/btrace/udpbench_-l36_-t10_-r_ot15_recv_10.3.45.34-btrace-kstack.0.svg
> 
> We are 4.3% in PCB lookup.  And in that part you find 10% in mutex.
> 
> Compare it to the orignal code, this mutex is not there:
> 
> http://bluhm.genua.de/perform/results/2022-03-10T17%3A19%3A00Z/2022-03-10T00%3A00%3A00Z/btrace/udpbench_-l36_-t10_-r_ot15_recv_10.3.45.34-btrace-kstack.0.svg
> 
> But UDP thoughput numbers do not change.
 
I do not expect this diff to make a big difference for performance and
your analysis shows this. To get forward we need to accept that some
benchmarks will suffer before they get better again. A lot of code needs
to be changed and not all of it can happen in one go.

> > One comment below.
> > At least the one bit that can fail can be moved outside of this loop:
> 
> This is a very good idea

Re: Security support status of xnf(4) and xbf(4)

2022-03-29 Thread Claudio Jeker
On Mon, Mar 28, 2022 at 04:38:33PM -0400, Demi Marie Obenour wrote:
> On 3/28/22 10:39, Mark Kettenis wrote:
> >> Date: Mon, 28 Mar 2022 09:51:22 -0400
> >> From: Demi Marie Obenour 
> >>
> >> On 3/27/22 21:45, Damien Miller wrote:
> >>> On Fri, 25 Mar 2022, Demi Marie Obenour wrote:
> >>>
>  Linux’s netfront and blkfront drivers recently had a security
>  vulnerability (XSA-396) that allowed a malicious backend to potentially
>  compromise them.  In follow-up audits, I found that OpenBSD’s xnf(4)
>  currently trusts the backend domain.  I reported this privately to Theo
>  de Raadt, who indicated that OpenBSD does not consider this to be a
>  security concern.
> 
>  This is obviously a valid position for the OpenBSD project to take, but
>  it is surprising to some (such as myself) from the broader Xen
>  ecosystem.  Standard practice in the Xen world is that bugs in frontends
>  that allow a malicious backend to cause mischief *are* considered
>  security bugs unless there is explicit documentation to the contrary.
>  As such, I believe this deserves to be noted in xnf(4) and xbf(4)’s man
>  pages.  If the OpenBSD project agrees, I am willing to write a patch,
>  but I have no experience with mandoc so it might take a few tries.
> >>>
> >>> Hang on, what is a "malicious backend" in this context? Is it something
> >>> other than the Xen Hypervisor? If not, then it seems not to be a useful
> >>> attack model, as the hypervisor typically has near-complete access to
> >>> guests' memory and CPU state.
> >>
> >> The backend can run in any Xen VM.  It often runs in dom0, but it
> >> is not required to, and in Qubes OS the network backend never runs
> >> in dom0.  Unless it runs in dom0, it has no access to frontend memory,
> >> except for memory the frontend has explicitly given it access to via
> >> grant tables.
> > 
> > So this is somewhat similar to the situation on sun4v (Sun's
> > virtualization of the SPARC architecture).  When writing the vnet(4)
> > and vdsk(4) drivers for OpenBSD, I did consider the implications of
> > those drivers talking to a "malicious" domain.  the SPARC hypervisor
> > implements a concept similar to grant tables.  It is fairly obvious
> > that any memory you grant access to should be considered insecure.
> > This means that you either have to make a copy of the data or revoke
> > access to the shared memory through some sort of Hypervisor call that
> > implements a synchronization point of some sorts.  Otherwise you and
> > up TOCTOU issues all over the place.  But this obviously has
> > significant performance consequences.  For vnet(4) I decided that an
> > extra copy was worth doing and the only reasonable way of doing things
> > given how OpenBSD's mbuf layer works.  But for vdsk(4) I decided to
> > trust the other domain as there is no way to prevent it from feeding
> > you compromised data.  Full disk encryption doesn't really solve the
> > problem unless you have a way to securely verify the bootloader.
> 
> In Qubes OS, xbf(4) devices are configurable.  While all of them are
> provided by dom0 (which is trusted) by default, it is possible to
> attach devices that are *not* provided by dom0, and these devices
> should not be trusted.
> 
> > Personally I think it might be beneficial for us to turn xnf(4) into
> > what we colloquially call a "bcopy" network driver.  But folks who
> > actually use xen may find the performance impact of doing this
> > unacceptable and decide to trust the backend instead.
> 
> You actually don’t have to do that.  The Xen network protocol
> requires the backend to drop access to the buffer before giving it
> to the frontend, so the frontend only needs to ensure that it cannot
> regain access.  This will fail if the backend still has access, but
> that is a bug in the backend, in which case you should shut down the
> interface.  So there should not be any significant performance impact.
> 
> If you are curious about how Linux does this, you can look at
> drivers/xen/grant-table.c, drivers/net/xen-netfront.c, and
> drivers/block/xen-blkfront.c from the Linux source.  They are
> dual licensed GPL/MIT so there should not be licensing issues there.
> Be sure to use a version at or after “xen/netfront: react properly to
> failing gnttab_end_foreign_access_ref()” and the other XSA-396 patches.

So how does xen manage to limit access to less than a page size?
The hardware on x86 does not give you byte precise mappings for granting
memory.
An mbuf is 256 bytes and of those 256 bytes less then that is used for
data. Still for dma the full 4k page needs to be granted to the host.
The only way this can be done is by memcpy all data into individual pages.
The same is true for the most common mbuf cluster size of 2k.
So yes, this will be a bcopy ethernet driver and by that will be on the
same level of crappyness as bce(4) and old old old realtek.

If you can trust the host don't run your vm on that 

refactor bgpd up_generate_updates()

2022-03-30 Thread Claudio Jeker
Change the code to use less goto and instead use a while loop.
I think the result is easier to understand.

OK?
-- 
:wq Claudio

Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.138
diff -u -p -r1.138 rde_update.c
--- rde_update.c22 Mar 2022 10:53:08 -  1.138
+++ rde_update.c30 Mar 2022 09:42:37 -
@@ -117,12 +117,7 @@ up_generate_updates(struct filter_head *
prefixlen = new->pt->prefixlen;
}
 
-again:
-   if (new == NULL) {
-   /* withdraw prefix */
-   if ((p = prefix_adjout_get(peer, 0, &addr, prefixlen)) != NULL)
-   prefix_adjout_withdraw(p);
-   } else {
+   while (new != NULL) {
need_withdraw = 0;
/*
 * up_test_update() needs to run before the output filters
@@ -144,7 +139,7 @@ again:
if (need_withdraw &&
!(peer->flags & PEERFLAG_EVALUATE_ALL)) {
new = NULL;
-   goto again;
+   continue;
}
 
rde_filterstate_prep(&state, prefix_aspath(new),
@@ -159,14 +154,17 @@ again:
new = NULL;
if (new != NULL && !prefix_eligible(new))
new = NULL;
-   goto again;
+   continue;
}
 
+   /* check if this was actually a withdraw */
if (need_withdraw) {
new = NULL;
-   goto again;
+   continue;
}
 
+   /* from here on we know this is an update */
+
up_prep_adjout(peer, &state, addr.aid);
prefix_adjout_update(peer, &state, &addr,
new->pt->prefixlen, prefix_vstate(new));
@@ -181,7 +179,13 @@ again:
rde_update_err(peer, ERR_CEASE,
ERR_CEASE_MAX_SENT_PREFIX, NULL, 0);
}
+
+   return;
}
+
+   /* withdraw prefix */
+   if ((p = prefix_adjout_get(peer, 0, &addr, prefixlen)) != NULL)
+   prefix_adjout_withdraw(p);
 }
 
 struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int);



Re: refactor bgpd up_generate_updates()

2022-03-30 Thread Claudio Jeker
On Wed, Mar 30, 2022 at 03:10:58PM +0200, Theo Buehler wrote:
> On Wed, Mar 30, 2022 at 02:38:54PM +0200, Claudio Jeker wrote:
> > Change the code to use less goto and instead use a while loop.
> > I think the result is easier to understand.
> 
> Yes this is clearer and preserves the current logic, so I'm ok with it.
> 
> Here's an alternative approach: unless I'm missing something, the only
> case that actually redoes the while loop and cares about new is if the
> TAILQ_NEXT() returns an eligible prefix, so I think the below diff is
> equivalent to yours (regress is still happy). I also removed various
> new = NULL since nothing looks at new afterward.

Agreed, this is even better. OK claudio@
 
> Index: rde_update.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> retrieving revision 1.138
> diff -u -p -r1.138 rde_update.c
> --- rde_update.c  22 Mar 2022 10:53:08 -  1.138
> +++ rde_update.c  30 Mar 2022 12:54:02 -
> @@ -117,12 +117,7 @@ up_generate_updates(struct filter_head *
>   prefixlen = new->pt->prefixlen;
>   }
>  
> -again:
> - if (new == NULL) {
> - /* withdraw prefix */
> - if ((p = prefix_adjout_get(peer, 0, &addr, prefixlen)) != NULL)
> - prefix_adjout_withdraw(p);
> - } else {
> + while (new != NULL) {
>   need_withdraw = 0;
>   /*
>* up_test_update() needs to run before the output filters
> @@ -142,10 +137,8 @@ again:
>* skip the filters.
>*/
>   if (need_withdraw &&
> - !(peer->flags & PEERFLAG_EVALUATE_ALL)) {
> - new = NULL;
> - goto again;
> - }
> + !(peer->flags & PEERFLAG_EVALUATE_ALL))
> + break;
>  
>   rde_filterstate_prep(&state, prefix_aspath(new),
>   prefix_communities(new), prefix_nexthop(new),
> @@ -153,19 +146,19 @@ again:
>   if (rde_filter(rules, peer, prefix_peer(new), &addr,
>   prefixlen, prefix_vstate(new), &state) == ACTION_DENY) {
>   rde_filterstate_clean(&state);
> - if (peer->flags & PEERFLAG_EVALUATE_ALL)
> + if (peer->flags & PEERFLAG_EVALUATE_ALL) {
>   new = TAILQ_NEXT(new, entry.list.rib);
> - else
> - new = NULL;
> - if (new != NULL && !prefix_eligible(new))
> - new = NULL;
> - goto again;
> + if (new != NULL && prefix_eligible(new))
> + continue;
> + }
> + break;
>   }
>  
> - if (need_withdraw) {
> - new = NULL;
> - goto again;
> - }
> + /* check if this was actually a withdraw */
> + if (need_withdraw)
> + break;
> +
> + /* from here on we know this is an update */
>  
>   up_prep_adjout(peer, &state, addr.aid);
>   prefix_adjout_update(peer, &state, &addr,
> @@ -181,7 +174,13 @@ again:
>   rde_update_err(peer, ERR_CEASE,
>   ERR_CEASE_MAX_SENT_PREFIX, NULL, 0);
>   }
> +
> + return;
>   }
> +
> + /* withdraw prefix */
> + if ((p = prefix_adjout_get(peer, 0, &addr, prefixlen)) != NULL)
> + prefix_adjout_withdraw(p);
>  }
>  
>  struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int);
> 

-- 
:wq Claudio



rpki-client cert.c refactor

2022-04-01 Thread Claudio Jeker
I would like to get rid of the ta flag on cert_parse_inner() and only do
the basic cert parse bits there. Then cert_parse() and ta_parse() do the
other bits.

This moves the easy checks to the right place.
-- 
:wq Claudio

Index: cert.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
retrieving revision 1.56
diff -u -p -r1.56 cert.c
--- cert.c  4 Feb 2022 16:50:49 -   1.56
+++ cert.c  1 Apr 2022 12:20:31 -
@@ -1171,11 +1171,6 @@ cert_parse_inner(const char *fn, const u
   p.fn);
goto out;
}
-   if (ta) {
-   warnx("%s: BGPsec cert can not be a trust anchor",
-  p.fn);
-   goto out;
-   }
break;
default:
warnx("%s: x509_get_purpose failed in %s", p.fn, __func__);
@@ -1187,38 +1182,6 @@ cert_parse_inner(const char *fn, const u
goto out;
}
 
-   if (ta && p.res->aki != NULL && strcmp(p.res->aki, p.res->ski)) {
-   warnx("%s: RFC 6487 section 8.4.2: "
-   "trust anchor AKI, if specified, must match SKI", p.fn);
-   goto out;
-   }
-
-   if (!ta && p.res->aki == NULL) {
-   warnx("%s: RFC 6487 section 8.4.2: "
-   "non-trust anchor missing AKI", p.fn);
-   goto out;
-   } else if (!ta && strcmp(p.res->aki, p.res->ski) == 0) {
-   warnx("%s: RFC 6487 section 8.4.2: "
-   "non-trust anchor AKI may not match SKI", p.fn);
-   goto out;
-   }
-
-   if (!ta && p.res->aia == NULL) {
-   warnx("%s: RFC 6487 section 8.4.7: "
-   "non-trust anchor missing AIA", p.fn);
-   goto out;
-   } else if (ta && p.res->aia != NULL) {
-   warnx("%s: RFC 6487 section 8.4.7: "
-   "trust anchor must not have AIA", p.fn);
-   goto out;
-   }
-
-   if (ta && p.res->crl != NULL) {
-   warnx("%s: RFC 6487 section 8.4.2: "
-   "trust anchor may not specify CRL resource", p.fn);
-   goto out;
-   }
-
p.res->x509 = x;
 
rc = 1;
@@ -1233,7 +1196,31 @@ out:
 struct cert *
 cert_parse(const char *fn, const unsigned char *der, size_t len)
 {
-   return cert_parse_inner(fn, der, len, 0);
+   struct cert *p;
+
+   if ((p = cert_parse_inner(fn, der, len, 0)) == NULL)
+   return NULL;
+
+   if (p->aki == NULL) {
+   warnx("%s: RFC 6487 section 8.4.2: "
+   "non-trust anchor missing AKI", fn);
+   goto badcert;
+   }
+   if (strcmp(p->aki, p->ski) == 0) {
+   warnx("%s: RFC 6487 section 8.4.2: "
+   "non-trust anchor AKI may not match SKI", fn);
+   goto badcert;
+   }
+   if (p->aia == NULL) {
+   warnx("%s: RFC 6487 section 8.4.7: "
+   "non-trust anchor missing AIA", fn);
+   goto badcert;
+   }
+   return p;
+
+badcert:
+   cert_free(p);
+   return NULL;
 }
 
 struct cert *
@@ -1243,7 +1230,6 @@ ta_parse(const char *fn, const unsigned 
ASN1_TIME   *notBefore, *notAfter;
EVP_PKEY*pk = NULL, *opk = NULL;
struct cert *p;
-   int  rc = 0;
 
if ((p = cert_parse_inner(fn, der, len, 1)) == NULL)
return NULL;
@@ -1279,17 +1265,33 @@ ta_parse(const char *fn, const unsigned 
warnx("%s: certificate has expired", fn);
goto badcert;
}
+   if (p->aki != NULL && strcmp(p->aki, p->ski)) {
+   warnx("%s: RFC 6487 section 8.4.2: "
+   "trust anchor AKI, if specified, must match SKI", fn);
+   goto badcert;
+   }
+   if (p->aia != NULL) {
+   warnx("%s: RFC 6487 section 8.4.7: "
+   "trust anchor must not have AIA", fn);
+   goto badcert;
+   }
+   if (p->crl != NULL) {
+   warnx("%s: RFC 6487 section 8.4.2: "
+   "trust anchor may not specify CRL resource", fn);
+   goto badcert;
+   }
+   if (p->purpose == CERT_PURPOSE_BGPSEC_ROUTER) {
+   warnx("%s: BGPsec cert can not be a trust anchor", fn);
+   goto badcert;
+   }
 
-   rc = 1;
+   EVP_PKEY_free(pk);
+   return p;
 
 badcert:
EVP_PKEY_free(pk);
-   if (rc == 0) {
-   cert_free(p);
-   p = NULL;
-   }
-
-   return p;
+   cert_free(p);
+   return NULL;
 }
 
 /*



rpki-client adjust x509_get functions

2022-04-01 Thread Claudio Jeker
cert_parse_inner() now only uses the ta flag to change behaviour of
loading the various x509 extensions (AKI, SKI, AIA und CRL DP).

This diff changes these functions to work always. Make AKI, AIA and CRL DP
optional and have the code calling those functions check if they must have
the extension. I modelled the functions after x509_get_expire() so they
return 0 on failure and 1 on success.

Adjust the code to work with these new functions. Most checks for the
optional attributes are already present.
-- 
:wq Claudio

Index: cert.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
retrieving revision 1.58
diff -u -p -r1.58 cert.c
--- cert.c  1 Apr 2022 13:27:38 -   1.58
+++ cert.c  1 Apr 2022 14:46:25 -
@@ -1058,7 +1058,7 @@ certificate_policies(struct parse *p, X5
  * is also dereferenced.
  */
 static struct cert *
-cert_parse_inner(const char *fn, const unsigned char *der, size_t len, int ta)
+cert_parse_inner(const char *fn, const unsigned char *der, size_t len)
 {
int  rc = 0, extsz, c;
int  sia_present = 0;
@@ -1132,12 +1132,14 @@ cert_parse_inner(const char *fn, const u
goto out;
}
 
-   p.res->aki = x509_get_aki(x, ta, p.fn);
-   p.res->ski = x509_get_ski(x, p.fn);
-   if (!ta) {
-   p.res->aia = x509_get_aia(x, p.fn);
-   p.res->crl = x509_get_crl(x, p.fn);
-   }
+   if (!x509_get_aki(x, p.fn, &p.res->aki))
+   goto out;
+   if (!x509_get_ski(x, p.fn, &p.res->ski))
+   goto out;
+   if (!x509_get_aia(x, p.fn, &p.res->aia))
+   goto out;
+   if (!x509_get_crl(x, p.fn, &p.res->crl))
+   goto out;
if (!x509_get_expire(x, p.fn, &p.res->expires))
goto out;
p.res->purpose = x509_get_purpose(x, p.fn);
@@ -1198,7 +1200,7 @@ cert_parse(const char *fn, const unsigne
 {
struct cert *p;
 
-   if ((p = cert_parse_inner(fn, der, len, 0)) == NULL)
+   if ((p = cert_parse_inner(fn, der, len)) == NULL)
return NULL;
 
if (p->aki == NULL) {
@@ -1212,8 +1214,12 @@ cert_parse(const char *fn, const unsigne
goto badcert;
}
if (p->aia == NULL) {
-   warnx("%s: RFC 6487 section 8.4.7: "
-   "non-trust anchor missing AIA", fn);
+   warnx("%s: RFC 6487 section 8.4.7: AIA: extension missing", fn);
+   goto badcert;
+   }
+   if (p->crl == NULL) {
+   warnx("%s: RFC 6487 section 4.8.6: CRL: "
+   "no CRL distribution point extension", fn);
goto badcert;
}
return p;
@@ -1231,7 +1237,7 @@ ta_parse(const char *fn, const unsigned 
EVP_PKEY*pk = NULL, *opk = NULL;
struct cert *p;
 
-   if ((p = cert_parse_inner(fn, der, len, 1)) == NULL)
+   if ((p = cert_parse_inner(fn, der, len)) == NULL)
return NULL;
 
/* first check pubkey against the one from the TAL */
Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.122
diff -u -p -r1.122 extern.h
--- extern.h31 Mar 2022 12:00:00 -  1.122
+++ extern.h1 Apr 2022 13:46:32 -
@@ -578,11 +578,11 @@ struct ibuf   *io_buf_recvfd(int, struct i
 /* X509 helpers. */
 
 voidx509_init_oid(void);
-char   *x509_get_aia(X509 *, const char *);
-char   *x509_get_aki(X509 *, int, const char *);
-char   *x509_get_ski(X509 *, const char *);
+int x509_get_aia(X509 *, const char *, char **);
+int x509_get_aki(X509 *, const char *, char **);
+int x509_get_ski(X509 *, const char *, char **);
 int x509_get_expire(X509 *, const char *, time_t *);
-char   *x509_get_crl(X509 *, const char *);
+int x509_get_crl(X509 *, const char *, char **);
 char   *x509_crl_get_aki(X509_CRL *, const char *);
 char   *x509_get_pubkey(X509 *, const char *);
 enum cert_purpose   x509_get_purpose(X509 *, const char *);
Index: gbr.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/gbr.c,v
retrieving revision 1.14
diff -u -p -r1.14 gbr.c
--- gbr.c   18 Jan 2022 16:24:55 -  1.14
+++ gbr.c   1 Apr 2022 14:53:47 -
@@ -63,19 +63,24 @@ gbr_parse(X509 **x509, const char *fn, c
err(1, NULL);
free(cms);
 
-   p.res->aia = x509_get_aia(*x509, fn);
-   p.res->aki = x509_get_aki(*x509, 0, fn);
-   p.res->ski = x509_get_ski(*x509, fn);
+   if (!x509_get_aia(*x509, fn, &p.res->aia))
+   goto out;
+   if (!x509_get_aki(*x509, fn, &p.res->aki))
+   goto out;
+   if (!x509_get_ski(*x509, fn, &p.res->ski))
+  

Re: rpki-client adjust x509_get functions

2022-04-01 Thread Claudio Jeker
On Fri, Apr 01, 2022 at 06:31:43PM +0200, Theo Buehler wrote:
> On Fri, Apr 01, 2022 at 05:01:00PM +0200, Claudio Jeker wrote:
> > cert_parse_inner() now only uses the ta flag to change behaviour of
> > loading the various x509 extensions (AKI, SKI, AIA und CRL DP).
> > 
> > This diff changes these functions to work always. Make AKI, AIA and CRL DP
> > optional and have the code calling those functions check if they must have
> > the extension. I modelled the functions after x509_get_expire() so they
> > return 0 on failure and 1 on success.
> 
> This looks pretty good.
> 
> There is a certain asymmetry with the SKI handling that confused me a
> bit. At first, the strcmp(p->aki, p->ski) in ta_parse() and cert_parse()
> looked like potential NULL derefs since it looks as if p->ski isn't
> checked.
> 
> The check for p.res->ski != NULL at the end of cert_parse_inner() is no
> longer necessary since x509_get_ski() would have failed.  Strictly
> speaking, the ...->ski == NULL tests in {gbr,mft,roa}_parse() are also
> no longer needed.
> 
> I would probably prefer to make x509_get_ski() behave the same way as
> the AKI, AIA, CRL getters and add a p->ski != NULL check to ta_parse()
> and cert_parse().

I think you're right, it may be better to behave the same for all of these
extensions.
 
> > if (p->aia == NULL) {
> > -   warnx("%s: RFC 6487 section 8.4.7: "
> > -   "non-trust anchor missing AIA", fn);
> > +   warnx("%s: RFC 6487 section 8.4.7: AIA: extension missing", fn);
> 
> The warnings in this function aren't super consistent. We can clean this
> up in a later pass.

I think this is a general concern for all of rpki-client. I switched the
text because it is shorter :) Also it was the warning that would have been
printed first.

> > @@ -360,7 +360,11 @@ proc_parser_mft_pre(char *file, const un
> >  
> > a = valid_ski_aki(file, &auths, mft->ski, mft->aki);
> > /* load CRL by hand, since it is referenced by the MFT itself */
> > -   c = x509_get_crl(x509, file);
> > +   if (!x509_get_crl(x509, file, &c) || c == NULL) {
> 
> The c == NULL case now fails silently. Previously this would have
> warned and crashed so it doesn't seem to occur in practice.

It could be triggered by a bad MFT cert. I added an error here.
 
> > +   mft_free(mft);
> > +   X509_free(x509);
> > +   return NULL;
> > +   }
> > crlfile = strrchr(c, '/');
> > if (crlfile != NULL)
> > crlfile++;
> > @@ -1078,7 +1082,7 @@ proc_parser_file(char *file, unsigned ch
> > struct crl *c;
> > char *crl_uri;
> >  
> > -   crl_uri = x509_get_crl(x509, file);
> > +   x509_get_crl(x509, file, &crl_uri);

Here luckily it does not matter, the code handles NULL just fine and fails
on the verify because of the missing cert.

> > parse_load_crl(crl_uri);
> > free(crl_uri);
> > if (auth_find(&auths, aki) == NULL)

> > -   aia = strndup(
> > +   *aia = strndup(
> > ASN1_STRING_get0_data(ad->location->d.uniformResourceIdentifier),
> > ASN1_STRING_length(ad->location->d.uniformResourceIdentifier));
> 
> Unrelated to your diff: I think we may want to ensure that the URI doesn't
> contain embedded NULs before calling strndup on it.

Maybe we need a function that does all this so it can be used in a few
additional places. I would suggest to tackle this as a seperate diff.
 
-- 
:wq Claudio



Re: rpki-client adjust x509_get functions

2022-04-01 Thread Claudio Jeker
On Fri, Apr 01, 2022 at 06:52:48PM +0200, Claudio Jeker wrote:
> On Fri, Apr 01, 2022 at 06:31:43PM +0200, Theo Buehler wrote:
> > On Fri, Apr 01, 2022 at 05:01:00PM +0200, Claudio Jeker wrote:
> > > cert_parse_inner() now only uses the ta flag to change behaviour of
> > > loading the various x509 extensions (AKI, SKI, AIA und CRL DP).
> > > 
> > > This diff changes these functions to work always. Make AKI, AIA and CRL DP
> > > optional and have the code calling those functions check if they must have
> > > the extension. I modelled the functions after x509_get_expire() so they
> > > return 0 on failure and 1 on success.
> > 
> > This looks pretty good.
> > 
> > There is a certain asymmetry with the SKI handling that confused me a
> > bit. At first, the strcmp(p->aki, p->ski) in ta_parse() and cert_parse()
> > looked like potential NULL derefs since it looks as if p->ski isn't
> > checked.
> > 
> > The check for p.res->ski != NULL at the end of cert_parse_inner() is no
> > longer necessary since x509_get_ski() would have failed.  Strictly
> > speaking, the ...->ski == NULL tests in {gbr,mft,roa}_parse() are also
> > no longer needed.
> > 
> > I would probably prefer to make x509_get_ski() behave the same way as
> > the AKI, AIA, CRL getters and add a p->ski != NULL check to ta_parse()
> > and cert_parse().
> 
> I think you're right, it may be better to behave the same for all of these
> extensions.
>  
> > >   if (p->aia == NULL) {
> > > - warnx("%s: RFC 6487 section 8.4.7: "
> > > - "non-trust anchor missing AIA", fn);
> > > + warnx("%s: RFC 6487 section 8.4.7: AIA: extension missing", fn);
> > 
> > The warnings in this function aren't super consistent. We can clean this
> > up in a later pass.
> 
> I think this is a general concern for all of rpki-client. I switched the
> text because it is shorter :) Also it was the warning that would have been
> printed first.
> 
> > > @@ -360,7 +360,11 @@ proc_parser_mft_pre(char *file, const un
> > >  
> > >   a = valid_ski_aki(file, &auths, mft->ski, mft->aki);
> > >   /* load CRL by hand, since it is referenced by the MFT itself */
> > > - c = x509_get_crl(x509, file);
> > > + if (!x509_get_crl(x509, file, &c) || c == NULL) {
> > 
> > The c == NULL case now fails silently. Previously this would have
> > warned and crashed so it doesn't seem to occur in practice.
> 
> It could be triggered by a bad MFT cert. I added an error here.
>  
> > > + mft_free(mft);
> > > + X509_free(x509);
> > > + return NULL;
> > > + }
> > >   crlfile = strrchr(c, '/');
> > >   if (crlfile != NULL)
> > >   crlfile++;
> > > @@ -1078,7 +1082,7 @@ proc_parser_file(char *file, unsigned ch
> > >   struct crl *c;
> > >   char *crl_uri;
> > >  
> > > - crl_uri = x509_get_crl(x509, file);
> > > + x509_get_crl(x509, file, &crl_uri);
> 
> Here luckily it does not matter, the code handles NULL just fine and fails
> on the verify because of the missing cert.
> 
> > >   parse_load_crl(crl_uri);
> > >   free(crl_uri);
> > >   if (auth_find(&auths, aki) == NULL)
> 
> > > - aia = strndup(
> > > + *aia = strndup(
> > >   ASN1_STRING_get0_data(ad->location->d.uniformResourceIdentifier),
> > >   ASN1_STRING_length(ad->location->d.uniformResourceIdentifier));
> > 
> > Unrelated to your diff: I think we may want to ensure that the URI doesn't
> > contain embedded NULs before calling strndup on it.
> 
> Maybe we need a function that does all this so it can be used in a few
> additional places. I would suggest to tackle this as a seperate diff.
>  

And here is the updated diff

-- 
:wq Claudio

Index: cert.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
retrieving revision 1.58
diff -u -p -r1.58 cert.c
--- cert.c  1 Apr 2022 13:27:38 -   1.58
+++ cert.c  1 Apr 2022 16:40:17 -
@@ -1052,13 +1052,10 @@ certificate_policies(struct parse *p, X5
 /*
  * Parse and partially validate an RPKI X509 certificate (either a trust
  * anchor or a certificate) as defined in RFC 6487.
- * If "ta" is set, this is a trust anchor and must be self-signed.
- * Returns the parse results or NULL on failure ("xp" will be NULL

rpki-client cache cleanup change

2022-04-04 Thread Claudio Jeker
This diff alters the way rpki-client cleans up the cache directory.
While with rsync any file can be removed and on the next run it will be
fetched again RRDP has no such logic. It is a very fragile protocol and
only works if files are not removed by something else.

Until now files are just unlinked from the cache if they are no longer
used but then RRDP gets very confused and the cache slowly gets out of
sync. It requires a RRDP delta to alter a missing file to trigger a full
resync and that can take some time.

To fix this I changed the cleanup process to only remove rsync backed
files. The RRDP backed files are now moved back into the .rrdp folder and
hopefully the next delta sync will delete them. With this change the cache
should no longer get out of sync.

-- 
:wq Claudio

Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.124
diff -u -p -r1.124 extern.h
--- extern.h2 Apr 2022 12:17:53 -   1.124
+++ extern.h4 Apr 2022 09:20:41 -
@@ -528,7 +528,7 @@ struct repo *ta_lookup(int, struct tal *
 struct repo*repo_lookup(int, const char *, const char *);
 struct repo*repo_byid(unsigned int);
 int repo_queued(struct repo *, struct entity *);
-voidrepo_cleanup(struct filepath_tree *);
+voidrepo_cleanup(struct filepath_tree *, int);
 voidrepo_free(void);
 
 voidrsync_finish(unsigned int, int);
@@ -628,6 +628,7 @@ voidlogx(const char *fmt, ...)
 time_t getmonotime(void);
 
 intmkpath(const char *);
+intmkpathat(int, const char *);
 
 #define RPKI_PATH_OUT_DIR  "/var/db/rpki-client"
 #define RPKI_PATH_BASE_DIR "/var/cache/rpki-client"
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.190
diff -u -p -r1.190 main.c
--- main.c  2 Apr 2022 12:17:53 -   1.190
+++ main.c  4 Apr 2022 09:20:41 -
@@ -1234,7 +1234,8 @@ main(int argc, char *argv[])
 
logx("all files parsed: generating output");
 
-   repo_cleanup(&fpt);
+   if (!noop)
+   repo_cleanup(&fpt, cachefd);
 
gettimeofday(&now_time, NULL);
timersub(&now_time, &start_time, &stats.elapsed_time);
Index: mkdir.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/mkdir.c,v
retrieving revision 1.7
diff -u -p -r1.7 mkdir.c
--- mkdir.c 6 May 2021 17:25:45 -   1.7
+++ mkdir.c 4 Apr 2022 09:20:57 -
@@ -32,6 +32,7 @@
 #include 
 
 #include 
+#include 
 #include 
 #include 
 
@@ -39,10 +40,11 @@
 
 /*
  * mkpath -- create directories.
+ * fd  - file descriptor to base dir or AT_FDCWD
  * dir - path to create directories for
  */
 int
-mkpath(const char *dir)
+mkpathat(int fd, const char *dir)
 {
char *path, *slash;
int done;
@@ -59,7 +61,7 @@ mkpath(const char *dir)
done = (*slash == '\0');
*slash = '\0';
 
-   if (mkdir(path, 0755) == -1 && errno != EEXIST) {
+   if (mkdirat(fd, path, 0755) == -1 && errno != EEXIST) {
free(path);
return -1;
}
@@ -72,4 +74,10 @@ mkpath(const char *dir)
 
free(path);
return 0;
+}
+
+int
+mkpath(const char *dir)
+{
+   return mkpathat(AT_FDCWD, dir);
 }
Index: repo.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
retrieving revision 1.31
diff -u -p -r1.31 repo.c
--- repo.c  14 Feb 2022 14:47:49 -  1.31
+++ repo.c  1 Apr 2022 10:31:03 -
@@ -241,7 +241,7 @@ repo_dir(const char *uri, const char *di
  * This functions alters the path temporarily.
  */
 static int
-repo_mkpath(char *file)
+repo_mkpath(int fd, char *file)
 {
char *slash;
 
@@ -249,7 +249,7 @@ repo_mkpath(char *file)
slash = strrchr(file, '/');
assert(slash != NULL);
*slash = '\0';
-   if (mkpath(file) == -1) {
+   if (mkpathat(fd, file) == -1) {
warn("mkpath %s", file);
return -1;
}
@@ -838,7 +838,7 @@ rrdp_handle_file(unsigned int id, enum p
if ((fn = rrdp_filename(rr, uri, 0)) == NULL)
return 0;
 
-   if (repo_mkpath(fn) == -1)
+   if (repo_mkpath(AT_FDCWD, fn) == -1)
goto fail;
 
fd = open(fn, O_WRONLY|O_CREAT|O_TRUNC, 0644);
@@ -1121,6 +1121,21 @@ repo_byid(unsigned int id)
 }
 
 /*
+ * Find repository by base path.
+ */
+static struct repo *
+repo_bypath(const char *path)
+{
+   struct repo *rp;
+
+   SLIST_FOREACH(rp, &repos, entry) {
+   if (strcmp(rp->basedir, path) == 0)
+   return rp;
+   }
+   return NULL;
+}
+
+/*
  * Ret

Re: rpki-client: remove a stale FIXME

2022-04-04 Thread Claudio Jeker
On Mon, Apr 04, 2022 at 01:33:18PM +0200, Theo Buehler wrote:
> We fixed this back in January when we added rtype_from_mftfile().
> 
> Index: main.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
> retrieving revision 1.190
> diff -u -p -r1.190 main.c
> --- main.c2 Apr 2022 12:17:53 -   1.190
> +++ main.c4 Apr 2022 11:21:44 -
> @@ -348,9 +348,6 @@ queue_add_from_mft(const char *path, con
>   * Loops over queue_add_from_mft() for all files.
>   * The order here is important: we want to parse the revocation
>   * list *before* we parse anything else.
> - * FIXME: set the type of file in the mftfile so that we don't need to
> - * keep doing the check (this should be done in the parser, where we
> - * check the suffix anyway).
>   */
>  static void
>  queue_add_from_mft_set(const struct mft *mft, const char *name, struct repo 
> *rp)
> 

OK claudio
-- 
:wq Claudio



rpki-client simplify rsync.c

2022-04-04 Thread Claudio Jeker
Kill a FIXME and simplify the logic around the process list by using a
static ids array on the stack.

Tested with and without -R.
-- 
:wq Claudio

Index: rsync.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/rsync.c,v
retrieving revision 1.33
diff -u -p -r1.33 rsync.c
--- rsync.c 31 Mar 2022 12:00:00 -  1.33
+++ rsync.c 4 Apr 2022 12:43:02 -
@@ -137,20 +137,17 @@ proc_child(int signal)
  * does so.
  * It then responds with the identifier of the repo that it updated.
  * It only exits cleanly when fd is closed.
- * FIXME: limit the number of simultaneous process.
- * Currently, an attacker can trivially specify thousands of different
- * repositories and saturate our system.
  */
 void
 proc_rsync(char *prog, char *bind_addr, int fd)
 {
-   size_t   i, idsz = 0, nprocs = 0;
+   size_t   i, nprocs = 0;
int  rc = 0;
struct pollfdpfd;
struct msgbufmsgq;
struct ibuf *b, *inbuf = NULL;
sigset_t mask, oldmask;
-   struct rsyncproc*ids = NULL;
+   struct rsyncproc ids[MAX_RSYNC_PROCESSES] = { 0 };
 
pfd.fd = fd;
 
@@ -231,10 +228,10 @@ proc_rsync(char *prog, char *bind_addr, 
while ((pid = waitpid(WAIT_ANY, &st, WNOHANG)) > 0) {
int ok = 1;
 
-   for (i = 0; i < idsz; i++)
+   for (i = 0; i < MAX_RSYNC_PROCESSES; i++)
if (ids[i].pid == pid)
break;
-   if (i >= idsz)
+   if (i >= MAX_RSYNC_PROCESSES)
errx(1, "waitpid: %d unexpected", pid);
 
if (!WIFEXITED(st)) {
@@ -279,6 +276,8 @@ proc_rsync(char *prog, char *bind_addr, 
 
if (!(pfd.revents & POLLIN))
continue;
+   if (nprocs >= MAX_RSYNC_PROCESSES)
+   continue;
 
b = io_buf_read(fd, &inbuf);
if (b == NULL)
@@ -339,16 +338,10 @@ proc_rsync(char *prog, char *bind_addr, 
 
/* Augment the list of running processes. */
 
-   for (i = 0; i < idsz; i++)
+   for (i = 0; i < MAX_RSYNC_PROCESSES; i++)
if (ids[i].pid == 0)
break;
-   if (i == idsz) {
-   ids = reallocarray(ids, idsz + 1, sizeof(*ids));
-   if (ids == NULL)
-   err(1, NULL);
-   idsz++;
-   }
-
+   assert(i < MAX_RSYNC_PROCESSES);
ids[i].id = id;
ids[i].pid = pid;
ids[i].uri = uri;
@@ -361,13 +354,12 @@ proc_rsync(char *prog, char *bind_addr, 
}
 
/* No need for these to be hanging around. */
-   for (i = 0; i < idsz; i++)
-   if (ids[i].pid > 0) {
+   for (i = 0; i < MAX_RSYNC_PROCESSES; i++)
+   if (ids[i].pid != 0) {
kill(ids[i].pid, SIGTERM);
free(ids[i].uri);
}
 
msgbuf_clear(&msgq);
-   free(ids);
exit(rc);
 }



rpki-client remove another outdated comment

2022-04-04 Thread Claudio Jeker
This was fixed in January. Now RRDP issues an RRDP_CLEAR to the parent
which in turns removes all files from the .rrdp cache dir.

-- 
:wq Claudio

Index: rrdp.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/rrdp.c,v
retrieving revision 1.22
diff -u -p -r1.22 rrdp.c
--- rrdp.c  10 Feb 2022 11:11:22 -  1.22
+++ rrdp.c  4 Apr 2022 13:16:56 -
@@ -256,7 +256,6 @@ rrdp_failed(struct rrdp *s)
/* reset file state before retrying */
s->file_failed = 0;
 
-   /* XXX MUST do some cleanup in the repo here */
if (s->task == DELTA) {
/* fallback to a snapshot as per RFC8182 */
free_delta_xml(s->dxml);



Re: rpki-client: two missing checks for the SIA extension

2022-04-04 Thread Claudio Jeker
On Mon, Apr 04, 2022 at 08:44:43PM +0200, Theo Buehler wrote:
> p->res->mft and p->res->repo are populated in sbgp_sia_resouce_entry().
> Nothing guarantees that the resources are present. With our current
> strstr() implementation we would let a cert with a missing mft through
> while we would crash on a missing repo.
> 
> Also, we don't check that the SIA extension isn't critical.
> 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.61
> diff -u -p -r1.61 cert.c
> --- cert.c4 Apr 2022 13:15:11 -   1.61
> +++ cert.c4 Apr 2022 18:12:23 -
> @@ -305,6 +305,12 @@ sbgp_sia_resource(struct parse *p, const
>   goto out;
>   }
>  
> + if (p->res->mft == NULL || p->res->repo == NULL) {
> + warnx("%s: RFC 6487 section 4.8.8: SIA missing caRepository "
> + "or rpkiManifest", p->fn);
> + goto out;
> + }
> +
>   if (strstr(p->res->mft, p->res->repo) != p->res->mft) {
>   warnx("%s: RFC 6487 section 4.8.8: SIA: "
>   "conflicting URIs for caRepository and rpkiManifest",
> @@ -329,6 +335,12 @@ sbgp_sia(struct parse *p, X509_EXTENSION
>   ASN1_SEQUENCE_ANY   *seq = NULL;
>   const ASN1_TYPE *t;
>   int  dsz, rc = 0;
> +
> + if (X509_EXTENSION_get_critical(ext)) {
> + warnx("%s: RFC 6487 section 4.8.8: SIA: "
> + "extension not non-critical", p->fn);
> + goto out;
> + }
>  
>   if ((dsz = i2d_X509_EXTENSION(ext, &sv)) < 0) {
>   cryptowarnx("%s: RFC 6487 section 4.8.8: SIA: "
> 

OK claudio@

-- 
:wq Claudio



Re: rpki-client: simplify SIA parsing

2022-04-10 Thread Claudio Jeker
On Tue, Apr 05, 2022 at 06:33:35PM +0200, Theo Buehler wrote:
> Instead of manually unpacking the SIA extension with super low-level
> ASN.1 fiddling, we can let the templated ASN.1 in libcrypto do this work
> for us, which makes the code quite a bit simpler. This resolves one
> FIXME and removes one use of the magic ASN1_frame().
> 
> I kept the current split of the functions to avoid noise and make the
> diff easier to review. I'm going to undo this split in a follow-up.
> 
> The relevant structs and typedefs are in x509/x509v3.h and the ASN.1
> templates are in x509/x509_info.c if you want to take a look at that.
> 
> The main point is the X509V3_EXT_d2i() in sbgp_sia(), which deserializes
> the extension based on its OID, which was checked in cert_parse_pre().
> We (ab)use the AUTHORITY_INFO_ACCESS type since there is unfortunately
> no SUBJECT_INFO_ACCESS type (the extensions have the same syntax, see
> RFC 5280, 4.2.2). This is documented in AUTHORITY_INFO_ACCESS_new(3).
> A typedef might be appropriate here.
> 
> AUTHORITY_INFO_ACCESS is a STACK_OF(ACCESS_DESCRIPTION), the unpacking
> of which is done in sbgp_sia_resource().

I'm not too woried about this, an option could be to use
STACK_OF(ACCESS_DESCRIPTION) directly but I'm totally fine with this.
 
> ACCESS_DESCRIPTION has an oid (ASN1_OBJECT) called method and a
> GENERAL_NAME called location. All three resources we currently care
> about are of the URI type, the new GEN_URI check makes sure of that
> (the discarded ptag argument of ASN1_frame() should have been checked
> to be GEN_URI).
> 
> Overall, I think the resulting code is a lot easier to follow and more
> correct.

This is a lot cleaner and indeed an improvement. I think some of the rc
handling can also be simplified. The code in sbgp_sia_resource_entry()
and sbgp_sia_resource() no longer require cleanup on error so we can just
return 0 instead of goto out. It is OK to do this cleanup in a 2nd step
(which you probably already planned).

OK claudio@
 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.62
> diff -u -p -r1.62 cert.c
> --- cert.c5 Apr 2022 03:56:20 -   1.62
> +++ cert.c5 Apr 2022 04:54:18 -
> @@ -214,63 +214,45 @@ sbgp_sia_resource_carepo(struct parse *p
>   * Returns zero on failure, non-zero on success.
>   */
>  static int
> -sbgp_sia_resource_entry(struct parse *p,
> - const unsigned char *d, size_t dsz)
> +sbgp_sia_resource_entry(struct parse *p, ACCESS_DESCRIPTION *ad)
>  {
> - ASN1_SEQUENCE_ANY   *seq;
>   ASN1_OBJECT *oid;
> - const ASN1_TYPE *t;
> - int  rc = 0, ptag;
> - long plen;
> + ASN1_IA5STRING  *uri = NULL;
> + int  rc = 0;
>  
> - if ((seq = d2i_ASN1_SEQUENCE_ANY(NULL, &d, dsz)) == NULL) {
> - cryptowarnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "failed ASN.1 sequence parse", p->fn);
> - goto out;
> - }
> - if (sk_ASN1_TYPE_num(seq) != 2) {
> - warnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "want 2 elements, have %d",
> - p->fn, sk_ASN1_TYPE_num(seq));
> - goto out;
> - }
> + if (ad->location->type == GEN_URI)
> + uri = ad->location->d.uniformResourceIdentifier;
>  
> - /* Composed of an OID and its continuation. */
> + oid = ad->method;
>  
> - t = sk_ASN1_TYPE_value(seq, 0);
> - if (t->type != V_ASN1_OBJECT) {
> - warnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "want ASN.1 object, have %s (NID %d)",
> - p->fn, ASN1_tag2str(t->type), t->type);
> - goto out;
> - }
> - oid = t->value.object;
> -
> - t = sk_ASN1_TYPE_value(seq, 1);
> - if (t->type != V_ASN1_OTHER) {
> - warnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "want ASN.1 external, have %s (NID %d)",
> - p->fn, ASN1_tag2str(t->type), t->type);
> - goto out;
> + if (OBJ_cmp(oid, carepo_oid) == 0) {
> + if (uri == NULL) {
> + warnx("%s: RFC 6487: 4.8.8.1 caRepository without URI",
> + p->fn);
> + goto out;
> + }
> + if (!sbgp_sia_resource_carepo(p, uri->data, uri->length))
> + goto out;
> + } else if (OBJ_cmp(oid, manifest_oid) == 0) {
> + if (uri == NULL) {
> + warnx("%s: RFC 6487: 4.8.8 SIA manifest without URI",
> + p->fn);
> + goto out;
> + }
> + if (!sbgp_sia_resource_mft(p, uri->data, uri->length))
> + goto out;
> + } else if (OBJ_cmp(oid, notify_oid) == 0) {
> + if (uri == NULL) {
> + warnx("%s: RFC 6487: 4

Re: have in_pcbselsrc copy the selected ip to the caller instead of a reference to it

2022-04-10 Thread Claudio Jeker
On Mon, Mar 21, 2022 at 02:17:21PM +1000, David Gwynne wrote:
> in_pcbselsrc has this:
> 
>   ifp = if_get(mopts->imo_ifidx);
>   if (ifp != NULL) {
>   if (ifp->if_rdomain == rtable_l2(rtableid))
>   IFP_TO_IA(ifp, ia);
>   if (ia == NULL) {
>   if_put(ifp);
>   return (EADDRNOTAVAIL);
>   }
> 
>   *insrc = ia->ia_addr.sin_addr;
>   if_put(ifp);
>   return (0);
>   }
> 
> which looks very much like it releases a reference to the interface
> holding the address it's passing back to the caller to use.
> 
> this diff has it copy the address to memory the caller provides instead.
> 
> ok?

I think this is a good time to commit this. OK claudio@
 
> Index: in_pcb.c
> ===
> RCS file: /cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.262
> diff -u -p -r1.262 in_pcb.c
> --- in_pcb.c  21 Mar 2022 03:51:09 -  1.262
> +++ in_pcb.c  21 Mar 2022 04:10:24 -
> @@ -476,7 +476,7 @@ in_pcbpickport(u_int16_t *lport, void *l
>  int
>  in_pcbconnect(struct inpcb *inp, struct mbuf *nam)
>  {
> - struct in_addr *ina = NULL;
> + struct in_addr ina;
>   struct sockaddr_in *sin;
>   int error;
>  
> @@ -495,7 +495,7 @@ in_pcbconnect(struct inpcb *inp, struct 
>   return (error);
>  
>   if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
> - *ina, inp->inp_lport, inp->inp_rtableid) != NULL)
> + ina, inp->inp_lport, inp->inp_rtableid) != NULL)
>   return (EADDRINUSE);
>  
>   KASSERT(inp->inp_laddr.s_addr == INADDR_ANY || inp->inp_lport);
> @@ -506,13 +506,13 @@ in_pcbconnect(struct inpcb *inp, struct 
>   if (error)
>   return (error);
>   if (in_pcbhashlookup(inp->inp_table, sin->sin_addr,
> - sin->sin_port, *ina, inp->inp_lport,
> + sin->sin_port, ina, inp->inp_lport,
>   inp->inp_rtableid) != NULL) {
>   inp->inp_lport = 0;
>   return (EADDRINUSE);
>   }
>   }
> - inp->inp_laddr = *ina;
> + inp->inp_laddr = ina;
>   }
>   inp->inp_faddr = sin->sin_addr;
>   inp->inp_fport = sin->sin_port;
> @@ -870,7 +870,7 @@ in_pcbrtentry(struct inpcb *inp)
>   * an entry to the caller for later use.
>   */
>  int
> -in_pcbselsrc(struct in_addr **insrc, struct sockaddr_in *sin,
> +in_pcbselsrc(struct in_addr *insrc, struct sockaddr_in *sin,
>  struct inpcb *inp)
>  {
>   struct ip_moptions *mopts = inp->inp_moptions;
> @@ -886,9 +886,9 @@ in_pcbselsrc(struct in_addr **insrc, str
>* If the socket(if any) is already bound, use that bound address
>* unless it is INADDR_ANY or INADDR_BROADCAST.
>*/
> - if (laddr && laddr->s_addr != INADDR_ANY &&
> + if (laddr->s_addr != INADDR_ANY &&
>   laddr->s_addr != INADDR_BROADCAST) {
> - *insrc = laddr;
> + *insrc = *laddr;
>   return (0);
>   }
>  
> @@ -911,7 +911,7 @@ in_pcbselsrc(struct in_addr **insrc, str
>   return (EADDRNOTAVAIL);
>   }
>  
> - *insrc = &ia->ia_addr.sin_addr;
> + *insrc = ia->ia_addr.sin_addr;
>   if_put(ifp);
>   return (0);
>   }
> @@ -962,7 +962,7 @@ in_pcbselsrc(struct in_addr **insrc, str
>   struct ifaddr *ifa;
>   if ((ifa = ifa_ifwithaddr(ip4_source, rtableid)) !=
>   NULL && ISSET(ifa->ifa_ifp->if_flags, IFF_UP)) {
> - *insrc = &satosin(ip4_source)->sin_addr;
> + *insrc = satosin(ip4_source)->sin_addr;
>   return (0);
>   }
>   }
> @@ -971,7 +971,7 @@ in_pcbselsrc(struct in_addr **insrc, str
>   if (ia == NULL)
>   return (EADDRNOTAVAIL);
>  
> - *insrc = &ia->ia_addr.sin_addr;
> + *insrc = ia->ia_addr.sin_addr;
>   return (0);
>  }
>  
> Index: in_pcb.h
> ===
> RCS file: /cvs/src/sys/netinet/in_pcb.h,v
> retrieving revision 1.125
> diff -u -p -r1.125 in_pcb.h
> --- in_pcb.h  14 Mar 2022 22:38:43 -  1.125
> +++ in_pcb.h  21 Mar 2022 04:10:25 -
> @@ -305,7 +305,7 @@ void   in_setpeeraddr(struct inpcb *, str
>  void  in_setsockaddr(struct inpcb *, struct mbuf *);
>  int   in_baddynamic(u_int16_t, u_int16_t);
>  int   in_rootonly(u_int16_t, u_int16_t);
> -int   in_pcbselsrc(struct in_addr **, struct sockaddr_in *, str

Re: rpki-client: simplify SIA parsing

2022-04-11 Thread Claudio Jeker
On Mon, Apr 11, 2022 at 09:41:05AM +0200, Theo Buehler wrote:
> On Sun, Apr 10, 2022 at 12:40:08PM +0200, Claudio Jeker wrote:
> > This is a lot cleaner and indeed an improvement. I think some of the rc
> > handling can also be simplified. The code in sbgp_sia_resource_entry()
> > and sbgp_sia_resource() no longer require cleanup on error so we can just
> > return 0 instead of goto out. It is OK to do this cleanup in a 2nd step
> > (which you probably already planned).
> 
> Yes, I kept the rc dance since it avoids noise in the following steps.
> The ones without cleanup will go away eventually.
> 
> Here's the next step: merge sbgp_sia() and sbgp_sia_resource().  Now
> that both are short and easy, there's no need for a split.
> 
> Also: move the .mft extension check out of sbgp_sia_resource_mft() and
> use rtype_from_file_extension() instead. The next step will dedup
> sbgp_sia_resource_*().
> 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 cert.c
> --- cert.c11 Apr 2022 06:47:38 -  1.63
> +++ cert.c11 Apr 2022 07:11:27 -
> @@ -162,18 +162,12 @@ sbgp_sia_resource_mft(struct parse *p, c
>   return 0;
>   }
>  
> - /* Make sure it's an MFT rsync address. */
> + /* Make sure it's an rsync address. */
>   if (!valid_uri(d, dsz, "rsync://")) {
>   warnx("%s: RFC 6487 section 4.8.8: bad MFT location", p->fn);
>   return 0;
>   }
>  
> - if (dsz < 4 || strcasecmp(d + dsz - 4, ".mft") != 0) {
> - warnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "not an MFT file", p->fn);
> - return 0;
> - }
> -
>   if ((p->res->mft = strndup(d, dsz)) == NULL)
>   err(1, NULL);
>  
> @@ -257,15 +251,28 @@ sbgp_sia_resource_entry(struct parse *p,
>  }
>  
>  /*
> - * Multiple locations as defined in RFC 6487, 4.8.8.1.
> + * Parse "Subject Information Access" extension, RFC 6487 4.8.8.
>   * Returns zero on failure, non-zero on success.
>   */
>  static int
> -sbgp_sia_resource(struct parse *p, AUTHORITY_INFO_ACCESS *sia)
> +sbgp_sia(struct parse *p, X509_EXTENSION *ext)
>  {
> + AUTHORITY_INFO_ACCESS   *sia = NULL;
>   ACCESS_DESCRIPTION  *ad;
>   int  i, rc = 0;
>  
> + if (X509_EXTENSION_get_critical(ext)) {
> + warnx("%s: RFC 6487 section 4.8.8: SIA: "
> + "extension not non-critical", p->fn);
> + goto out;
> + }
> +
> + if ((sia = X509V3_EXT_d2i(ext)) == NULL) {
> + cryptowarnx("%s: RFC 6487 section 4.8.8: SIA: "
> + "failed extension parse", p->fn);
> + goto out;
> + }
> +
>   for (i = 0; i < sk_ACCESS_DESCRIPTION_num(sia); i++) {
>   ad = sk_ACCESS_DESCRIPTION_value(sia, i);
>   if (!sbgp_sia_resource_entry(p, ad))
> @@ -285,34 +292,11 @@ sbgp_sia_resource(struct parse *p, AUTHO
>   goto out;
>   }
>  
> - rc = 1;
> - out:
> - return rc;
> -}
> -
> -/*
> - * Parse "Subject Information Access" extension, RFC 6487 4.8.8.
> - * Returns zero on failure, non-zero on success.
> - */
> -static int
> -sbgp_sia(struct parse *p, X509_EXTENSION *ext)
> -{
> - AUTHORITY_INFO_ACCESS   *sia = NULL;
> - int  rc = 0;
> -
> - if (X509_EXTENSION_get_critical(ext)) {
> + if (rtype_from_file_extension(p->res->mft) != RTYPE_MFT) {
>   warnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "extension not non-critical", p->fn);
> - goto out;
> - }
> -
> - if ((sia = X509V3_EXT_d2i(ext)) == NULL) {
> - cryptowarnx("%s: RFC 6487 section 4.8.8: SIA: "
> - "failed extension parse", p->fn);
> + "not an MFT file", p->fn);
>   goto out;
>   }
> - if (!sbgp_sia_resource(p, sia))
> - goto out;
>  
>   rc = 1;
>   out:
> 

OK claudio@

-- 
:wq Claudio



<    7   8   9   10   11   12   13   14   15   16   >