bgpd refactor timer code

2020-12-09 Thread Claudio Jeker
This diff makes the timer code independent from struct peer. This way
it can be used in different places without too much issues.

OK?
-- 
:wq Claudio

Index: control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.101
diff -u -p -r1.101 control.c
--- control.c   5 Nov 2020 11:28:11 -   1.101
+++ control.c   9 Dec 2020 11:57:05 -
@@ -333,7 +333,8 @@ control_dispatch_msg(struct pollfd *pfd,
IMSG_CTL_SHOW_NEIGHBOR,
0, 0, -1, p, sizeof(*p));
for (i = 1; i < Timer_Max; i++) {
-   if (!timer_running(p, i, &d))
+   if (!timer_running(&p->timers,
+   i, &d))
continue;
ct.type = i;
ct.val = d;
@@ -403,7 +404,8 @@ control_dispatch_msg(struct pollfd *pfd,
if (!p->conf.down) {
session_stop(p,
ERR_CEASE_ADMIN_RESET);
-   timer_set(p, Timer_IdleHold,
+   timer_set(&p->timers,
+   Timer_IdleHold,
SESSION_CLEAR_DELAY);
} else {
session_stop(p,
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.405
diff -u -p -r1.405 session.c
--- session.c   5 Nov 2020 14:44:59 -   1.405
+++ session.c   9 Dec 2020 11:56:19 -
@@ -263,7 +263,8 @@ session_main(int debug, int verbose)
if (p->reconf_action == RECONF_REINIT) {
session_stop(p, ERR_CEASE_ADMIN_RESET);
if (!p->conf.down)
-   timer_set(p, Timer_IdleHold, 0);
+   timer_set(&p->timers,
+   Timer_IdleHold, 0);
}
 
/* deletion due? */
@@ -272,7 +273,7 @@ session_main(int debug, int verbose)
session_demote(p, -1);
p->conf.demote_group[0] = 0;
session_stop(p, ERR_CEASE_PEER_UNCONF);
-   timer_remove_all(p);
+   timer_remove_all(&p->timers);
tcp_md5_del_listener(conf, p);
log_peer_warnx(&p->conf, "removed");
RB_REMOVE(peer_head, &conf->peers, p);
@@ -366,10 +367,10 @@ session_main(int debug, int verbose)
now = getmonotime();
RB_FOREACH(p, peer_head, &conf->peers) {
time_t  nextaction;
-   struct peer_timer *pt;
+   struct timer *pt;
 
/* check timers */
-   if ((pt = timer_nextisdue(p, now)) != NULL) {
+   if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
switch (pt->type) {
case Timer_Hold:
bgp_fsm(p, EVNT_TIMER_HOLDTIME);
@@ -387,24 +388,27 @@ session_main(int debug, int verbose)
p->IdleHoldTime =
INTERVAL_IDLE_HOLD_INITIAL;
p->errcnt = 0;
-   timer_stop(p, Timer_IdleHoldReset);
+   timer_stop(&p->timers,
+   Timer_IdleHoldReset);
break;
case Timer_CarpUndemote:
-   timer_stop(p, Timer_CarpUndemote);
+   timer_stop(&p->timers,
+   Timer_CarpUndemote);
if (p->demoted &&
p->state == STATE_ESTABLISHED)
session_demote(p, -1);
break;
case Timer_RestartTimeout:
-

Re: ipmi(4): ipmi_poll_thread(): tsleep(9) -> tsleep_nsec(9)

2020-12-10 Thread Claudio Jeker
On Mon, Dec 07, 2020 at 10:54:26PM -0600, Scott Cheloha wrote:
> On Wed, Dec 02, 2020 at 11:43:32PM +0100, Mark Kettenis wrote:
> > > From: "Constantine A. Murenin" 
> > > Date: Wed, 2 Dec 2020 14:04:52 -0800
> > > 
> > > Not sure if you've seen it, but ipmi(4) has been disabled for over 12
> > > years, because it's broken on some machines, so, this code is not
> > > necessarily guaranteed to be correct as-is.
> > > 
> > > http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/arch/i386/conf/GENERIC#rev1.632
> > > http://cvsweb.openbsd.org/cgi-bin/cvsweb/src/sys/arch/amd64/conf/GENERIC#rev1.238
> > 
> > The driver is actually enabled on arm64.  And I'll probably enable it
> > on powerpc64 at some point.
> 
> kettenis@/sthen@:
> 
> In that case, could one or both of you test this diff?
> 
> I doubt anyone remembers why we spin until tsleep(9) returns
> EWOULDBLOCK.  If we can confirm that the driver still works with a 1ms
> block in this spot then that's good enough for me.
> 
> So, does this still work?
> 
> Index: ipmi.c
> ===
> RCS file: /cvs/src/sys/dev/ipmi.c,v
> retrieving revision 1.112
> diff -u -p -r1.112 ipmi.c
> --- ipmi.c29 Mar 2020 09:31:10 -  1.112
> +++ ipmi.c2 Dec 2020 20:31:57 -
> @@ -1497,7 +1497,8 @@ ipmi_poll_thread(void *arg)
>   printf("%s: no SDRs IPMI disabled\n", DEVNAME(sc));
>   goto done;
>   }
> - while (tsleep(sc, PWAIT, "ipmirun", 1) != EWOULDBLOCK)
> + while (tsleep_nsec(sc, PWAIT, "ipmirun",
> + MSEC_TO_NSEC(1)) != EWOULDBLOCK)
>   continue;
>   }
>  

This idiom of a quick sleep is a bit strange and I would prefer if this is
rewritten to be a simple tsleep_nsec call without the while loop.
Since there is no corresponding wakeup call this tsleep can only return
EWOULDBLOCK there is no way to return any other value (PCATCH is not set
and nothing will do a wakeup).

So this could be simply written as:
tsleep_nsec(sc, PWAIT, "ipmirun", MSEC_TO_NSEC(1));

This whole poll thread is just way more complicated then it needs to be.
Neither current_sensor nor thread->running are needed. I'm not even sure
if the tsleep itself is needed in that discovery loop. get_sdr() calls
ipmi_cmd() which does another tsleep to wait for the command.

This driver seems to just use all the concepts without much thought. I bet
ipmi_cmd() calls can race against each other.
-- 
:wq Claudio



Re: bgpd show status of set tables

2020-12-10 Thread Claudio Jeker
On Thu, Dec 03, 2020 at 10:59:29PM +0100, Claudio Jeker wrote:
> The use of roa-set, prefix-set and as-set is fairly common in bgpd.
> Still sometimes it is not exactly clear how old the data in those tables
> is. This diff is a first step at inproving inspection by adding
>   bgpctl show sets
> 
> Sample output is:
> Type   Name #IPv4   #Ipv6 #ASnum Last Change
> ROARPKI ROA158810   26257  -00:00:07
> ASNUM  asns_AS15600 -   - 2601:19:10
> PREFIX p4_AS21040   8   0  -01:19:10
> 
> I just did a bgpctl reload with a new roa table (generated by rpki-client)
> but the as-set and prefix-set did not change during this reload.
> The output also includes the number of entries in the tables but in the
> case of roa-set the number of unique prefixes is counted. So the number is
> a bit under the count from rpki-client because e.g.
> 1.32.219.0/24 source-as 4842
> 1.32.219.0/24 source-as 138570
> are counted as 1 right now (instead of 2 prefixes).
> 
> More statistics can be added if their calculation is easy.

Am I the only one interested in knowing how old my set data is?

-- 
:wq Claudio

> PS: apply diff in /usr/src/usr.sbin

Index: bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.405
diff -u -p -r1.405 bgpd.h
--- bgpd/bgpd.h 5 Nov 2020 11:52:59 -   1.405
+++ bgpd/bgpd.h 3 Dec 2020 20:15:30 -
@@ -254,12 +254,15 @@ struct trie_head {
struct tentry_v6*root_v6;
int  match_default_v4;
int  match_default_v6;
+   size_t   v4_cnt;
+   size_t   v6_cnt;
 };
 
 struct rde_prefixset {
charname[SET_NAME_LEN];
struct trie_headth;
SIMPLEQ_ENTRY(rde_prefixset)entry;
+   time_t  lastchange;
int dirty;
 };
 SIMPLEQ_HEAD(rde_prefixset_head, rde_prefixset);
@@ -465,6 +468,7 @@ enum imsg_type {
IMSG_CTL_SHOW_TIMER,
IMSG_CTL_LOG_VERBOSE,
IMSG_CTL_SHOW_FIB_TABLES,
+   IMSG_CTL_SHOW_SET,
IMSG_CTL_TERMINATE,
IMSG_NETWORK_ADD,
IMSG_NETWORK_ASPATH,
@@ -696,6 +700,20 @@ struct ctl_show_nexthop {
u_int8_tkrvalid;
 };
 
+struct ctl_show_set {
+   charname[SET_NAME_LEN];
+   time_t  lastchange;
+   size_t  v4_cnt;
+   size_t  v6_cnt;
+   size_t  as_cnt;
+   enum {
+   ASNUM_SET,
+   PREFIX_SET,
+   ORIGIN_SET,
+   ROA_SET,
+   }   type;
+};
+
 struct ctl_neighbor {
struct bgpd_addraddr;
chardescr[PEER_DESCR_LEN];
@@ -1049,6 +1067,7 @@ struct as_set {
char name[SET_NAME_LEN];
SIMPLEQ_ENTRY(as_set)entry;
struct set_table*set;
+   time_t   lastchange;
int  dirty;
 };
 
@@ -1283,6 +1302,7 @@ void   set_prep(struct set_table *);
 void   *set_match(const struct set_table *, u_int32_t);
 int set_equal(const struct set_table *,
const struct set_table *);
+size_t  set_nmemb(const struct set_table *);
 
 /* rde_trie.c */
 inttrie_add(struct trie_head *, struct bgpd_addr *, u_int8_t, u_int8_t,
Index: bgpd/control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.101
diff -u -p -r1.101 control.c
--- bgpd/control.c  5 Nov 2020 11:28:11 -   1.101
+++ bgpd/control.c  3 Dec 2020 17:07:58 -
@@ -280,6 +280,7 @@ control_dispatch_msg(struct pollfd *pfd,
case IMSG_CTL_SHOW_NETWORK:
case IMSG_CTL_SHOW_RIB:
case IMSG_CTL_SHOW_RIB_PREFIX:
+   case IMSG_CTL_SHOW_SET:
break;
default:
/* clear imsg type to prevent processing */
@@ -496,6 +497,7 @@ control_dispatch_msg(struct pollfd *pfd,
c->terminate = 1;
/* FALLTHROUGH */
case IMSG_CTL_SHOW_RIB_MEM:
+   case IMSG_CTL_SHOW_SET:
c->ibuf.pid = imsg.hdr.pid;
imsg_ctl_rde(imsg.hdr.type, imsg.hdr.pid,
  

Re: ipmi(4): ipmi_poll_thread(): tsleep(9) -> tsleep_nsec(9)

2020-12-11 Thread Claudio Jeker
On Thu, Dec 10, 2020 at 10:07:29PM -0600, Scott Cheloha wrote:
> On Thu, Dec 10, 2020 at 10:00:46AM +0100, Claudio Jeker wrote:
> > On Mon, Dec 07, 2020 at 10:54:26PM -0600, Scott Cheloha wrote:
> > > Index: ipmi.c
> > > ===
> > > RCS file: /cvs/src/sys/dev/ipmi.c,v
> > > retrieving revision 1.112
> > > diff -u -p -r1.112 ipmi.c
> > > --- ipmi.c29 Mar 2020 09:31:10 -  1.112
> > > +++ ipmi.c2 Dec 2020 20:31:57 -
> > > @@ -1497,7 +1497,8 @@ ipmi_poll_thread(void *arg)
> > >   printf("%s: no SDRs IPMI disabled\n", DEVNAME(sc));
> > >   goto done;
> > >   }
> > > - while (tsleep(sc, PWAIT, "ipmirun", 1) != EWOULDBLOCK)
> > > + while (tsleep_nsec(sc, PWAIT, "ipmirun",
> > > + MSEC_TO_NSEC(1)) != EWOULDBLOCK)
> > >   continue;
> > >   }
> > >  
> > 
> > This idiom of a quick sleep is a bit strange and I would prefer if this is
> > rewritten to be a simple tsleep_nsec call without the while loop.
> > Since there is no corresponding wakeup call this tsleep can only return
> > EWOULDBLOCK there is no way to return any other value (PCATCH is not set
> > and nothing will do a wakeup).
> > 
> > So this could be simply written as:
> > tsleep_nsec(sc, PWAIT, "ipmirun", MSEC_TO_NSEC(1));
> > 
> > This whole poll thread is just way more complicated then it needs to be.
> > Neither current_sensor nor thread->running are needed. I'm not even sure
> > if the tsleep itself is needed in that discovery loop. get_sdr() calls
> > ipmi_cmd() which does another tsleep to wait for the command.
> > 
> > This driver seems to just use all the concepts without much thought. I bet
> > ipmi_cmd() calls can race against each other.
> 
> One thing at a time.
> 
> First, remove the loop.  It is unnecessary, as there is no other
> thread calling wakeup(9), i.e. tsleep_nsec(9) will always return
> EWOULDBLOCK here.
> 
> ok?
> 
> Index: ipmi.c
> ===
> RCS file: /cvs/src/sys/dev/ipmi.c,v
> retrieving revision 1.113
> diff -u -p -r1.113 ipmi.c
> --- ipmi.c11 Dec 2020 04:00:33 -  1.113
> +++ ipmi.c11 Dec 2020 04:05:31 -
> @@ -1497,9 +1497,7 @@ ipmi_poll_thread(void *arg)
>   printf("%s: no SDRs IPMI disabled\n", DEVNAME(sc));
>   goto done;
>   }
> - while (tsleep_nsec(sc, PWAIT, "ipmirun",
> - MSEC_TO_NSEC(1)) != EWOULDBLOCK)
> - continue;
> + tsleep_nsec(sc, PWAIT, "ipmirun", MSEC_TO_NSEC(1));
>   }
>  
>   /* initialize sensor list for thread */

OK claudio@
-- 
:wq Claudio



Re: bpf(4): BIOCGRTIMEOUT, BIOCSRTIMEOUT: protect with bd_mtx

2020-12-11 Thread Claudio Jeker
On Thu, Dec 10, 2020 at 11:26:16AM -0600, Scott Cheloha wrote:
> Hi,
> 
> Before converting bpf(4) from using ticks to using real units of time
> we need to serialize BIOCGRTIMEOUT and BIOCSRTIMEOUT.  Neither
> operation is atomic so we need to use the per-descriptor mutex when
> reading or writing the bd_rtout member.
> 
> While here we can start annotating the locking for struct members in
> bpfdesc.h, too.
> 
> ok?
> Index: bpf.c
> ===
> RCS file: /cvs/src/sys/net/bpf.c,v
> retrieving revision 1.193
> diff -u -p -r1.193 bpf.c
> --- bpf.c 4 Nov 2020 04:40:13 -   1.193
> +++ bpf.c 10 Dec 2020 17:24:43 -
> @@ -873,9 +873,11 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
>   break;
>   }
>   rtout += tv->tv_usec / tick;
> + mtx_enter(&d->bd_mtx);
>   d->bd_rtout = rtout;
>   if (d->bd_rtout == 0 && tv->tv_usec != 0)
>   d->bd_rtout = 1;

This code could be refactored to write bd_rtout only once.
if (rtout == 0 && tv->tv_usec != 0)
rtout = 1;
d->bd_rtout = rtout;
or using WRITE_ONCE
WRITE_ONCE(d->bd_rtout, rtout);

This way the mutex would no longer be required since this is an atomic
update.

> + mtx_leave(&d->bd_mtx);
>   break;
>   }
>  
> @@ -886,8 +888,10 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
>   {
>   struct timeval *tv = (struct timeval *)addr;
>  
> + mtx_enter(&d->bd_mtx);
>   tv->tv_sec = d->bd_rtout / hz;
>   tv->tv_usec = (d->bd_rtout % hz) * tick;

Same here using a local rtout variable:
rtout = READ_ONCE(d->bd_rtout);
tv->tv_sec = rtout / hz;
tv->tv_usec = (rtout % hz) * tick;

Again this would be an atomic operation and the lock would not be needed
anymore.

I guess moving forward bd_rtout will be changed since it is tick based so
maybe then the update can no longer be made in an atomic fashion.

> + mtx_leave(&d->bd_mtx);
>   break;
>   }
>  
> Index: bpfdesc.h
> ===
> RCS file: /cvs/src/sys/net/bpfdesc.h,v
> retrieving revision 1.41
> diff -u -p -r1.41 bpfdesc.h
> --- bpfdesc.h 13 May 2020 21:34:37 -  1.41
> +++ bpfdesc.h 10 Dec 2020 17:24:43 -
> @@ -42,6 +42,13 @@
>  
>  #ifdef _KERNEL
>  
> +/*
> + * Locks used to protect struct members in this file:
> + *
> + *   I   immutable after initialization
> + *   m   the per-descriptor mutex (bpf_d.bd_mtx)
> + */
> +
>  struct bpf_program_smr {
>   struct bpf_program  bps_bf;
>   struct smr_entrybps_smr;
> @@ -72,7 +79,7 @@ struct bpf_d {
>   int bd_in_uiomove;  /* for debugging purpose */
>  
>   struct bpf_if  *bd_bif; /* interface descriptor */
> - u_long  bd_rtout;   /* Read timeout in 'ticks' */
> + u_long  bd_rtout;   /* [m] Read timeout in 'ticks' */
>   u_long  bd_rdStart; /* when the read started */
>   int bd_rnonblock;   /* true if nonblocking reads are set */
>   struct bpf_program_smr
> 

-- 
:wq Claudio



Re: openrsync: fix poll_timeout in server mode

2020-12-12 Thread Claudio Jeker
On Sat, Dec 12, 2020 at 07:07:20AM -0500, Daniel Moch wrote:
> A recent change to openrsync added the --timeout opt.  There's code to
> handle the (default) case of --timeout=0, which sets the poll_timeout
> to -1 (INFTIM).  Unfortunately that code doesn't run in the server
> process, meaning all of the relevant calls to poll(2) return
> immediately and the process fails.
> 
> The following patch addresses the issue by moving the code that
> handles --timeout=0 up to run before the rsync_server call.
> 
> Index: main.c
> ===
> RCS file: /cvs/src/usr.bin/rsync/main.c,v
> retrieving revision 1.50
> diff -r1.50 main.c
> 411a412,417
> > /* by default and for --timeout=0 disable poll_timeout */
> > if (poll_timeout == 0)
> > poll_timeout = -1;
> > else
> > poll_timeout *= 1000;
> > 
> 420,425d425
> < 
> < /* by default and for --timeout=0 disable poll_timeout */
> < if (poll_timeout == 0)
> < poll_timeout = -1;
> < else
> < poll_timeout *= 1000;
> 

Here the unified diff which moves the poll_timeout initalisation before
the rsync_server() call.

-- 
:wq Claudio

Index: main.c
===
RCS file: /cvs/src/usr.bin/rsync/main.c,v
retrieving revision 1.50
diff -u -p -r1.50 main.c
--- main.c  24 Nov 2020 16:54:44 -  1.50
+++ main.c  12 Dec 2020 12:33:07 -
@@ -409,6 +409,12 @@ main(int argc, char *argv[])
if (opts.port == NULL)
opts.port = "rsync";
 
+   /* by default and for --timeout=0 disable poll_timeout */
+   if (poll_timeout == 0)
+   poll_timeout = -1;
+   else
+   poll_timeout *= 1000;
+
/*
 * This is what happens when we're started with the "hidden"
 * --server option, which is invoked for the rsync on the remote
@@ -417,12 +423,6 @@ main(int argc, char *argv[])
 
if (opts.server)
exit(rsync_server(&opts, (size_t)argc, argv));
-
-   /* by default and for --timeout=0 disable poll_timeout */
-   if (poll_timeout == 0)
-   poll_timeout = -1;
-   else
-   poll_timeout *= 1000;
 
/*
 * Now we know that we're the client on the local machine



bgpd send side hold timer

2020-12-14 Thread Claudio Jeker
The BGP protocol has a keepalive packet which resets the hold timer when a
packet is received. The problem is this covers only one side of the
transmission. It seems that some BGP implementations fail to process
messages in some cases but still send out KEEPALIVE packets. So bgpd
thinks everything is fine even though no updates where processed by the
other side (including our KEEPALIVE packets). The session is stuck in
limbo and with it some prefixes and routes.

Because of this I think it makes sense to add a send hold timer that is
reset whenever a write call to the socket is made. If a socket does not
become writable for holdtime seconds (90s by default) then the session is
reset similar to the hold timer expiring because no data was received.

This send holdtimer is not part of the BGP spec right now but looking at
discussions on the IDR mailing list I assume something like this may be
added at one point.

I would like to know what other people think and would especially like to
know if this diff causes session resets that should not happen.

Cheers
-- 
:wq Claudio


Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.405
diff -u -p -r1.405 bgpd.h
--- bgpd.h  5 Nov 2020 11:52:59 -   1.405
+++ bgpd.h  13 Dec 2020 14:56:47 -
@@ -1467,6 +1467,7 @@ static const char * const timernames[] =
"ConnectRetryTimer",
"KeepaliveTimer",
"HoldTimer",
+   "SendHoldTimer",
"IdleHoldTimer",
"IdleHoldResetTimer",
"CarpUndemoteTimer",
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.406
diff -u -p -r1.406 session.c
--- session.c   11 Dec 2020 12:00:01 -  1.406
+++ session.c   13 Dec 2020 14:52:42 -
@@ -373,6 +373,7 @@ session_main(int debug, int verbose)
if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
switch (pt->type) {
case Timer_Hold:
+   case Timer_HoldSend:
bgp_fsm(p, EVNT_TIMER_HOLDTIME);
break;
case Timer_ConnectRetry:
@@ -597,6 +598,7 @@ bgp_fsm(struct peer *peer, enum session_
switch (event) {
case EVNT_START:
timer_stop(&peer->timers, Timer_Hold);
+   timer_stop(&peer->timers, Timer_HoldSend);
timer_stop(&peer->timers, Timer_Keepalive);
timer_stop(&peer->timers, Timer_IdleHold);
 
@@ -875,6 +877,7 @@ change_state(struct peer *peer, enum ses
timer_stop(&peer->timers, Timer_ConnectRetry);
timer_stop(&peer->timers, Timer_Keepalive);
timer_stop(&peer->timers, Timer_Hold);
+   timer_stop(&peer->timers, Timer_HoldSend);
timer_stop(&peer->timers, Timer_IdleHold);
timer_stop(&peer->timers, Timer_IdleHoldReset);
session_close_connection(peer);
@@ -923,6 +926,7 @@ change_state(struct peer *peer, enum ses
timer_stop(&peer->timers, Timer_ConnectRetry);
timer_stop(&peer->timers, Timer_Keepalive);
timer_stop(&peer->timers, Timer_Hold);
+   timer_stop(&peer->timers, Timer_HoldSend);
timer_stop(&peer->timers, Timer_IdleHold);
timer_stop(&peer->timers, Timer_IdleHoldReset);
session_close_connection(peer);
@@ -1780,6 +1784,8 @@ session_dispatch_msg(struct pollfd *pfd,
return (1);
}
p->stats.last_write = getmonotime();
+   if (p->holdtime > 0)
+   timer_set(&p->timers, Timer_HoldSend, p->holdtime);
if (p->throttled && p->wbuf.queued < SESS_MSG_LOW_MARK) {
if (imsg_rde(IMSG_XON, p->conf.id, NULL, 0) == -1)
log_peer_warn(&p->conf, "imsg_compose XON");
Index: session.h
===
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.148
diff -u -p -r1.148 session.h
--- session.h   11 Dec 2020 12:00:01 -  1.148
+++ session.h   13 Dec 2020 14:52:17 -
@@ -180,6 +180,7 @@ enum Timer {
Timer_ConnectRetry,
Timer_Keepalive,
Timer_Hold,
+   Timer_HoldSend,
Timer_IdleHold,
Timer_IdleHoldReset,
Timer_CarpUndemote,



Re: sdmmc(4): sdmmc_io_function_enable(): don't sleep on lbolt

2020-12-15 Thread Claudio Jeker
On Fri, Dec 11, 2020 at 07:07:56PM -0600, Scott Cheloha wrote:
> Hi,
> 
> I'd like to remove lbolt from the kernel.  I think having it in the
> kernel complicates otherwise simple code.
> 
> We can start with sdmmc(4).
> 
> The goal in sdmmc_io_function_enable() is calling sdmmc_io_function_ready()
> up to six times and sleep 1 second between each attempt.  Here's rewritten
> code that does with without lbolt.
> 
> ok?
> 
> Index: sdmmc_io.c
> ===
> RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_io.c,v
> retrieving revision 1.41
> diff -u -p -r1.41 sdmmc_io.c
> --- sdmmc_io.c31 Dec 2019 10:05:33 -  1.41
> +++ sdmmc_io.c12 Dec 2020 01:04:59 -
> @@ -231,8 +231,8 @@ sdmmc_io_function_enable(struct sdmmc_fu
>  {
>   struct sdmmc_softc *sc = sf->sc;
>   struct sdmmc_function *sf0 = sc->sc_fn0;
> + int chan, retry = 5;
>   u_int8_t rv;
> - int retry = 5;
>  
>   rw_assert_wrlock(&sc->sc_lock);
>  
> @@ -244,7 +244,7 @@ sdmmc_io_function_enable(struct sdmmc_fu
>   sdmmc_io_write_1(sf0, SD_IO_CCCR_FN_ENABLE, rv);
>  
>   while (!sdmmc_io_function_ready(sf) && retry-- > 0)
> - tsleep_nsec(&lbolt, PPAUSE, "pause", INFSLP);
> + tsleep_nsec(&chan, PPAUSE, "pause", SEC_TO_NSEC(1));
>   return (retry >= 0) ? 0 : ETIMEDOUT;
>  }
>  

Why not use &retry as wait channel instead of adding a new variable
chan? Result is the same. Would it make sense to allow NULL as wait
channel to make the tsleep not wakeable. At least that could be used in a
few places where timeouts are implemented with tsleep and would make the
intent more obvious.

-- 
:wq Claudio



Re: bgpd send side hold timer

2020-12-15 Thread Claudio Jeker
On Mon, Dec 14, 2020 at 06:22:09PM +, Job Snijders wrote:
> Hi all,
> 
> This patch appears to be a very elegant solution to a thorny subtle
> problem: what to do when a peer is not accepting new routing information
> from you?

One thing I'm unsure about is the value of the SendHold timer. I reused
the hold timer value with the assumption that for dead connections the
regular hold timer expires before the SendHold timer (the send buffer
needs to be full before send starts blocking).

People should look out for cases where the SendHold timer triggered before
either a NOTIFICATION form the peer arrived or where the SendHold timer
triggered before the HoldTimer. Now that may be tricky since both SendHold
and Hold timer trigger the same EVNT_TIMER_HOLDTIME event so they can not
be distinguished easily.

I think that the SendHold timer will almost never trigger and if it does
only for the case where a session only works in one direction.
 
> I've seen in the wild that some crashed BGP implementations continue to
> be able to generate KEEPALIVE messages, and are able to TCP ACK
> keepalives you are sending, but won't actually accept anything you send
> to such a problematic peer. A red flag is when the peer keeps telling
> you not to send it anything by signalling a TCP Receive Window of 0.
> 
> The consequences are quite dire: in situations like these you know for a
> fact that the peer is operating based on stale routing information (you
> have proof they are not accepting your KEEPALIVEs and more importantly
> UPDATEs, as those are all stuck in your OutQ).
> 
> If a peer is not progressing new routing information coming from you,
> how can we trust it was processing any updates coming from other
> neighbors? Are the routes the peer told us in the past (what we have in
> Adj-RIB-In) still valid? Seems unlikely to me... it seems safer to
> destroy the BGP-4 session, log an error, and generate WITHDRAW messages
> for all routes pointing towards the broken peer so the network can
> converge to healthier paths.
> 
> IDR discussions here
> 
> https://mailarchive.ietf.org/arch/msg/idr/L9nWFBpW0Tci0c9DGfMoqC1j_sA/
> 
> OK job@
> 
> Kind regards,
> 
> Job
> 
> On Mon, Dec 14, 2020 at 06:45:47PM +0100, Claudio Jeker wrote:
> > The BGP protocol has a keepalive packet which resets the hold timer when a
> > packet is received. The problem is this covers only one side of the
> > transmission. It seems that some BGP implementations fail to process
> > messages in some cases but still send out KEEPALIVE packets. So bgpd
> > thinks everything is fine even though no updates where processed by the
> > other side (including our KEEPALIVE packets). The session is stuck in
> > limbo and with it some prefixes and routes.
> > 
> > Because of this I think it makes sense to add a send hold timer that is
> > reset whenever a write call to the socket is made. If a socket does not
> > become writable for holdtime seconds (90s by default) then the session is
> > reset similar to the hold timer expiring because no data was received.
> > 
> > This send holdtimer is not part of the BGP spec right now but looking at
> > discussions on the IDR mailing list I assume something like this may be
> > added at one point.
> > 
> > I would like to know what other people think and would especially like to
> > know if this diff causes session resets that should not happen.
> > 
> > Cheers
> > -- 
> > :wq Claudio
> > 
> > 
> > Index: bgpd.h
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
> > retrieving revision 1.405
> > diff -u -p -r1.405 bgpd.h
> > --- bgpd.h  5 Nov 2020 11:52:59 -   1.405
> > +++ bgpd.h  13 Dec 2020 14:56:47 -
> > @@ -1467,6 +1467,7 @@ static const char * const timernames[] =
> > "ConnectRetryTimer",
> > "KeepaliveTimer",
> > "HoldTimer",
> > +   "SendHoldTimer",
> > "IdleHoldTimer",
> > "IdleHoldResetTimer",
> > "CarpUndemoteTimer",
> > Index: session.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> > retrieving revision 1.406
> > diff -u -p -r1.406 session.c
> > --- session.c   11 Dec 2020 12:00:01 -  1.406
> > +++ session.c   13 Dec 2020 14:52:42 -
> > @@ -373,6 +373,7 @@ session_main(int debug, int verbose)
> > if ((pt = timer_nextisdue(&p->timers, now)) != NULL) {
> >  

Re: netstat - proto ip record

2020-12-16 Thread Claudio Jeker
On Wed, Dec 16, 2020 at 03:54:04PM +, Stuart Henderson wrote:
> On 2020/12/16 16:43, Salvatore Cuzzilla wrote:
> > Hi folks,
> > 
> > is there any process associated with this netstat record?
> > btw, what's the meaning of the state field with value '17'?
> > 
> > ToTo@obsd ~ $ doas netstat -an -f inet
> > Active Internet connections (including servers)
> > Proto   Recv-Q Send-Q  Local Address  Foreign Address(state)
> > ip   0  0  *.**.*17
> 
> Are kernel and userland in sync?

This is a SOCK_RAW socket using protocol 17 (UDP). AFAIK this is dhclient.
You can see this also with fstat.
root dhclient   750245* internet dgram udp *:0

-- 
:wq Claudio



Re: regress print target name

2020-12-16 Thread Claudio Jeker
On Wed, Dec 16, 2020 at 05:01:18PM +0100, Theo Buehler wrote:
> On Wed, Dec 16, 2020 at 04:42:59PM +0100, Alexander Bluhm wrote:
> > When debugging tests, it is useful to see the target name and which
> > output belongs to it.  A lot of my tests have echo lines, but I
> > think this is better done in the framework.  Then all tests behave
> > simmilar.  I would remove the echos from the Makefiles afterwards.
> 
> Agreed. While it's not exactly perfect for libssl regress tests due to
> some suboptimal target names, I think it's an improvement (provided you
> remove superfluous echos, as intended).
> 
> ok tb

I agree. OK claudio@ 

-- 
:wq Claudio



Re: sdmmc(4): sdmmc_io_function_enable(): don't sleep on lbolt

2020-12-16 Thread Claudio Jeker
On Wed, Dec 16, 2020 at 04:50:42PM -0300, Martin Pieuchot wrote:
> On 16/12/20(Wed) 12:50, Scott Cheloha wrote:
> > On Tue, Dec 15, 2020 at 01:47:24PM +0100, Mark Kettenis wrote:
> > > > Date: Tue, 15 Dec 2020 13:32:22 +0100
> > > > From: Claudio Jeker 
> > > > 
> > > > On Fri, Dec 11, 2020 at 07:07:56PM -0600, Scott Cheloha wrote:
> > > > > Hi,
> > > > > 
> > > > > I'd like to remove lbolt from the kernel.  I think having it in the
> > > > > kernel complicates otherwise simple code.
> > > > > 
> > > > > We can start with sdmmc(4).
> > > > > 
> > > > > The goal in sdmmc_io_function_enable() is calling 
> > > > > sdmmc_io_function_ready()
> > > > > up to six times and sleep 1 second between each attempt.  Here's 
> > > > > rewritten
> > > > > code that does with without lbolt.
> > > > > 
> > > > > ok?
> > > > > 
> > > > > Index: sdmmc_io.c
> > > > > ===
> > > > > RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_io.c,v
> > > > > retrieving revision 1.41
> > > > > diff -u -p -r1.41 sdmmc_io.c
> > > > > --- sdmmc_io.c31 Dec 2019 10:05:33 -  1.41
> > > > > +++ sdmmc_io.c12 Dec 2020 01:04:59 -
> > > > > @@ -231,8 +231,8 @@ sdmmc_io_function_enable(struct sdmmc_fu
> > > > >  {
> > > > >   struct sdmmc_softc *sc = sf->sc;
> > > > >   struct sdmmc_function *sf0 = sc->sc_fn0;
> > > > > + int chan, retry = 5;
> > > > >   u_int8_t rv;
> > > > > - int retry = 5;
> > > > >  
> > > > >   rw_assert_wrlock(&sc->sc_lock);
> > > > >  
> > > > > @@ -244,7 +244,7 @@ sdmmc_io_function_enable(struct sdmmc_fu
> > > > >   sdmmc_io_write_1(sf0, SD_IO_CCCR_FN_ENABLE, rv);
> > > > >  
> > > > >   while (!sdmmc_io_function_ready(sf) && retry-- > 0)
> > > > > - tsleep_nsec(&lbolt, PPAUSE, "pause", INFSLP);
> > > > > + tsleep_nsec(&chan, PPAUSE, "pause", SEC_TO_NSEC(1));
> > > > >   return (retry >= 0) ? 0 : ETIMEDOUT;
> > > > >  }
> > > > >  
> > > > 
> > > > Why not use &retry as wait channel instead of adding a new variable
> > > > chan? Result is the same. Would it make sense to allow NULL as wait
> > > > channel to make the tsleep not wakeable. At least that could be used in 
> > > > a
> > > > few places where timeouts are implemented with tsleep and would make the
> > > > intent more obvious.
> > > 
> > > Or have an appropriately named global variable?  Something like "int 
> > > nowake"?
> > 
> > Something like the attached patch?
> > 
> > I think the idea of a "dead channel" communicates the intent.  Nobody
> > broadcasts wakeups on the dead channel.  If you don't want to receive
> > wakeup broadcasts you sleep on the dead channel.  Hence, "deadchan".
> 
> Why did we choose to use a variable over NULL?  Any technical reason?

The sleep subsytem requires a non-NULL value for ident. Changing this
seems not trivial.
 
> I'm wondering it the locality of the variable might not matter in a
> distant future.  Did you dig a bit deeper about the FreeBSD solution?
> Why did they choose a per-CPU value?

Currently all sleep channels are hashed into IIRC 128 buckets. If all
timeouts use the same sleep channel then this queue may get overcrowded.
I guess only instrumentation and measurements will tell us how bad the
sleep queue is hashed.

> > Index: kern/kern_synch.c
> > ===
> > RCS file: /cvs/src/sys/kern/kern_synch.c,v
> > retrieving revision 1.172
> > diff -u -p -r1.172 kern_synch.c
> > --- kern/kern_synch.c   7 Dec 2020 16:55:29 -   1.172
> > +++ kern/kern_synch.c   16 Dec 2020 18:50:12 -
> > @@ -87,6 +87,12 @@ sleep_queue_init(void)
> > TAILQ_INIT(&slpque[i]);
> >  }
> >  
> > +/*
> > + * Threads that do not want to receive wakeup(9) broadcasts should
> > + * sleep on deadchan.
> > + */
> > +static int __deadchan;
> > +int *deadchan = &__deadchan;
> >  
>

Re: bgpd send side hold timer

2020-12-17 Thread Claudio Jeker
On Wed, Dec 16, 2020 at 10:41:42PM +, Job Snijders wrote:
> On Tue, Dec 15, 2020 at 05:02:19PM +0100, Claudio Jeker wrote:
> > On Mon, Dec 14, 2020 at 06:22:09PM +, Job Snijders wrote:
> > > This patch appears to be a very elegant solution to a thorny subtle
> > > problem: what to do when a peer is not accepting new routing
> > > information from you?
> > 
> > One thing I'm unsure about is the value of the SendHold timer. I reused
> > the hold timer value with the assumption that for dead connections the
> > regular hold timer expires before the SendHold timer (the send buffer
> > needs to be full before send starts blocking).
> 
> Let's be conservative while being progressive! :-)
> 
> If the 'Send Hold Timer' value is moved from 'infinite' to 90 seconds
> ("The suggested default value for the HoldTime", RFC 4271), I think
> we'll be able to see benefits.
> 
> > People should look out for cases where the SendHold timer triggered before
> > either a NOTIFICATION form the peer arrived or where the SendHold timer
> > triggered before the HoldTimer. Now that may be tricky since both SendHold
> > and Hold timer trigger the same EVNT_TIMER_HOLDTIME event so they can not
> > be distinguished easily.
> 
> Separation of the cases might be helpful.
> 
> > I think that the SendHold timer will almost never trigger and if it does
> > only for the case where a session only works in one direction.
> 
> If it is rare, maybe it should be logged as a unique message:
> 
> "SendHoldTimer Expired".
> 

This diff does both suggestions. Adds a new event and a uses the default
hold time of 90 sec for the send timeout (unless holdtime is larger than
90 sec in which case holdtime is used).

This will show up in the logs like this:
neighbor (badjojo): sending notification: HoldTimer expired
neighbor (badjojo): state change Established -> Idle, reason: SendHoldTimer 
expired

In bgpctl show nei badjojo it will show this:
  Last error sent: HoldTimer expired

Since there is no NOTIFICATION error code for SendHoldTimer expired this
is about the best we can do for now.
-- 
:wq Claudio

Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.405
diff -u -p -r1.405 bgpd.h
--- bgpd.h  5 Nov 2020 11:52:59 -   1.405
+++ bgpd.h  17 Dec 2020 12:01:38 -
@@ -1377,6 +1377,7 @@ static const char * const eventnames[] =
"ConnectRetryTimer expired",
"HoldTimer expired",
"KeepaliveTimer expired",
+   "SendHoldTimer expired",
"OPEN message received",
"KEEPALIVE message received",
"UPDATE message received",
@@ -1467,6 +1468,7 @@ static const char * const timernames[] =
"ConnectRetryTimer",
"KeepaliveTimer",
"HoldTimer",
+   "SendHoldTimer",
"IdleHoldTimer",
"IdleHoldResetTimer",
"CarpUndemoteTimer",
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.406
diff -u -p -r1.406 session.c
--- session.c   11 Dec 2020 12:00:01 -  1.406
+++ session.c   17 Dec 2020 12:11:35 -
@@ -375,6 +375,9 @@ session_main(int debug, int verbose)
case Timer_Hold:
bgp_fsm(p, EVNT_TIMER_HOLDTIME);
break;
+   case Timer_SendHold:
+   bgp_fsm(p, EVNT_TIMER_SENDHOLD);
+   break;
case Timer_ConnectRetry:
bgp_fsm(p, EVNT_TIMER_CONNRETRY);
break;
@@ -597,6 +600,7 @@ bgp_fsm(struct peer *peer, enum session_
switch (event) {
case EVNT_START:
timer_stop(&peer->timers, Timer_Hold);
+   timer_stop(&peer->timers, Timer_SendHold);
timer_stop(&peer->timers, Timer_Keepalive);
timer_stop(&peer->timers, Timer_IdleHold);
 
@@ -709,6 +713,7 @@ bgp_fsm(struct peer *peer, enum session_
change_state(peer, STATE_IDLE, event);
break;
case EVNT_TIMER_HOLDTIME:
+   case EVNT_TIMER_SENDHOLD:
session_notification(peer, ERR_HOLDTIMEREXPIRED,
0, NULL, 0);
change_state(peer, STATE_IDLE, event);
@@ -749,6 +754,

bgpd: getifaddrs ifa_addr NULL check

2020-12-17 Thread Claudio Jeker
getifaddrs can return a struct ifaddrs entry with a NULL ifa_addr.
I think an unnumbered point-to-point interface can trigger this.
So better check for it before accessing anything in ifa_addr.

-- 
:wq Claudio

Index: config.c
===
RCS file: /cvs/src/usr.sbin/bgpd/config.c,v
retrieving revision 1.95
diff -u -p -r1.95 config.c
--- config.c14 Feb 2020 13:54:31 -  1.95
+++ config.c4 Dec 2020 11:46:33 -
@@ -339,7 +339,8 @@ get_bgpid(void)
fatal("getifaddrs");
 
for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
-   if (ifa->ifa_addr->sa_family != AF_INET)
+   if (ifa->ifa_addr == NULL ||
+   ifa->ifa_addr->sa_family != AF_INET)
continue;
cur = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
if ((cur & localnet) == localnet)   /* skip 127/8 */
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.406
diff -u -p -r1.406 session.c
--- session.c   11 Dec 2020 12:00:01 -  1.406
+++ session.c   17 Dec 2020 12:18:54 -
@@ -1223,7 +1223,8 @@ get_alternate_addr(struct sockaddr *sa, 
fatal("getifaddrs");
 
for (match = ifap; match != NULL; match = match->ifa_next)
-   if (sa_cmp(sa, match->ifa_addr) == 0)
+   if (match->ifa_addr != NULL &&
+   sa_cmp(sa, match->ifa_addr) == 0)
break;
 
if (match == NULL) {
@@ -1234,7 +1235,8 @@ get_alternate_addr(struct sockaddr *sa, 
switch (sa->sa_family) {
case AF_INET6:
for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
-   if (ifa->ifa_addr->sa_family == AF_INET &&
+   if (ifa->ifa_addr != NULL &&
+   ifa->ifa_addr->sa_family == AF_INET &&
strcmp(ifa->ifa_name, match->ifa_name) == 0) {
sa2addr(ifa->ifa_addr, alt, NULL);
break;
@@ -1243,10 +1245,12 @@ get_alternate_addr(struct sockaddr *sa, 
break;
case AF_INET:
for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
-   struct sockaddr_in6 *s =
-   (struct sockaddr_in6 *)ifa->ifa_addr;
-   if (ifa->ifa_addr->sa_family == AF_INET6 &&
+   if (ifa->ifa_addr != NULL &&
+   ifa->ifa_addr->sa_family == AF_INET6 &&
strcmp(ifa->ifa_name, match->ifa_name) == 0) {
+   struct sockaddr_in6 *s =
+   (struct sockaddr_in6 *)ifa->ifa_addr;
+
/* only accept global scope addresses */
if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))



bgpd refactor roa-set internals

2020-12-18 Thread Claudio Jeker
In preparation for RTR support this diff changes the internal
representation of roa-set to a simple RB tree based on struct roa.
The big difference is that overlapping roas, e.g.
10/8 source-as 3
10/8 maxlen 24 source-as 3
are now merged in the RDE and so bgpd -nv will show both entries instead
of only the second one.

On my testbox there is no difference in OVS state between a -current bgpd
and the one with this diff applied. More testing welcome.
-- 
:wq Claudio

Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.230
diff -u -p -r1.230 bgpd.c
--- bgpd.c  5 Nov 2020 11:52:59 -   1.230
+++ bgpd.c  18 Dec 2020 10:27:44 -
@@ -502,6 +502,7 @@ send_config(struct bgpd_config *conf)
struct as_set   *aset;
struct prefixset*ps;
struct prefixset_item   *psi, *npsi;
+   struct roa  *roa, *nroa;
 
reconfpending = 2;  /* one per child */
 
@@ -567,7 +568,6 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
0, 0, -1, psi, sizeof(*psi)) == -1)
return (-1);
-   set_free(psi->set);
free(psi);
}
free(ps);
@@ -579,23 +579,12 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
ps->name, sizeof(ps->name)) == -1)
return (-1);
-   RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
-   struct roa_set *rs;
-   size_t i, l, n;
-   RB_REMOVE(prefixset_tree, &ps->psitems, psi);
-   rs = set_get(psi->set, &n);
-   for (i = 0; i < n; i += l) {
-   l = (n - i > 1024 ? 1024 : n - i);
-   if (imsg_compose(ibuf_rde,
-   IMSG_RECONF_ROA_SET_ITEMS,
-   0, 0, -1, rs + i, l * sizeof(*rs)) == -1)
-   return -1;
-   }
-   if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
-   0, 0, -1, psi, sizeof(*psi)) == -1)
+   RB_FOREACH_SAFE(roa, roa_tree, &ps->roaitems, nroa) {
+   RB_REMOVE(roa_tree, &conf->roa, roa);
+   if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
+   -1, roa, sizeof(*roa)) == -1)
return (-1);
-   set_free(psi->set);
-   free(psi);
+   free(roa);
}
free(ps);
}
@@ -604,23 +593,12 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_SET, 0, 0, -1,
NULL, 0) == -1)
return (-1);
-   RB_FOREACH_SAFE(psi, prefixset_tree, &conf->roa, npsi) {
-   struct roa_set *rs;
-   size_t i, l, n;
-   RB_REMOVE(prefixset_tree, &conf->roa, psi);
-   rs = set_get(psi->set, &n);
-   for (i = 0; i < n; i += l) {
-   l = (n - i > 1024 ? 1024 : n - i);
-   if (imsg_compose(ibuf_rde,
-   IMSG_RECONF_ROA_SET_ITEMS,
-   0, 0, -1, rs + i, l * sizeof(*rs)) == -1)
-   return -1;
-   }
-   if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
-   0, 0, -1, psi, sizeof(*psi)) == -1)
+   RB_FOREACH_SAFE(roa, roa_tree, &conf->roa, nroa) {
+   RB_REMOVE(roa_tree, &conf->roa, roa);
+   if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
+   -1, roa, sizeof(*roa)) == -1)
return (-1);
-   set_free(psi->set);
-   free(psi);
+   free(roa);
}
}
 
Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.405
diff -u -p -r1.405 bgpd.h
--- bgpd.h  5 Nov 2020 11:52:59 -   1.405
+++ bgpd.h  18 Dec 2020 10:27:21 -
@@ -264,6 +264,21 @@ struct rde_prefixset {
 };
 SIMPLEQ_HEAD(rde_prefixset_head, rde_prefixset);
 
+struct roa {
+   RB_ENTRY(roa)   entry;
+   uint8_t aid;
+   uint8_t prefixlen;
+   uint8_t maxlen;
+   uint8_t pad;
+   uint32_tasnum;
+  

Re: rpki-client refactor some path building

2020-12-18 Thread Claudio Jeker
On Thu, Dec 03, 2020 at 02:33:03PM +0100, Claudio Jeker wrote:
> Use asprintf with %.*s to construct the path based on the mft file
> location and the filename of the referenced file.
> 
> Since the * field in printf(3) is expecting an int type, typecast the
> ptrdiff_t to an int. Add an assert check to make sure there is no
> overflow. Also do the same overflow check in mft.c where the same idiom is
> used.
> 
> Maybe some PATH_MAX checks should be placed in the mft parser.

Ping

-- 
:wq Claudio

Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.85
diff -u -p -r1.85 main.c
--- main.c  2 Dec 2020 15:31:15 -   1.85
+++ main.c  3 Dec 2020 12:25:24 -
@@ -451,23 +451,16 @@ static void
 queue_add_from_mft(int fd, struct entityq *q, const char *mft,
 const struct mftfile *file, enum rtype type, size_t *eid)
 {
-   size_t   sz;
char*cp, *nfile;
 
/* Construct local path from filename. */
-
-   sz = strlen(file->file) + strlen(mft);
-   if ((nfile = calloc(sz + 1, 1)) == NULL)
-   err(1, "calloc");
-
/* We know this is host/module/... */
 
-   strlcpy(nfile, mft, sz + 1);
-   cp = strrchr(nfile, '/');
+   cp = strrchr(mft, '/');
assert(cp != NULL);
-   cp++;
-   *cp = '\0';
-   strlcat(nfile, file->file, sz + 1);
+   assert(cp - mft < INT_MAX);
+   if (asprintf(&nfile, "%.*s/%s", (int)(cp - mft), mft, file->file) == -1)
+   err(1, "asprintf");
 
/*
 * Since we're from the same directory as the MFT file, we know
Index: mft.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/mft.c,v
retrieving revision 1.19
diff -u -p -r1.19 mft.c
--- mft.c   6 Nov 2020 04:22:18 -   1.19
+++ mft.c   3 Dec 2020 12:37:15 -
@@ -17,6 +17,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -457,6 +458,7 @@ mft_validfilehash(const char *fn, const 
/* Check hash of file now, but first build path for it */
cp = strrchr(fn, '/');
assert(cp != NULL);
+   assert(cp - fn < INT_MAX);
if (asprintf(&path, "%.*s/%s", (int)(cp - fn), fn, m->file) == -1)
err(1, "asprintf");



Re: rpki-client unmarshal empty strings as NULL

2020-12-18 Thread Claudio Jeker
On Wed, Dec 02, 2020 at 05:06:28PM +0100, Claudio Jeker wrote:
> rpki-client passes both empty strings and NULL strings as zero length
> objects. The unmarshal code then allocates memory in any case and so a
> NULL string is unmarshalled as empty string. This is not great, currently
> there are no empty strings but a fair amount of NULL strings.
> This diff changes the behaviour and now NULL is passed as NULL.
> This should simplify passing optional strings (e.g. in the entity queue
> code).

I will commit this later today. It will help with some further cleanup of
the codebase.

-- 
:wq Claudio

? obj
Index: io.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/io.c,v
retrieving revision 1.10
diff -u -p -r1.10 io.c
--- io.c2 Dec 2020 15:31:15 -   1.10
+++ io.c2 Dec 2020 15:54:38 -
@@ -153,7 +153,7 @@ io_buf_read_alloc(int fd, void **res, si
 }
 
 /*
- * Read a string (which may just be \0 and zero-length), allocating
+ * Read a string (returns NULL for zero-length strings), allocating
  * space for it.
  */
 void
@@ -162,6 +162,10 @@ io_str_read(int fd, char **res)
size_t   sz;
 
io_simple_read(fd, &sz, sizeof(size_t));
+   if (sz == 0) {
+   *res = NULL;
+   return;
+   }
if ((*res = calloc(sz + 1, 1)) == NULL)
err(1, NULL);
io_simple_read(fd, *res, sz);



Re: rpki-client unmarshal empty strings as NULL

2020-12-18 Thread Claudio Jeker
On Fri, Dec 18, 2020 at 01:46:49PM +0100, Theo Buehler wrote:
> On Fri, Dec 18, 2020 at 11:43:40AM +0100, Claudio Jeker wrote:
> > On Wed, Dec 02, 2020 at 05:06:28PM +0100, Claudio Jeker wrote:
> > > rpki-client passes both empty strings and NULL strings as zero length
> > > objects. The unmarshal code then allocates memory in any case and so a
> > > NULL string is unmarshalled as empty string. This is not great, currently
> > > there are no empty strings but a fair amount of NULL strings.
> > > This diff changes the behaviour and now NULL is passed as NULL.
> > > This should simplify passing optional strings (e.g. in the entity queue
> > > code).
> > 
> > I will commit this later today. It will help with some further cleanup of
> > the codebase.
> 
> I'm a bit torn about this. Some of the callers clearly do not expect
> having to deal with NULL.
> 
> I see some *printf("%s", NULL) (for example in proc_rsync()) that should
> never happen but can now happen with this change. I'm unsure that there
> are no NULL derefs that this introduces. I'm fine with this going in if
> you intend to address this as part of the further cleanup work you
> envision.
> 

In most cases the code expects a non-empty string. For example in the
rsync.c case neither host nor mod are allowed to be NULL or "".
I guess adding an assert(host && mod) would be enough there.
I actually prefer the code to blow up since as mentioned
empty strings are almost always wrong (e.g. empty filenames or hashes).
Indeed in all those cases a check for NULL should be added in the
unmarshal function.

-- 
:wq Claudio



Re: rpki-client unmarshal empty strings as NULL

2020-12-18 Thread Claudio Jeker
On Fri, Dec 18, 2020 at 05:50:27PM +0100, Theo Buehler wrote:
> On Fri, Dec 18, 2020 at 05:45:01PM +0100, Claudio Jeker wrote:
> > On Fri, Dec 18, 2020 at 01:46:49PM +0100, Theo Buehler wrote:
> > > On Fri, Dec 18, 2020 at 11:43:40AM +0100, Claudio Jeker wrote:
> > > > On Wed, Dec 02, 2020 at 05:06:28PM +0100, Claudio Jeker wrote:
> > > > > rpki-client passes both empty strings and NULL strings as zero length
> > > > > objects. The unmarshal code then allocates memory in any case and so a
> > > > > NULL string is unmarshalled as empty string. This is not great, 
> > > > > currently
> > > > > there are no empty strings but a fair amount of NULL strings.
> > > > > This diff changes the behaviour and now NULL is passed as NULL.
> > > > > This should simplify passing optional strings (e.g. in the entity 
> > > > > queue
> > > > > code).
> > > > 
> > > > I will commit this later today. It will help with some further cleanup 
> > > > of
> > > > the codebase.
> > > 
> > > I'm a bit torn about this. Some of the callers clearly do not expect
> > > having to deal with NULL.
> > > 
> > > I see some *printf("%s", NULL) (for example in proc_rsync()) that should
> > > never happen but can now happen with this change. I'm unsure that there
> > > are no NULL derefs that this introduces. I'm fine with this going in if
> > > you intend to address this as part of the further cleanup work you
> > > envision.
> > > 
> > 
> > In most cases the code expects a non-empty string. For example in the
> > rsync.c case neither host nor mod are allowed to be NULL or "".
> 
> Yes.
> 
> > I guess adding an assert(host && mod) would be enough there.
> 
> Right.
> 
> > I actually prefer the code to blow up since as mentioned
> > empty strings are almost always wrong (e.g. empty filenames or hashes).
> > Indeed in all those cases a check for NULL should be added in the
> > unmarshal function.
> 
> Go ahead. It certainly doesn't make things worse or (more) incorrect.
> 
> ok tb
> 

This is the next step. I added asserts for strings that must be set and
removed some of complications around optional strings. Especially cert.c
and some of the entityq code benefits from this.

-- 
:wq Claudio

Index: cert.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
retrieving revision 1.20
diff -u -p -r1.20 cert.c
--- cert.c  7 Dec 2020 13:23:01 -   1.20
+++ cert.c  18 Dec 2020 17:09:45 -
@@ -1262,7 +1262,6 @@ void
 cert_buffer(char **b, size_t *bsz, size_t *bmax, const struct cert *p)
 {
size_t   i;
-   int  has_crl, has_aki;
 
io_simple_buffer(b, bsz, bmax, &p->valid, sizeof(int));
io_simple_buffer(b, bsz, bmax, &p->ipsz, sizeof(size_t));
@@ -1275,15 +1274,8 @@ cert_buffer(char **b, size_t *bsz, size_
 
io_str_buffer(b, bsz, bmax, p->mft);
io_str_buffer(b, bsz, bmax, p->notify);
-
-   has_crl = (p->crl != NULL);
-   io_simple_buffer(b, bsz, bmax, &has_crl, sizeof(int));
-   if (has_crl)
-   io_str_buffer(b, bsz, bmax, p->crl);
-   has_aki = (p->aki != NULL);
-   io_simple_buffer(b, bsz, bmax, &has_aki, sizeof(int));
-   if (has_aki)
-   io_str_buffer(b, bsz, bmax, p->aki);
+   io_str_buffer(b, bsz, bmax, p->crl);
+   io_str_buffer(b, bsz, bmax, p->aki);
io_str_buffer(b, bsz, bmax, p->ski);
 }
 
@@ -1327,7 +1319,6 @@ cert_read(int fd)
 {
struct cert *p;
size_t   i;
-   int  has_crl, has_aki;
 
if ((p = calloc(1, sizeof(struct cert))) == NULL)
err(1, NULL);
@@ -1348,14 +1339,12 @@ cert_read(int fd)
cert_as_read(fd, &p->as[i]);
 
io_str_read(fd, &p->mft);
+   assert(p->mft);
io_str_read(fd, &p->notify);
-   io_simple_read(fd, &has_crl, sizeof(int));
-   if (has_crl)
-   io_str_read(fd, &p->crl);
-   io_simple_read(fd, &has_aki, sizeof(int));
-   if (has_aki)
-   io_str_read(fd, &p->aki);
+   io_str_read(fd, &p->crl);
+   io_str_read(fd, &p->aki);
io_str_read(fd, &p->ski);
+   assert(p->ski);
 
return p;
 }
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.86
diff -u -p -r1.86 main.c
--- main.c  9 Dec 2020 11:29:04 -   1.86
+++ main

Re: IPsec PMTU and reject route

2020-12-20 Thread Claudio Jeker
On Sun, Dec 20, 2020 at 01:01:58AM +0100, Alexander Bluhm wrote:
> Hi,
> 
> In revision 1.87 of ip_icmp.c claudio@ added ignoring reject routes
> to icmp_mtudisc_clone().  Otherwise TCP would clone these routes
> for PMTU discovery.  They will not work, even after dynamic routing
> has found a better route than the reject route.
> 
> With IPsec the use case is different.  First you need a route, but
> then the flow handles the packet without routing.  Usually this
> route should be a reject route to avoid sending unencrypted traffic
> if the flow is missing.  But IPsec needs this route for PMTU
> discovery, which currently does not work.
> 
> So accept reject and blackhole routes for IPsec PMTU discovery.
> 
> ok?

This makes sense and is fine. OK claudio@
 
> bluhm
> 
> Index: netinet/ip_icmp.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
> retrieving revision 1.183
> diff -u -p -r1.183 ip_icmp.c
> --- netinet/ip_icmp.c 22 Aug 2020 17:55:54 -  1.183
> +++ netinet/ip_icmp.c 18 Dec 2020 16:59:25 -
> @@ -928,7 +928,7 @@ icmp_sysctl_icmpstat(void *oldp, size_t 
>  }
>  
>  struct rtentry *
> -icmp_mtudisc_clone(struct in_addr dst, u_int rtableid)
> +icmp_mtudisc_clone(struct in_addr dst, u_int rtableid, int ipsec)
>  {
>   struct sockaddr_in sin;
>   struct rtentry *rt;
> @@ -942,7 +942,10 @@ icmp_mtudisc_clone(struct in_addr dst, u
>   rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid);
>  
>   /* Check if the route is actually usable */
> - if (!rtisvalid(rt) || (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)))
> + if (!rtisvalid(rt))
> + goto bad;
> + /* IPsec needs the route only for PMTU, it can use reject for that */
> + if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)))
>   goto bad;
>  
>   /*
> @@ -1000,7 +1003,7 @@ icmp_mtudisc(struct icmp *icp, u_int rta
>   struct ifnet *ifp;
>   u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
>  
> - rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid);
> + rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0);
>   if (rt == NULL)
>   return;
>  
> Index: netinet/ip_icmp.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.h,v
> retrieving revision 1.31
> diff -u -p -r1.31 ip_icmp.h
> --- netinet/ip_icmp.h 5 Nov 2018 21:50:39 -   1.31
> +++ netinet/ip_icmp.h 18 Dec 2020 16:59:25 -
> @@ -239,7 +239,7 @@ int   icmp_reflect(struct mbuf *, struct m
>  void icmp_send(struct mbuf *, struct mbuf *);
>  int  icmp_sysctl(int *, u_int, void *, size_t *, void *, size_t);
>  struct rtentry *
> - icmp_mtudisc_clone(struct in_addr, u_int);
> + icmp_mtudisc_clone(struct in_addr, u_int, int);
>  void icmp_mtudisc(struct icmp *, u_int);
>  int  icmp_do_exthdr(struct mbuf *, u_int16_t, u_int8_t, void *, size_t);
>  #endif /* _KERNEL */
> Index: netinet/ip_output.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
> retrieving revision 1.357
> diff -u -p -r1.357 ip_output.c
> --- netinet/ip_output.c   24 Jun 2020 22:03:43 -  1.357
> +++ netinet/ip_output.c   18 Dec 2020 16:59:25 -
> @@ -605,7 +605,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
>   rt = NULL;
>   else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
>   rt = icmp_mtudisc_clone(ip->ip_dst,
> - m->m_pkthdr.ph_rtableid);
> + m->m_pkthdr.ph_rtableid, 1);
>   rt_mtucloned = 1;
>   }
>   DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__,
> Index: netinet/tcp_timer.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
> retrieving revision 1.67
> diff -u -p -r1.67 tcp_timer.c
> --- netinet/tcp_timer.c   11 Jun 2018 07:40:26 -  1.67
> +++ netinet/tcp_timer.c   18 Dec 2020 16:59:25 -
> @@ -292,7 +292,7 @@ tcp_timer_rexmt(void *arg)
>  #endif
>   case PF_INET:
>   rt = icmp_mtudisc_clone(inp->inp_faddr,
> - inp->inp_rtableid);
> + inp->inp_rtableid, 0);
>   break;
>   }
>   if (rt != NULL) {
> 

-- 
:wq Claudio



Re: kdump: show scope for v6 addresses if set

2020-12-20 Thread Claudio Jeker
On Sun, Dec 20, 2020 at 01:39:57PM +0100, Otto Moerbeek wrote:
> Hi,
> 
> scope is there, just not shown. While there, use proper constants for
> two sizes.
> 
>   -Otto
> 
> 
> Index: ktrstruct.c
> ===
> RCS file: /cvs/src/usr.bin/kdump/ktrstruct.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 ktrstruct.c
> --- ktrstruct.c   17 Nov 2018 20:46:12 -  1.28
> +++ ktrstruct.c   20 Dec 2020 12:34:34 -
> @@ -90,7 +90,7 @@ ktrsockaddr(struct sockaddr *sa)
>   switch(sa->sa_family) {
>   case AF_INET: {
>   struct sockaddr_in  *sa_in;
> - char addr[64];
> + char addr[INET_ADDRSTRLEN];
>  
>   sa_in = (struct sockaddr_in *)sa;
>   check_sockaddr_len(in);
> @@ -100,12 +100,15 @@ ktrsockaddr(struct sockaddr *sa)
>   }
>   case AF_INET6: {
>   struct sockaddr_in6 *sa_in6;
> - char addr[64];
> + char addr[INET6_ADDRSTRLEN], scope[12] = { 0 };
>  
>   sa_in6 = (struct sockaddr_in6 *)sa;
>   check_sockaddr_len(in6);
>   inet_ntop(AF_INET6, &sa_in6->sin6_addr, addr, sizeof addr);
> - printf("[%s]:%u", addr, htons(sa_in6->sin6_port));
> + if (sa_in6->sin6_scope_id)
> + snprintf(scope, sizeof(scope), "%%%u",
> + sa_in6->sin6_scope_id);

Would it make sense to use if_indextoname() here to translate the string
into an interface name? The snprintf would still be needed for the case
where NULL is returned by if_indextoname().

> + printf("[%s%s]:%u", addr, scope, htons(sa_in6->sin6_port));
>   break;
>   }
>   case AF_UNIX: {
> 

-- 
:wq Claudio



Re: IPv6 pf_test EACCES

2020-12-21 Thread Claudio Jeker
On Mon, Dec 21, 2020 at 11:34:04PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> A while ago we decided to pass EACCES to uerland if pf blocks a
> packet.  IPv6 still has the old EHOSTUNREACH code.
> 
> Use the same errno for dropped IPv6 packets as in IPv4.
> 
> ok?

Seems reasonable. OK claudio@

> Index: netinet6/ip6_output.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
> retrieving revision 1.247
> diff -u -p -r1.247 ip6_output.c
> --- netinet6/ip6_output.c 17 Jul 2020 15:21:36 -  1.247
> +++ netinet6/ip6_output.c 21 Dec 2020 22:27:24 -
> @@ -616,7 +616,7 @@ reroute:
>  
>  #if NPF > 0
>   if (pf_test(AF_INET6, PF_OUT, ifp, &m) != PF_PASS) {
> - error = EHOSTUNREACH;
> + error = EACCES;
>   m_freem(m);
>   goto done;
>   }
> @@ -2773,7 +2773,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
>   if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
>   pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, &m) != PF_PASS) {
>   m_freem(m);
> - return EHOSTUNREACH;
> + return EACCES;
>   }
>   if (m == NULL)
>   return 0;
> 

-- 
:wq Claudio



bgpd: adjust loopback filter for network statements

2020-12-23 Thread Claudio Jeker
In bgpd statements like
network inet static
or
network rtlabel "exportme"
will skip routes that use 127.0.0.1 as nexthop. This makes sense for
network connected and network static but for rtlabel and even priority
based selection this makes less sense.

Especially using rtlabel to export routes should give the admin also the
option to export reject or blackhole routes (which have their nexthop set
to 127.0.0.1).

This diff does this change but still skips networks like 224/4 for network
inet static. I think this is a decent compromise.
-- 
:wq Claudio

Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.239
diff -u -p -r1.239 kroute.c
--- kroute.c1 Oct 2019 08:57:48 -   1.239
+++ kroute.c4 Dec 2020 11:31:09 -
@@ -110,7 +110,7 @@ int kr6_delete(struct ktable *, struct k
 intkrVPN4_delete(struct ktable *, struct kroute_full *, u_int8_t);
 intkrVPN6_delete(struct ktable *, struct kroute_full *, u_int8_t);
 void   kr_net_delete(struct network *);
-intkr_net_match(struct ktable *, struct network_config *, u_int16_t);
+intkr_net_match(struct ktable *, struct network_config *, u_int16_t, int);
 struct network *kr_net_find(struct ktable *, struct network *);
 void   kr_net_clear(struct ktable *);
 void   kr_redistribute(int, struct ktable *, struct kroute *);
@@ -1318,7 +1318,8 @@ kr_net_redist_del(struct ktable *kt, str
 }
 
 int
-kr_net_match(struct ktable *kt, struct network_config *net, u_int16_t flags)
+kr_net_match(struct ktable *kt, struct network_config *net, u_int16_t flags,
+int loopback)
 {
struct network  *xn;
 
@@ -1330,10 +1331,16 @@ kr_net_match(struct ktable *kt, struct n
/* static match already redistributed */
continue;
case NETWORK_STATIC:
+   /* Skip networks with nexthop on loopback. */
+   if (loopback)
+   continue;
if (flags & F_STATIC)
break;
continue;
case NETWORK_CONNECTED:
+   /* Skip networks with nexthop on loopback. */
+   if (loopback)
+   continue;
if (flags & F_CONNECTED)
break;
continue;
@@ -1419,6 +1426,7 @@ kr_redistribute(int type, struct ktable 
 {
struct network_confignet;
u_int32_ta;
+   int  loflag = 0;
 
bzero(&net, sizeof(net));
net.prefix.aid = AID_INET;
@@ -1449,9 +1457,9 @@ kr_redistribute(int type, struct ktable 
(a >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)
return;
 
-   /* Consider networks with nexthop loopback as not redistributable. */
+   /* Check if the nexthop is the loopback addr. */
if (kr->nexthop.s_addr == htonl(INADDR_LOOPBACK))
-   return;
+   loflag = 1;
 
/*
 * never allow 0.0.0.0/0 the default route can only be redistributed
@@ -1460,7 +1468,7 @@ kr_redistribute(int type, struct ktable 
if (kr->prefix.s_addr == INADDR_ANY && kr->prefixlen == 0)
return;
 
-   if (kr_net_match(kt, &net, kr->flags) == 0)
+   if (kr_net_match(kt, &net, kr->flags, loflag) == 0)
/* no longer matches, if still present remove it */
kr_net_redist_del(kt, &net, 1);
 }
@@ -1468,7 +1476,8 @@ kr_redistribute(int type, struct ktable 
 void
 kr_redistribute6(int type, struct ktable *kt, struct kroute6 *kr6)
 {
-   struct network_confignet;
+   struct network_config   net;
+   int loflag = 0;
 
bzero(&net, sizeof(net));
net.prefix.aid = AID_INET6;
@@ -1503,11 +1512,9 @@ kr_redistribute6(int type, struct ktable
IN6_IS_ADDR_V4COMPAT(&kr6->prefix))
return;
 
-   /*
-* Consider networks with nexthop loopback as not redistributable.
-*/
+   /* Check if the nexthop is the loopback addr. */
if (IN6_IS_ADDR_LOOPBACK(&kr6->nexthop))
-   return;
+   loflag = 1;
 
/*
 * never allow ::/0 the default route can only be redistributed
@@ -1517,7 +1524,7 @@ kr_redistribute6(int type, struct ktable
memcmp(&kr6->prefix, &in6addr_any, sizeof(struct in6_addr)) == 0)
return;
 
-   if (kr_net_match(kt, &net, kr6->flags) == 0)
+   if (kr_net_match(kt, &net, kr6->flags, loflag) == 0)
/* no longer matches, if still present remove it */
kr_net_redist_del(kt, &net, 1);
 }



Re: netstat - proto ip record

2020-12-23 Thread Claudio Jeker
On Wed, Dec 23, 2020 at 04:13:04PM +0100, Alexander Bluhm wrote:
> On Wed, Dec 16, 2020 at 05:24:50PM +0100, Claudio Jeker wrote:
> > On Wed, Dec 16, 2020 at 03:54:04PM +, Stuart Henderson wrote:
> > > On 2020/12/16 16:43, Salvatore Cuzzilla wrote:
> > > > Hi folks,
> > > > 
> > > > is there any process associated with this netstat record?
> > > > btw, what's the meaning of the state field with value '17'?
> > > > 
> > > > ToTo@obsd ~ $ doas netstat -an -f inet
> > > > Active Internet connections (including servers)
> > > > Proto   Recv-Q Send-Q  Local Address  Foreign Address
> > > > (state)
> > > > ip   0  0  *.**.*17
> > > 
> > > Are kernel and userland in sync?
> > 
> > This is a SOCK_RAW socket using protocol 17 (UDP). AFAIK this is dhclient.
> > You can see this also with fstat.
> > root dhclient   750245* internet dgram udp *:0
> 
> Should we print a specific headline in netstat to avoid such questions?
> 
> Proto   Recv-Q Send-Q  Local Address  Foreign AddressIP-Proto
> ip   0  0  *.**.*17
> 
> Proto   Recv-Q Send-Q  Local Address  Foreign AddressTCP-State
> tcp  0  0  192.168.2.138.3513052.10.128.80.443   
> ESTABLISHED
> 
> Proto   Recv-Q Send-Q  Local Address  Foreign Address   
> udp  0  0  192.168.2.138.31411162.159.200.1.123 
> 
> ok?

I like it. OK claudio@

> Index: inet.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
> retrieving revision 1.168
> diff -u -p -r1.168 inet.c
> --- inet.c15 Jan 2020 14:02:37 -  1.168
> +++ inet.c23 Dec 2020 15:08:45 -
> @@ -327,9 +327,10 @@ netdomainpr(struct kinfo_file *kf, int p
>   if (Bflag && istcp)
>   printf("%-6.6s %-6.6s %-6.6s ",
>   "Recv-W", "Send-W", "Cgst-W");
> - printf(" %-*.*s %-*.*s %s\n",
> + printf(" %-*.*s %-*.*s%s\n",
>   addrlen, addrlen, "Local Address",
> - addrlen, addrlen, "Foreign Address", "(state)");
> + addrlen, addrlen, "Foreign Address",
> + istcp ? " TCP-State" : type == SOCK_RAW ? " IP-Proto" : "");
>   }
>  
>   if (Aflag)

-- 
:wq Claudio



Re: bgpd refactor roa-set internals

2020-12-29 Thread Claudio Jeker
On Fri, Dec 18, 2020 at 11:36:33AM +0100, Claudio Jeker wrote:
> In preparation for RTR support this diff changes the internal
> representation of roa-set to a simple RB tree based on struct roa.
> The big difference is that overlapping roas, e.g.
> 10/8 source-as 3
> 10/8 maxlen 24 source-as 3
> are now merged in the RDE and so bgpd -nv will show both entries instead
> of only the second one.
> 
> On my testbox there is no difference in OVS state between a -current bgpd
> and the one with this diff applied. More testing welcome.

Ping. I would like to get this in so I can reduce the size of my RTR work
in progress diff.

-- 
:wq Claudio

Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.230
diff -u -p -r1.230 bgpd.c
--- bgpd.c  5 Nov 2020 11:52:59 -   1.230
+++ bgpd.c  18 Dec 2020 10:27:44 -
@@ -502,6 +502,7 @@ send_config(struct bgpd_config *conf)
struct as_set   *aset;
struct prefixset*ps;
struct prefixset_item   *psi, *npsi;
+   struct roa  *roa, *nroa;
 
reconfpending = 2;  /* one per child */
 
@@ -567,7 +568,6 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
0, 0, -1, psi, sizeof(*psi)) == -1)
return (-1);
-   set_free(psi->set);
free(psi);
}
free(ps);
@@ -579,23 +579,12 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_RECONF_ORIGIN_SET, 0, 0, -1,
ps->name, sizeof(ps->name)) == -1)
return (-1);
-   RB_FOREACH_SAFE(psi, prefixset_tree, &ps->psitems, npsi) {
-   struct roa_set *rs;
-   size_t i, l, n;
-   RB_REMOVE(prefixset_tree, &ps->psitems, psi);
-   rs = set_get(psi->set, &n);
-   for (i = 0; i < n; i += l) {
-   l = (n - i > 1024 ? 1024 : n - i);
-   if (imsg_compose(ibuf_rde,
-   IMSG_RECONF_ROA_SET_ITEMS,
-   0, 0, -1, rs + i, l * sizeof(*rs)) == -1)
-   return -1;
-   }
-   if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
-   0, 0, -1, psi, sizeof(*psi)) == -1)
+   RB_FOREACH_SAFE(roa, roa_tree, &ps->roaitems, nroa) {
+   RB_REMOVE(roa_tree, &conf->roa, roa);
+   if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
+   -1, roa, sizeof(*roa)) == -1)
return (-1);
-   set_free(psi->set);
-   free(psi);
+   free(roa);
}
free(ps);
}
@@ -604,23 +593,12 @@ send_config(struct bgpd_config *conf)
if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_SET, 0, 0, -1,
NULL, 0) == -1)
return (-1);
-   RB_FOREACH_SAFE(psi, prefixset_tree, &conf->roa, npsi) {
-   struct roa_set *rs;
-   size_t i, l, n;
-   RB_REMOVE(prefixset_tree, &conf->roa, psi);
-   rs = set_get(psi->set, &n);
-   for (i = 0; i < n; i += l) {
-   l = (n - i > 1024 ? 1024 : n - i);
-   if (imsg_compose(ibuf_rde,
-   IMSG_RECONF_ROA_SET_ITEMS,
-   0, 0, -1, rs + i, l * sizeof(*rs)) == -1)
-   return -1;
-   }
-   if (imsg_compose(ibuf_rde, IMSG_RECONF_PREFIX_SET_ITEM,
-   0, 0, -1, psi, sizeof(*psi)) == -1)
+   RB_FOREACH_SAFE(roa, roa_tree, &conf->roa, nroa) {
+   RB_REMOVE(roa_tree, &conf->roa, roa);
+   if (imsg_compose(ibuf_rde, IMSG_RECONF_ROA_ITEM, 0, 0,
+   -1, roa, sizeof(*roa)) == -1)
return (-1);
-   set_free(psi->set);
-   free(psi);
+   free(roa);
}
}
 
Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.405
diff -u -p -r1.405 bgpd.h
--- bgpd.h  5 Nov 2020 11:52:59 -   1.405
+++ bgpd.h  18 Dec 2020

Re: bgpd: getifaddrs ifa_addr NULL check

2020-12-29 Thread Claudio Jeker
On Thu, Dec 17, 2020 at 01:26:09PM +0100, Claudio Jeker wrote:
> getifaddrs can return a struct ifaddrs entry with a NULL ifa_addr.
> I think an unnumbered point-to-point interface can trigger this.
> So better check for it before accessing anything in ifa_addr.

I will commit this later today. The change is obvious and simple.
 
-- 
:wq Claudio

Index: config.c
===
RCS file: /cvs/src/usr.sbin/bgpd/config.c,v
retrieving revision 1.95
diff -u -p -r1.95 config.c
--- config.c14 Feb 2020 13:54:31 -  1.95
+++ config.c4 Dec 2020 11:46:33 -
@@ -339,7 +339,8 @@ get_bgpid(void)
fatal("getifaddrs");
 
for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
-   if (ifa->ifa_addr->sa_family != AF_INET)
+   if (ifa->ifa_addr == NULL ||
+   ifa->ifa_addr->sa_family != AF_INET)
continue;
cur = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
if ((cur & localnet) == localnet)   /* skip 127/8 */
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.406
diff -u -p -r1.406 session.c
--- session.c   11 Dec 2020 12:00:01 -  1.406
+++ session.c   17 Dec 2020 12:18:54 -
@@ -1223,7 +1223,8 @@ get_alternate_addr(struct sockaddr *sa, 
fatal("getifaddrs");
 
for (match = ifap; match != NULL; match = match->ifa_next)
-   if (sa_cmp(sa, match->ifa_addr) == 0)
+   if (match->ifa_addr != NULL &&
+   sa_cmp(sa, match->ifa_addr) == 0)
break;
 
if (match == NULL) {
@@ -1234,7 +1235,8 @@ get_alternate_addr(struct sockaddr *sa, 
switch (sa->sa_family) {
case AF_INET6:
for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
-   if (ifa->ifa_addr->sa_family == AF_INET &&
+   if (ifa->ifa_addr != NULL &&
+   ifa->ifa_addr->sa_family == AF_INET &&
strcmp(ifa->ifa_name, match->ifa_name) == 0) {
sa2addr(ifa->ifa_addr, alt, NULL);
break;
@@ -1243,10 +1245,12 @@ get_alternate_addr(struct sockaddr *sa, 
break;
case AF_INET:
for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
-   struct sockaddr_in6 *s =
-   (struct sockaddr_in6 *)ifa->ifa_addr;
-   if (ifa->ifa_addr->sa_family == AF_INET6 &&
+   if (ifa->ifa_addr != NULL &&
+   ifa->ifa_addr->sa_family == AF_INET6 &&
strcmp(ifa->ifa_name, match->ifa_name) == 0) {
+   struct sockaddr_in6 *s =
+   (struct sockaddr_in6 *)ifa->ifa_addr;
+
/* only accept global scope addresses */
if (IN6_IS_ADDR_LINKLOCAL(&s->sin6_addr) ||
IN6_IS_ADDR_SITELOCAL(&s->sin6_addr))



Re: bgpd show status of set tables

2020-12-29 Thread Claudio Jeker
On Thu, Dec 10, 2020 at 02:42:49PM +0100, Claudio Jeker wrote:
> On Thu, Dec 03, 2020 at 10:59:29PM +0100, Claudio Jeker wrote:
> > The use of roa-set, prefix-set and as-set is fairly common in bgpd.
> > Still sometimes it is not exactly clear how old the data in those tables
> > is. This diff is a first step at inproving inspection by adding
> > bgpctl show sets
> > 
> > Sample output is:
> > Type   Name #IPv4   #Ipv6 #ASnum Last Change
> > ROARPKI ROA158810   26257  -00:00:07
> > ASNUM  asns_AS15600 -   - 2601:19:10
> > PREFIX p4_AS21040   8   0  -01:19:10
> > 
> > I just did a bgpctl reload with a new roa table (generated by rpki-client)
> > but the as-set and prefix-set did not change during this reload.
> > The output also includes the number of entries in the tables but in the
> > case of roa-set the number of unique prefixes is counted. So the number is
> > a bit under the count from rpki-client because e.g.
> > 1.32.219.0/24 source-as 4842
> > 1.32.219.0/24 source-as 138570
> > are counted as 1 right now (instead of 2 prefixes).
> > 
> > More statistics can be added if their calculation is easy.
> 
> Am I the only one interested in knowing how old my set data is?

Any feedback on this? This should help people to detect issues where
rpki-client or bgpq3 fail to produce updates.

-- 
:wq Claudio

PS: apply diff in /usr/src/usr.sbin

Index: bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.263
diff -u -p -r1.263 bgpctl.c
--- bgpctl/bgpctl.c 10 May 2020 13:38:46 -  1.263
+++ bgpctl/bgpctl.c 3 Dec 2020 20:17:14 -
@@ -213,6 +213,9 @@ main(int argc, char *argv[])
case SHOW_INTERFACE:
imsg_compose(ibuf, IMSG_CTL_SHOW_INTERFACE, 0, 0, -1, NULL, 0);
break;
+   case SHOW_SET:
+   imsg_compose(ibuf, IMSG_CTL_SHOW_SET, 0, 0, -1, NULL, 0);
+   break;
case SHOW_NEIGHBOR:
case SHOW_NEIGHBOR_TIMERS:
case SHOW_NEIGHBOR_TERSE:
@@ -393,6 +396,7 @@ show(struct imsg *imsg, struct parse_res
struct ctl_timer*t;
struct ctl_show_interface   *iface;
struct ctl_show_nexthop *nh;
+   struct ctl_show_set *set;
struct kroute_full  *kf;
struct ktable   *kt;
struct ctl_show_rib  rib;
@@ -466,6 +470,10 @@ show(struct imsg *imsg, struct parse_res
memcpy(&hash, imsg->data, sizeof(hash));
output->rib_hash(&hash);
break;
+   case IMSG_CTL_SHOW_SET:
+   set = imsg->data;
+   output->set(set);
+   break;
case IMSG_CTL_RESULT:
if (imsg->hdr.len != IMSG_HEADER_SIZE + sizeof(rescode)) {
warnx("got IMSG_CTL_RESULT with wrong len");
@@ -974,6 +982,23 @@ fmt_ext_community(u_int8_t *data)
log_ext_subtype(type, subtype),
(unsigned long long)be64toh(ext));
return buf;
+   }
+}
+
+const char *
+fmt_set_type(struct ctl_show_set *set)
+{
+   switch (set->type) {
+   case ROA_SET:
+   return "ROA";
+   case PREFIX_SET:
+   return "PREFIX";
+   case ORIGIN_SET:
+   return "ORIGIN";
+   case ASNUM_SET:
+   return "ASNUM";
+   default:
+   return "BULA";
}
 }
 
Index: bgpctl/bgpctl.h
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.h,v
retrieving revision 1.7
diff -u -p -r1.7 bgpctl.h
--- bgpctl/bgpctl.h 2 May 2020 14:33:33 -   1.7
+++ bgpctl/bgpctl.h 3 Dec 2020 20:12:34 -
@@ -30,6 +30,7 @@ struct output {
struct parse_result *);
void(*rib_hash)(struct rde_hashstats *);
void(*rib_mem)(struct rde_memstats *);
+   void(*set)(struct ctl_show_set *);
void(*result)(u_int);
void(*tail)(void);
 };
@@ -53,3 +54,4 @@ const char*fmt_attr(u_int8_t, int);
 const char *fmt_community(u_int16_t, u_int16_t);
 const char *fmt_large_community(u_int32_t, u_int32_t, u_int32_t);
 const char *fmt_ext_community(u_int8_t *);
+const char *fmt_set_type(struct ctl_show_set *);
Index: bgpctl/output.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.10
diff -u -p -r1.10 output.c
--- bgpctl/output.c 21 Oct 2020 06:52:45 -  1.10
+++ bgpctl/output.c  

Re: usr.sbin/* getifaddrs ifa_addr NULL check

2020-12-29 Thread Claudio Jeker
On Tue, Dec 29, 2020 at 03:42:20PM +0100, Sebastian Benoit wrote:
> Hi,
> 
> claudios bgpd diff and florian mentioning rad(8) made me look into usr.sbin/
> for getifaddrs() usage. I think these need a NULL check as well.
> 
> ok?

Looks OK to me.
 
> diff --git usr.sbin/ospf6d/parse.y usr.sbin/ospf6d/parse.y
> index f163e24149d..509aa2f2e88 100644
> --- usr.sbin/ospf6d/parse.y
> +++ usr.sbin/ospf6d/parse.y
> @@ -1253,7 +1253,8 @@ get_rtr_id(void)
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
>   if (strncmp(ifa->ifa_name, "carp", 4) == 0)
>   continue;
> - if (ifa->ifa_addr->sa_family != AF_INET)
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_INET)
>   continue;
>   cur = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
>   if ((cur & localnet) == localnet)   /* skip 127/8 */
> diff --git usr.sbin/ospfd/parse.y usr.sbin/ospfd/parse.y
> index a09696504f8..3fbcca0c911 100644
> --- usr.sbin/ospfd/parse.y
> +++ usr.sbin/ospfd/parse.y
> @@ -1469,7 +1469,8 @@ get_rtr_id(void)
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
>   if (strncmp(ifa->ifa_name, "carp", 4) == 0)
>   continue;
> - if (ifa->ifa_addr->sa_family != AF_INET)
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_INET)
>   continue;
>   cur = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
>   if ((cur & localnet) == localnet)   /* skip 127/8 */
> diff --git usr.sbin/pppd/sys-bsd.c usr.sbin/pppd/sys-bsd.c
> index e8deee6d2ff..73f0e287938 100644
> --- usr.sbin/pppd/sys-bsd.c
> +++ usr.sbin/pppd/sys-bsd.c
> @@ -1334,6 +1334,8 @@ get_ether_addr(ipaddr, hwaddr)
>   * address on the same subnet as `ipaddr'.
>   */
>  for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> + if (ifa->ifa_addr == NULL)
> + continue;
>   if (ifa->ifa_addr->sa_family == AF_INET) {
>   ina = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
>   /*
> @@ -1367,6 +1369,8 @@ get_ether_addr(ipaddr, hwaddr)
>   */
>  ifp = ifa;
>  for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> + if (ifa->ifa_addr == NULL)
> + continue;
>   if (strcmp(ifp->ifa_name, ifa->ifa_name) == 0
>   && ifa->ifa_addr->sa_family == AF_LINK) {
>   /*
> @@ -1418,8 +1422,9 @@ GetMask(addr)
>   /*
>* Check the interface's internet address.
>*/
> - if (ifa->ifa_addr->sa_family != AF_INET)
> - continue;
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_INET)
> + continue;
>   ina = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr;
>   if ((ntohl(ina) & nmask) != (addr & nmask))
>   continue;
> diff --git usr.sbin/rad/frontend.c usr.sbin/rad/frontend.c
> index b69e9b7b0a3..c4dcf46e068 100644
> --- usr.sbin/rad/frontend.c
> +++ usr.sbin/rad/frontend.c
> @@ -731,7 +731,8 @@ get_link_state(char *if_name)
>   return LINK_STATE_UNKNOWN;
>   }
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> - if (ifa->ifa_addr->sa_family != AF_LINK)
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_LINK)
>   continue;
>   if (strcmp(if_name, ifa->ifa_name) != 0)
>   continue;
> @@ -969,7 +970,8 @@ interface_has_linklocal_address(char *name)
>   for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
>   if (strcmp(name, ifa->ifa_name) != 0)
>   continue;
> - if (ifa->ifa_addr->sa_family != AF_INET6)
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_INET6)
>   continue;
>  
>   sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
> @@ -1013,7 +1015,8 @@ get_interface_prefixes(struct ra_iface *ra_iface, 
> struct ra_prefix_conf
>   for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
>   if (strcmp(ra_iface->name, ifa->ifa_name) != 0)
>   continue;
> - if (ifa->ifa_addr->sa_family != AF_INET6)
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_INET6)
>   continue;
>  
>   sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
> diff --git usr.sbin/rarpd/rarpd.c usr.sbin/rarpd/rarpd.c
> index f3227883404..7652dcd3c4f 100644
> --- usr.sbin/rarpd/rarpd.c
> +++ usr.sbin/rarpd/rarpd.c
> @@ -200,6 +200,8 @@ init_all(void)
>   error("getifaddrs: %s", strerror(errno));
>  
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> + if (ifa->ifa_addr == NULL)
> + continue;
>   sdl = (struct sockaddr_dl *)ifa->ifa_addr;
>   if (sdl->sdl_family !=

Re: more getifaddrs ifa_addr NULL checks

2020-12-29 Thread Claudio Jeker
On Tue, Dec 29, 2020 at 04:20:04PM +0100, Sebastian Benoit wrote:
> 
> More missing checks, outside of usr.sbin.
> 
> Missing: isakmpd and ifconfig
> I have not yet looked at libc internal use, libpcap and regress/.
> 
> ok?

Looks good to me.
 
> diff --git sbin/iked/parse.y sbin/iked/parse.y
> index aedbb74f3fd..b02ff55d4e7 100644
> --- sbin/iked/parse.y
> +++ sbin/iked/parse.y
> @@ -2166,7 +2166,8 @@ ifa_load(void)
>   err(1, "ifa_load: getifaddrs");
>  
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> - if (!(ifa->ifa_addr->sa_family == AF_INET ||
> + if (ifa->ifa_addr == NULL ||
> + !(ifa->ifa_addr->sa_family == AF_INET ||
>   ifa->ifa_addr->sa_family == AF_INET6 ||
>   ifa->ifa_addr->sa_family == AF_LINK))
>   continue;
> diff --git sbin/ipsecctl/parse.y sbin/ipsecctl/parse.y
> index d61f6d5e151..5121d8bb655 100644
> --- sbin/ipsecctl/parse.y
> +++ sbin/ipsecctl/parse.y
> @@ -1827,7 +1827,8 @@ ifa_load(void)
>   err(1, "ifa_load: getifaddrs");
>  
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> - if (!(ifa->ifa_addr->sa_family == AF_INET ||
> + if (ifa->ifa_addr == NULL ||
> + !(ifa->ifa_addr->sa_family == AF_INET ||
>   ifa->ifa_addr->sa_family == AF_INET6 ||
>   ifa->ifa_addr->sa_family == AF_LINK))
>   continue;
> diff --git sbin/pfctl/pfctl_parser.c sbin/pfctl/pfctl_parser.c
> index 32b14d8b4cb..164b0639a48 100644
> --- sbin/pfctl/pfctl_parser.c
> +++ sbin/pfctl/pfctl_parser.c
> @@ -1352,7 +1352,8 @@ ifa_load(void)
>   err(1, "getifaddrs");
>  
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> - if (!(ifa->ifa_addr->sa_family == AF_INET ||
> + if (ifa->ifa_addr == NULL ||
> + !(ifa->ifa_addr->sa_family == AF_INET ||
>   ifa->ifa_addr->sa_family == AF_INET6 ||
>   ifa->ifa_addr->sa_family == AF_LINK))
>   continue;
> diff --git sbin/route/route.c sbin/route/route.c
> index c13f5b6be07..e1d31902e98 100644
> --- sbin/route/route.c
> +++ sbin/route/route.c
> @@ -506,7 +506,8 @@ setsource(int argc, char **argv)
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
>   if (if_nametoindex(ifa->ifa_name) != ifindex)
>   continue;
> - if (!(ifa->ifa_addr->sa_family == AF_INET ||
> + if (ifa->ifa_addr == NULL ||
> + !(ifa->ifa_addr->sa_family == AF_INET ||
>   ifa->ifa_addr->sa_family == AF_INET6))
>   continue;
>   if ((af != AF_UNSPEC) &&
> diff --git sbin/slaacd/frontend.c sbin/slaacd/frontend.c
> index 6bdfc92339d..0e877d03bdb 100644
> --- sbin/slaacd/frontend.c
> +++ sbin/slaacd/frontend.c
> @@ -584,6 +584,8 @@ update_autoconf_addresses(uint32_t if_index, char* 
> if_name)
>   for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
>   if (strcmp(if_name, ifa->ifa_name) != 0)
>   continue;
> + if (ifa->ifa_addr == NULL)
> + continue;
>  
>   if (ifa->ifa_addr->sa_family == AF_LINK)
>   imsg_link_state.link_state =
> @@ -937,6 +939,8 @@ get_lladdr(char *if_name, struct ether_addr *mac, struct 
> sockaddr_in6 *ll)
>   for (ifa = ifap; ifa != NULL; ifa = ifa->ifa_next) {
>   if (strcmp(if_name, ifa->ifa_name) != 0)
>   continue;
> + if (ifa->ifa_addr == NULL)
> + continue;
>  
>   switch(ifa->ifa_addr->sa_family) {
>   case AF_LINK:
> diff --git usr.bin/netstat/route.c usr.bin/netstat/route.c
> index 9e8e22ba54b..1aef3f9cd3c 100644
> --- usr.bin/netstat/route.c
> +++ usr.bin/netstat/route.c
> @@ -368,7 +368,8 @@ rdomainpr(void)
>  
>   getifaddrs(&ifap);
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> - if (ifa->ifa_addr->sa_family != AF_LINK)
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_LINK)
>   continue;
>   ifd = ifa->ifa_data;
>   if (rdom_if[ifd->ifi_rdomain] == NULL) {
> diff --git usr.bin/rusers/rusers.c usr.bin/rusers/rusers.c
> index 53f50a8ce6a..124ebd6cc9c 100644
> --- usr.bin/rusers/rusers.c
> +++ usr.bin/rusers/rusers.c
> @@ -558,7 +558,8 @@ allhosts(void)
>*/
>   for (i = 0; i < 6; i++) {
>   for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
> - if (ifa->ifa_addr->sa_family != AF_INET ||
> + if (ifa->ifa_addr == NULL ||
> + ifa->ifa_addr->sa_family != AF_INET ||
>   !(ifa->ifa_flags & IFF_BROADCAST) ||
>   !(ifa->ifa_flags & IFF_UP) ||
>

Re: pppoe: input without kernel lock

2020-12-29 Thread Claudio Jeker
On Tue, Dec 29, 2020 at 08:48:28PM +0100, Klemens Nanni wrote:
> Earlier this year `struct pppoe_softc' was annotated with lock comments
> showing no member being protected by KERNEL_LOCK() alone.
> 
> After further review of the code paths starting from pppoeintr() I also
> could not find sleeping points, which must be avoided in the sofnet
> thread.  (As part of this, I specifically went through all possible
> malloc(9) calls and verified that none of them use `M_WAITOK'.)
> 
> The only thing I see neccessary now is wrapping mbuf queue access with
> if_get(9) to prevent interfaces from disappearing while processing the
> mbuf -- currently the big lock protects against that.
> 
> I've been running with this diff for over four months on an octeon
> edgerouter 4 which updates to snapshots modulo the custom kernel roughly
> once a week.
> 
> Others also successfully tested this on octeon and amd64 based setups.
> 
> Did I miss anything?
> Feedback? Objections? OK?
 
Generally I would prefer to go for direct dispatch and not use netisr.
This removes a queue and a scheduling point and should help reduce the
latency in processing pppoe packages.
 
This does not mean that I'm against this change. I just think it may be
benefitial to move one step further.

> Index: if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.621
> diff -u -p -r1.621 if.c
> --- if.c  15 Dec 2020 03:43:34 -  1.621
> +++ if.c  28 Dec 2020 18:13:02 -
> @@ -907,9 +907,7 @@ if_netisr(void *unused)
>  #endif
>  #if NPPPOE > 0
>   if (n & (1 << NETISR_PPPOE)) {
> - KERNEL_LOCK();
>   pppoeintr();
> - KERNEL_UNLOCK();
>   }
>  #endif
>   t |= n;
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 if_pppoe.c
> --- if_pppoe.c13 Sep 2020 11:00:40 -  1.73
> +++ if_pppoe.c28 Dec 2020 18:13:02 -
> @@ -346,14 +346,29 @@ void
>  pppoeintr(void)
>  {
>   struct mbuf *m;
> + struct ifnet *ifp;
>  
>   NET_ASSERT_LOCKED();
>  
> - while ((m = niq_dequeue(&pppoediscinq)) != NULL)
> + while ((m = niq_dequeue(&pppoediscinq)) != NULL) {
> + ifp = if_get(m->m_pkthdr.ph_ifidx);
> + if (ifp == NULL) {
> + m_freem(m);
> + continue;
> + }
>   pppoe_disc_input(m);
> + if_put(ifp);
> + }
>  
> - while ((m = niq_dequeue(&pppoeinq)) != NULL)
> + while ((m = niq_dequeue(&pppoeinq)) != NULL) {
> + ifp = if_get(m->m_pkthdr.ph_ifidx);
> + if (ifp == NULL) {
> + m_freem(m);
> + continue;
> + }
>   pppoe_data_input(m);
> + if_put(ifp);
> + }
>  }
>  
>  /* Analyze and handle a single received packet while not in session state. */
> 

-- 
:wq Claudio



Re: httpd: another log related leak

2020-12-31 Thread Claudio Jeker
On Thu, Dec 31, 2020 at 11:21:44AM +0100, Theo Buehler wrote:
> msg is allocated by vasprintf, and is leaked on return of server_sendlog.
> vasprintf calculates the length of the string, so we can zap a needless
> call to strlen while there.
> 
> Index: server.c
> ===
> RCS file: /cvs/src/usr.sbin/httpd/server.c,v
> retrieving revision 1.121
> diff -u -p -r1.121 server.c
> --- server.c  11 Oct 2020 03:21:44 -  1.121
> +++ server.c  31 Dec 2020 10:06:28 -
> @@ -1251,12 +1251,14 @@ server_sendlog(struct server_config *srv
>   iov[0].iov_base = &srv_conf->id;
>   iov[0].iov_len = sizeof(srv_conf->id);
>   iov[1].iov_base = msg;
> - iov[1].iov_len = strlen(msg) + 1;
> + iov[1].iov_len = ret + 1;
>  
>   if (proc_composev(httpd_env->sc_ps, PROC_LOGGER, cmd, iov, 2) != 0) {
>   log_warn("%s: failed to compose imsg", __func__);
> + free(msg);
>   return;
>   }
> + free(msg);
>  }
>  
>  void
> 

OK claudio@

-- 
:wq Claudio



Re: httpd: call tls_close before closing the socket

2021-01-01 Thread Claudio Jeker
On Fri, Jan 01, 2021 at 09:06:34PM +0100, Theo Buehler wrote:
> httpd(8) leaks resources when clients connect via TLS.  The reason for
> this is that server_close() closes the socket underlying the TLS
> connection before calling tls_close().
> 
> The currently unchecked tls_close() call fails with EBADF when trying to
> write out the close_notify in SSL_shutdown(). That resources are leaked
> are bugs in libssl/libtls which will need more investigation. Anyway,
> tls_close() wants an open socket if possible and it wants error
> checking, so move it up and do the usual dance.
> 
> This diff makes a simple httpd run in essentially constant memory when I
> hammer it with many thousand TLS connections.

Is the fd passed to to tls_close() non-blocking? If so could result in a
busy loop on the while (ret == TLS_WANT_POLLIN || ret == TLS_WANT_POLLOUT).
Also doing these individual "poll" loops outside of the main event loop
can hurt performance since no other fd can be processed during that time.
I wondered about this in some other event based code I wrote. Especially
if tls_close() is called because of some error cleanup code.

> Index: server.c
> ===
> RCS file: /cvs/src/usr.sbin/httpd/server.c,v
> retrieving revision 1.122
> diff -u -p -r1.122 server.c
> --- server.c  31 Dec 2020 14:17:12 -  1.122
> +++ server.c  1 Jan 2021 18:03:32 -
> @@ -1333,14 +1333,25 @@ server_close(struct client *clt, const c
>  
>   if (clt->clt_srvbev != NULL)
>   bufferevent_free(clt->clt_srvbev);
> +
> + /* tls_close must be called before the underlying socket is closed. */
> + if (clt->clt_tls_ctx != NULL) {
> + int ret;
> +
> + do {
> + ret = tls_close(clt->clt_tls_ctx);
> + } while (ret == TLS_WANT_POLLIN || ret == TLS_WANT_POLLOUT);
> + if (ret == -1) {
> + log_warnx("%s: tls_close failed (%s)", __func__,
> + tls_error(clt->clt_tls_ctx));
> + }
> + tls_free(clt->clt_tls_ctx);
> + }
> +
>   if (clt->clt_fd != -1)
>   close(clt->clt_fd);
>   if (clt->clt_s != -1)
>   close(clt->clt_s);
> -
> - if (clt->clt_tls_ctx != NULL)
> - tls_close(clt->clt_tls_ctx);
> - tls_free(clt->clt_tls_ctx);
>  
>   server_inflight_dec(clt, __func__);
>  
> 

-- 
:wq Claudio



Re: httpd: call tls_close before closing the socket

2021-01-01 Thread Claudio Jeker
On Sat, Jan 02, 2021 at 12:07:28AM +0100, Theo Buehler wrote:
> On Fri, Jan 01, 2021 at 11:38:32PM +0100, Claudio Jeker wrote:
> > On Fri, Jan 01, 2021 at 09:06:34PM +0100, Theo Buehler wrote:
> > > httpd(8) leaks resources when clients connect via TLS.  The reason for
> > > this is that server_close() closes the socket underlying the TLS
> > > connection before calling tls_close().
> > > 
> > > The currently unchecked tls_close() call fails with EBADF when trying to
> > > write out the close_notify in SSL_shutdown(). That resources are leaked
> > > are bugs in libssl/libtls which will need more investigation. Anyway,
> > > tls_close() wants an open socket if possible and it wants error
> > > checking, so move it up and do the usual dance.
> > > 
> > > This diff makes a simple httpd run in essentially constant memory when I
> > > hammer it with many thousand TLS connections.
> > 
> > Is the fd passed to to tls_close() non-blocking? If so could result in a
> > busy loop on the while (ret == TLS_WANT_POLLIN || ret == TLS_WANT_POLLOUT).
> 
> Thanks, I was just wondering about this as well.  The socket is of
> course non-blocking and I used the blocking idiom.  I do not how likely
> it is that we hit an actual WANT_POLL* situation but it is conceivable.
> 
> In my few test cases, tls_close() always succeeds the first time around.
> 
> > Also doing these individual "poll" loops outside of the main event loop
> > can hurt performance since no other fd can be processed during that time.
> > I wondered about this in some other event based code I wrote. Especially
> > if tls_close() is called because of some error cleanup code.
> 
> tls_close() is known to need some work. So perhaps we can do this simple
> diff in the interim. This addresses the annoying resource leaks I'm
> seeing and the proper handling with libevent (which I'm unfamiliar with)
> can be done once it's figured out.

This is OK claudio@
 
> Index: server.c
> ===
> RCS file: /cvs/src/usr.sbin/httpd/server.c,v
> retrieving revision 1.122
> diff -u -p -r1.122 server.c
> --- server.c  31 Dec 2020 14:17:12 -  1.122
> +++ server.c  1 Jan 2021 22:55:29 -
> @@ -1333,14 +1333,16 @@ server_close(struct client *clt, const c
>  
>   if (clt->clt_srvbev != NULL)
>   bufferevent_free(clt->clt_srvbev);
> +
> + /* tls_close must be called before the underlying socket is closed. */
> + if (clt->clt_tls_ctx != NULL)
> + tls_close(clt->clt_tls_ctx); /* XXX - error handling */
> + tls_free(clt->clt_tls_ctx);
> +
>   if (clt->clt_fd != -1)
>   close(clt->clt_fd);
>   if (clt->clt_s != -1)
>   close(clt->clt_s);
> -
> - if (clt->clt_tls_ctx != NULL)
> - tls_close(clt->clt_tls_ctx);
> - tls_free(clt->clt_tls_ctx);
>  
>   server_inflight_dec(clt, __func__);
>  
> 

-- 
:wq Claudio



use getnameinfo in bgpd to print addresses

2021-01-04 Thread Claudio Jeker
In bgpd most prefixes and addresses are stored as struct bgpd_addr. When
it is printed it uses inet_ntop() which is not ideal since it does not
handle IPv6 scoped_id. Instead convert to a struct sockaddr and use
log_sockaddr() which in turn uses getnameinfo.

Ideally the same should be done for the VPN address types but that is a
bit more complex.

-- 
:wq Claudio

Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.408
diff -u -p -r1.408 bgpd.h
--- bgpd.h  30 Dec 2020 07:29:56 -  1.408
+++ bgpd.h  4 Jan 2021 09:34:44 -
@@ -1371,7 +1371,7 @@ intaid2afi(u_int8_t, u_int16_t *, u_i
 int afi2aid(u_int16_t, u_int8_t, u_int8_t *);
 sa_family_t aid2af(u_int8_t);
 int af2aid(sa_family_t, u_int8_t, u_int8_t *);
-struct sockaddr*addr2sa(struct bgpd_addr *, u_int16_t, socklen_t *);
+struct sockaddr*addr2sa(const struct bgpd_addr *, u_int16_t, socklen_t 
*);
 voidsa2addr(struct sockaddr *, struct bgpd_addr *, u_int16_t *);
 const char *get_baudrate(unsigned long long, char *);
 
Index: util.c
===
RCS file: /cvs/src/usr.sbin/bgpd/util.c,v
retrieving revision 1.55
diff -u -p -r1.55 util.c
--- util.c  21 Oct 2020 06:53:54 -  1.55
+++ util.c  4 Jan 2021 09:33:30 -
@@ -39,14 +39,12 @@ log_addr(const struct bgpd_addr *addr)
 {
static char buf[74];
chartbuf[40];
+   socklen_t   len;
 
switch (addr->aid) {
case AID_INET:
case AID_INET6:
-   if (inet_ntop(aid2af(addr->aid), &addr->ba, buf,
-   sizeof(buf)) == NULL)
-   return ("?");
-   return (buf);
+   return log_sockaddr(addr2sa(addr, 0, &len), len);
case AID_VPN_IPv4:
if (inet_ntop(AF_INET, &addr->vpn4.addr, tbuf,
sizeof(tbuf)) == NULL)
@@ -838,7 +836,7 @@ af2aid(sa_family_t af, u_int8_t safi, u_
 }
 
 struct sockaddr *
-addr2sa(struct bgpd_addr *addr, u_int16_t port, socklen_t *len)
+addr2sa(const struct bgpd_addr *addr, u_int16_t port, socklen_t *len)
 {
static struct sockaddr_storage   ss;
struct sockaddr_in  *sa_in = (struct sockaddr_in *)&ss;



bgpd roa compare function

2021-01-04 Thread Claudio Jeker
This adjusts the roa_cmp() function to result in the same order on big and
little endian systems. This should help with regress tests where the order
matters.

I also change the prefixset_cmp function to use memcmp for both IPv4 and
IPv6 addresses.

OK?
-- 
:wq Claudio

Index: config.c
===
RCS file: /cvs/src/usr.sbin/bgpd/config.c,v
retrieving revision 1.97
diff -u -p -r1.97 config.c
--- config.c29 Dec 2020 15:30:34 -  1.97
+++ config.c4 Jan 2021 10:30:15 -
@@ -551,22 +551,20 @@ prefixset_cmp(struct prefixset_item *a, 
 
switch (a->p.addr.aid) {
case AID_INET:
-   if (ntohl(a->p.addr.v4.s_addr) < ntohl(b->p.addr.v4.s_addr))
-   return (-1);
-   if (ntohl(a->p.addr.v4.s_addr) > ntohl(b->p.addr.v4.s_addr))
-   return (1);
+   i = memcmp(&a->p.addr.v4, &b->p.addr.v4,
+   sizeof(struct in_addr));
break;
case AID_INET6:
i = memcmp(&a->p.addr.v6, &b->p.addr.v6,
sizeof(struct in6_addr));
-   if (i > 0)
-   return (1);
-   if (i < 0)
-   return (-1);
break;
default:
fatalx("%s: unknown af", __func__);
}
+   if (i > 0)
+   return (1);
+   if (i < 0)
+   return (-1);
if (a->p.len < b->p.len)
return (-1);
if (a->p.len > b->p.len)
@@ -587,16 +585,45 @@ RB_GENERATE(prefixset_tree, prefixset_it
 static inline int
 roa_cmp(struct roa *a, struct roa *b)
 {
-   size_t len = 4 + sizeof(a->asnum);
+   int i;
+
+   if (a->aid < b->aid)
+   return (-1);
+   if (a->aid > b->aid)
+   return (1);
 
-   if (a->aid == b->aid) {
-   if (a->aid == AID_INET)
-   len += sizeof(a->prefix.inet);
-   else
-   len += sizeof(a->prefix.inet6);
+   switch (a->aid) {
+   case AID_INET:
+   i = memcmp(&a->prefix.inet, &b->prefix.inet,
+   sizeof(struct in_addr));
+   break;
+   case AID_INET6:
+   i = memcmp(&a->prefix.inet6, &b->prefix.inet6,
+   sizeof(struct in6_addr));
+   break;
+   default:
+   fatalx("%s: unknown af", __func__);
}
+   if (i > 0)
+   return (1);
+   if (i < 0)
+   return (-1);
+   if (a->prefixlen < b->prefixlen)
+   return (-1);
+   if (a->prefixlen > b->prefixlen)
+   return (1);
 
-   return memcmp(&a->aid, &b->aid, len);
+   if (a->asnum < b->asnum)
+   return (-1);
+   if (a->asnum > b->asnum)
+   return (1);
+
+   if (a->maxlen < b->maxlen)
+   return (-1);
+   if (a->maxlen > b->maxlen)
+   return (1);
+
+   return (0);
 }
 
 RB_GENERATE(roa_tree, roa, entry, roa_cmp);



Re: bgpd: struct mrt vs struct mrt_config confusion

2021-01-04 Thread Claudio Jeker
On Mon, Jan 04, 2021 at 12:09:46PM +0100, Theo Buehler wrote:
> Pointed out by llvm scan-build. mrt_config is much larger (> 10x). As
> far as I can tell, this isn't bad. It just overallocates and copies a
> lot of zeroes thanks to the calloc() in parse.y.
> 
> Perhaps it would be better to use sizeof(*xm) instead.

I think this is wrong. There is a difference between struct mrt_config and mrt
but the code uses MRT2MC() in various places to change between the types.
This code could use some work to clean up this mess.
 
> Regress passes with the Makefile diff at the end (is there a better
> way?).

Regress diff is OK. This was the addition of 'bgpctl show sets' that added
the need for getmonotime().
 
> Index: usr.sbin/bgpd/mrt.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
> retrieving revision 1.103
> diff -u -p -r1.103 mrt.c
> --- usr.sbin/bgpd/mrt.c   9 Jan 2020 11:55:25 -   1.103
> +++ usr.sbin/bgpd/mrt.c   4 Jan 2021 10:16:18 -
> @@ -1031,9 +1031,9 @@ mrt_mergeconfig(struct mrt_head *xconf, 
>   LIST_FOREACH(m, nconf, entry) {
>   if ((xm = mrt_get(xconf, m)) == NULL) {
>   /* NEW */
> - if ((xm = malloc(sizeof(struct mrt_config))) == NULL)
> + if ((xm = malloc(sizeof(struct mrt))) == NULL)
>   fatal("mrt_mergeconfig");
> - memcpy(xm, m, sizeof(struct mrt_config));
> + memcpy(xm, m, sizeof(struct mrt));
>   xm->state = MRT_STATE_OPEN;
>   LIST_INSERT_HEAD(xconf, xm, entry);
>   } else {
> Index: usr.sbin/bgpd/parse.y
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
> retrieving revision 1.411
> diff -u -p -r1.411 parse.y
> --- usr.sbin/bgpd/parse.y 29 Dec 2020 15:30:34 -  1.411
> +++ usr.sbin/bgpd/parse.y 4 Jan 2021 10:17:26 -
> @@ -3871,7 +3871,7 @@ add_mrtconfig(enum mrt_type type, char *
>   }
>   }
>  
> - if ((n = calloc(1, sizeof(struct mrt_config))) == NULL)
> + if ((n = calloc(1, sizeof(struct mrt))) == NULL)
>   fatal("add_mrtconfig");
>  
>   n->type = type;
> Index: regress/usr.sbin/bgpd/unittests/Makefile
> ===
> RCS file: /cvs/src/regress/usr.sbin/bgpd/unittests/Makefile,v
> retrieving revision 1.6
> diff -u -p -r1.6 Makefile
> --- regress/usr.sbin/bgpd/unittests/Makefile  29 Dec 2020 16:57:50 -  
> 1.6
> +++ regress/usr.sbin/bgpd/unittests/Makefile  4 Jan 2021 10:37:38 -
> @@ -14,11 +14,11 @@ CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../..
>  LDADD= -lutil
>  DPADD+= ${LIBUTIL}
>  
> -SRCS_rde_sets_test=  rde_sets_test.c rde_sets.c
> +SRCS_rde_sets_test=  rde_sets_test.c rde_sets.c timer.c log.c
>  run-regress-rde_sets_test: rde_sets_test
>   ./rde_sets_test
>  
> -SRCS_rde_trie_test=  rde_trie_test.c rde_trie.c util.c rde_sets.c
> +SRCS_rde_trie_test=  rde_trie_test.c rde_trie.c util.c rde_sets.c timer.c 
> log.c
>  TRIE_TESTS=1 2 3 4 5 6
>  TRIE4_FLAGS=-o
>  TRIE5_FLAGS=-r
> 

-- 
:wq Claudio



Re: pppoe: input without kernel lock

2021-01-04 Thread Claudio Jeker
On Mon, Jan 04, 2021 at 01:46:43AM +0100, Klemens Nanni wrote:
> On Tue, Dec 29, 2020 at 11:18:26PM +0100, Claudio Jeker wrote:
> > Generally I would prefer to go for direct dispatch and not use netisr.
> > This removes a queue and a scheduling point and should help reduce the
> > latency in processing pppoe packages.
> >  
> > This does not mean that I'm against this change. I just think it may be
> > benefitial to move one step further.
> Here's a diff that removes the kernel lock and calls input routines
> directly instead of (de)queuing through netisr.
> 
> Previously, if_netisr() handled the net lock around those calls, now
> if_input_process() does it before calling ether_input(), so no need to
> add or remove NET_*LOCK() anywhere.
> 
> I'm running this on my home router without any regression so far.
> 
> Feedback? Objections? OK?
> NB: I want to commit this separately modulo the previous diff.
> 

Looks good to me. OK claudio@
 
> Index: if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.621
> diff -u -p -r1.621 if.c
> --- if.c  15 Dec 2020 03:43:34 -  1.621
> +++ if.c  3 Jan 2021 21:38:57 -
> @@ -68,7 +68,6 @@
>  #include "pf.h"
>  #include "pfsync.h"
>  #include "ppp.h"
> -#include "pppoe.h"
>  #include "switch.h"
>  #include "if_wg.h"
>  
> @@ -902,13 +901,6 @@ if_netisr(void *unused)
>   if (n & (1 << NETISR_SWITCH)) {
>   KERNEL_LOCK();
>   switchintr();
> - KERNEL_UNLOCK();
> - }
> -#endif
> -#if NPPPOE > 0
> - if (n & (1 << NETISR_PPPOE)) {
> - KERNEL_LOCK();
> - pppoeintr();
>   KERNEL_UNLOCK();
>   }
>  #endif
> Index: if_ethersubr.c
> ===
> RCS file: /cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.267
> diff -u -p -r1.267 if_ethersubr.c
> --- if_ethersubr.c1 Oct 2020 05:14:10 -   1.267
> +++ if_ethersubr.c3 Jan 2021 21:41:17 -
> @@ -532,9 +532,9 @@ ether_input(struct ifnet *ifp, struct mb
>   }
>  #endif
>   if (etype == ETHERTYPE_PPPOEDISC)
> - niq_enqueue(&pppoediscinq, m);
> + pppoe_disc_input(m);
>   else
> - niq_enqueue(&pppoeinq, m);
> + pppoe_data_input(m);
>   return;
>  #endif
>  #ifdef MPLS
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.75
> diff -u -p -r1.75 if_pppoe.c
> --- if_pppoe.c30 Dec 2020 13:18:07 -  1.75
> +++ if_pppoe.c4 Jan 2021 00:14:30 -
> @@ -143,14 +143,8 @@ struct pppoe_softc {
>   struct timeval sc_session_time; /* [N] time the session was established 
> */
>  };
>  
> -/* incoming traffic will be queued here */
> -struct niqueue pppoediscinq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_PPPOE);
> -struct niqueue pppoeinq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_PPPOE);
> -
>  /* input routines */
> -static void pppoe_disc_input(struct mbuf *);
>  static void pppoe_dispatch_disc_pkt(struct mbuf *);
> -static void pppoe_data_input(struct mbuf *);
>  
>  /* management routines */
>  void pppoeattach(int);
> @@ -341,21 +335,6 @@ pppoe_find_softc_by_hunique(u_int8_t *to
>   return (sc);
>  }
>  
> -/* Interface interrupt handler routine. */
> -void
> -pppoeintr(void)
> -{
> - struct mbuf *m;
> -
> - NET_ASSERT_LOCKED();
> -
> - while ((m = niq_dequeue(&pppoediscinq)) != NULL)
> - pppoe_disc_input(m);
> -
> - while ((m = niq_dequeue(&pppoeinq)) != NULL)
> - pppoe_data_input(m);
> -}
> -
>  /* Analyze and handle a single received packet while not in session state. */
>  static void
>  pppoe_dispatch_disc_pkt(struct mbuf *m)
> @@ -649,7 +628,7 @@ done:
>  }
>  
>  /* Input function for discovery packets. */
> -static void
> +void
>  pppoe_disc_input(struct mbuf *m)
>  {
>   /* avoid error messages if there is not a single pppoe instance */
> @@ -661,7 +640,7 @@ pppoe_disc_input(struct mbuf *m)
>  }
>  
>  /* Input function for data packets */
> -static void
> +void
>  pppoe_data_input(struct mbuf *m)
>  {
>   struct pppoe_softc *sc;
> Index: if_pppoe.h
> ===

minor cleanup in bgpd's process startup

2021-01-04 Thread Claudio Jeker
bgpd will get a new process for RTR handling. Because of this it makes
sense to cleanup the startup code a bit and not use flags to indicate
which process to run but instead use the enum bgpd_process.
Additionally change the PFD_PIPE_ROUTE to PFD_PIPE_RDE. The latter is less
confusing since there is also PFD_SOCK_ROUTE.

OK?
-- 
:wq Claudio

Index: bgpd.c
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.c,v
retrieving revision 1.232
diff -u -p -r1.232 bgpd.c
--- bgpd.c  30 Dec 2020 07:00:54 -  1.232
+++ bgpd.c  4 Jan 2021 16:00:30 -
@@ -90,7 +90,7 @@ usage(void)
 }
 
 #define PFD_PIPE_SESSION   0
-#define PFD_PIPE_ROUTE 1
+#define PFD_PIPE_RDE   1
 #define PFD_SOCK_ROUTE 2
 #define PFD_SOCK_PFKEY 3
 #define POLL_MAX   4
@@ -102,6 +102,7 @@ int
 main(int argc, char *argv[])
 {
struct bgpd_config  *conf;
+   enum bgpd_processproc = PROC_MAIN;
struct rde_rib  *rr;
struct peer *p;
struct pollfdpfd[POLL_MAX];
@@ -110,7 +111,6 @@ main(int argc, char *argv[])
char*conffile;
char*saved_argv0;
int  debug = 0;
-   int  rflag = 0, sflag = 0;
int  rfd, keyfd;
int  ch, status;
int  pipe_m2s[2];
@@ -151,10 +151,10 @@ main(int argc, char *argv[])
cmd_opts |= BGPD_OPT_VERBOSE;
break;
case 'R':
-   rflag = 1;
+   proc = PROC_RDE;
break;
case 'S':
-   sflag = 1;
+   proc = PROC_SE;
break;
default:
usage();
@@ -164,7 +164,7 @@ main(int argc, char *argv[])
 
argc -= optind;
argv += optind;
-   if (argc > 0 || (sflag && rflag))
+   if (argc > 0)
usage();
 
if (cmd_opts & BGPD_OPT_NOACTION) {
@@ -184,10 +184,16 @@ main(int argc, char *argv[])
exit(0);
}
 
-   if (rflag)
+   switch (proc) {
+   case PROC_MAIN:
+   break;
+   case PROC_RDE:
rde_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
-   else if (sflag)
+   /* NOTREACHED */
+   case PROC_SE:
session_main(debug, cmd_opts & BGPD_OPT_VERBOSE);
+   /* NOTREACHED */
+   }
 
if (geteuid())
errx(1, "need root privileges");
@@ -278,7 +284,7 @@ BROKEN  if (pledge("stdio rpath wpath cpa
pfd[PFD_SOCK_PFKEY].events = POLLIN;
 
set_pollfd(&pfd[PFD_PIPE_SESSION], ibuf_se);
-   set_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde);
+   set_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde);
 
if (timeout < 0 || timeout > MAX_TIMEOUT)
timeout = MAX_TIMEOUT;
@@ -300,14 +306,14 @@ BROKENif (pledge("stdio rpath wpath cpa
quit = 1;
}
 
-   if (handle_pollfd(&pfd[PFD_PIPE_ROUTE], ibuf_rde) == -1) {
+   if (handle_pollfd(&pfd[PFD_PIPE_RDE], ibuf_rde) == -1) {
log_warnx("main: Lost connection to RDE");
msgbuf_clear(&ibuf_rde->w);
free(ibuf_rde);
ibuf_rde = NULL;
quit = 1;
} else {
-   if (dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE, conf) ==
+   if (dispatch_imsg(ibuf_rde, PFD_PIPE_RDE, conf) ==
-1)
quit = 1;
}
@@ -713,7 +719,7 @@ dispatch_imsg(struct imsgbuf *ibuf, int 
 
switch (imsg.hdr.type) {
case IMSG_KROUTE_CHANGE:
-   if (idx != PFD_PIPE_ROUTE)
+   if (idx != PFD_PIPE_RDE)
log_warnx("route request not from RDE");
else if (imsg.hdr.len != IMSG_HEADER_SIZE +
sizeof(struct kroute_full))
@@ -723,7 +729,7 @@ dispatch_imsg(struct imsgbuf *ibuf, int 
rv = -1;
break;
case IMSG_KROUTE_DELETE:
-   if (idx != PFD_PIPE_ROUTE)
+   if (idx != PFD_PIPE_RDE)
log_warnx("route request not from RDE");
else if (imsg.hdr.len != IMSG_HEADER_SIZE +
sizeof(struct kroute_full))
@@ -733,7 +739,7 @@ dispatch_imsg(struct imsgbuf *ibuf, int 
rv = -1;
break;
case IMSG_KROUTE_FLUSH:
-

bgpd more log_addr cleanup

2021-01-04 Thread Claudio Jeker
So this is probably a sensible next step to cleanup log_addr() a bit more.
Teach addr2sa() how to convert the VPN addresses into sockaddrs. The rd
and labelstack still needs to be handled by the caller.

OK?
-- 
:wq Claudio

Index: util.c
===
RCS file: /cvs/src/usr.sbin/bgpd/util.c,v
retrieving revision 1.56
diff -u -p -r1.56 util.c
--- util.c  4 Jan 2021 13:40:32 -   1.56
+++ util.c  4 Jan 2021 18:01:52 -
@@ -38,26 +38,21 @@ const char *
 log_addr(const struct bgpd_addr *addr)
 {
static char buf[74];
-   chartbuf[40];
+   struct sockaddr *sa;
socklen_t   len;
 
+   sa = addr2sa(addr, 0, &len);
switch (addr->aid) {
case AID_INET:
case AID_INET6:
-   return log_sockaddr(addr2sa(addr, 0, &len), len);
+   return log_sockaddr(sa, len);
case AID_VPN_IPv4:
-   if (inet_ntop(AF_INET, &addr->vpn4.addr, tbuf,
-   sizeof(tbuf)) == NULL)
-   return ("?");
snprintf(buf, sizeof(buf), "%s %s", log_rd(addr->vpn4.rd),
-   tbuf);
+   log_sockaddr(sa, len));
return (buf);
case AID_VPN_IPv6:
-   if (inet_ntop(aid2af(addr->aid), &addr->vpn6.addr, tbuf,
-   sizeof(tbuf)) == NULL)
-   return ("?");
snprintf(buf, sizeof(buf), "%s %s", log_rd(addr->vpn6.rd),
-   tbuf);
+   log_sockaddr(sa, len));
return (buf);
}
return ("???");
@@ -92,7 +87,7 @@ log_sockaddr(struct sockaddr *sa, sockle
 {
static char buf[NI_MAXHOST];
 
-   if (getnameinfo(sa, len, buf, sizeof(buf), NULL, 0,
+   if (sa == NULL || getnameinfo(sa, len, buf, sizeof(buf), NULL, 0,
NI_NUMERICHOST))
return ("(unknown)");
else
@@ -835,6 +830,10 @@ af2aid(sa_family_t af, u_int8_t safi, u_
return (-1);
 }
 
+/*
+ * Convert a struct bgpd_addr into a struct sockaddr. For VPN addresses
+ * the included label stack is ignored and needs to be handled by the caller.
+ */
 struct sockaddr *
 addr2sa(const struct bgpd_addr *addr, u_int16_t port, socklen_t *len)
 {
@@ -842,10 +841,10 @@ addr2sa(const struct bgpd_addr *addr, u_
struct sockaddr_in  *sa_in = (struct sockaddr_in *)&ss;
struct sockaddr_in6 *sa_in6 = (struct sockaddr_in6 *)&ss;
 
-   if (addr == NULL || addr->aid == AID_UNSPEC)
-   return (NULL);
-
bzero(&ss, sizeof(ss));
+   if (addr == NULL)
+   return ((struct sockaddr *)&ss);
+
switch (addr->aid) {
case AID_INET:
sa_in->sin_family = AF_INET;
@@ -856,6 +855,20 @@ addr2sa(const struct bgpd_addr *addr, u_
case AID_INET6:
sa_in6->sin6_family = AF_INET6;
memcpy(&sa_in6->sin6_addr, &addr->v6,
+   sizeof(sa_in6->sin6_addr));
+   sa_in6->sin6_port = htons(port);
+   sa_in6->sin6_scope_id = addr->scope_id;
+   *len = sizeof(struct sockaddr_in6);
+   break;
+   case AID_VPN_IPv4:
+   sa_in->sin_family = AF_INET;
+   sa_in->sin_addr.s_addr = addr->vpn4.addr.s_addr;
+   sa_in->sin_port = htons(port);
+   *len = sizeof(struct sockaddr_in);
+   break;
+   case AID_VPN_IPv6:
+   sa_in6->sin6_family = AF_INET6;
+   memcpy(&sa_in6->sin6_addr, &addr->vpn6.addr,
sizeof(sa_in6->sin6_addr));
sa_in6->sin6_port = htons(port);
sa_in6->sin6_scope_id = addr->scope_id;



Re: diff: tcp ack improvement

2021-01-05 Thread Claudio Jeker
On Tue, Jan 05, 2021 at 10:16:04AM +0100, Jan Klemkow wrote:
> On Wed, Dec 23, 2020 at 11:59:13AM +, Stuart Henderson wrote:
> > On 2020/12/17 20:50, Jan Klemkow wrote:
> > > ping
> > > 
> > > On Fri, Nov 06, 2020 at 01:10:52AM +0100, Jan Klemkow wrote:
> > > > bluhm and I make some network performance measurements and kernel
> > > > profiling.
> > 
> > I've been running this on my workstation since you sent it out - lots
> > of long-running ssh connections, hourly reposync, daily rsync of base
> > snapshots.
> > 
> > I don't know enough about TCP stack behaviour to really give a meaningful
> > OK, but certainly not seeing any problems with it.
> 
> Thanks, Stuart.  Has someone else tested this diff?  Or, are there some
> opinions or objections about it?  Even bike-shedding is welcome :-)

>From my memory TCP uses the ACKs on startup to increase the send window
and so your diff could slow down the initial startup. Not sure if that
matters actually. It can have some impact if userland reads in big blocks
at infrequent intervals since then the ACK clock slows down.

I guess to get converage it would be best to commit this and then monitor
the lists for possible slowdowns.
 
> Thanks,
> Jan
> 
> > > > Setup:  Linux (iperf) -10gbit-> OpenBSD (relayd) -10gbit-> Linux (iperf)
> > > > 
> > > > We figured out, that the kernel uses a huge amount of processing time
> > > > for sending ACKs to the sender on the receiving interface.  After
> > > > receiving a data segment, we send our two ACK.  The first one in
> > > > tcp_input() direct after receiving.  The second ACK is send out, after
> > > > the userland or the sosplice task read some data out of the socket
> > > > buffer.
> > > > 
> > > > The fist ACK in tcp_input() is called after receiving every other data
> > > > segment like it is discribed in RFC1122:
> > > > 
> > > > 4.2.3.2  When to Send an ACK Segment
> > > > A TCP SHOULD implement a delayed ACK, but an ACK should
> > > > not be excessively delayed; in particular, the delay
> > > > MUST be less than 0.5 seconds, and in a stream of
> > > > full-sized segments there SHOULD be an ACK for at least
> > > > every second segment.
> > > > 
> > > > This advice is based on the paper "Congestion Avoidance and Control":
> > > > 
> > > > 4 THE GATEWAY SIDE OF CONGESTION CONTROL
> > > > The 8 KBps senders were talking to 4.3+BSD receivers
> > > > which would delay an ack for atmost one packet (because
> > > > of an ack’s clock’ role, the authors believe that the
> > > > minimum ack frequency should be every other packet).
> > > > 
> > > > Sending the first ACK (on every other packet) coasts us too much
> > > > processing time.  Thus, we run into a full socket buffer earlier.  The
> > > > first ACK just acknowledges the received data, but does not update the
> > > > window.  The second ACK, caused by the socket buffer reader, also
> > > > acknowledges the data and also updates the window.  So, the second ACK,
> > > > is much more worth for a fast packet processing than the fist one.
> > > > 
> > > > The performance improvement is between 33% with splicing and 20% without
> > > > splice:
> > > > 
> > > > splicingrelaying
> > > > 
> > > > current 3.1 GBit/s  2.6 GBit/s
> > > > w/o first ack   4.1 GBit/s  3.1 GBit/s
> > > > 
> > > > As far as I understand the implementation of other operating systems:
> > > > Linux has implement a custom TCP_QUICKACK socket option, to turn this
> > > > kind of feature on and off.  FreeBSD and NetBSD sill depend on it, when
> > > > using the New Reno implementation.
> > > > 
> > > > The following diff turns off the direct ACK on every other segment.  We
> > > > are running this diff in production on our own machines at genua and on
> > > > our products for several month, now.  We don't noticed any problems,
> > > > even with interactive network sessions (ssh) nor with bulk traffic.
> > > > 
> > > > Another solution could be a sysctl(3) or an additional socket option,
> > > > similar to Linux, to control this behavior per socket or system wide.
> > > > Also, a counter to ACK every 3rd, 4th... data segment could beat the
> > > > problem.
> > > > 
> > > > bye,
> > > > Jan
> > > > 
> > > > Index: netinet/tcp_input.c
> > > > ===
> > > > RCS file: /cvs/src/sys/netinet/tcp_input.c,v
> > > > retrieving revision 1.365
> > > > diff -u -p -r1.365 tcp_input.c
> > > > --- netinet/tcp_input.c 19 Jun 2020 22:47:22 -  1.365
> > > > +++ netinet/tcp_input.c 5 Nov 2020 23:00:34 -
> > > > @@ -165,8 +165,8 @@ do { \
> > > >  #endif
> > > >  
> > > >  /*
> > > > - * Macro to compute ACK transmission behavior.  Delay the ACK unless
> > > > - * we have already delayed an ACK (must send an ACK every two 
> > > > seg

Change bgpd_addr encoding of VPN v4 and v6 addresses

2021-01-05 Thread Claudio Jeker
While changing log_addr() I noticed that struct bgpd_addr could benefit
from changing the encoding of AID_VPN_IPv4 and AID_VPN_IPv6 addrs.
Instead of having independent route distinguishers and labelstacks use
common fields for those and use the v4 and v6 addresses for the prefix.
This is a bit more compact but also simplifies some code since the
handling of AID_VPN_IPv4 and AID_VPN_IPv6 can be handled in the same
switch case.

I reduced the labelstack size from 21 to 18 (6 instead of 7 labels). Now
in theory you could pack 7 labels into an IPv4 VPN NLRI (8bit prefixlen +
64bit RD + 16bit prefix + 21 * 8bit label = 256) but that is quite silly.
Even 6 labels is more than enough. bgpd itself only allows a single MPLS
label when announcing such networks.

-- 
:wq Claudio

PS: diff is based of /usr/src/usr.sbin

Index: bgpctl/mrtparser.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/mrtparser.c,v
retrieving revision 1.13
diff -u -p -r1.13 mrtparser.c
--- bgpctl/mrtparser.c  3 Jul 2019 06:22:01 -   1.13
+++ bgpctl/mrtparser.c  5 Jan 2021 10:05:30 -
@@ -833,14 +833,14 @@ mrt_extract_attr(struct mrt_rib_entry *r
re->nexthop.aid = aid;
memcpy(&tmp, a + 1 + sizeof(u_int64_t),
sizeof(tmp));
-   re->nexthop.vpn4.addr.s_addr = tmp;
+   re->nexthop.v4.s_addr = tmp;
break;
case AID_VPN_IPv6:
if (attr_len < sizeof(u_int64_t) +
sizeof(struct in6_addr))
return (-1);
re->nexthop.aid = aid;
-   memcpy(&re->nexthop.vpn6.addr,
+   memcpy(&re->nexthop.v6,
a + 1 + sizeof(u_int64_t),
sizeof(struct in6_addr));
break;
@@ -979,7 +979,7 @@ mrt_extract_addr(void *msg, u_int len, s
return (-1);
addr->aid = aid;
/* XXX labelstack and rd missing */
-   memcpy(&addr->vpn4.addr, b + sizeof(u_int64_t),
+   memcpy(&addr->v4, b + sizeof(u_int64_t),
sizeof(struct in_addr));
return (sizeof(u_int64_t) + sizeof(struct in_addr));
case AID_VPN_IPv6:
@@ -987,7 +987,7 @@ mrt_extract_addr(void *msg, u_int len, s
return (-1);
addr->aid = aid;
/* XXX labelstack and rd missing */
-   memcpy(&addr->vpn6.addr, b + sizeof(u_int64_t),
+   memcpy(&addr->v6, b + sizeof(u_int64_t),
sizeof(struct in6_addr));
return (sizeof(u_int64_t) + sizeof(struct in6_addr));
default:
Index: bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.409
diff -u -p -r1.409 bgpd.h
--- bgpd/bgpd.h 4 Jan 2021 13:40:32 -   1.409
+++ bgpd/bgpd.h 5 Jan 2021 10:05:30 -
@@ -176,23 +176,6 @@ extern const struct aid aid_vals[];
sizeof(struct pt_entry_vpn6)\
 }
 
-struct vpn4_addr {
-   u_int64_t   rd;
-   struct in_addr  addr;
-   u_int8_tlabelstack[21]; /* max that makes sense */
-   u_int8_tlabellen;
-   u_int8_tpad1;
-   u_int8_tpad2;
-};
-
-struct vpn6_addr {
-   u_int64_t   rd;
-   struct in6_addr addr;
-   u_int8_tlabelstack[21]; /* max that makes sense */
-   u_int8_tlabellen;
-   u_int8_tpad1;
-   u_int8_tpad2;
-};
 
 #define BGP_MPLS_BOS   0x01
 
@@ -200,22 +183,15 @@ struct bgpd_addr {
union {
struct in_addr  v4;
struct in6_addr v6;
-   struct vpn4_addrvpn4;
-   struct vpn6_addrvpn6;
/* maximum size for a prefix is 256 bits */
-   u_int8_taddr8[32];
-   u_int16_t   addr16[16];
-   u_int32_t   addr32[8];
} ba;   /* 128-bit address */
+   u_int64_t   rd; /* route distinguisher for VPN addrs */
u_int32_t   scope_id;   /* iface scope id for v6 */
u_int8_taid;
+   u_int8_tlabellen;   /* size of the labelstack */
+   u_int8_tlabelstack[18]; /* max that makes sense */
 #definev4  ba.v4
 #definev6  ba.v6
-#definevpn4ba.vpn4
-#definevpn6ba.vpn6
-#defineaddr8   ba.addr8
-#defineaddr16  ba.addr16
-#defineaddr32  ba.addr32
 };
 
 #defineDEFAULT_LISTENER0x01
Index: bg

Extend IP_ADD_MEMBERSHIP to support struct ip_mreqn

2021-01-06 Thread Claudio Jeker
Linux and FreeBSD both support the use of struct ip_mreqn in
IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. This struct adds one more field
to pass an interface index to the kernel (instead of using the IP
address).

struct ip_mreqn {
   struct  in_addr imr_multiaddr;  /* IP multicast address of group */
   struct  in_addr imr_address;/* local IP address of interface */
   int imr_ifindex;/* interface index */
};

So if imr_ifindex is not 0 then this value is used to define the outgoing
interface instead of doing a lookup with imr_address.
This is something I want to use in ospfd(8) to support unnumbered
interfaces (or actually point-to-point interfaces using the same source
IP).

-- 
:wq Claudio

Index: net/if_gre.c
===
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.163
diff -u -p -r1.163 if_gre.c
--- net/if_gre.c12 Dec 2020 11:49:02 -  1.163
+++ net/if_gre.c6 Jan 2021 08:31:46 -
@@ -3640,7 +3640,7 @@ nvgre_up(struct nvgre_softc *sc)
 
switch (tunnel->t_af) {
case AF_INET:
-   inm = in_addmulti(&tunnel->t_dst4, ifp0);
+   inm = in_addmulti(&tunnel->t_dst4, ifp0->if_index);
if (inm == NULL) {
error = ECONNABORTED;
goto remove_ucast;
Index: net/if_pfsync.c
===
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.280
diff -u -p -r1.280 if_pfsync.c
--- net/if_pfsync.c 4 Jan 2021 12:48:27 -   1.280
+++ net/if_pfsync.c 6 Jan 2021 08:31:46 -
@@ -1424,7 +1424,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cm
addr.s_addr = INADDR_PFSYNC_GROUP;
 
if ((imo->imo_membership[0] =
-   in_addmulti(&addr, sifp)) == NULL) {
+   in_addmulti(&addr, sifp->if_index)) == NULL) {
sc->sc_sync_ifidx = 0;
return (ENOBUFS);
}
Index: net/if_vxlan.c
===
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.81
diff -u -p -r1.81 if_vxlan.c
--- net/if_vxlan.c  21 Aug 2020 22:59:27 -  1.81
+++ net/if_vxlan.c  6 Jan 2021 08:31:46 -
@@ -274,7 +274,7 @@ vxlan_multicast_join(struct ifnet *ifp, 
return (EADDRNOTAVAIL);
 
if ((imo->imo_membership[0] =
-   in_addmulti(&dst4->sin_addr, mifp)) == NULL)
+   in_addmulti(&dst4->sin_addr, mifp->if_index)) == NULL)
return (ENOBUFS);
 
imo->imo_num_memberships++;
Index: netinet/in.c
===
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.170
diff -u -p -r1.170 in.c
--- netinet/in.c27 May 2020 11:19:28 -  1.170
+++ netinet/in.c6 Jan 2021 08:31:46 -
@@ -730,7 +730,7 @@ in_ifinit(struct ifnet *ifp, struct in_i
struct in_addr addr;
 
addr.s_addr = INADDR_ALLHOSTS_GROUP;
-   ia->ia_allhosts = in_addmulti(&addr, ifp);
+   ia->ia_allhosts = in_addmulti(&addr, ifp->if_index);
}
 
 out:
@@ -847,10 +847,15 @@ in_broadcast(struct in_addr in, u_int rt
  * Add an address to the list of IP multicast addresses for a given interface.
  */
 struct in_multi *
-in_addmulti(struct in_addr *ap, struct ifnet *ifp)
+in_addmulti(struct in_addr *ap, unsigned int ifidx)
 {
struct in_multi *inm;
struct ifreq ifr;
+   struct ifnet *ifp;
+
+   ifp = if_get(ifidx);
+   if (ifp == NULL)
+   return (NULL);
 
/*
 * See if address already in list.
@@ -867,14 +872,16 @@ in_addmulti(struct in_addr *ap, struct i
 * and link it into the interface's multicast list.
 */
inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
-   if (inm == NULL)
+   if (inm == NULL) {
+   if_put(ifp);
return (NULL);
+   }
 
inm->inm_sin.sin_len = sizeof(struct sockaddr_in);
inm->inm_sin.sin_family = AF_INET;
inm->inm_sin.sin_addr = *ap;
inm->inm_refcnt = 1;
-   inm->inm_ifidx = ifp->if_index;
+   inm->inm_ifidx = ifidx;
inm->inm_ifma.ifma_addr = sintosa(&inm->inm_sin);
 
/*
@@ -884,6 +891,7 @@ in_addmulti(struct in_addr *ap, struct i
memset(&ifr, 0, sizeof(ifr));
memcpy(&ifr.ifr_addr, &inm->inm_sin, sizeof(inm->inm_sin));
if ((*ifp->if_ioctl)(ifp, SIOCADDMULTI,(caddr_t)&ifr) != 0) {
+   if_put(ifp);
free(inm, M_IPMADDR, sizeof(*inm));

fix opsfd parse.y shit/reduce conflicts

2021-01-06 Thread Claudio Jeker
The dependon statement in ospfd parse.y introduces some troubles since it
holds an empty rule that then conflicts with optnl.
This diff changes dependon into dependon and dependonopt so that in the
place where it is optional dependonopt can be used and in the places where
it must not be optional it isn't. With this the shift/reduce conficts are
gone. While at it cleanup some other rules and use the same optnl idiom
for area and interface (it is the same one as used by bgpd).

Please test this with your configs to see if this causes any parse errors
(ospfd -n should be enough for this).
-- 
:wq Claudio


Index: parse.y
===
RCS file: /cvs/src/usr.sbin/ospfd/parse.y,v
retrieving revision 1.101
diff -u -p -r1.101 parse.y
--- parse.y 29 Dec 2020 19:44:47 -  1.101
+++ parse.y 6 Jan 2021 10:10:23 -
@@ -144,7 +144,7 @@ typedef struct {
 %token   NUMBER
 %typeyesno no optlist optlist_l option demotecount msec
 %typedeadtime
-%typestring dependon
+%typestring dependon dependonopt
 %typeredistribute
 %typeareaid
 
@@ -297,7 +297,7 @@ conf_main   : ROUTERID STRING {
;
 
 
-redistribute   : no REDISTRIBUTE NUMBER '/' NUMBER optlist dependon {
+redistribute   : no REDISTRIBUTE NUMBER '/' NUMBER optlist dependonopt {
struct redistribute *r;
 
if ((r = calloc(1, sizeof(*r))) == NULL)
@@ -323,7 +323,7 @@ redistribute: no REDISTRIBUTE NUMBER '/
free($7);
$$ = r;
}
-   | no REDISTRIBUTE STRING optlist dependon {
+   | no REDISTRIBUTE STRING optlist dependonopt {
struct redistribute *r;
 
if ((r = calloc(1, sizeof(*r))) == NULL)
@@ -426,8 +426,10 @@ option : METRIC NUMBER {
}
;
 
-dependon   : /* empty */   { $$ = NULL; }
-   | DEPEND ON STRING  {
+dependonopt: /* empty */   { $$ = NULL; }
+   | dependon
+
+dependon   : DEPEND ON STRING  {
struct in_addr   addr;
struct kif  *kif;
 
@@ -599,7 +601,7 @@ area: AREA areaid {
memcpy(&areadefs, defs, sizeof(areadefs));
md_list_copy(&areadefs.md_list, &defs->md_list);
defs = &areadefs;
-   } '{' optnl areaopts_l '}' {
+   } '{' optnl areaopts_l optnl '}' {
area = NULL;
md_list_clr(&defs->md_list);
defs = &globaldefs;
@@ -627,8 +629,8 @@ areaid  : NUMBER {
}
;
 
-areaopts_l : areaopts_l areaoptsl nl
-   | areaoptsl optnl
+areaopts_l : areaopts_l nl areaoptsl
+   | areaoptsl
;
 
 areaoptsl  : interface
@@ -739,13 +741,13 @@ interface : INTERFACE STRING  {
}
;
 
-interface_block: '{' optnl interfaceopts_l '}'
+interface_block: '{' optnl interfaceopts_l optnl '}'
| '{' optnl '}'
-   |
+   | /* empty */
;
 
-interfaceopts_l: interfaceopts_l interfaceoptsl nl
-   | interfaceoptsl optnl
+interfaceopts_l: interfaceopts_l nl interfaceoptsl
+   | interfaceoptsl
;
 
 interfaceoptsl : PASSIVE   { iface->passive = 1; }



Make ospf6d work on point-to-point links

2021-01-06 Thread Claudio Jeker
The code in ospf6d is a bit broken when it comes to point-to-point links.
This diff fixes this by a) using the neighbor address instead of the unset
interface destination address and by b) matching the incomming packet
against all possible IPs of that interface.

I tripped on b) because my P2P interface has more than one link-local
address and the code just likes to select the wrong one.

This works for my case, please check I did not break something else.
-- 
:wq Claudio

Index: lsupdate.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/lsupdate.c,v
retrieving revision 1.18
diff -u -p -r1.18 lsupdate.c
--- lsupdate.c  15 Jul 2020 14:47:41 -  1.18
+++ lsupdate.c  6 Jan 2021 11:28:43 -
@@ -474,7 +474,7 @@ ls_retrans_timer(int fd, short event, vo
/* ls_retrans_list_free retriggers the timer */
return;
} else if (nbr->iface->type == IF_TYPE_POINTOPOINT)
-   memcpy(&addr, &nbr->iface->dst, sizeof(addr));
+   memcpy(&addr, &nbr->addr, sizeof(addr));
else
inet_pton(AF_INET6, AllDRouters, &addr);
} else
Index: packet.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/packet.c,v
retrieving revision 1.17
diff -u -p -r1.17 packet.c
--- packet.c23 Dec 2019 07:33:49 -  1.17
+++ packet.c6 Jan 2021 11:52:08 -
@@ -82,12 +82,9 @@ send_packet(struct iface *iface, struct 
 struct in6_addr *dst)
 {
struct sockaddr_in6 sa6;
-   struct msghdr   msg;
-   struct ioveciov[1];
 
-   /* setup buffer */
+   /* setup sockaddr */
bzero(&sa6, sizeof(sa6));
-
sa6.sin6_family = AF_INET6;
sa6.sin6_len = sizeof(sa6);
sa6.sin6_addr = *dst;
@@ -104,15 +101,8 @@ send_packet(struct iface *iface, struct 
return (-1);
}
 
-   bzero(&msg, sizeof(msg));
-   msg.msg_name = &sa6;
-   msg.msg_namelen = sizeof(sa6);
-   iov[0].iov_base = buf->buf;
-   iov[0].iov_len = ibuf_size(buf);
-   msg.msg_iov = iov;
-   msg.msg_iovlen = 1;
-
-   if (sendmsg(iface->fd, &msg, 0) == -1) {
+   if (sendto(iface->fd, buf->buf, ibuf_size(buf), 0,
+   (struct sockaddr *)&sa6, sizeof(sa6)) == -1) {
log_warn("send_packet: error sending packet on interface %s",
iface->name);
return (-1);
@@ -186,11 +176,16 @@ recv_packet(int fd, short event, void *b
 * AllDRouters is only valid for DR and BDR but this is checked later.
 */
inet_pton(AF_INET6, AllSPFRouters, &addr);
-
if (!IN6_ARE_ADDR_EQUAL(&dest, &addr)) {
inet_pton(AF_INET6, AllDRouters, &addr);
if (!IN6_ARE_ADDR_EQUAL(&dest, &addr)) {
-   if (!IN6_ARE_ADDR_EQUAL(&dest, &iface->addr)) {
+   struct iface_addr *ia;
+
+   TAILQ_FOREACH(ia, &iface->ifa_list, entry) {
+   if (IN6_ARE_ADDR_EQUAL(&dest, &ia->addr))
+   break;
+   }
+   if (ia == NULL) {
log_debug("recv_packet: packet sent to wrong "
"address %s, interface %s",
log_in6addr(&dest), iface->name);



Re: Extend IP_ADD_MEMBERSHIP to support struct ip_mreqn

2021-01-07 Thread Claudio Jeker
On Wed, Jan 06, 2021 at 10:27:42AM +0100, Claudio Jeker wrote:
> Linux and FreeBSD both support the use of struct ip_mreqn in
> IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. This struct adds one more field
> to pass an interface index to the kernel (instead of using the IP
> address).
> 
> struct ip_mreqn {
>struct  in_addr imr_multiaddr;  /* IP multicast address of group */
>struct  in_addr imr_address;/* local IP address of interface */
>int imr_ifindex;/* interface index */
> };
> 
> So if imr_ifindex is not 0 then this value is used to define the outgoing
> interface instead of doing a lookup with imr_address.
> This is something I want to use in ospfd(8) to support unnumbered
> interfaces (or actually point-to-point interfaces using the same source
> IP).
> 

Here the corresponding ospfd(8) diff to use struct ip_mreqn and the
interface index. With this it should be possible the have the same IP
address set on multiple interfaces.

-- 
:wq Claudio

Index: interface.c
===
RCS file: /cvs/src/usr.sbin/ospfd/interface.c,v
retrieving revision 1.84
diff -u -p -r1.84 interface.c
--- interface.c 2 Nov 2020 00:30:56 -   1.84
+++ interface.c 6 Jan 2021 09:33:38 -
@@ -711,7 +711,7 @@ LIST_HEAD(,if_group_count) ifglist = LIS
 int
 if_join_group(struct iface *iface, struct in_addr *addr)
 {
-   struct ip_mreq   mreq;
+   struct ip_mreqn  mreq;
struct if_group_count   *ifg;
 
switch (iface->type) {
@@ -734,7 +734,7 @@ if_join_group(struct iface *iface, struc
return (0);
 
mreq.imr_multiaddr.s_addr = addr->s_addr;
-   mreq.imr_interface.s_addr = iface->addr.s_addr;
+   mreq.imr_ifindex = iface->ifindex;
 
if (setsockopt(iface->fd, IPPROTO_IP, IP_ADD_MEMBERSHIP,
(void *)&mreq, sizeof(mreq)) == -1) {
@@ -760,7 +760,7 @@ if_join_group(struct iface *iface, struc
 int
 if_leave_group(struct iface *iface, struct in_addr *addr)
 {
-   struct ip_mreq   mreq;
+   struct ip_mreqn  mreq;
struct if_group_count   *ifg;
 
switch (iface->type) {
@@ -782,7 +782,7 @@ if_leave_group(struct iface *iface, stru
}
 
mreq.imr_multiaddr.s_addr = addr->s_addr;
-   mreq.imr_interface.s_addr = iface->addr.s_addr;
+   mreq.imr_ifindex = iface->ifindex;
 
if (setsockopt(iface->fd, IPPROTO_IP, IP_DROP_MEMBERSHIP,
(void *)&mreq, sizeof(mreq)) == -1) {



Re: Extend IP_ADD_MEMBERSHIP to support struct ip_mreqn

2021-01-07 Thread Claudio Jeker
On Wed, Jan 06, 2021 at 10:27:42AM +0100, Claudio Jeker wrote:
> Linux and FreeBSD both support the use of struct ip_mreqn in
> IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. This struct adds one more field
> to pass an interface index to the kernel (instead of using the IP
> address).
> 
> struct ip_mreqn {
>struct  in_addr imr_multiaddr;  /* IP multicast address of group */
>struct  in_addr imr_address;/* local IP address of interface */
>int imr_ifindex;/* interface index */
> };
> 
> So if imr_ifindex is not 0 then this value is used to define the outgoing
> interface instead of doing a lookup with imr_address.
> This is something I want to use in ospfd(8) to support unnumbered
> interfaces (or actually point-to-point interfaces using the same source
> IP).
> 

This diff is better. I removed the change to in_addmulti() from the
previous version. There is no benefit of passing the interface index to
in_addmulti (instead of the ifp). in_addmulti() needs the ifp (not just
only the index) and it just adds a lot of unneccessary change to the diff.

-- 
:wq Claudio

Index: netinet/in.h
===
RCS file: /cvs/src/sys/netinet/in.h,v
retrieving revision 1.138
diff -u -p -r1.138 in.h
--- netinet/in.h22 Aug 2020 17:55:30 -  1.138
+++ netinet/in.h6 Jan 2021 08:31:46 -
@@ -360,6 +360,12 @@ struct ip_mreq {
struct  in_addr imr_interface;  /* local IP address of interface */
 };
 
+struct ip_mreqn {
+   struct  in_addr imr_multiaddr;  /* IP multicast address of group */
+   struct  in_addr imr_address;/* local IP address of interface */
+   int imr_ifindex;/* interface index */
+};
+
 /*
  * Argument for IP_PORTRANGE:
  * - which range to search when port is unspecified at bind() or connect()
Index: netinet/ip_output.c
===
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.358
diff -u -p -r1.358 ip_output.c
--- netinet/ip_output.c 20 Dec 2020 21:15:47 -  1.358
+++ netinet/ip_output.c 7 Jan 2021 11:14:37 -
@@ -73,6 +73,7 @@
 #endif /* IPSEC */
 
 int ip_pcbopts(struct mbuf **, struct mbuf *);
+int ip_multicast_if(struct ip_mreqn *, u_int, unsigned int *);
 int ip_setmoptions(int, struct ip_moptions **, struct mbuf *, u_int);
 void ip_mloopback(struct ifnet *, struct mbuf *, struct sockaddr_in *);
 static __inline u_int16_t __attribute__((__unused__))
@@ -1337,6 +1338,51 @@ ip_pcbopts(struct mbuf **pcbopt, struct 
 }
 
 /*
+ * Lookup the interface based on the information in the ip_mreqn struct.
+ */
+int
+ip_multicast_if(struct ip_mreqn *mreq, u_int rtableid, unsigned int *ifidx)
+{
+   struct sockaddr_in sin;
+   struct rtentry *rt;
+
+   /*
+* In case userland provides the imr_ifindex use this as interface.
+* If no interface address was provided, use the interface of
+* the route to the given multicast address.
+*/
+   if (mreq->imr_ifindex != 0) {
+   *ifidx = mreq->imr_ifindex;
+   } else if (mreq->imr_address.s_addr == INADDR_ANY) {
+   memset(&sin, 0, sizeof(sin));
+   sin.sin_len = sizeof(sin);
+   sin.sin_family = AF_INET;
+   sin.sin_addr = mreq->imr_multiaddr;
+   rt = rtalloc(sintosa(&sin), RT_RESOLVE, rtableid);
+   if (!rtisvalid(rt)) {
+   rtfree(rt);
+   return EADDRNOTAVAIL;
+   }
+   *ifidx = rt->rt_ifidx;
+   rtfree(rt);
+   } else {
+   memset(&sin, 0, sizeof(sin));
+   sin.sin_len = sizeof(sin);
+   sin.sin_family = AF_INET;
+   sin.sin_addr = mreq->imr_address;
+   rt = rtalloc(sintosa(&sin), 0, rtableid);
+   if (!rtisvalid(rt) || !ISSET(rt->rt_flags, RTF_LOCAL)) {
+   rtfree(rt);
+   return EADDRNOTAVAIL;
+   }
+   *ifidx = rt->rt_ifidx;
+   rtfree(rt);
+   }
+
+   return 0;
+}
+
+/*
  * Set the IP multicast options in response to user setsockopt().
  */
 int
@@ -1345,12 +1391,12 @@ ip_setmoptions(int optname, struct ip_mo
 {
struct in_addr addr;
struct in_ifaddr *ia;
-   struct ip_mreq *mreq;
+   struct ip_mreqn mreqn;
struct ifnet *ifp = NULL;
struct ip_moptions *imo = *imop;
struct in_multi **immp;
-   struct rtentry *rt;
struct sockaddr_in sin;
+   unsigned int ifidx;
int i, error = 0;
u_char loop;
 
@@ -1438,63 +1484,41 @@ ip_setmoptions(int optname, struct ip_mo
 * Add a multicast group membership.
 * Group must be a valid IP multicast address.
 */
-

rpki-client simplify entity queue handling

2021-01-07 Thread Claudio Jeker
Currently rpki-client keeps all pending work on a queue and only removes
it from the queue at once it got processed. The only bit that the parent
rpki-client process needs from the queue is the type when processing the
response. So instead of passing the id pass the type back from the parser.

With this the queue only holds entries that can't be processed right now
because the repository is not yet loaded. Additionally the handling of
responses becomes more decoupled.

All in all I think this simplifies the code a fair bit. What do others
think?
-- 
:wq Claudio

Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.88
diff -u -p -r1.88 main.c
--- main.c  21 Dec 2020 11:35:55 -  1.88
+++ main.c  7 Jan 2021 13:22:02 -
@@ -88,6 +88,7 @@ structrepo {
size_t   id; /* identifier (array index) */
 };
 
+size_t entity_queue;
 inttimeout = 60*60;
 volatile sig_atomic_t killme;
 void   suicide(int sig);
@@ -105,7 +106,6 @@ static struct   repotab {
  * and parsed.
  */
 struct entity {
-   size_t   id; /* unique identifier */
enum rtype   type; /* type of entity (not RTYPE_EOF) */
char*uri; /* file or rsync:// URI */
int  has_dgst; /* whether dgst is specified */
@@ -223,7 +223,6 @@ static void
 entity_read_req(int fd, struct entity *ent)
 {
 
-   io_simple_read(fd, &ent->id, sizeof(size_t));
io_simple_read(fd, &ent->type, sizeof(enum rtype));
io_str_read(fd, &ent->uri);
io_simple_read(fd, &ent->has_dgst, sizeof(int));
@@ -244,7 +243,6 @@ entity_buffer_req(char **b, size_t *bsz,
 const struct entity *ent)
 {
 
-   io_simple_buffer(b, bsz, bmax, &ent->id, sizeof(size_t));
io_simple_buffer(b, bsz, bmax, &ent->type, sizeof(enum rtype));
io_str_buffer(b, bsz, bmax, ent->uri);
io_simple_buffer(b, bsz, bmax, &ent->has_dgst, sizeof(int));
@@ -278,12 +276,14 @@ entity_write_req(int fd, const struct en
 static void
 entityq_flush(int fd, struct entityq *q, const struct repo *repo)
 {
-   struct entity   *p;
+   struct entity   *p, *np;
 
-   TAILQ_FOREACH(p, q, entries) {
+   TAILQ_FOREACH_SAFE(p, q, entries, np) {
if (p->repo < 0 || repo->id != (size_t)p->repo)
continue;
entity_write_req(fd, p);
+   TAILQ_REMOVE(q, p, entries);
+   entity_free(p);
}
 }
 
@@ -365,49 +365,18 @@ repo_filename(const struct repo *repo, c
 }
 
 /*
- * Read the next entity from the parser process, removing it from the
- * queue of pending requests in the process.
- * This always returns a valid entity.
- */
-static struct entity *
-entityq_next(int fd, struct entityq *q)
-{
-   size_t   id;
-   struct entity   *entp;
-
-   io_simple_read(fd, &id, sizeof(size_t));
-
-   TAILQ_FOREACH(entp, q, entries)
-   if (entp->id == id)
-   break;
-
-   assert(entp != NULL);
-   TAILQ_REMOVE(q, entp, entries);
-   return entp;
-}
-
-static void
-entity_buffer_resp(char **b, size_t *bsz, size_t *bmax,
-const struct entity *ent)
-{
-
-   io_simple_buffer(b, bsz, bmax, &ent->id, sizeof(size_t));
-}
-
-/*
  * Add the heap-allocated file to the queue for processing.
  */
 static void
 entityq_add(int fd, struct entityq *q, char *file, enum rtype type,
 const struct repo *rp, const unsigned char *dgst,
-const unsigned char *pkey, size_t pkeysz, char *descr, size_t *eid)
+const unsigned char *pkey, size_t pkeysz, char *descr)
 {
struct entity   *p;
 
if ((p = calloc(1, sizeof(struct entity))) == NULL)
err(1, "calloc");
 
-   p->id = (*eid)++;
p->type = type;
p->uri = file;
p->repo = (rp != NULL) ? (ssize_t)rp->id : -1;
@@ -426,15 +395,19 @@ entityq_add(int fd, struct entityq *q, c
err(1, "strdup");
 
filepath_add(file);
-   TAILQ_INSERT_TAIL(q, p, entries);
+
+   entity_queue++;
 
/*
 * Write to the queue if there's no repo or the repo has already
-* been loaded.
+* been loaded else enqueue it for later.
 */
 
-   if (rp == NULL || rp->loaded)
+   if (rp == NULL || rp->loaded) {
entity_write_req(fd, p);
+   entity_free(p);
+   } else
+   TAILQ_INSERT_TAIL(q, p, entries);
 }
 
 /*
@@ -443,7 +416,7 @@ entityq_add(int fd, struct entityq *q, c
  */
 static void
 queue_add_from_mft(int fd, struct entityq *q, const char *mft,
-const struct mftfile *file, enum rtype type, size_t *eid)
+const struct mftfile *file, enum rtype type)
 {
char*cp, *nfile;
 
@@ -461,7 +434,7 @@ queue_add_from_mft(int fd, struct entity
 * that the repository has already been loaded.
 */
 
-   entityq_add(fd, q, n

extend ip(4) to document ip_mreqn

2021-01-07 Thread Claudio Jeker
Here is my try to extend ip(4) to also document struct ip_mreqn.
Not sure what is the best way to document the option to use either struct
ip_mreq or struct ip_mreqn with IP_ADD_MEMBERSHIP.

-- 
:wq Claudio

Index: ip.4
===
RCS file: /cvs/src/share/man/man4/ip.4,v
retrieving revision 1.41
diff -u -p -r1.41 ip.4
--- ip.418 Aug 2016 11:45:18 -  1.41
+++ ip.47 Jan 2021 14:58:54 -
@@ -411,13 +411,21 @@ setsockopt(s, IPPROTO_IP, IP_ADD_MEMBERS
 .Pp
 where
 .Fa mreq
-is the following structure:
+is either the following structure:
 .Bd -literal -offset indent
 struct ip_mreq {
 struct in_addr imr_multiaddr; /* multicast group to join */
 struct in_addr imr_interface; /* interface to join on */
 }
 .Ed
+or
+.Bd -literal -offset indent
+struct ip_mreqn {
+struct in_addr imr_multiaddr; /* multicast group to join */
+struct in_addr imr_address;   /* local IP address of interface */
+intimr_ifindex;   /* interface index to join*/
+};
+.Ed
 .Pp
 .Va imr_interface
 should
@@ -428,6 +436,12 @@ or the
 .Tn IP
 address of a particular multicast-capable interface if
 the host is multihomed.
+.Va imr_ifindex
+of
+.Va struct ip_mreqn
+can be set to the interface index instead of specifying the
+.Tn IP
+address of a  particular multicast-capable interface.
 Membership is associated with a single interface;
 programs running on multihomed hosts may need to
 join the same group on more than one interface.



bgpd simplify update path

2021-01-07 Thread Claudio Jeker
When bgpd generates an UPDATE to update or withdraw prefixes it does this
from rde_generate_updates() and then decends into up_generate_update().
Now there is up_test_update() that checks if a new prefix is actually OK
to be distributed. It checks things for route reflectors and the common
communities (NO_EXPORT, ...). There are a few more checks that are pure
peer config checks and those should be moved up to rde_generate_updates().

Last but not least there is this bit about ORIGINATOR_ID which seems
sensible but on second thought I think it is actually wrong and an
extension on top of the RFC. Since I think this code currently has not the
right withdraw behaviour I decided it is the best to just remove it.

This code simplifies the return of up_test_update() to a pure true / false
case and make up_generate_update() simpler. Also I think doing the peer
checks early on will improve performance.

Please review :)
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.510
diff -u -p -r1.510 rde.c
--- rde.c   30 Dec 2020 07:29:56 -  1.510
+++ rde.c   7 Jan 2021 17:04:53 -
@@ -2814,7 +2814,8 @@ rde_send_kroute(struct rib *rib, struct 
 void
 rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix *old)
 {
-   struct rde_peer *peer;
+   struct rde_peer *peer;
+   u_int8_t aid;
 
/*
 * If old is != NULL we know it was active and should be removed.
@@ -2824,6 +2825,11 @@ rde_generate_updates(struct rib *rib, st
if (old == NULL && new == NULL)
return;
 
+   if (new)
+   aid = new->pt->aid;
+   else
+   aid = old->pt->aid;
+
LIST_FOREACH(peer, &peerlist, peer_l) {
if (peer->conf.id == 0)
continue;
@@ -2831,6 +2837,14 @@ rde_generate_updates(struct rib *rib, st
continue;
if (peer->state != PEER_UP)
continue;
+   /* check if peer actually supports the address family */
+   if (peer->capa.mp[aid] == 0)
+   continue;
+   /* skip peers with special export types */
+   if (peer->conf.export_type == EXPORT_NONE ||
+   peer->conf.export_type == EXPORT_DEFAULT_ROUTE)
+   continue;
+
up_generate_updates(out_rules, peer, new, old);
}
 }
Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.123
diff -u -p -r1.123 rde_update.c
--- rde_update.c24 Jan 2020 05:44:05 -  1.123
+++ rde_update.c7 Jan 2021 18:13:45 -
@@ -47,11 +47,9 @@ static struct community  comm_no_expsubco
 static int
 up_test_update(struct rde_peer *peer, struct prefix *p)
 {
-   struct bgpd_addr addr;
struct rde_aspath   *asp;
struct rde_community*comm;
struct rde_peer *prefp;
-   struct attr *attr;
 
if (p == NULL)
/* no prefix available */
@@ -70,10 +68,6 @@ up_test_update(struct rde_peer *peer, st
if (asp->flags & F_ATTR_LOOP)
fatalx("try to send out a looped path");
 
-   pt_getaddr(p->pt, &addr);
-   if (peer->capa.mp[addr.aid] == 0)
-   return (-1);
-
if (!prefp->conf.ebgp && !peer->conf.ebgp) {
/*
 * route reflector redistribution rules:
@@ -90,16 +84,6 @@ up_test_update(struct rde_peer *peer, st
return (0);
}
 
-   /* export type handling */
-   if (peer->conf.export_type == EXPORT_NONE ||
-   peer->conf.export_type == EXPORT_DEFAULT_ROUTE) {
-   /*
-* no need to withdraw old prefix as this will be
-* filtered out as well.
-*/
-   return (-1);
-   }
-
/* well known communities */
if (community_match(comm, &comm_no_advertise, NULL))
return (0);
@@ -110,18 +94,6 @@ up_test_update(struct rde_peer *peer, st
return (0);
}
 
-   /*
-* Don't send messages back to originator
-* this is not specified in the RFC but seems logical.
-*/
-   if ((attr = attr_optget(asp, ATTR_ORIGINATOR_ID)) != NULL) {
-   if (memcmp(attr->data, &peer->remote_bgpid,
-   sizeof(peer->remote_bgpid)) == 0) {
-   /* would cause loop don't send */
-   return (-1);
-   }
-   }
-
return (1);
 }
 
@@ -149,13 +121,8 @@ withdraw:
peer->up_wcnt++;
}
} else {
-   switch (up_test_update(peer, new)) {
-   case 1:
- 

rpki-client check IP and ASnum coverage only on ROAs

2021-01-08 Thread Claudio Jeker
rpki-client is currently very strict about the ip ranges and as ranges in
certificates. If a child certificate has a uncovered range in its list it
is considered invalid and is removed from the pool (with it all the ROA
entries as well).

Now rfc8360 relaxes this a bit and mentions that a ROA for 192.0.2.0/24
is valid if that prefix is covered in all certs in the chain. So if we
have this chain:
Cert 1 (TA):
Resources 192.0.2.0/24, 198.51.100.0/24
Certs 2:
Resources 192.0.2.0/24
Cert 3:
Resources 192.0.2.0/24, 198.51.100.0/24
ROA:
Resources 192.0.2.0/24

The rpki-client would currently remove cert3 with an error:
RFC 6487: uncovered IP: 198.51.100.0/24
and 192.0.2.0/24 would be missing in the roa-set.
This diff changes this behaviour. It only reports a warning for
certificates that have uncovered IP or AS ranges but at the same time
it will validate the IP range all the way up to the TA (both for cert and
ROA). With this 192.0.2.0/24 remains in the roa-set even though Cert 3 has
an uncovered IP range included.

This makes it a bit easier to update certificates since they can be
updated independently from each other.

Please test
-- 
:wq Claudio

Index: validate.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/validate.c,v
retrieving revision 1.11
diff -u -p -r1.11 validate.c
--- validate.c  12 Sep 2020 15:46:48 -  1.11
+++ validate.c  8 Jan 2021 13:42:32 -
@@ -37,8 +37,8 @@ tracewarn(const struct auth *a)
 }
 
 /*
- * Walk up the chain of certificates trying to match our AS number to
- * one of the allocations in that chain.
+ * Walk up the full chain of certificates trying to match our AS number to
+ * one of the allocations in all the certs in that chain.
  * Returns 1 if covered or 0 if not.
  */
 static int
@@ -47,26 +47,21 @@ valid_as(struct auth *a, uint32_t min, u
int  c;
 
if (a == NULL)
-   return 0;
+   return 1;
 
/* Does this certificate cover our AS number? */
-   if (a->cert->asz) {
-   c = as_check_covered(min, max,
-   a->cert->as, a->cert->asz);
-   if (c > 0)
-   return 1;
-   else if (c < 0)
-   return 0;
-   }
+   c = as_check_covered(min, max,
+   a->cert->as, a->cert->asz);
+   if (c < 0)
+   return 0;
 
/* If it doesn't, walk up the chain. */
return valid_as(a->parent, min, max);
 }
 
 /*
- * Walk up the chain of certificates (really just the last one, but in
- * the case of inheritence, the ones before) making sure that our IP
- * prefix is covered in the first non-inheriting specification.
+ * Walk up the full chain of certificates making sure that our IP
+ * prefix is covered in all certs non-inheriting specification.
  * Returns 1 if covered or 0 if not.
  */
 static int
@@ -76,14 +71,12 @@ valid_ip(struct auth *a, enum afi afi,
int  c;
 
if (a == NULL)
-   return 0;
+   return 1;
 
/* Does this certificate cover our IP prefix? */
c = ip_addr_check_covered(afi, min, max,
a->cert->ips, a->cert->ipsz);
-   if (c > 0)
-   return 1;
-   else if (c < 0)
+   if (c < 0)
return 0;
 
/* If it doesn't, walk up the chain. */
@@ -173,8 +166,6 @@ valid_cert(const char *fn, struct auth_t
continue;
warnx("%s: RFC 6487: uncovered AS: "
"%u--%u", fn, min, max);
-   tracewarn(a);
-   return 0;
}
 
for (i = 0; i < cert->ipsz; i++) {
@@ -200,8 +191,6 @@ valid_cert(const char *fn, struct auth_t
"(inherit)", fn);
break;
}
-   tracewarn(a);
-   return 0;
}
 
return 1;



Re: bgpd simplify update path

2021-01-09 Thread Claudio Jeker
On Fri, Jan 08, 2021 at 09:42:57PM +0100, Sebastian Benoit wrote:
> Claudio Jeker(cje...@diehard.n-r-g.com) on 2021.01.07 19:34:23 +0100:
> > When bgpd generates an UPDATE to update or withdraw prefixes it does this
> > from rde_generate_updates() and then decends into up_generate_update().
> > Now there is up_test_update() that checks if a new prefix is actually OK
> > to be distributed. It checks things for route reflectors and the common
> > communities (NO_EXPORT, ...). There are a few more checks that are pure
> > peer config checks and those should be moved up to rde_generate_updates().
> > 
> > Last but not least there is this bit about ORIGINATOR_ID which seems
> > sensible but on second thought I think it is actually wrong and an
> > extension on top of the RFC. Since I think this code currently has not the
> > right withdraw behaviour I decided it is the best to just remove it.
> 
> I think it should not matter because the receiving router will do the same
> check (against its own id) and ignore the update:
> 
>   A router [that recognizes the ORIGINATOR_ID attribute] SHOULD
>   ignore a route received with its BGP Identifier as the ORIGINATOR_ID.
>   (RFC 4456)
> 
> However your change is correct because the RFC does say that the receiver
> should make this descision. We do seem to correctly check that when
> receiving updates in rde_reflector().

What I wonder about is that because of return -1 the prefix is actually
not withdrawn from the neighbor. So an old prefix could get stuck on the
peer. This is why I think it is best to remove this.
 
> > This code simplifies the return of up_test_update() to a pure true / false
> > case and make up_generate_update() simpler. Also I think doing the peer
> > checks early on will improve performance.
> 
> ok benno@
> one whitespace error below

Will fix those and then commit. Thanks

> > 
> > Please review :)
> > -- 
> > :wq Claudio
> > 
> > Index: rde.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > retrieving revision 1.510
> > diff -u -p -r1.510 rde.c
> > --- rde.c   30 Dec 2020 07:29:56 -  1.510
> > +++ rde.c   7 Jan 2021 17:04:53 -
> > @@ -2814,7 +2814,8 @@ rde_send_kroute(struct rib *rib, struct 
> >  void
> >  rde_generate_updates(struct rib *rib, struct prefix *new, struct prefix 
> > *old)
> >  {
> > -   struct rde_peer *peer;
> > +   struct rde_peer *peer;
> > +   u_int8_t aid;
> >  
> > /*
> >  * If old is != NULL we know it was active and should be removed.
> > @@ -2824,6 +2825,11 @@ rde_generate_updates(struct rib *rib, st
> > if (old == NULL && new == NULL)
> > return;
> >  
> > +   if (new)
> > +   aid = new->pt->aid;
> > +   else
> > +   aid = old->pt->aid;
> > +
> > LIST_FOREACH(peer, &peerlist, peer_l) {
> > if (peer->conf.id == 0)
> > continue;
> > @@ -2831,6 +2837,14 @@ rde_generate_updates(struct rib *rib, st
> > continue;
> > if (peer->state != PEER_UP)
> > continue;
> > +   /* check if peer actually supports the address family */
> > +   if (peer->capa.mp[aid] == 0)
> > +   continue;
> > +   /* skip peers with special export types */
> 
> spaces instead of tabs
> 
> 
> > +   if (peer->conf.export_type == EXPORT_NONE ||
> > +   peer->conf.export_type == EXPORT_DEFAULT_ROUTE)
> > +   continue;
> > +
> > up_generate_updates(out_rules, peer, new, old);
> > }
> >  }
> > Index: rde_update.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
> > retrieving revision 1.123
> > diff -u -p -r1.123 rde_update.c
> > --- rde_update.c24 Jan 2020 05:44:05 -  1.123
> > +++ rde_update.c7 Jan 2021 18:13:45 -
> > @@ -47,11 +47,9 @@ static struct community  comm_no_expsubco
> >  static int
> >  up_test_update(struct rde_peer *peer, struct prefix *p)
> >  {
> > -   struct bgpd_addr addr;
> > struct rde_aspath   *asp;
> > struct rde_community*comm;
> > struct rde_peer *prefp;
> > -   struct attr *attr;
> >  
> > if (p == NULL)
> > /* no prefix available */
> > @

Re: rpki-client check IP and ASnum coverage only on ROAs

2021-01-09 Thread Claudio Jeker
On Thu, Jan 07, 2021 at 04:11:47PM +, Job Snijders wrote:
> On Fri, Jan 08, 2021 at 03:43:18PM +0100, Claudio Jeker wrote:
> > rpki-client is currently very strict about the ip ranges and as ranges in
> > certificates. If a child certificate has a uncovered range in its list it
> > is considered invalid and is removed from the pool (with it all the ROA
> > entries as well).
> > 
> > Now rfc8360 relaxes this a bit and mentions that a ROA for 192.0.2.0/24
> > is valid if that prefix is covered in all certs in the chain. 
> 
> RFC 8360 makes a lot of sense

Actually after closer inspection RFC 8360 only relaxes this for a new form
of certs that include new types of certificate policy, ip address ranges
and as number ranges types. So this diff is not correct and I probably
need to work on proper RFC 8360 support (even though it seems no CA is
using RFC 8360 ids right now).

-- 
:wq Claudio



Re: Change bgpd_addr encoding of VPN v4 and v6 addresses

2021-01-12 Thread Claudio Jeker
On Tue, Jan 05, 2021 at 11:17:22AM +0100, Claudio Jeker wrote:
> While changing log_addr() I noticed that struct bgpd_addr could benefit
> from changing the encoding of AID_VPN_IPv4 and AID_VPN_IPv6 addrs.
> Instead of having independent route distinguishers and labelstacks use
> common fields for those and use the v4 and v6 addresses for the prefix.
> This is a bit more compact but also simplifies some code since the
> handling of AID_VPN_IPv4 and AID_VPN_IPv6 can be handled in the same
> switch case.
> 
> I reduced the labelstack size from 21 to 18 (6 instead of 7 labels). Now
> in theory you could pack 7 labels into an IPv4 VPN NLRI (8bit prefixlen +
> 64bit RD + 16bit prefix + 21 * 8bit label = 256) but that is quite silly.
> Even 6 labels is more than enough. bgpd itself only allows a single MPLS
> label when announcing such networks.

Ping
 
-- 
:wq Claudio

PS: diff is based of /usr/src/usr.sbin

Index: bgpctl/mrtparser.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/mrtparser.c,v
retrieving revision 1.13
diff -u -p -r1.13 mrtparser.c
--- bgpctl/mrtparser.c  3 Jul 2019 06:22:01 -   1.13
+++ bgpctl/mrtparser.c  5 Jan 2021 09:19:09 -
@@ -833,14 +833,14 @@ mrt_extract_attr(struct mrt_rib_entry *r
re->nexthop.aid = aid;
memcpy(&tmp, a + 1 + sizeof(u_int64_t),
sizeof(tmp));
-   re->nexthop.vpn4.addr.s_addr = tmp;
+   re->nexthop.v4.s_addr = tmp;
break;
case AID_VPN_IPv6:
if (attr_len < sizeof(u_int64_t) +
sizeof(struct in6_addr))
return (-1);
re->nexthop.aid = aid;
-   memcpy(&re->nexthop.vpn6.addr,
+   memcpy(&re->nexthop.v6,
a + 1 + sizeof(u_int64_t),
sizeof(struct in6_addr));
break;
@@ -979,7 +979,7 @@ mrt_extract_addr(void *msg, u_int len, s
return (-1);
addr->aid = aid;
/* XXX labelstack and rd missing */
-   memcpy(&addr->vpn4.addr, b + sizeof(u_int64_t),
+   memcpy(&addr->v4, b + sizeof(u_int64_t),
sizeof(struct in_addr));
return (sizeof(u_int64_t) + sizeof(struct in_addr));
case AID_VPN_IPv6:
@@ -987,7 +987,7 @@ mrt_extract_addr(void *msg, u_int len, s
return (-1);
addr->aid = aid;
/* XXX labelstack and rd missing */
-   memcpy(&addr->vpn6.addr, b + sizeof(u_int64_t),
+   memcpy(&addr->v6, b + sizeof(u_int64_t),
sizeof(struct in6_addr));
return (sizeof(u_int64_t) + sizeof(struct in6_addr));
default:
Index: bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.409
diff -u -p -r1.409 bgpd.h
--- bgpd/bgpd.h 4 Jan 2021 13:40:32 -   1.409
+++ bgpd/bgpd.h 5 Jan 2021 08:23:39 -
@@ -176,23 +176,6 @@ extern const struct aid aid_vals[];
sizeof(struct pt_entry_vpn6)\
 }
 
-struct vpn4_addr {
-   u_int64_t   rd;
-   struct in_addr  addr;
-   u_int8_tlabelstack[21]; /* max that makes sense */
-   u_int8_tlabellen;
-   u_int8_tpad1;
-   u_int8_tpad2;
-};
-
-struct vpn6_addr {
-   u_int64_t   rd;
-   struct in6_addr addr;
-   u_int8_tlabelstack[21]; /* max that makes sense */
-   u_int8_tlabellen;
-   u_int8_tpad1;
-   u_int8_tpad2;
-};
 
 #define BGP_MPLS_BOS   0x01
 
@@ -200,22 +183,15 @@ struct bgpd_addr {
union {
struct in_addr  v4;
struct in6_addr v6;
-   struct vpn4_addrvpn4;
-   struct vpn6_addrvpn6;
/* maximum size for a prefix is 256 bits */
-   u_int8_taddr8[32];
-   u_int16_t   addr16[16];
-   u_int32_t   addr32[8];
} ba;   /* 128-bit address */
+   u_int64_t   rd; /* route distinguisher for VPN addrs */
u_int32_t   scope_id;   /* iface scope id for v6 */
u_int8_taid;
+   u_int8_tlabellen;   /* size of the labelstack */
+   u_int8_tlabelstack[18]; /* max that makes sense */
 #definev4  ba.v4
 #definev6  ba.v6
-#define 

Re: Make ospf6d work on point-to-point links

2021-01-12 Thread Claudio Jeker
On Wed, Jan 06, 2021 at 01:02:50PM +0100, Claudio Jeker wrote:
> The code in ospf6d is a bit broken when it comes to point-to-point links.
> This diff fixes this by a) using the neighbor address instead of the unset
> interface destination address and by b) matching the incomming packet
> against all possible IPs of that interface.
> 
> I tripped on b) because my P2P interface has more than one link-local
> address and the code just likes to select the wrong one.
> 
> This works for my case, please check I did not break something else.

So this seems to work. Anyone wants to OK else I will commit this later
today.

> -- 
> :wq Claudio
> 
> Index: lsupdate.c
> ===
> RCS file: /cvs/src/usr.sbin/ospf6d/lsupdate.c,v
> retrieving revision 1.18
> diff -u -p -r1.18 lsupdate.c
> --- lsupdate.c15 Jul 2020 14:47:41 -  1.18
> +++ lsupdate.c6 Jan 2021 11:28:43 -
> @@ -474,7 +474,7 @@ ls_retrans_timer(int fd, short event, vo
>   /* ls_retrans_list_free retriggers the timer */
>   return;
>   } else if (nbr->iface->type == IF_TYPE_POINTOPOINT)
> - memcpy(&addr, &nbr->iface->dst, sizeof(addr));
> + memcpy(&addr, &nbr->addr, sizeof(addr));
>   else
>   inet_pton(AF_INET6, AllDRouters, &addr);
>   } else
> Index: packet.c
> ===
> RCS file: /cvs/src/usr.sbin/ospf6d/packet.c,v
> retrieving revision 1.17
> diff -u -p -r1.17 packet.c
> --- packet.c  23 Dec 2019 07:33:49 -  1.17
> +++ packet.c  6 Jan 2021 11:52:08 -
> @@ -82,12 +82,9 @@ send_packet(struct iface *iface, struct 
>  struct in6_addr *dst)
>  {
>   struct sockaddr_in6 sa6;
> - struct msghdr   msg;
> - struct ioveciov[1];
>  
> - /* setup buffer */
> + /* setup sockaddr */
>   bzero(&sa6, sizeof(sa6));
> -
>   sa6.sin6_family = AF_INET6;
>   sa6.sin6_len = sizeof(sa6);
>   sa6.sin6_addr = *dst;
> @@ -104,15 +101,8 @@ send_packet(struct iface *iface, struct 
>   return (-1);
>   }
>  
> - bzero(&msg, sizeof(msg));
> - msg.msg_name = &sa6;
> - msg.msg_namelen = sizeof(sa6);
> - iov[0].iov_base = buf->buf;
> - iov[0].iov_len = ibuf_size(buf);
> - msg.msg_iov = iov;
> - msg.msg_iovlen = 1;
> -
> - if (sendmsg(iface->fd, &msg, 0) == -1) {
> + if (sendto(iface->fd, buf->buf, ibuf_size(buf), 0,
> + (struct sockaddr *)&sa6, sizeof(sa6)) == -1) {
>   log_warn("send_packet: error sending packet on interface %s",
>   iface->name);
>   return (-1);
> @@ -186,11 +176,16 @@ recv_packet(int fd, short event, void *b
>* AllDRouters is only valid for DR and BDR but this is checked later.
>*/
>   inet_pton(AF_INET6, AllSPFRouters, &addr);
> -
>   if (!IN6_ARE_ADDR_EQUAL(&dest, &addr)) {
>   inet_pton(AF_INET6, AllDRouters, &addr);
>   if (!IN6_ARE_ADDR_EQUAL(&dest, &addr)) {
> - if (!IN6_ARE_ADDR_EQUAL(&dest, &iface->addr)) {
> + struct iface_addr *ia;
> +
> + TAILQ_FOREACH(ia, &iface->ifa_list, entry) {
> + if (IN6_ARE_ADDR_EQUAL(&dest, &ia->addr))
> + break;
> + }
> + if (ia == NULL) {
>   log_debug("recv_packet: packet sent to wrong "
>   "address %s, interface %s",
>   log_in6addr(&dest), iface->name);
> 



bgpd refactor route decision process

2021-01-12 Thread Claudio Jeker
This diff changes two things:
- First, it move the kroute update into rde_generate_updates() simplifying
prefix_evaluate a little bit.

- Second, it changes prefix_evaluate to take an additional argument for the
old prefix (to be removed). Instead of doing this outside of
prefix_evaluate() with some drawbacks in case the same prefix is removed
and readded, the code is now in prefix_evaluate() and does all the magic
itself.

This is a necessary step to finally fix MED sorting.
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.511
diff -u -p -r1.511 rde.c
--- rde.c   9 Jan 2021 16:49:41 -   1.511
+++ rde.c   12 Jan 2021 16:17:31 -
@@ -2825,6 +2825,9 @@ rde_generate_updates(struct rib *rib, st
if (old == NULL && new == NULL)
return;
 
+   if ((rib->flags & F_RIB_NOFIB) == 0)
+   rde_send_kroute(rib, new, old);
+
if (new)
aid = new->pt->aid;
else
@@ -3533,8 +3536,7 @@ rde_softreconfig_sync_reeval(struct rib_
/* need to re-link the nexthop if not already linked */
if ((p->flags & PREFIX_NEXTHOP_LINKED) == 0)
nexthop_link(p);
-   LIST_REMOVE(p, entry.list.rib);
-   prefix_evaluate(p, re);
+   prefix_evaluate(re, p, p);
}
 }
 
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.235
diff -u -p -r1.235 rde.h
--- rde.h   4 Dec 2020 11:57:13 -   1.235
+++ rde.h   12 Jan 2021 16:17:31 -
@@ -483,10 +483,10 @@ communities_unref(struct rde_community *
communities_unlink(comm);
 }
 
-int community_to_rd(struct community *, u_int64_t *);
+intcommunity_to_rd(struct community *, u_int64_t *);
 
 /* rde_decide.c */
-voidprefix_evaluate(struct prefix *, struct rib_entry *);
+void   prefix_evaluate(struct rib_entry *, struct prefix *, struct prefix *);
 
 /* rde_filter.c */
 void   rde_apply_set(struct filter_set_head *, struct rde_peer *,
Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.78
diff -u -p -r1.78 rde_decide.c
--- rde_decide.c9 Aug 2019 13:44:27 -   1.78
+++ rde_decide.c12 Jan 2021 16:24:36 -
@@ -238,14 +238,16 @@ prefix_cmp(struct prefix *p1, struct pre
  * The to evaluate prefix must not be in the prefix list.
  */
 void
-prefix_evaluate(struct prefix *p, struct rib_entry *re)
+prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
 {
struct prefix   *xp;
 
if (re_rib(re)->flags & F_RIB_NOEVALUATE) {
/* decision process is turned off */
-   if (p != NULL)
-   LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
+   if (old != NULL)
+   LIST_REMOVE(old, entry.list.rib);
+   if (new != NULL)
+   LIST_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
if (re->active) {
/*
 * During reloads it is possible that the decision
@@ -259,19 +261,22 @@ prefix_evaluate(struct prefix *p, struct
return;
}
 
-   if (p != NULL) {
+   if (old != NULL)
+   LIST_REMOVE(old, entry.list.rib);
+   
+   if (new != NULL) {
if (LIST_EMPTY(&re->prefix_h))
-   LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
+   LIST_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
else {
LIST_FOREACH(xp, &re->prefix_h, entry.list.rib) {
-   if (prefix_cmp(p, xp) > 0) {
-   LIST_INSERT_BEFORE(xp, p,
+   if (prefix_cmp(new, xp) > 0) {
+   LIST_INSERT_BEFORE(xp, new,
entry.list.rib);
break;
} else if (LIST_NEXT(xp, entry.list.rib) ==
NULL) {
/* if xp last element ... */
-   LIST_INSERT_AFTER(xp, p,
+   LIST_INSERT_AFTER(xp, new,
entry.list.rib);
break;
}
@@ -290,18 +295,17 @@ prefix_evaluate(struct prefix *p, struct
xp = NULL;
}
 
-   if (re->active != xp) {
-   /* need to generate an update */
-
+   /*
+* If the active prefix changed or the active prefix was remo

Re: bgpd refactor route decision process

2021-01-13 Thread Claudio Jeker
On Wed, Jan 13, 2021 at 11:24:32AM +0100, Denis Fondras wrote:
> Le Tue, Jan 12, 2021 at 05:39:02PM +0100, Claudio Jeker a écrit :
> > This diff changes two things:
> > - First, it move the kroute update into rde_generate_updates() simplifying
> > prefix_evaluate a little bit.
> > 
> > - Second, it changes prefix_evaluate to take an additional argument for the
> > old prefix (to be removed). Instead of doing this outside of
> > prefix_evaluate() with some drawbacks in case the same prefix is removed
> > and readded, the code is now in prefix_evaluate() and does all the magic
> > itself.
> > 
> > Index: rde_decide.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> > retrieving revision 1.78
> > diff -u -p -r1.78 rde_decide.c
> > --- rde_decide.c9 Aug 2019 13:44:27 -   1.78
> > +++ rde_decide.c12 Jan 2021 16:24:36 -
> > @@ -238,14 +238,16 @@ prefix_cmp(struct prefix *p1, struct pre
> >   * The to evaluate prefix must not be in the prefix list.
> >   */
> >  void
> > -prefix_evaluate(struct prefix *p, struct rib_entry *re)
> > +prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix 
> > *old)
> >  {
> > struct prefix   *xp;
> >  
> > if (re_rib(re)->flags & F_RIB_NOEVALUATE) {
> > /* decision process is turned off */
> > -   if (p != NULL)
> > -   LIST_INSERT_HEAD(&re->prefix_h, p, entry.list.rib);
> > +   if (old != NULL)
> > +   LIST_REMOVE(old, entry.list.rib);
> > +   if (new != NULL)
> > +   LIST_INSERT_HEAD(&re->prefix_h, new, entry.list.rib);
> 
> Would it be beneficial to have a p == new test ?

You mean old == new? Not sure if it is worth the trouble. 
Currently this has the benefit that that most recent update is at the head
of the list. Now one could argue that this code is supposed to be as fast
as possible and so skipping the remove & insert could be benefitial.
I'm currently after a bigger issue so I'm happy to leave this for others
:)

> 
> Otherwise OK denis@
> 

-- 
:wq Claudio



more refactor bgpd route decision process

2021-01-13 Thread Claudio Jeker
This is another cleanup round of the route decision process.
This time focusing on prefix_cmp(). Make sure that when using
return (a - b) that the results always fits in an int type.
Also make sure the check of the remote_addr at the end is done
properly. The result is probably the same but this is the same
way it is done in many other places.

Unless I made a mistake the result should still be the same.
-- 
:wq Claudio

? obj
Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.79
diff -u -p -r1.79 rde_decide.c
--- rde_decide.c13 Jan 2021 11:34:01 -  1.79
+++ rde_decide.c13 Jan 2021 12:08:21 -
@@ -113,12 +113,12 @@ prefix_cmp(struct prefix *p1, struct pre
struct rde_peer *peer1, *peer2;
struct attr *a;
u_int32_tp1id, p2id;
-   int  p1cnt, p2cnt;
+   int  p1cnt, p2cnt, i;
 
if (p1 == NULL)
-   return (-1);
+   return -1;
if (p2 == NULL)
-   return (1);
+   return 1;
 
asp1 = prefix_aspath(p1);
asp2 = prefix_aspath(p2);
@@ -127,15 +127,15 @@ prefix_cmp(struct prefix *p1, struct pre
 
/* pathes with errors are not eligible */
if (asp1 == NULL || asp1->flags & F_ATTR_PARSE_ERR)
-   return (-1);
+   return -1;
if (asp2 == NULL || asp2->flags & F_ATTR_PARSE_ERR)
-   return (1);
+   return 1;
 
/* only loop free pathes are eligible */
if (asp1->flags & F_ATTR_LOOP)
-   return (-1);
+   return -1;
if (asp2->flags & F_ATTR_LOOP)
-   return (1);
+   return 1;
 
/*
 * 1. check if prefix is eligible a.k.a reachable
@@ -144,14 +144,16 @@ prefix_cmp(struct prefix *p1, struct pre
 */
if (prefix_nexthop(p2) != NULL &&
prefix_nexthop(p2)->state != NEXTHOP_REACH)
-   return (1);
+   return 1;
if (prefix_nexthop(p1) != NULL &&
prefix_nexthop(p1)->state != NEXTHOP_REACH)
-   return (-1);
+   return -1;
 
/* 2. local preference of prefix, bigger is better */
-   if ((asp1->lpref - asp2->lpref) != 0)
-   return (asp1->lpref - asp2->lpref);
+   if (asp1->lpref > asp2->lpref)
+   return 1;
+   if (asp1->lpref < asp2->lpref)
+   return -1;
 
/* 3. aspath count, the shorter the better */
if ((asp2->aspath->ascnt - asp1->aspath->ascnt) != 0)
@@ -161,12 +163,19 @@ prefix_cmp(struct prefix *p1, struct pre
if ((asp2->origin - asp1->origin) != 0)
return (asp2->origin - asp1->origin);
 
-   /* 5. MED decision, only comparable between the same neighboring AS */
-   if (rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS ||
-   aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath))
+   /*
+* 5. MED decision
+* Only comparable between the same neighboring AS or if
+* 'rde med compare always' is set.
+*/
+   if ((rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS) ||
+   aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) {
/* lowest value wins */
-   if ((asp2->med - asp1->med) != 0)
-   return (asp2->med - asp1->med);
+   if (asp1->med < asp2->med)
+   return 1;
+   if (asp1->med > asp2->med)
+   return -1;
+   }
 
/*
 * 6. EBGP is cooler than IBGP
@@ -187,8 +196,10 @@ prefix_cmp(struct prefix *p1, struct pre
 * a metric that weights a prefix at a very late stage in the
 * decision process.
 */
-   if ((asp1->weight - asp2->weight) != 0)
-   return (asp1->weight - asp2->weight);
+   if (asp1->weight > asp2->weight)
+   return 1;
+   if (asp1->weight < asp2->weight)
+   return -1;
 
/* 8. nexthop costs. NOT YET -> IGNORE */
 
@@ -196,9 +207,12 @@ prefix_cmp(struct prefix *p1, struct pre
 * 9. older route (more stable) wins but only if route-age
 * evaluation is enabled.
 */
-   if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE)
-   if ((p2->lastchange - p1->lastchange) != 0)
-   return (p2->lastchange - p1->lastchange);
+   if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) {
+   if (p1->lastchange < p2->lastchange) /* p1 is older */
+   return 1;
+   if (p1->lastchange > p2->lastchange)
+   return -1;
+   }
 
/* 10. lowest BGP Id wins, use ORIGINATOR_ID if present */
if ((a = attr_optget(asp1, ATTR_ORIGINATOR_ID)) != NULL) {
@@ -211,

bgpd fix route decision for strict med

2021-01-14 Thread Claudio Jeker
Currently bgpd does not properly handle strict med route decisions.
The problem is that the strict MED check only matters for aspaths with the
same neighbor as. The route decision process currently stops as soon as
the current prefix is better then the one checked in the list of prefixes.
Now in some cases this results in unstable decisions because the order of
insertions matter. Depending on the order any route may be selected.
The med.sh regress test I added shows this issue. Depending on the order
any of the 3 routes can be selected as best:

1:
flags ovs destination  gateway  lpref   med aspath origin
*>  N 10.12.1.0/24 10.12.57.410050 64501 64510 i
*   N 10.12.1.0/24 10.12.57.2100   100 64501 64510 i
*   N 10.12.1.0/24 10.12.57.3100   100 64502 64510 i

2:
flags ovs destination  gateway  lpref   med aspath origin
*>  N 10.12.1.0/24 10.12.57.2100   100 64501 64510 i
*   N 10.12.1.0/24 10.12.57.3100   100 64502 64510 i
*   N 10.12.1.0/24 10.12.57.410050 64501 64510 i

3 (and the actual expected result):
flags ovs destination  gateway  lpref   med aspath origin
*>  N 10.12.1.0/24 10.12.57.3100   100 64502 64510 i
*   N 10.12.1.0/24 10.12.57.410050 64501 64510 i
*   N 10.12.1.0/24 10.12.57.2100   100 64501 64510 i

Additionally removing a route requires to reevaluate part of the routes in
some cases. For examle in case 3 if the middle route (with med 50) is
removed then the last route actually becomes best (bgpid is lower).

The following diff fixes this issue hopefully. On insertion if decisions
happen at or after the MED check (step 5) then all remaining routes need
to be checked (until a check before step 5 happens). Routes matching on
med that need to be re-evaluated are put on a redo queue and at the end of
the decision process are put back to get their order right.

Something similar happens when removing a prefix. If the next prefix
differ on a check after the MED check then again all those prefixes need
to be rechecked and maybe re-evaluated.

This change is important but also rather complex. Please test and if
possible validate that it does not cause troubles in your setup.
Btw. this only matters for 'rde med compare strict' (default). 
-- 
:wq Claudio

Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.80
diff -u -p -r1.80 rde_decide.c
--- rde_decide.c14 Jan 2021 08:29:26 -  1.80
+++ rde_decide.c14 Jan 2021 10:16:30 -
@@ -26,7 +26,9 @@
 #include "rde.h"
 #include "log.h"
 
-intprefix_cmp(struct prefix *, struct prefix *);
+intprefix_cmp(struct prefix *, struct prefix *, int *);
+void   prefix_insert(struct prefix *, struct prefix *, struct rib_entry *);
+void   prefix_remove(struct prefix *, struct rib_entry *);
 /*
  * Decision Engine RFC implementation:
  *  Phase 1:
@@ -107,7 +109,7 @@ int prefix_cmp(struct prefix *, struct p
  * already added prefix.
  */
 int
-prefix_cmp(struct prefix *p1, struct prefix *p2)
+prefix_cmp(struct prefix *p1, struct prefix *p2, int *testall)
 {
struct rde_aspath   *asp1, *asp2;
struct rde_peer *peer1, *peer2;
@@ -115,6 +117,16 @@ prefix_cmp(struct prefix *p1, struct pre
u_int32_tp1id, p2id;
int  p1cnt, p2cnt, i;
 
+   /*
+* If a match happens before the MED check then the list is
+* correctly sorted. If a match happens after MED then it
+* may further elements need to be checked to make sure that
+* all path are considered that could affect this path.
+* If the check happens to be on MED signal this by setting
+* testall to 2.
+*/
+   *testall = 0;
+
if (p1 == NULL)
return -1;
if (p2 == NULL)
@@ -166,10 +178,14 @@ prefix_cmp(struct prefix *p1, struct pre
/*
 * 5. MED decision
 * Only comparable between the same neighboring AS or if
-* 'rde med compare always' is set.
+* 'rde med compare always' is set. In the first case
+* set the testall flag since further elements need to be
+* evaluated as well.
 */
if ((rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS) ||
aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) {
+   if (!(rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS))
+   *testall = 2;
/* lowest value wins */
if (asp1->med < asp2->med)
return 1;
@@ -177,6 +193,9 @@ prefix_cmp(struct prefix *p1, struct pre
return -1;
}
 
+   if (!(rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS))
+   

bgpd adjust aspath_neighbor to follow RFC more closely

2021-01-14 Thread Claudio Jeker
The aspath_neighbor function returns the first AS of a path. Now if the
first element is an AS_SET then this does not really make sense.
RFC4271 has this bit in section 9.1.2.2

 Similarly, neighborAS(n) is a function that returns the
 neighbor AS from which the route was received.  If the route is
 learned via IBGP, and the other IBGP speaker didn't originate
 the route, it is the neighbor AS from which the other IBGP
 speaker learned the route.  If the route is learned via IBGP,
 and the other IBGP speaker either (a) originated the route, or
 (b) created the route by aggregation and the AS_PATH attribute
 of the aggregate route is either empty or begins with an
 AS_SET, it is the local AS.

bgpd uses aspath_neighbor() in a few spots:
- in the decision process to decide if MED should be compared or not
- in filters using 'peer-as'
- in the enforce neighbor-as check

I think the above mentioned behaviour is valid for all these cases and so
here is a diff that adjust aspath_neighbor().

-- 
:wq Claudio

Index: rde_attr.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_attr.c,v
retrieving revision 1.123
diff -u -p -r1.123 rde_attr.c
--- rde_attr.c  24 Jun 2019 06:39:49 -  1.123
+++ rde_attr.c  14 Jan 2021 13:13:15 -
@@ -673,8 +673,13 @@ aspath_length(struct aspath *aspath)
 u_int32_t
 aspath_neighbor(struct aspath *aspath)
 {
-   /* Empty aspath is OK -- internal AS route. */
-   if (aspath->len == 0)
+   /*
+* Empty aspath is OK -- internal AS route.
+* Additionally the RFC specifies that if the path starts with an
+* AS_SET the neighbor AS is also the local AS.
+*/
+   if (aspath->len == 0 ||
+   aspath->data[0] != AS_SEQUENCE)
return (rde_local_as());
return (aspath_extract(aspath->data, 0));
 }



allow bgpd to reject AS_SET segemnts (enforce RFC6472)

2021-01-14 Thread Claudio Jeker
This diff adds 'reject as-set yes' as an option to filter out AS paths
with AS_SET segement elements. In bgpctl they show up with {} elements,
e.g. 174 6762 24835 { 36893 }.

This diff uses the soft-error path from RFC7606 and because of this
prefixes that have such an AS_SET segment will be removed via
treat-as-withdraw. These prefixes can be seen in `bgpctl show rib in error`.

By default this is turned off.
-- 
:wq Claudio

Index: bgpctl/output.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.11
diff -u -p -r1.11 output.c
--- bgpctl/output.c 30 Dec 2020 07:31:19 -  1.11
+++ bgpctl/output.c 14 Jan 2021 14:31:45 -
@@ -647,8 +647,8 @@ show_attr(u_char *data, size_t len, stru
case ATTR_ASPATH:
case ATTR_AS4_PATH:
/* prefer 4-byte AS here */
-   e4 = aspath_verify(data, alen, 1);
-   e2 = aspath_verify(data, alen, 0);
+   e4 = aspath_verify(data, alen, 1, 0);
+   e2 = aspath_verify(data, alen, 0, 0);
if (e4 == 0 || e4 == AS_ERR_SOFT) {
path = data;
} else if (e2 == 0 || e2 == AS_ERR_SOFT) {
Index: bgpctl/output_json.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
retrieving revision 1.5
diff -u -p -r1.5 output_json.c
--- bgpctl/output_json.c30 Dec 2020 07:31:19 -  1.5
+++ bgpctl/output_json.c14 Jan 2021 14:31:57 -
@@ -598,8 +598,8 @@ json_attr(u_char *data, size_t len, stru
case ATTR_ASPATH:
case ATTR_AS4_PATH:
/* prefer 4-byte AS here */
-   e4 = aspath_verify(data, alen, 1);
-   e2 = aspath_verify(data, alen, 0);
+   e4 = aspath_verify(data, alen, 1, 0);
+   e2 = aspath_verify(data, alen, 0, 0);
if (e4 == 0 || e4 == AS_ERR_SOFT) {
path = data;
} else if (e2 == 0 || e2 == AS_ERR_SOFT) {
Index: bgpd/bgpd.conf.5
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
retrieving revision 1.205
diff -u -p -r1.205 bgpd.conf.5
--- bgpd/bgpd.conf.516 May 2020 16:58:11 -  1.205
+++ bgpd/bgpd.conf.514 Jan 2021 14:30:43 -
@@ -330,6 +330,20 @@ This renders the decision process nondet
 The default is
 .Ic ignore .
 .Pp
+.It Xo
+.Ic reject Ic as-set
+.Pq Ic yes Ns | Ns Ic no
+.Xc
+If set to
+.Ic yes ,
+.Em AS paths
+attributes containing
+.Em AS_SET
+path segements will be rejected and
+all prefixes will be treated as withdraws.
+The default is
+.Ic no .
+.Pp
 .It Ic router-id Ar dotted-quad
 Set the BGP router ID, which must be non-zero and should be unique
 within the AS.
@@ -1086,6 +1100,21 @@ statement defines the maximum hops the n
 .Pp
 .It Ic passive
 Do not attempt to actively open a TCP connection to the neighbor system.
+.Pp
+.It Xo
+.Ic reject Ic as-set
+.Pq Ic yes Ns | Ns Ic no
+.Xc
+If set to
+.Ic yes ,
+.Em AS paths
+attributes containing
+.Em AS_SET
+path segements will be rejected and
+all prefixes will be treated as withdraws.
+The default is inherited from the global
+.Ic reject Ic as-set
+setting.
 .Pp
 .It Ic remote-as Ar as-number
 Set the AS number of the remote system.
Index: bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.409
diff -u -p -r1.409 bgpd.h
--- bgpd/bgpd.h 4 Jan 2021 13:40:32 -   1.409
+++ bgpd/bgpd.h 14 Jan 2021 14:05:58 -
@@ -65,6 +65,7 @@
 #defineBGPD_FLAG_DECISION_ROUTEAGE 0x0100
 #defineBGPD_FLAG_DECISION_TRANS_AS 0x0200
 #defineBGPD_FLAG_DECISION_MED_ALWAYS   0x0400
+#defineBGPD_FLAG_NO_AS_SET 0x0800
 
 #defineBGPD_LOG_UPDATES0x0001
 
@@ -427,6 +428,7 @@ struct peer_config {
 
 #define PEERFLAG_TRANS_AS  0x01
 #define PEERFLAG_LOG_UPDATES   0x02
+#define PEERFLAG_NO_AS_SET 0x04
 
 enum network_type {
NETWORK_DEFAULT,/* from network statements */
@@ -1346,7 +1348,7 @@ intaspath_snprint(char *, size_t, voi
 int aspath_asprint(char **, void *, u_int16_t);
 size_t  aspath_strlen(void *, u_int16_t);
 u_int32_t   aspath_extract(const void *, int);
-int aspath_verify(void *, u_int16_t, int);
+int aspath_verify(void *, u_int16_t, int, int);
 #define AS_ERR_LEN -1
 #define AS_ERR_TYPE-2
 #define AS_ERR_BAD -3
Index: bgpd/parse.y
===
RCS file: /cvs/src/usr.sbin/bgpd/parse.y,v
retrieving revision 1.411
diff -u -p -r1.411 parse.y
--- bgpd/parse.y29 Dec 2020 15:30:34 -  1.411
+++ bgpd/parse.y14 Jan 2021 14:17:19 -
@@ -623,6 +6

Re: Change bgpd_addr encoding of VPN v4 and v6 addresses

2021-01-14 Thread Claudio Jeker
On Thu, Jan 14, 2021 at 08:22:45PM +0100, Denis Fondras wrote:
> Le Tue, Jan 12, 2021 at 10:06:46AM +0100, Claudio Jeker a écrit :
> > On Tue, Jan 05, 2021 at 11:17:22AM +0100, Claudio Jeker wrote:
> > > While changing log_addr() I noticed that struct bgpd_addr could benefit
> > > from changing the encoding of AID_VPN_IPv4 and AID_VPN_IPv6 addrs.
> > > Instead of having independent route distinguishers and labelstacks use
> > > common fields for those and use the v4 and v6 addresses for the prefix.
> > > This is a bit more compact but also simplifies some code since the
> > > handling of AID_VPN_IPv4 and AID_VPN_IPv6 can be handled in the same
> > > switch case.
> > > 
> > > I reduced the labelstack size from 21 to 18 (6 instead of 7 labels). Now
> > > in theory you could pack 7 labels into an IPv4 VPN NLRI (8bit prefixlen +
> > > 64bit RD + 16bit prefix + 21 * 8bit label = 256) but that is quite silly.
> > > Even 6 labels is more than enough. bgpd itself only allows a single MPLS
> > > label when announcing such networks.
> > 
> > Ping
> >  
> > Index: bgpd/util.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/util.c,v
> > retrieving revision 1.58
> > diff -u -p -r1.58 util.c
> > --- bgpd/util.c 5 Jan 2021 10:00:28 -   1.58
> > +++ bgpd/util.c 5 Jan 2021 10:05:15 -
> > @@ -666,9 +669,17 @@ prefix_compare(const struct bgpd_addr *a
> > mask = htonl(prefixlen2mask(prefixlen));
> > aa = ntohl(a->v4.s_addr & mask);
> > ba = ntohl(b->v4.s_addr & mask);
> > -   if (aa != ba)
> > -   return (aa - ba);
> > -   return (0);
> > +   if (aa > ba)
> > +   return (1);
> > +   if (aa < ba)
> > +   return (1);
> 
> I guess it is -1 here.
> 
> Otherwise OK denis@

Thanks for spotting this glitch. That would have caused some havoc for
sure. 

-- 
:wq Claudio



Re: allow bgpd to reject AS_SET segemnts (enforce RFC6472)

2021-01-14 Thread Claudio Jeker
On Thu, Jan 14, 2021 at 04:28:42PM +0100, Claudio Jeker wrote:
> This diff adds 'reject as-set yes' as an option to filter out AS paths
> with AS_SET segement elements. In bgpctl they show up with {} elements,
> e.g. 174 6762 24835 { 36893 }.
> 
> This diff uses the soft-error path from RFC7606 and because of this
> prefixes that have such an AS_SET segment will be removed via
> treat-as-withdraw. These prefixes can be seen in `bgpctl show rib in error`.
> 
> By default this is turned off.

This version includes the printconf.c bits to show the setting in bgpd -nv.

-- 
:wq Claudio

Index: bgpctl/output.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.11
diff -u -p -r1.11 output.c
--- bgpctl/output.c 30 Dec 2020 07:31:19 -  1.11
+++ bgpctl/output.c 14 Jan 2021 14:31:45 -
@@ -647,8 +647,8 @@ show_attr(u_char *data, size_t len, stru
case ATTR_ASPATH:
case ATTR_AS4_PATH:
/* prefer 4-byte AS here */
-   e4 = aspath_verify(data, alen, 1);
-   e2 = aspath_verify(data, alen, 0);
+   e4 = aspath_verify(data, alen, 1, 0);
+   e2 = aspath_verify(data, alen, 0, 0);
if (e4 == 0 || e4 == AS_ERR_SOFT) {
path = data;
} else if (e2 == 0 || e2 == AS_ERR_SOFT) {
Index: bgpctl/output_json.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
retrieving revision 1.5
diff -u -p -r1.5 output_json.c
--- bgpctl/output_json.c30 Dec 2020 07:31:19 -  1.5
+++ bgpctl/output_json.c14 Jan 2021 14:31:57 -
@@ -598,8 +598,8 @@ json_attr(u_char *data, size_t len, stru
case ATTR_ASPATH:
case ATTR_AS4_PATH:
/* prefer 4-byte AS here */
-   e4 = aspath_verify(data, alen, 1);
-   e2 = aspath_verify(data, alen, 0);
+   e4 = aspath_verify(data, alen, 1, 0);
+   e2 = aspath_verify(data, alen, 0, 0);
if (e4 == 0 || e4 == AS_ERR_SOFT) {
path = data;
} else if (e2 == 0 || e2 == AS_ERR_SOFT) {
Index: bgpd/bgpd.conf.5
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.conf.5,v
retrieving revision 1.205
diff -u -p -r1.205 bgpd.conf.5
--- bgpd/bgpd.conf.516 May 2020 16:58:11 -  1.205
+++ bgpd/bgpd.conf.514 Jan 2021 14:30:43 -
@@ -330,6 +330,20 @@ This renders the decision process nondet
 The default is
 .Ic ignore .
 .Pp
+.It Xo
+.Ic reject Ic as-set
+.Pq Ic yes Ns | Ns Ic no
+.Xc
+If set to
+.Ic yes ,
+.Em AS paths
+attributes containing
+.Em AS_SET
+path segements will be rejected and
+all prefixes will be treated as withdraws.
+The default is
+.Ic no .
+.Pp
 .It Ic router-id Ar dotted-quad
 Set the BGP router ID, which must be non-zero and should be unique
 within the AS.
@@ -1086,6 +1100,21 @@ statement defines the maximum hops the n
 .Pp
 .It Ic passive
 Do not attempt to actively open a TCP connection to the neighbor system.
+.Pp
+.It Xo
+.Ic reject Ic as-set
+.Pq Ic yes Ns | Ns Ic no
+.Xc
+If set to
+.Ic yes ,
+.Em AS paths
+attributes containing
+.Em AS_SET
+path segements will be rejected and
+all prefixes will be treated as withdraws.
+The default is inherited from the global
+.Ic reject Ic as-set
+setting.
 .Pp
 .It Ic remote-as Ar as-number
 Set the AS number of the remote system.
Index: bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.409
diff -u -p -r1.409 bgpd.h
--- bgpd/bgpd.h 4 Jan 2021 13:40:32 -   1.409
+++ bgpd/bgpd.h 14 Jan 2021 14:05:58 -
@@ -65,6 +65,7 @@
 #defineBGPD_FLAG_DECISION_ROUTEAGE 0x0100
 #defineBGPD_FLAG_DECISION_TRANS_AS 0x0200
 #defineBGPD_FLAG_DECISION_MED_ALWAYS   0x0400
+#defineBGPD_FLAG_NO_AS_SET 0x0800
 
 #defineBGPD_LOG_UPDATES0x0001
 
@@ -427,6 +428,7 @@ struct peer_config {
 
 #define PEERFLAG_TRANS_AS  0x01
 #define PEERFLAG_LOG_UPDATES   0x02
+#define PEERFLAG_NO_AS_SET 0x04
 
 enum network_type {
NETWORK_DEFAULT,/* from network statements */
@@ -1346,7 +1348,7 @@ intaspath_snprint(char *, size_t, voi
 int aspath_asprint(char **, void *, u_int16_t);
 size_t  aspath_strlen(void *, u_int16_t);
 u_int32_t   aspath_extract(const void *, int);
-int aspath_verify(void *, u_int16_t, int);
+int aspath_verify(void *, u_int16_t, int, int);
 #define AS_ERR_LEN -1
 #define AS_ERR_TYPE-2
 #define AS_ERR_BAD -3
Index: bgpd/parse.y
===
RCS file: /cvs/s

Add if_mreqn support to IP_MULTICAST_IF

2021-01-15 Thread Claudio Jeker
I forgot to add ip_mreqn support to IP_MULTICAST_IF and so the
IP_ADD_MEMBERSHIP change is not fixing all the issues I have.

Linux supports calling IP_MULTICAST_IF with a struct in_addr, a struct
ip_mreq, or a struct ip_mreqn. FreeBSD only does the first and last.
I followed the Linux way because doing that was not that hard. In the end
only the imr_ifindex field and the imr_address field need to be checked
and if the imr_ifindex is 0 then just use the old code. If the imr_ifindex
is set then use this for interface index and break early.

Any opinions about this?
-- 
:wq Claudio

Index: netinet/ip_output.c
===
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.360
diff -u -p -r1.360 ip_output.c
--- netinet/ip_output.c 11 Jan 2021 13:28:53 -  1.360
+++ netinet/ip_output.c 15 Jan 2021 12:20:26 -
@@ -1423,11 +1423,40 @@ ip_setmoptions(int optname, struct ip_mo
/*
 * Select the interface for outgoing multicast packets.
 */
-   if (m == NULL || m->m_len != sizeof(struct in_addr)) {
+   if (m == NULL) {
+   error = EINVAL;
+   break;
+   }
+   if (m->m_len == sizeof(struct in_addr)) {
+   addr = *(mtod(m, struct in_addr *));
+   } else if (m->m_len == sizeof(struct ip_mreq) ||
+   m->m_len == sizeof(struct ip_mreqn)) {
+   memset(&mreqn, 0, sizeof(mreqn));
+   memcpy(&mreqn, mtod(m, void *), m->m_len);
+
+   /*
+* If an interface index is given use this
+* index to set the imo_ifidx but check first
+* that the interface actually exists.
+* In the other case just set the addr to
+* the imr_address and fall through to the
+* regular code.
+*/
+   if (mreqn.imr_ifindex != 0) {
+   ifp = if_get(mreqn.imr_ifindex);
+   if (ifp == NULL) {
+   error = EADDRNOTAVAIL;
+   break;
+   }
+   imo->imo_ifidx = ifp->if_index;
+   if_put(ifp);
+   break;
+   } else
+   addr = mreqn.imr_address;
+   } else {
error = EINVAL;
break;
}
-   addr = *(mtod(m, struct in_addr *));
/*
 * INADDR_ANY is used to remove a previous selection.
 * When no interface is selected, a default one is



Re: Add if_mreqn support to IP_MULTICAST_IF

2021-01-15 Thread Claudio Jeker
On Fri, Jan 15, 2021 at 02:53:17PM +0100, Claudio Jeker wrote:
> I forgot to add ip_mreqn support to IP_MULTICAST_IF and so the
> IP_ADD_MEMBERSHIP change is not fixing all the issues I have.
> 
> Linux supports calling IP_MULTICAST_IF with a struct in_addr, a struct
> ip_mreq, or a struct ip_mreqn. FreeBSD only does the first and last.
> I followed the Linux way because doing that was not that hard. In the end
> only the imr_ifindex field and the imr_address field need to be checked
> and if the imr_ifindex is 0 then just use the old code. If the imr_ifindex
> is set then use this for interface index and break early.
> 
> Any opinions about this?

This is the corresponding diff for ospfd.

Additionally this initalizes the imr_address field. It is not used but we
should not send stack garbage to the kernel.

-- 
:wq Claudio

Index: interface.c
===
RCS file: /cvs/src/usr.sbin/ospfd/interface.c,v
retrieving revision 1.85
diff -u -p -r1.85 interface.c
--- interface.c 12 Jan 2021 09:11:09 -  1.85
+++ interface.c 15 Jan 2021 14:00:39 -
@@ -734,6 +734,7 @@ if_join_group(struct iface *iface, struc
return (0);
 
mreq.imr_multiaddr.s_addr = addr->s_addr;
+   mreq.imr_address.s_addr = 0;
mreq.imr_ifindex = iface->ifindex;
 
if (setsockopt(iface->fd, IPPROTO_IP, IP_ADD_MEMBERSHIP,
@@ -782,6 +783,7 @@ if_leave_group(struct iface *iface, stru
}
 
mreq.imr_multiaddr.s_addr = addr->s_addr;
+   mreq.imr_address.s_addr = 0;
mreq.imr_ifindex = iface->ifindex;
 
if (setsockopt(iface->fd, IPPROTO_IP, IP_DROP_MEMBERSHIP,
@@ -808,11 +810,15 @@ if_leave_group(struct iface *iface, stru
 int
 if_set_mcast(struct iface *iface)
 {
+   struct ip_mreqn  mreq;
+
switch (iface->type) {
case IF_TYPE_POINTOPOINT:
case IF_TYPE_BROADCAST:
+   memset(&mreq, 0, sizeof(mreq));
+   mreq.imr_ifindex = iface->ifindex;
if (setsockopt(iface->fd, IPPROTO_IP, IP_MULTICAST_IF,
-   &iface->addr.s_addr, sizeof(iface->addr.s_addr)) == -1) {
+   &mreq, sizeof(mreq)) == -1) {
log_warn("if_set_mcast: error setting "
"IP_MULTICAST_IF, interface %s", iface->name);
return (-1);



ospfd -fno-common fixes

2021-01-18 Thread Claudio Jeker
This is my try at cleaning up commons in ospfd.
I made one big combined diff but will probably split up a few things
into own commits. E.g. the lsupdate.c and lsreq.c ones.

I had to cleanup the control.c code a bit since this was a bit of a mess.
While in bgpd I was able to remove the global bgpd_process variable this
is not really possible in ospfd. Too much code depends on it and unrolling
it just gets messy.

-- 
:wq Claudio

Index: control.c
===
RCS file: /cvs/src/usr.sbin/ospfd/control.c,v
retrieving revision 1.46
diff -u -p -r1.46 control.c
--- control.c   16 Sep 2020 20:50:10 -  1.46
+++ control.c   18 Jan 2021 10:11:05 -
@@ -32,12 +32,20 @@
 #include "log.h"
 #include "control.h"
 
+TAILQ_HEAD(ctl_conns, ctl_conn)ctl_conns = 
TAILQ_HEAD_INITIALIZER(ctl_conns);
+
 #defineCONTROL_BACKLOG 5
 
 struct ctl_conn*control_connbyfd(int);
 struct ctl_conn*control_connbypid(pid_t);
 voidcontrol_close(int);
 
+struct {
+   struct eventev;
+   struct eventevt;
+   int fd;
+} control_state;
+
 int
 control_check(char *path)
 {
@@ -108,8 +116,9 @@ control_init(char *path)
 }
 
 int
-control_listen(void)
+control_listen(int fd)
 {
+   control_state.fd = fd;
 
if (listen(control_state.fd, CONTROL_BACKLOG) == -1) {
log_warn("control_listen: listen");
Index: control.h
===
RCS file: /cvs/src/usr.sbin/ospfd/control.h,v
retrieving revision 1.8
diff -u -p -r1.8 control.h
--- control.h   16 Sep 2020 20:50:10 -  1.8
+++ control.h   18 Jan 2021 10:11:18 -
@@ -23,12 +23,6 @@
 #include 
 #include 
 
-struct {
-   struct eventev;
-   struct eventevt;
-   int fd;
-} control_state;
-
 struct ctl_conn {
TAILQ_ENTRY(ctl_conn)   entry;
struct imsgev   iev;
@@ -36,7 +30,7 @@ struct ctl_conn {
 
 intcontrol_check(char *);
 intcontrol_init(char *);
-intcontrol_listen(void);
+intcontrol_listen(int);
 void   control_accept(int, short, void *);
 void   control_dispatch_imsg(int, short, void *);
 intcontrol_imsg_relay(struct imsg *);
Index: lsreq.c
===
RCS file: /cvs/src/usr.sbin/ospfd/lsreq.c,v
retrieving revision 1.21
diff -u -p -r1.21 lsreq.c
--- lsreq.c 15 Jul 2019 18:26:39 -  1.21
+++ lsreq.c 18 Jan 2021 10:48:04 -
@@ -27,8 +27,6 @@
 #include "log.h"
 #include "ospfe.h"
 
-extern struct imsgev   *iev_rde;
-
 /* link state request packet handling */
 int
 send_ls_req(struct nbr *nbr)
@@ -107,8 +105,7 @@ recv_ls_req(struct nbr *nbr, char *buf, 
case NBR_STA_XCHNG:
case NBR_STA_LOAD:
case NBR_STA_FULL:
-   imsg_compose_event(iev_rde, IMSG_LS_REQ, nbr->peerid,
-   0, -1, buf, len);
+   ospfe_imsg_compose_rde(IMSG_LS_REQ, nbr->peerid, 0, buf, len);
break;
default:
fatalx("recv_ls_req: unknown neighbor state");
Index: lsupdate.c
===
RCS file: /cvs/src/usr.sbin/ospfd/lsupdate.c,v
retrieving revision 1.48
diff -u -p -r1.48 lsupdate.c
--- lsupdate.c  6 May 2020 14:40:54 -   1.48
+++ lsupdate.c  18 Jan 2021 10:48:10 -
@@ -32,9 +32,6 @@
 #include "ospfe.h"
 #include "rde.h"
 
-extern struct ospfd_conf   *oeconf;
-extern struct imsgev   *iev_rde;
-
 struct ibuf *prepare_ls_update(struct iface *);
 intadd_ls_update(struct ibuf *, struct iface *, void *, u_int16_t,
u_int16_t);
@@ -276,8 +273,8 @@ recv_ls_update(struct nbr *nbr, char *bu
"neighbor ID %s", inet_ntoa(nbr->id));
return;
}
-   imsg_compose_event(iev_rde, IMSG_LS_UPD, nbr->peerid, 0,
-   -1, buf, ntohs(lsa.len));
+   ospfe_imsg_compose_rde(IMSG_LS_UPD, nbr->peerid, 0,
+   buf, ntohs(lsa.len));
buf += ntohs(lsa.len);
len -= ntohs(lsa.len);
}
Index: ospfd.c
===
RCS file: /cvs/src/usr.sbin/ospfd/ospfd.c,v
retrieving revision 1.114
diff -u -p -r1.114 ospfd.c
--- ospfd.c 16 Sep 2020 20:50:10 -  1.114
+++ ospfd.c 18 Jan 2021 10:50:40 -
@@ -64,9 +64,10 @@ int  pipe_parent2ospfe[2];
 intpipe_parent2rde[2];
 intpipe_ospfe2rde[2];
 
+enum ospfd_process  ospfd_process;
 struct ospfd_conf  *ospfd_conf = NULL;
-struct imsgev  *iev_ospfe;
-struct imsgev  *iev_rde;
+static struct imsgev   *iev_ospfe;
+static struct imsgev   *iev_rde;
 char   *conffile;
 
 pid_t   ospfe_pid = 0;
Index: ospfd.h
===

Re: IPPROTO_SCTP

2021-01-18 Thread Claudio Jeker
On Mon, Jan 18, 2021 at 12:13:32PM +, Stuart Henderson wrote:
> can I add IPPROTO_SCTP to in.h? only one port wants it at the
> moment, but I think I've seen others in the past.

OK claudio@

> Index: netinet/in.h
> ===
> RCS file: /cvs/src/sys/netinet/in.h,v
> retrieving revision 1.139
> diff -u -p -r1.139 in.h
> --- netinet/in.h  7 Jan 2021 14:51:46 -   1.139
> +++ netinet/in.h  18 Jan 2021 12:12:02 -
> @@ -97,6 +97,7 @@ typedef __in_port_t in_port_t;  /* IP por
>  #define  IPPROTO_PIM 103 /* Protocol indep. 
> multicast */
>  #define  IPPROTO_IPCOMP  108 /* IP Payload Comp. 
> Protocol */
>  #define  IPPROTO_CARP112 /* CARP */
> +#define  IPPROTO_SCTP132 /* SCTP, RFC 4960 */
>  #define  IPPROTO_UDPLITE 136 /* UDP-Lite, RFC 3828 */
>  #define  IPPROTO_MPLS137 /* unicast MPLS packet 
> */
>  #define  IPPROTO_PFSYNC  240 /* PFSYNC */
> 

-- 
:wq Claudio



Re: -fno-common fixes for slaacd, unwind & rad

2021-01-18 Thread Claudio Jeker
On Mon, Jan 18, 2021 at 05:31:21PM +0100, Florian Obser wrote:
> This is my take on -fno-common fixes.
> 
> slaacd, unwind and rad are based on the same template so the fixes
> were similar
> 
> - remove global $daemon_process, just use a const string for
>   setproctitle
> - move ctl_conns to control.c and control_state to frontend.c,
>   control_state needs to be extern because it's shared between
>   frontend.c and control.c. (I see that claudio fixed this differently
>   in ospfd)

It is pretty similar, the only change is that I also removed control_state
from the frontend code and just pass the fd to control_listen(). This way
the full control_state becomes internal.

> - give imsgevs unique names by using proc1_to_proc2 variable names
> - other fixes unique per daemon

I glanced over this and it looks ok.

-- 
:wq Claudio



Re: bpf(4) doesn't have to keep track of nonblocking state itself

2021-01-19 Thread Claudio Jeker
On Tue, Jan 19, 2021 at 10:11:14AM +1000, David Gwynne wrote:
> vfs does it for us.
> 
> ok?

OK claudio@
 
> Index: bpf.c
> ===
> RCS file: /cvs/src/sys/net/bpf.c,v
> retrieving revision 1.202
> diff -u -p -r1.202 bpf.c
> --- bpf.c 17 Jan 2021 02:27:29 -  1.202
> +++ bpf.c 19 Jan 2021 00:10:22 -
> @@ -379,7 +379,6 @@ bpfopen(dev_t dev, int flag, int mode, s
>   sigio_init(&bd->bd_sigio);
>  
>   bd->bd_rtout = 0;   /* no timeout by default */
> - bd->bd_rnonblock = ISSET(flag, FNONBLOCK);
>  
>   bpf_get(bd);
>   LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
> @@ -497,7 +496,7 @@ bpfread(dev_t dev, struct uio *uio, int 
>   ROTATE_BUFFERS(d);
>   break;
>   }
> - if (d->bd_rnonblock) {
> + if (ISSET(ioflag, IO_NDELAY)) {
>   /* User requested non-blocking I/O */
>   error = EWOULDBLOCK;
>   } else if (d->bd_rtout == 0) {
> @@ -982,10 +981,7 @@ bpfioctl(dev_t dev, u_long cmd, caddr_t 
>   break;
>  
>   case FIONBIO:   /* Non-blocking I/O */
> - if (*(int *)addr)
> - d->bd_rnonblock = 1;
> - else
> - d->bd_rnonblock = 0;
> + /* let vfs to keep track of this */
>   break;
>  
>   case FIOASYNC:  /* Send signal on receive packets */
> Index: bpfdesc.h
> ===
> RCS file: /cvs/src/sys/net/bpfdesc.h,v
> retrieving revision 1.44
> diff -u -p -r1.44 bpfdesc.h
> --- bpfdesc.h 2 Jan 2021 02:46:06 -   1.44
> +++ bpfdesc.h 19 Jan 2021 00:10:22 -
> @@ -80,7 +80,6 @@ struct bpf_d {
>   struct bpf_if  *bd_bif; /* interface descriptor */
>   uint64_tbd_rtout;   /* [m] Read timeout in nanoseconds */
>   u_long  bd_nreaders;/* [m] # threads asleep in bpfread() */
> - int bd_rnonblock;   /* true if nonblocking reads are set */
>   struct bpf_program_smr
>  *bd_rfilter; /* read filter code */
>   struct bpf_program_smr
> 

-- 
:wq Claudio



iscsid initiator cleanup

2021-01-19 Thread Claudio Jeker
In iscsid the initiator is kind of a singleton. So lets make use of this
and remove the initiator argument from all function calls.

This compiles for me but I can't currently test this out.
-- 
:wq Claudio

Index: initiator.c
===
RCS file: /cvs/src/usr.sbin/iscsid/initiator.c,v
retrieving revision 1.15
diff -u -p -r1.15 initiator.c
--- initiator.c 16 Jan 2015 15:57:06 -  1.15
+++ initiator.c 19 Jan 2021 13:09:31 -
@@ -33,7 +33,7 @@
 #include "iscsid.h"
 #include "log.h"
 
-struct initiator *initiator;
+static struct initiator *initiator;
 
 struct task_login {
struct task  task;
@@ -62,7 +62,7 @@ void  initiator_logout_cb(struct connecti
 struct session_params  initiator_sess_defaults;
 struct connection_params   initiator_conn_defaults;
 
-struct initiator *
+void
 initiator_init(void)
 {
if (!(initiator = calloc(1, sizeof(*initiator
@@ -78,24 +78,34 @@ initiator_init(void)
initiator_conn_defaults = iscsi_conn_defaults;
initiator_sess_defaults.MaxConnections = ISCSID_DEF_CONNS;
initiator_conn_defaults.MaxRecvDataSegmentLength = 65536;
-
-   return initiator;
 }
 
 void
-initiator_cleanup(struct initiator *i)
+initiator_cleanup(void)
 {
struct session *s;
 
-   while ((s = TAILQ_FIRST(&i->sessions)) != NULL) {
-   TAILQ_REMOVE(&i->sessions, s, entry);
+   while ((s = TAILQ_FIRST(&initiator->sessions)) != NULL) {
+   TAILQ_REMOVE(&initiator->sessions, s, entry);
session_cleanup(s);
}
free(initiator);
 }
 
 void
-initiator_shutdown(struct initiator *i)
+initiator_set_config(struct initiator_config *ic)
+{
+   initiator->config = *ic;
+}
+
+struct initiator_config *
+initiator_get_config(void)
+{
+   return &initiator->config;
+}
+
+void
+initiator_shutdown(void)
 {
struct session *s;
 
@@ -106,7 +116,7 @@ initiator_shutdown(struct initiator *i)
 }
 
 int
-initiator_isdown(struct initiator *i)
+initiator_isdown(void)
 {
struct session *s;
int inprogres = 0;
@@ -119,6 +129,46 @@ initiator_isdown(struct initiator *i)
 }
 
 struct session *
+initiator_new_session(u_int8_t st)
+{
+   struct session *s;
+
+   if (!(s = calloc(1, sizeof(*s
+   return NULL;
+
+   /* use the same qualifier unless there is a conflict */
+   s->isid_base = initiator->config.isid_base;
+   s->isid_qual = initiator->config.isid_qual;
+   s->cmdseqnum = arc4random();
+   s->itt = arc4random();
+   s->state = SESS_INIT;
+
+   if (st == SESSION_TYPE_DISCOVERY)
+   s->target = 0;
+   else
+   s->target = initiator->target++;
+
+   TAILQ_INIT(&s->connections);
+   TAILQ_INIT(&s->tasks);
+
+   TAILQ_INSERT_HEAD(&initiator->sessions, s, entry);
+
+   return s;
+}
+
+struct session *
+initiator_find_session(char *name)
+{
+   struct session *s;
+
+   TAILQ_FOREACH(s, &initiator->sessions, entry) {
+   if (strcmp(s->config.SessionName, name) == 0)
+   return s;
+   }
+   return NULL;
+}
+
+struct session *
 initiator_t2s(u_int target)
 {
struct session *s;
@@ -128,6 +178,12 @@ initiator_t2s(u_int target)
return s;
}
return NULL;
+}
+
+struct session_head *
+initiator_get_sessions(void)
+{
+   return &initiator->sessions;
 }
 
 void
Index: iscsid.c
===
RCS file: /cvs/src/usr.sbin/iscsid/iscsid.c,v
retrieving revision 1.20
diff -u -p -r1.20 iscsid.c
--- iscsid.c23 Jan 2017 08:40:07 -  1.20
+++ iscsid.c19 Jan 2021 13:15:23 -
@@ -38,7 +38,6 @@ void  main_sig_handler(int, short, void 
 __dead voidusage(void);
 void   shutdown_cb(int, short, void *);
 
-struct initiator *initiator;
 struct event exit_ev;
 int exit_rounds;
 #define ISCSI_EXIT_WAIT 5
@@ -146,13 +145,13 @@ main(int argc, char *argv[])
signal(SIGPIPE, SIG_IGN);
 
control_event_init();
-   initiator = initiator_init();
+   initiator_init();
 
event_dispatch();
 
/* do some cleanup on the way out */
control_cleanup(ctrlsock);
-   initiator_cleanup(initiator);
+   initiator_cleanup();
log_info("exiting.");
return 0;
 }
@@ -162,7 +161,7 @@ shutdown_cb(int fd, short event, void *a
 {
struct timeval tv;
 
-   if (exit_rounds++ >= ISCSI_EXIT_WAIT || initiator_isdown(initiator))
+   if (exit_rounds++ >= ISCSI_EXIT_WAIT || initiator_isdown())
event_loopexit(NULL);
 
timerclear(&tv);
@@ -182,7 +181,7 @@ main_sig_handler(int sig, short event, v
case SIGTERM:
case SIGINT:
case SIGHUP:
-   initiator_shutdown(initiator);
+   initiator_shutdown();
evtimer_set(&exit_ev, shutdown_cb, NULL);

Re: dig(1): replace inet_net_pton(3)

2021-01-20 Thread Claudio Jeker
On Tue, Jan 19, 2021 at 07:49:29PM +0100, Florian Obser wrote:
> When we converted isc_sockaddr_t to sockaddr_storage we also moved to
> inet_net_pton(3). It turns out that was a mistake, at least it's not
> portable for AF_INET6. Effectively revert that part and hand-roll it
> using inet_pton(3).
> 
> OK?

I thought inet_net_pton() for AF_INET would still be ok. Handling the
AF_INET case by hand here seems rather unpleasant.

Since this code uses sockaddr to store the result why not use
getaddrinfo().

In bgpd the code does more or less this:
1. extract the /prefixlen like you do.
2. use getnameinfo() on the prefix
3. if that fails use inet_net_pton(AF_INET) to parse short forms (like 10/8)

Note: I think the bgpd code could also use some further cleanup.

I'm not sure if it makes sense to force this code to be portable by not
using inet_net_pton(3). The code is currently non-portable because it uses
the len fields of the sockaddr structs. So why bother about this function?
In general if we plan to make a portable version of our dig the framework
can provide a working inet_net_pton(3) version.

> p.s. it is kinda telling that isc, who introduced the API is (no
> longer?) using it.
> 
> diff --git dighost.c dighost.c
> index 2d2a52c86e2..2995b7e1602 100644
> --- dighost.c
> +++ dighost.c
> @@ -935,10 +935,16 @@ parse_netprefix(struct sockaddr_storage **sap, int 
> *plen, const char *value) {
>   struct sockaddr_storage *sa = NULL;
>   struct in_addr *in4;
>   struct in6_addr *in6;
> - int prefix_length;
> + int prefix_length = -1, i;
> + char *sep;
> + char buf[INET6_ADDRSTRLEN + sizeof("/128")];
> + const char *errstr;
>  
>   REQUIRE(sap != NULL && *sap == NULL);
>  
> + if (strlcpy(buf, value, sizeof(buf)) >= sizeof(buf))
> + fatal("invalid prefix '%s'\n", value);
> +
>   sa = calloc(1, sizeof(*sa));
>   if (sa == NULL)
>   fatal("out of memory");
> @@ -952,14 +958,36 @@ parse_netprefix(struct sockaddr_storage **sap, int 
> *plen, const char *value) {
>   goto done;
>   }
>  
> - if ((prefix_length = inet_net_pton(AF_INET6, value, in6, sizeof(*in6)))
> - != -1) {
> + sep = strchr(buf, '/');
> + if (sep != NULL) {
> + *sep++ = '\0';
> + prefix_length = strtonum(sep, 0, 128, &errstr);
> + if (errstr != NULL)
> + fatal("invalid address '%s'", value);
> + }
> +
> + if (inet_pton(AF_INET6, buf, in6) == 1) {
>   sa->ss_len = sizeof(struct sockaddr_in6);
>   sa->ss_family = AF_INET6;
> - } else if ((prefix_length = inet_net_pton(AF_INET, value, in4,
> - sizeof(*in4))) != -1) {
> + if (prefix_length > 128 || prefix_length == -1)

prefix_length can't be bigger than 128. But I see why it makes sense to do
the same here :)

> + prefix_length = 128;
> + } else if (inet_pton(AF_INET, buf, in4) == 1) {
>   sa->ss_len = sizeof(struct sockaddr_in);
>   sa->ss_family = AF_INET;
> + if (prefix_length > 32 || prefix_length == -1)
> + prefix_length = 32;

In my opinion 10.0.0.0/75 is not a valid address and should not be
coerced to 10.0.0.0. This should result in an error.

> + } else if (prefix_length != -1) {
> + if (prefix_length > 32)
> + prefix_length = 32;

Same here.

> + for (i = 0; i < 3 ; i++) {
> + if (strlcat(buf, ".0", sizeof(buf)) > sizeof(buf))
> + fatal("invalid address '%s'", value);
> + if (inet_pton(AF_INET, buf, in4) == 1) {
> + sa->ss_len = sizeof(struct sockaddr_in);
> + sa->ss_family = AF_INET;
> + goto done;
> + }
> + }
>   } else
>   fatal("invalid address '%s'", value);
>  

-- 
:wq Claudio



Re: bgpd fix route decision for strict med

2021-01-22 Thread Claudio Jeker
On Thu, Jan 14, 2021 at 12:27:54PM +0100, Claudio Jeker wrote:
> Currently bgpd does not properly handle strict med route decisions.
> The problem is that the strict MED check only matters for aspaths with the
> same neighbor as. The route decision process currently stops as soon as
> the current prefix is better then the one checked in the list of prefixes.
> Now in some cases this results in unstable decisions because the order of
> insertions matter. Depending on the order any route may be selected.
> The med.sh regress test I added shows this issue. Depending on the order
> any of the 3 routes can be selected as best:
> 
> 1:
> flags ovs destination  gateway  lpref   med aspath origin
> *>  N 10.12.1.0/24 10.12.57.410050 64501 64510 i
> *   N 10.12.1.0/24 10.12.57.2100   100 64501 64510 i
> *   N 10.12.1.0/24 10.12.57.3100   100 64502 64510 i
> 
> 2:
> flags ovs destination  gateway  lpref   med aspath origin
> *>  N 10.12.1.0/24 10.12.57.2100   100 64501 64510 i
> *   N 10.12.1.0/24 10.12.57.3100   100 64502 64510 i
> *   N 10.12.1.0/24 10.12.57.410050 64501 64510 i
> 
> 3 (and the actual expected result):
> flags ovs destination  gateway  lpref   med aspath origin
> *>  N 10.12.1.0/24 10.12.57.3100   100 64502 64510 i
> *   N 10.12.1.0/24 10.12.57.410050 64501 64510 i
> *   N 10.12.1.0/24 10.12.57.2100   100 64501 64510 i
> 
> Additionally removing a route requires to reevaluate part of the routes in
> some cases. For examle in case 3 if the middle route (with med 50) is
> removed then the last route actually becomes best (bgpid is lower).
> 
> The following diff fixes this issue hopefully. On insertion if decisions
> happen at or after the MED check (step 5) then all remaining routes need
> to be checked (until a check before step 5 happens). Routes matching on
> med that need to be re-evaluated are put on a redo queue and at the end of
> the decision process are put back to get their order right.
> 
> Something similar happens when removing a prefix. If the next prefix
> differ on a check after the MED check then again all those prefixes need
> to be rechecked and maybe re-evaluated.
> 
> This change is important but also rather complex. Please test and if
> possible validate that it does not cause troubles in your setup.
> Btw. this only matters for 'rde med compare strict' (default). 

I would really like some feedback on this. This is a major issue in bgpd
that finally needs to be fixed. Please help by testing this out.

-- 
:wq Claudio

Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.80
diff -u -p -r1.80 rde_decide.c
--- rde_decide.c14 Jan 2021 08:29:26 -  1.80
+++ rde_decide.c14 Jan 2021 10:16:30 -
@@ -26,7 +26,9 @@
 #include "rde.h"
 #include "log.h"
 
-intprefix_cmp(struct prefix *, struct prefix *);
+intprefix_cmp(struct prefix *, struct prefix *, int *);
+void   prefix_insert(struct prefix *, struct prefix *, struct rib_entry *);
+void   prefix_remove(struct prefix *, struct rib_entry *);
 /*
  * Decision Engine RFC implementation:
  *  Phase 1:
@@ -107,7 +109,7 @@ int prefix_cmp(struct prefix *, struct p
  * already added prefix.
  */
 int
-prefix_cmp(struct prefix *p1, struct prefix *p2)
+prefix_cmp(struct prefix *p1, struct prefix *p2, int *testall)
 {
struct rde_aspath   *asp1, *asp2;
struct rde_peer *peer1, *peer2;
@@ -115,6 +117,16 @@ prefix_cmp(struct prefix *p1, struct pre
u_int32_tp1id, p2id;
int  p1cnt, p2cnt, i;
 
+   /*
+* If a match happens before the MED check then the list is
+* correctly sorted. If a match happens after MED then it
+* may further elements need to be checked to make sure that
+* all path are considered that could affect this path.
+* If the check happens to be on MED signal this by setting
+* testall to 2.
+*/
+   *testall = 0;
+
if (p1 == NULL)
return -1;
if (p2 == NULL)
@@ -166,10 +178,14 @@ prefix_cmp(struct prefix *p1, struct pre
/*
 * 5. MED decision
 * Only comparable between the same neighboring AS or if
-* 'rde med compare always' is set.
+* 'rde med compare always' is set. In the first case
+* set the testall flag since further elements need to be
+* evaluated as well.
 */
if ((rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS

RTR support for bgpd

2021-01-26 Thread Claudio Jeker
This diff adds initial RTR (RPKI to Router) support to bgpd.
Instead of loading the roa-set table via the configuration bgpd will use
RTR to load the RPKI table from one or multiple RTR servers.
This has the benefit that in large setups only a few systems need to run
rpki-client instead of running it on every router.

Currently only RTR via TCP is supported. Basic 'bgpctl show rtr' output is
available to monitor sessions and 'bgpctl show sets' also shows the right
info. There is a lot more that can be added here but this diff is already
big enough.

Enjoy
-- 
:wq Claudio

Index: bgpctl/bgpctl.8
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.8,v
retrieving revision 1.95
diff -u -p -r1.95 bgpctl.8
--- bgpctl/bgpctl.8 10 May 2020 13:38:46 -  1.95
+++ bgpctl/bgpctl.8 26 Jan 2021 08:42:39 -
@@ -33,7 +33,7 @@ program controls the
 .Xr bgpd 8
 daemon.
 Commands may be abbreviated to the minimum unambiguous prefix; for example,
-.Cm s s
+.Cm s su
 for
 .Cm show summary .
 .Pp
@@ -409,6 +409,18 @@ or
 Multiple options can be used at the same time and the
 .Ar neighbor
 filter can be combined with other filters.
+.It Cm show rtr
+Show a list of all
+.Em RTR
+sessions, including information about the session state.
+.It Cm show sets
+Show a list summarizing all
+.Em roa-set ,
+.Em as-set ,
+.Em prefix-set ,
+and
+.Em origin-set
+tables.
 .It Cm show summary
 Show a list of all neighbors, including information about the session state
 and message counters:
Index: bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.264
diff -u -p -r1.264 bgpctl.c
--- bgpctl/bgpctl.c 30 Dec 2020 07:31:19 -  1.264
+++ bgpctl/bgpctl.c 25 Jan 2021 18:06:13 -
@@ -216,6 +216,9 @@ main(int argc, char *argv[])
case SHOW_SET:
imsg_compose(ibuf, IMSG_CTL_SHOW_SET, 0, 0, -1, NULL, 0);
break;
+   case SHOW_RTR:
+   imsg_compose(ibuf, IMSG_CTL_SHOW_RTR, 0, 0, -1, NULL, 0);
+   break;
case SHOW_NEIGHBOR:
case SHOW_NEIGHBOR_TIMERS:
case SHOW_NEIGHBOR_TERSE:
@@ -393,18 +396,19 @@ int
 show(struct imsg *imsg, struct parse_result *res)
 {
struct peer *p;
-   struct ctl_timer*t;
+   struct ctl_timer t;
struct ctl_show_interface   *iface;
struct ctl_show_nexthop *nh;
-   struct ctl_show_set *set;
+   struct ctl_show_set  set;
+   struct ctl_show_rtr  rtr;
struct kroute_full  *kf;
struct ktable   *kt;
struct ctl_show_rib  rib;
+   struct rde_memstats  stats;
+   struct rde_hashstats hash;
u_char  *asdata;
-   struct rde_memstats stats;
-   struct rde_hashstatshash;
-   u_int   rescode, ilen;
-   size_t  aslen;
+   u_intrescode, ilen;
+   size_t   aslen;
 
switch (imsg->hdr.type) {
case IMSG_CTL_SHOW_NEIGHBOR:
@@ -412,9 +416,11 @@ show(struct imsg *imsg, struct parse_res
output->neighbor(p, res);
break;
case IMSG_CTL_SHOW_TIMER:
-   t = imsg->data;
-   if (t->type > 0 && t->type < Timer_Max)
-   output->timer(t);
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(t))
+   errx(1, "wrong imsg len");
+   memcpy(&t, imsg->data, sizeof(t));
+   if (t.type > 0 && t.type < Timer_Max)
+   output->timer(&t);
break;
case IMSG_CTL_SHOW_INTERFACE:
iface = imsg->data;
@@ -463,16 +469,28 @@ show(struct imsg *imsg, struct parse_res
output->attr(imsg->data, ilen, res);
break;
case IMSG_CTL_SHOW_RIB_MEM:
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(stats))
+   errx(1, "wrong imsg len");
memcpy(&stats, imsg->data, sizeof(stats));
output->rib_mem(&stats);
break;
case IMSG_CTL_SHOW_RIB_HASH:
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(hash))
+   errx(1, "wrong imsg len");
memcpy(&hash, imsg->data, sizeof(hash));
output->rib_hash(&hash);
break;
case IMSG_CTL_SHOW_SET:
-   set = imsg->data;
-   output->set(set);
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(set))
+   errx(1, "wrong imsg len");
+   memcpy(&set, imsg->data, sizeof(set));
+   output->set(&set);
+   break;
+   case IMSG_CTL_SHOW_RTR:
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(rtr))
+   errx(1, 

Re: pf route-to issues

2021-01-26 Thread Claudio Jeker
On Tue, Jan 26, 2021 at 12:33:25PM +0100, Alexander Bluhm wrote:
> On Tue, Jan 26, 2021 at 10:39:30AM +1000, David Gwynne wrote:
> > > But what about dup-to?  The packet is duplicated for both directions.
> > > I guess the main use case for dup-to is implementing a monitor port.
> > > There you have to pass packets stateless, otherwise it would not
> > > work anyway.  The strange semantics is not related to this diff.
> > 
> > are you saying i should skip pf_test for all dup-to generated packets?
> 
> I am not sure.
> 
> When we have an in dup-to rule, the incoming packets in request
> direction are dupped and tested with the out ruleset.  The reply
> packets for this state are also dupped, but not tested when they
> leave the dup interface.
> 
> This is inconsistent and cannot work statefully.  Stateful filtering
> with dupped packets does not make sense anyway.  The only working
> config is "pass out on dup-interface no state".
> 
> Do we think this rule should be required?

dup-to is tricky. In general you should run the collector on its own
interface and then 'set skip on $dupif' could work. I would assume that
the copy of the packet is bypassing pf and is just sent directly without
hitting pf_test again. At least for route-to & reply-to I would expect
this behaviour (like I do not expect that I need an extra pass rule to
allow a rdr-to through).
 
> 1. No packet should leave an interface without a rule.
> 
> if (pd->dir == PF_IN || s->rt == PF_DUPTO) {
> if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
> 
> 2. The config says we want a monitor port.  We risk that the
>original packet and the dupped packet match the same rule.
>Stateful filtering cannot work, we do not expect reply packets
>for the dups.
> 
> if (pd->dir == PF_IN && s->rt != PF_DUPTO) {
> if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)

I guess this is for the case where route-to is used in PF_IN. I agree this
should be done so that the state table is properly set. Skipping this for
the copy of dup-to packets makes sense.
Running pf_test() for the same mbuf and direction but with different ifp
is causing more harm the good.

> 3. Some sort of problem was there before, but different.  Don't
>address it now.
> 
> Maybe 2 has less impact for the users and is easy to understand.
> We should document that in the man page.
> 
> > > We are reaching a state where this diff can go in.  I just startet
> > > a regress run with it.  OK bluhm@
> > 
> > hopefully i fixed the pfctl error messages up so the regress tests arent
> > too unhappy.
> 
> pf forward and pf fragment tests pass.  They include route-to and
> reply-to rules.  I have no test for dup-to.  Regress pfctl fails,
> but I think dlg@ has a diff for that.
> 
> bluhm
> 

-- 
:wq Claudio



Re: have pf_route bail out if it resolves a route with RTF_LOCAL set

2021-01-27 Thread Claudio Jeker
On Thu, Jan 28, 2021 at 09:57:33AM +1000, David Gwynne wrote:
> calling if_output with a route to a local IP is confusing, and I'm not
> sure it makes sense anyway.
> 
> this treats a an RTF_LOCAL route like an invalid round and drops the
> packet.
> 
> ok?

Isn't this a change of behaviour? I think ip_output() fill pass the packet
via loopback back into the system. Which is probably strange for route-to
/ reply-to (since there is divert-to and rdr-to for this). What about
dup-to? 
 
> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1104
> diff -u -p -r1.1104 pf.c
> --- pf.c  27 Jan 2021 23:53:35 -  1.1104
> +++ pf.c  27 Jan 2021 23:55:49 -
> @@ -6054,7 +6054,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
>   }
>  
>   rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid);
> - if (!rtisvalid(rt)) {
> + if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_LOCAL)) {
>   if (r->rt != PF_DUPTO) {
>   pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
>   0, pd->af, s->rule.ptr, pd->rdomain);
> @@ -6213,7 +6213,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr))
>   dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
>   rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid);
> - if (!rtisvalid(rt)) {
> + if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_LOCAL)) {
>   if (r->rt != PF_DUPTO) {
>   pf_send_icmp(m0, ICMP6_DST_UNREACH,
>   ICMP6_DST_UNREACH_NOROUTE, 0,
> 

-- 
:wq Claudio



rpki-client remove double checking of hashes

2021-01-28 Thread Claudio Jeker
Initially rpki-client checked the file hash while parsing the file (.roa,
.cert or .crl) but since a while rpki-client does the hash check early
during the .mft parsing with mft_check(). After that all files in the
fileandhash attribute are verified and so there is no need to do it again.

All in all this simplifies the code a fair bit. The only problematic case
was the distinction between root cert and regular cert based on the
presence of the digest. Instead use the presence of the public key (from
the TAL). Result is the same, logic is inverse.

So this still works for me.
-- 
:wq Claudio

Index: cert.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
retrieving revision 1.22
diff -u -p -r1.22 cert.c
--- cert.c  8 Jan 2021 08:09:07 -   1.22
+++ cert.c  28 Jan 2021 14:56:49 -
@@ -973,18 +973,16 @@ out:
  * is also dereferenced.
  */
 static struct cert *
-cert_parse_inner(X509 **xp, const char *fn, const unsigned char *dgst, int ta)
+cert_parse_inner(X509 **xp, const char *fn, int ta)
 {
-   int  rc = 0, extsz, c, sz;
+   int  rc = 0, extsz, c;
size_t   i;
X509*x = NULL;
X509_EXTENSION  *ext = NULL;
ASN1_OBJECT *obj;
struct parse p;
-   BIO *bio = NULL, *shamd;
+   BIO *bio = NULL;
FILE*f;
-   EVP_MD  *md;
-   char mdbuf[EVP_MAX_MD_SIZE];
 
*xp = NULL;
 
@@ -1004,49 +1002,11 @@ cert_parse_inner(X509 **xp, const char *
if ((p.res = calloc(1, sizeof(struct cert))) == NULL)
err(1, NULL);
 
-   /*
-* If we have a digest specified, create an MD chain that will
-* automatically compute a digest during the X509 creation.
-*/
-
-   if (dgst != NULL) {
-   if ((shamd = BIO_new(BIO_f_md())) == NULL)
-   cryptoerrx("BIO_new");
-   if (!BIO_set_md(shamd, EVP_sha256()))
-   cryptoerrx("BIO_set_md");
-   if ((bio = BIO_push(shamd, bio)) == NULL)
-   cryptoerrx("BIO_push");
-   }
-
if ((x = *xp = d2i_X509_bio(bio, NULL)) == NULL) {
cryptowarnx("%s: d2i_X509_bio", p.fn);
goto out;
}
 
-   /*
-* If we have a digest, find it in the chain (we'll already have
-* made it, so assert otherwise) and verify it.
-*/
-
-   if (dgst != NULL) {
-   shamd = BIO_find_type(bio, BIO_TYPE_MD);
-   assert(shamd != NULL);
-
-   if (!BIO_get_md(shamd, &md))
-   cryptoerrx("BIO_get_md");
-   assert(EVP_MD_type(md) == NID_sha256);
-
-   if ((sz = BIO_gets(shamd, mdbuf, EVP_MAX_MD_SIZE)) < 0)
-   cryptoerrx("BIO_gets");
-   assert(sz == SHA256_DIGEST_LENGTH);
-
-   if (memcmp(mdbuf, dgst, SHA256_DIGEST_LENGTH)) {
-   if (verbose > 0)
-   warnx("%s: bad message digest", p.fn);
-   goto out;
-   }
-   }
-
/* Look for X509v3 extensions. */
 
if ((extsz = X509_get_ext_count(x)) < 0)
@@ -1156,10 +1116,10 @@ out:
 }
 
 struct cert *
-cert_parse(X509 **xp, const char *fn, const unsigned char *dgst)
+cert_parse(X509 **xp, const char *fn)
 {
 
-   return cert_parse_inner(xp, fn, dgst, 0);
+   return cert_parse_inner(xp, fn, 0);
 }
 
 struct cert *
@@ -1169,7 +1129,7 @@ ta_parse(X509 **xp, const char *fn, cons
struct cert *p;
int  rc = 0;
 
-   if ((p = cert_parse_inner(xp, fn, NULL, 1)) == NULL)
+   if ((p = cert_parse_inner(xp, fn, 1)) == NULL)
return NULL;
 
if (pkey != NULL) {
Index: cms.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cms.c,v
retrieving revision 1.7
diff -u -p -r1.7 cms.c
--- cms.c   2 Apr 2020 09:16:43 -   1.7
+++ cms.c   28 Jan 2021 15:01:17 -
@@ -36,17 +36,16 @@
  */
 unsigned char *
 cms_parse_validate(X509 **xp, const char *fn,
-const char *oid, const unsigned char *dgst, size_t *rsz)
+const char *oid, size_t *rsz)
 {
const ASN1_OBJECT   *obj;
ASN1_OCTET_STRING   **os = NULL;
-   BIO *bio = NULL, *shamd;
+   BIO *bio = NULL;
CMS_ContentInfo *cms;
FILE*f;
-   char buf[128], mdbuf[EVP_MAX_MD_SIZE];
+   char buf[128];
int  rc = 0, sz;
STACK_OF(X509)  *certs = NULL;
-   EVP_MD  *md;
unsigned char   *res = NULL;
 
*rsz = 0;
@@ -66,46 +65,9 @@ cms_parse_validate(X509 **xp, const char
return

Re: rpki-client remove double checking of hashes

2021-01-28 Thread Claudio Jeker
On Thu, Jan 28, 2021 at 05:19:31PM +0100, Theo Buehler wrote:
> On Thu, Jan 28, 2021 at 04:42:00PM +0100, Claudio Jeker wrote:
> > Initially rpki-client checked the file hash while parsing the file (.roa,
> > .cert or .crl) but since a while rpki-client does the hash check early
> > during the .mft parsing with mft_check(). After that all files in the
> > fileandhash attribute are verified and so there is no need to do it again.
> > 
> > All in all this simplifies the code a fair bit. The only problematic case
> > was the distinction between root cert and regular cert based on the
> > presence of the digest. Instead use the presence of the public key (from
> > the TAL). Result is the same, logic is inverse.
> > 
> > So this still works for me.
> 
> Makes sense, ok tb
> 
> Please add the diff below to adjust regress when you land this.

I had the same already prepped in my tree.
 
> Index: test-cert.c
> ===
> RCS file: /cvs/src/regress/usr.sbin/rpki-client/test-cert.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 test-cert.c
> --- test-cert.c   9 Dec 2020 11:22:47 -   1.6
> +++ test-cert.c   28 Jan 2021 16:14:30 -
> @@ -145,7 +145,7 @@ main(int argc, char *argv[])
>   }
>   } else {
>   for (i = 0; i < argc; i++) {
> - p = cert_parse(&xp, argv[i], NULL);
> + p = cert_parse(&xp, argv[i]);
>   if (p == NULL)
>   break;
>   if (verb)
> Index: test-roa.c
> ===
> RCS file: /cvs/src/regress/usr.sbin/rpki-client/test-roa.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 test-roa.c
> --- test-roa.c9 Nov 2020 16:13:02 -   1.7
> +++ test-roa.c28 Jan 2021 16:14:44 -
> @@ -87,7 +87,7 @@ main(int argc, char *argv[])
>   errx(1, "argument missing");
>  
>   for (i = 0; i < argc; i++) {
> - if ((p = roa_parse(&xp, argv[i], NULL)) == NULL)
> + if ((p = roa_parse(&xp, argv[i])) == NULL)
>   break;
>   if (verb)
>   roa_print(p);
> 

-- 
:wq Claudio



Re: unwind(8): use SO_BINDANY

2021-01-29 Thread Claudio Jeker
On Fri, Jan 29, 2021 at 06:29:52PM +0100, Florian Obser wrote:
> Hold off on this for now, claudio pointed out that I might not be
> supposed to use SO_BINDANY like this.

I checked the code and I think using SO_BINDANY should be OK. Still not
sure if unwind can bind(2) before any interface is configured.
 
> On Fri, Jan 29, 2021 at 04:51:46PM +0100, Florian Obser wrote:
> > I want to start unwind earlier, around the time when slaacd comes up,
> > the network is not up at that point. Set SO_BINDANY to be able to
> > already bind upd/53 and tcp/53 on localhost.
> > This will make integration with dhclient easier (I hope).
> > 
> > diff --git unwind.c unwind.c
> > index 00c600560e4..9bfc4dcf3b8 100644
> > --- unwind.c
> > +++ unwind.c
> > @@ -746,6 +746,9 @@ open_ports(void)
> > if (setsockopt(udp4sock, SOL_SOCKET, SO_SNDBUF, &bsize,
> > sizeof(bsize)) == -1)
> > log_warn("setting SO_SNDBUF on socket");
> > +   if (setsockopt(udp4sock, SOL_SOCKET, SO_BINDANY, &opt,
> > +   sizeof(opt)) == -1)
> > +   log_warn("setting SO_BINDANY on socket");
> > if (bind(udp4sock, res0->ai_addr, res0->ai_addrlen)
> > == -1) {
> > close(udp4sock);
> > @@ -767,6 +770,9 @@ open_ports(void)
> > if (setsockopt(udp6sock, SOL_SOCKET, SO_SNDBUF, &bsize,
> > sizeof(bsize)) == -1)
> > log_warn("setting SO_SNDBUF on socket");
> > +   if (setsockopt(udp6sock, SOL_SOCKET, SO_BINDANY, &opt,
> > +   sizeof(opt)) == -1)
> > +   log_warn("setting SO_BINDANY on socket");
> > if (bind(udp6sock, res0->ai_addr, res0->ai_addrlen)
> > == -1) {
> > close(udp6sock);
> > @@ -791,6 +797,9 @@ open_ports(void)
> > if (setsockopt(tcp4sock, SOL_SOCKET, SO_SNDBUF, &bsize,
> > sizeof(bsize)) == -1)
> > log_warn("setting SO_SNDBUF on socket");
> > +   if (setsockopt(tcp4sock, SOL_SOCKET, SO_BINDANY, &opt,
> > +   sizeof(opt)) == -1)
> > +   log_warn("setting SO_BINDANY on socket");
> > if (bind(tcp4sock, res0->ai_addr, res0->ai_addrlen)
> > == -1) {
> > close(tcp4sock);
> > @@ -817,6 +826,9 @@ open_ports(void)
> > if (setsockopt(tcp6sock, SOL_SOCKET, SO_SNDBUF, &bsize,
> > sizeof(bsize)) == -1)
> > log_warn("setting SO_SNDBUF on socket");
> > +   if (setsockopt(tcp6sock, SOL_SOCKET, SO_BINDANY, &opt,
> > +   sizeof(opt)) == -1)
> > +   log_warn("setting SO_BINDANY on socket");
> > if (bind(tcp6sock, res0->ai_addr, res0->ai_addrlen)
> > == -1) {
> > close(tcp6sock);
> > 
> > 
> > -- 
> > I'm not entirely sure you are real.
> > 
> 
> -- 
> I'm not entirely sure you are real.
> 

-- 
:wq Claudio



Re: ftp: make use of getline(3)

2021-01-30 Thread Claudio Jeker
On Sat, Jan 30, 2021 at 11:52:15AM +0100, Hiltjo Posthuma wrote:
> On Sat, Jan 30, 2021 at 12:22:04AM +0100, Christian Weisgerber wrote:
> > Hiltjo Posthuma:
> > 
> > > > @@ -75,19 +74,8 @@ cookie_load(void)
> > > > if (fp == NULL)
> > > > err(1, "cannot open cookie file %s", cookiefile);
> > > > date = time(NULL);
> > > > -   lbuf = NULL;
> > > > -   while ((line = fgetln(fp, &len)) != NULL) {
> > > > -   if (line[len - 1] == '\n') {
> > > > -   line[len - 1] = '\0';
> > > > -   --len;
> > > > -   } else {
> > > > -   if ((lbuf = malloc(len + 1)) == NULL)
> > > > -   err(1, NULL);
> > > > -   memcpy(lbuf, line, len);
> > > > -   lbuf[len] = '\0';
> > > > -   line = lbuf;
> > > > -   }
> > > > -   line[strcspn(line, "\r")] = '\0';
> > > > +   while (getline(&line, &linesize, fp) != -1) {
> > > > +   line[strcspn(line, "\r\n")] = '\0';
> > > >  
> > > 
> > > getline returns the number of characters read including the delimeter. 
> > > This
> > > size could be used to '\0' terminate the string instead of a strcspn() 
> > > call.
> > 
> > A strcspn() call is already there.
> > 
> > -- 
> > Christian "naddy" Weisgerber  na...@mips.inka.de
> 
> Yes, my point is it scans the entire line again for delimeter, but it is not
> needed, because it is already known after getline() and returned.

But the strcspn will stop at the first \r or \n char while getline() only
checks for \n. So there is still a need to remove \r from the string.
I think the strcspn() is a very nice solution here and I doubt scanning
the line again will cause a performance issue.

-- 
:wq Claudio



Re: sleep_setup/finish simplification

2021-02-01 Thread Claudio Jeker
On Mon, Feb 01, 2021 at 04:25:47PM +0100, Martin Pieuchot wrote:
> On 08/12/20(Tue) 10:06, Martin Pieuchot wrote:
> > Diff below aims to simplify the API to put a thread on a sleep queue and
> > reduce it to the following:
> > 
> > sleep_setup();
> > /* check condition or release lock */
> > sleep_finish();
> > 
> > It is motivated by my work to sleep the SCHED_LOCK() but might as well
> > prevent/fix some bugs.
> > 
> > The tricky part of the current implementation is that sleep_setup_signal()
> > can already park/stop the current thread resulting in a context change.
> > Should any custom accounting / lock check happen before that?  At least
> > two lock primitives do so currently:  drm's schedule_timeout() and
> > rwlock's rw_enter().
> > 
> > As a result of this diff various states can be removed and sleep_finish()
> > contains the following magic:
> > 
> > 1. check for signal/parking
> > 2. context switch or remove from sleep queue
> > 3. check for signal/parking
> > 
> > Note that sleep_finish() could be simplified even further but I left
> > that for later to ease the review.
> 
> Updated diff on top of recent changes from claudio@, still ok?

The sleep code makes my head spin but looking at this diff applied the
changes make sense and the order remains consistent.

OK claudio@
 
> Index: dev/dt/dt_dev.c
> ===
> RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 dt_dev.c
> --- dev/dt/dt_dev.c   28 Sep 2020 13:16:58 -  1.10
> +++ dev/dt/dt_dev.c   26 Jan 2021 17:20:11 -
> @@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f
>   return (EMSGSIZE);
>  
>   while (!sc->ds_evtcnt) {
> - sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
> - sleep_setup_signal(&sls);
> - sleep_finish(&sls, !sc->ds_evtcnt);
> - error = sleep_finish_signal(&sls);
> + sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
> + error = sleep_finish(&sls, !sc->ds_evtcnt);
>   if (error == EINTR || error == ERESTART)
>   break;
>   }
> Index: dev/pci/if_myx.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 if_myx.c
> --- dev/pci/if_myx.c  17 Jan 2021 02:52:21 -  1.114
> +++ dev/pci/if_myx.c  26 Jan 2021 17:20:11 -
> @@ -1397,7 +1397,7 @@ myx_down(struct myx_softc *sc)
>   (void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL);
>  
>   while (sc->sc_state != MYX_S_OFF) {
> - sleep_setup(&sls, sts, PWAIT, "myxdown");
> + sleep_setup(&sls, sts, PWAIT, "myxdown", 0);
>   membar_consumer();
>   sleep_finish(&sls, sc->sc_state != MYX_S_OFF);
>   }
> Index: dev/pci/drm/drm_linux.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.76
> diff -u -p -r1.76 drm_linux.c
> --- dev/pci/drm/drm_linux.c   13 Jan 2021 01:04:49 -  1.76
> +++ dev/pci/drm/drm_linux.c   26 Jan 2021 17:22:50 -
> @@ -110,14 +110,14 @@ schedule_timeout(long timeout)
>  {
>   struct sleep_state sls;
>   unsigned long deadline;
> - int wait, spl;
> + int wait, spl, timo = 0;
>  
>   MUTEX_ASSERT_LOCKED(&sch_mtx);
>   KASSERT(!cold);
>  
> - sleep_setup(&sls, sch_ident, sch_priority, "schto");
>   if (timeout != MAX_SCHEDULE_TIMEOUT)
> - sleep_setup_timeout(&sls, timeout);
> + timo = timeout;
> + sleep_setup(&sls, sch_ident, sch_priority, "schto", timo);
>  
>   wait = (sch_proc == curproc && timeout > 0);
>  
> @@ -125,11 +125,9 @@ schedule_timeout(long timeout)
>   MUTEX_OLDIPL(&sch_mtx) = splsched();
>   mtx_leave(&sch_mtx);
>  
> - sleep_setup_signal(&sls);
> -
>   if (timeout != MAX_SCHEDULE_TIMEOUT)
>   deadline = jiffies + timeout;
> - sleep_finish_all(&sls, wait);
> + sleep_finish(&sls, wait);
>   if (timeout != MAX_SCHEDULE_TIMEOUT)
>   timeout = deadline - jiffies;
>  
> Index: kern/kern_rwlock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 kern_rwlock.c
> --- kern/kern_rwlock.c11 Jan 2021 18:49:38 -  1.46
> +++ kern/kern_rwlock.c26 Jan 2021 17:20:11 -
> @@ -279,15 +279,13 @@ retry:
>   prio = op->wait_prio;
>   if (flags & RW_INTR)
>   prio |= PCATCH;
> - sleep_setup(&sls, rwl, prio, rwl->rwl_name);
> - if (flags & RW_INTR)
> - sleep_setup_signal(&sls);
> + sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0);
>  
>   do_sleep = !rw_cas(&rwl->rwl_owner, o, set

Re: sleep_setup/finish simplification

2021-02-01 Thread Claudio Jeker
On Mon, Feb 01, 2021 at 05:11:39PM +0100, Claudio Jeker wrote:
> On Mon, Feb 01, 2021 at 04:25:47PM +0100, Martin Pieuchot wrote:
> > On 08/12/20(Tue) 10:06, Martin Pieuchot wrote:
> > > Diff below aims to simplify the API to put a thread on a sleep queue and
> > > reduce it to the following:
> > > 
> > >   sleep_setup();
> > >   /* check condition or release lock */
> > >   sleep_finish();
> > > 
> > > It is motivated by my work to sleep the SCHED_LOCK() but might as well
> > > prevent/fix some bugs.
> > > 
> > > The tricky part of the current implementation is that sleep_setup_signal()
> > > can already park/stop the current thread resulting in a context change.
> > > Should any custom accounting / lock check happen before that?  At least
> > > two lock primitives do so currently:  drm's schedule_timeout() and
> > > rwlock's rw_enter().
> > > 
> > > As a result of this diff various states can be removed and sleep_finish()
> > > contains the following magic:
> > > 
> > >   1. check for signal/parking
> > >   2. context switch or remove from sleep queue
> > >   3. check for signal/parking
> > > 
> > > Note that sleep_finish() could be simplified even further but I left
> > > that for later to ease the review.
> > 
> > Updated diff on top of recent changes from claudio@, still ok?
> 
> The sleep code makes my head spin but looking at this diff applied the
> changes make sense and the order remains consistent.
> 

I may have spoken too fast. I see strange issues in firefox with this diff
in. Need more time to better understand what is going on. For me firefox
hangs on session restore (processes are stuck in poll).

-- 
:wq Claudio



Re: pf route-to: only run pf_test when packets enter and leave the stack

2021-02-01 Thread Claudio Jeker
On Tue, Feb 02, 2021 at 02:52:52PM +1000, David Gwynne wrote:
> this is part of a high level discussion about when pf runs against a
> packet. the options are:
> 
> 1. pf runs when a packet goes over an interface
> or
> 2. pf runs when a packet enters or leaves the network stack.
> 
> for normal packet handling there isn't a difference between these
> options. in the routing case a packet comes in on an interface, pf tests
> it, then the stack processes it and decides to send it out another
> interface, pf tests it again on the way out, the packet goes on the
> wire. for packets handled by the local system, a packet comes in on an
> interface, pf tests it, the stack processes it locally, something
> generates a reply, the stack decides to route that out an interface, pf
> tests it on the way out, the reply packet ends up on the wire.
> 
> in both situations, you get the same sequence of events if you think
> that pf runs when a packet goes over an interface or wether pf runs when
> a packet enters or leaves the stack.
> 
> however, there is a difference if route-to gets involved. if route-to is
> applied on an outbound rule/state, it could change which interface the
> packet should be going over.
> 
> currently the code implements option 1. this means that if route-to
> changes the interface, it reruns pf test for the packet going over the
> new interface. i would like to change it to option 2.
> 
> the main reason i want to change it is that option 1 creates confusion
> for the state table. by default, pf states are floating, meaning that
> packets are matched to states regardless of which interface they're
> going over. if a packet leaving on em0 is rerouted out em1, both
> traversals will end up using the same state, which at best will make the
> accounting look weird, or at worst fail some checks in the state and get
> dropped.
> 
> another reason i want to change this is to make it consistent with
> other changes that are made to packet. eg, when nat is applied to
> a packet, we don't run pf_test again with the new addresses.
> 
> the downside to this change is that the pf_test rerun may have been used
> to do things like push a packet out another interface with the first run
> through pf, and pick up a broad "nat all packets leaving this interface"
> rule on the second one.
> 
> however, like most things relating to route-to/reply-to/dup-to, im
> pretty sure at this point it's not used a lot, so the impact is minimal.
> a lot of changes in this space have already been made, so adding another
> simplification is justifiable. if this does remove functionality that
> people need, i believe sashan@ has agreed to help me implement route-to
> on match rules to give more flexibility and composability of rules.
> 
> i've canvassed a few people, and their responses have varied from "i
> don't care, route-to is the worst" to "i thought we did option 2
> anyway". anyone else want to chime in?
> 
> this keeps the behaviour where route-to on a packet coming into the
> stack is pushed past it and immediately forwarded to the output
> interface. the condition for that is greatly simplified now though.
> 
> ok?

For me pf(4) should behave like 2. That was the initial design (pf_test in
ip_input and in ip_output) and with this a forwarded packet is tested when
received and when sent. route-to, reply-to, dup-to where added later and
the change of behaviour using these special shortcuts was never fully
considered. (I'm not even sure if you could use route-to on out rules in
old versions of pf). The state matching was built on the behaviour in 2
and retesting the same packet again results in a lot of state confusion.
The result is erratic behaviour and this is why I think that you're right
and route-to, reply-to should avoid rerunning pf_test() for the same
direction.

So in my opinion this diff is OK.
 
> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1106
> diff -u -p -r1.1106 pf.c
> --- pf.c  1 Feb 2021 00:31:05 -   1.1106
> +++ pf.c  2 Feb 2021 03:44:51 -
> @@ -6033,7 +6033,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
>   (ifp->if_flags & IFF_LOOPBACK) == 0)
>   ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
>  
> - if (s->rt != PF_DUPTO && pd->kif->pfik_ifp != ifp) {
> + if (s->rt != PF_DUPTO && pd->dir == PF_IN) {
>   if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
>   goto bad;
>   else if (m0 == NULL)
> @@ -6178,7 +6178,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   (ifp->if_flags & IFF_LOOPBACK) == 0)
>   ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
>  
> - if (s->rt != PF_DUPTO && pd->kif->pfik_ifp != ifp) {
> + if (s->rt != PF_DUPTO && pd->dir == PF_IN) {
>   if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
>   goto bad;
>   else

Re: sleep_setup/finish simplification

2021-02-02 Thread Claudio Jeker
On Mon, Feb 01, 2021 at 04:25:47PM +0100, Martin Pieuchot wrote:
> On 08/12/20(Tue) 10:06, Martin Pieuchot wrote:
> > Diff below aims to simplify the API to put a thread on a sleep queue and
> > reduce it to the following:
> > 
> > sleep_setup();
> > /* check condition or release lock */
> > sleep_finish();
> > 
> > It is motivated by my work to sleep the SCHED_LOCK() but might as well
> > prevent/fix some bugs.
> > 
> > The tricky part of the current implementation is that sleep_setup_signal()
> > can already park/stop the current thread resulting in a context change.
> > Should any custom accounting / lock check happen before that?  At least
> > two lock primitives do so currently:  drm's schedule_timeout() and
> > rwlock's rw_enter().
> > 
> > As a result of this diff various states can be removed and sleep_finish()
> > contains the following magic:
> > 
> > 1. check for signal/parking
> > 2. context switch or remove from sleep queue
> > 3. check for signal/parking
> > 
> > Note that sleep_finish() could be simplified even further but I left
> > that for later to ease the review.
> 
> Updated diff on top of recent changes from claudio@, still ok?

Found the bug. The timeout for rwsleep() got lost. See below.
 
> Index: dev/dt/dt_dev.c
> ===
> RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 dt_dev.c
> --- dev/dt/dt_dev.c   28 Sep 2020 13:16:58 -  1.10
> +++ dev/dt/dt_dev.c   26 Jan 2021 17:20:11 -
> @@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f
>   return (EMSGSIZE);
>  
>   while (!sc->ds_evtcnt) {
> - sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
> - sleep_setup_signal(&sls);
> - sleep_finish(&sls, !sc->ds_evtcnt);
> - error = sleep_finish_signal(&sls);
> + sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
> + error = sleep_finish(&sls, !sc->ds_evtcnt);
>   if (error == EINTR || error == ERESTART)
>   break;
>   }
> Index: dev/pci/if_myx.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 if_myx.c
> --- dev/pci/if_myx.c  17 Jan 2021 02:52:21 -  1.114
> +++ dev/pci/if_myx.c  26 Jan 2021 17:20:11 -
> @@ -1397,7 +1397,7 @@ myx_down(struct myx_softc *sc)
>   (void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL);
>  
>   while (sc->sc_state != MYX_S_OFF) {
> - sleep_setup(&sls, sts, PWAIT, "myxdown");
> + sleep_setup(&sls, sts, PWAIT, "myxdown", 0);
>   membar_consumer();
>   sleep_finish(&sls, sc->sc_state != MYX_S_OFF);
>   }
> Index: dev/pci/drm/drm_linux.c
> ===
> RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
> retrieving revision 1.76
> diff -u -p -r1.76 drm_linux.c
> --- dev/pci/drm/drm_linux.c   13 Jan 2021 01:04:49 -  1.76
> +++ dev/pci/drm/drm_linux.c   26 Jan 2021 17:22:50 -
> @@ -110,14 +110,14 @@ schedule_timeout(long timeout)
>  {
>   struct sleep_state sls;
>   unsigned long deadline;
> - int wait, spl;
> + int wait, spl, timo = 0;
>  
>   MUTEX_ASSERT_LOCKED(&sch_mtx);
>   KASSERT(!cold);
>  
> - sleep_setup(&sls, sch_ident, sch_priority, "schto");
>   if (timeout != MAX_SCHEDULE_TIMEOUT)
> - sleep_setup_timeout(&sls, timeout);
> + timo = timeout;
> + sleep_setup(&sls, sch_ident, sch_priority, "schto", timo);
>  
>   wait = (sch_proc == curproc && timeout > 0);
>  
> @@ -125,11 +125,9 @@ schedule_timeout(long timeout)
>   MUTEX_OLDIPL(&sch_mtx) = splsched();
>   mtx_leave(&sch_mtx);
>  
> - sleep_setup_signal(&sls);
> -
>   if (timeout != MAX_SCHEDULE_TIMEOUT)
>   deadline = jiffies + timeout;
> - sleep_finish_all(&sls, wait);
> + sleep_finish(&sls, wait);
>   if (timeout != MAX_SCHEDULE_TIMEOUT)
>   timeout = deadline - jiffies;
>  
> Index: kern/kern_rwlock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 kern_rwlock.c
> --- kern/kern_rwlock.c11 Jan 2021 18:49:38 -  1.46
> +++ kern/kern_rwlock.c26 Jan 2021 17:20:11 -
> @@ -279,15 +279,13 @@ retry:
>   prio = op->wait_prio;
>   if (flags & RW_INTR)
>   prio |= PCATCH;
> - sleep_setup(&sls, rwl, prio, rwl->rwl_name);
> - if (flags & RW_INTR)
> - sleep_setup_signal(&sls);
> + sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0);
>  
>   do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
>  
> - sleep_finish(&sls, do_sleep);
> + error 

Re: iscsid initiator cleanup

2021-02-02 Thread Claudio Jeker
On Tue, Jan 19, 2021 at 02:22:14PM +0100, Claudio Jeker wrote:
> In iscsid the initiator is kind of a singleton. So lets make use of this
> and remove the initiator argument from all function calls.
> 
> This compiles for me but I can't currently test this out.

Updated version for -current.

-- 
:wq Claudio

Index: initiator.c
===
RCS file: /cvs/src/usr.sbin/iscsid/initiator.c,v
retrieving revision 1.15
diff -u -p -r1.15 initiator.c
--- initiator.c 16 Jan 2015 15:57:06 -  1.15
+++ initiator.c 19 Jan 2021 13:09:31 -
@@ -33,7 +33,7 @@
 #include "iscsid.h"
 #include "log.h"
 
-struct initiator *initiator;
+static struct initiator *initiator;
 
 struct task_login {
struct task  task;
@@ -62,7 +62,7 @@ void  initiator_logout_cb(struct connecti
 struct session_params  initiator_sess_defaults;
 struct connection_params   initiator_conn_defaults;
 
-struct initiator *
+void
 initiator_init(void)
 {
if (!(initiator = calloc(1, sizeof(*initiator
@@ -78,24 +78,34 @@ initiator_init(void)
initiator_conn_defaults = iscsi_conn_defaults;
initiator_sess_defaults.MaxConnections = ISCSID_DEF_CONNS;
initiator_conn_defaults.MaxRecvDataSegmentLength = 65536;
-
-   return initiator;
 }
 
 void
-initiator_cleanup(struct initiator *i)
+initiator_cleanup(void)
 {
struct session *s;
 
-   while ((s = TAILQ_FIRST(&i->sessions)) != NULL) {
-   TAILQ_REMOVE(&i->sessions, s, entry);
+   while ((s = TAILQ_FIRST(&initiator->sessions)) != NULL) {
+   TAILQ_REMOVE(&initiator->sessions, s, entry);
session_cleanup(s);
}
free(initiator);
 }
 
 void
-initiator_shutdown(struct initiator *i)
+initiator_set_config(struct initiator_config *ic)
+{
+   initiator->config = *ic;
+}
+
+struct initiator_config *
+initiator_get_config(void)
+{
+   return &initiator->config;
+}
+
+void
+initiator_shutdown(void)
 {
struct session *s;
 
@@ -106,7 +116,7 @@ initiator_shutdown(struct initiator *i)
 }
 
 int
-initiator_isdown(struct initiator *i)
+initiator_isdown(void)
 {
struct session *s;
int inprogres = 0;
@@ -119,6 +129,46 @@ initiator_isdown(struct initiator *i)
 }
 
 struct session *
+initiator_new_session(u_int8_t st)
+{
+   struct session *s;
+
+   if (!(s = calloc(1, sizeof(*s
+   return NULL;
+
+   /* use the same qualifier unless there is a conflict */
+   s->isid_base = initiator->config.isid_base;
+   s->isid_qual = initiator->config.isid_qual;
+   s->cmdseqnum = arc4random();
+   s->itt = arc4random();
+   s->state = SESS_INIT;
+
+   if (st == SESSION_TYPE_DISCOVERY)
+   s->target = 0;
+   else
+   s->target = initiator->target++;
+
+   TAILQ_INIT(&s->connections);
+   TAILQ_INIT(&s->tasks);
+
+   TAILQ_INSERT_HEAD(&initiator->sessions, s, entry);
+
+   return s;
+}
+
+struct session *
+initiator_find_session(char *name)
+{
+   struct session *s;
+
+   TAILQ_FOREACH(s, &initiator->sessions, entry) {
+   if (strcmp(s->config.SessionName, name) == 0)
+   return s;
+   }
+   return NULL;
+}
+
+struct session *
 initiator_t2s(u_int target)
 {
struct session *s;
@@ -128,6 +178,12 @@ initiator_t2s(u_int target)
return s;
}
return NULL;
+}
+
+struct session_head *
+initiator_get_sessions(void)
+{
+   return &initiator->sessions;
 }
 
 void
Index: iscsid.c
===
RCS file: /cvs/src/usr.sbin/iscsid/iscsid.c,v
retrieving revision 1.21
diff -u -p -r1.21 iscsid.c
--- iscsid.c27 Jan 2021 07:21:54 -  1.21
+++ iscsid.c27 Jan 2021 15:35:36 -
@@ -38,7 +38,6 @@ void  main_sig_handler(int, short, void 
 __dead voidusage(void);
 void   shutdown_cb(int, short, void *);
 
-extern struct initiator *initiator;
 struct event exit_ev;
 int exit_rounds;
 #define ISCSI_EXIT_WAIT 5
@@ -146,13 +145,13 @@ main(int argc, char *argv[])
signal(SIGPIPE, SIG_IGN);
 
control_event_init();
-   initiator = initiator_init();
+   initiator_init();
 
event_dispatch();
 
/* do some cleanup on the way out */
control_cleanup(ctrlsock);
-   initiator_cleanup(initiator);
+   initiator_cleanup();
log_info("exiting.");
return 0;
 }
@@ -162,7 +161,7 @@ shutdown_cb(int fd, short event, void *a
 {
struct timeval tv;
 
-   if (exit_rounds++ >= ISCSI_EXIT_WAIT || initiator_isdown(initiator))
+   if (exit_rounds++ >= ISCSI_EXIT_WAIT || initiator_isdown())
event_loopexi

rpki-client rework repository handling

2021-02-02 Thread Claudio Jeker
For RRDP support the repository code needs to be cleaned up and adjusted.
Instead of working with host/module store the URI (repo->repo) and the
local path (repo->local). This simplifies the communication between
rpki-client main process and the rsync process a fair bit.

Also introduce mkpath() stolen and adjusted from bin/mkdir to create
all directories in a path. In a second step the repository layout will
probably change so that rsync and rrdp can coexist. This is why some code
is currently maybe a bit extra complex.

-- 
:wq Claudio

Index: Makefile
===
RCS file: /cvs/src/usr.sbin/rpki-client/Makefile,v
retrieving revision 1.16
diff -u -p -r1.16 Makefile
--- Makefile8 Jan 2021 08:09:07 -   1.16
+++ Makefile2 Feb 2021 17:24:34 -
@@ -1,8 +1,8 @@
 #  $OpenBSD: Makefile,v 1.16 2021/01/08 08:09:07 claudio Exp $
 
 PROG=  rpki-client
-SRCS=  as.c cert.c cms.c crl.c gbr.c io.c ip.c log.c main.c mft.c output.c \
-   output-bgpd.c output-bird.c output-csv.c output-json.c \
+SRCS=  as.c cert.c cms.c crl.c gbr.c io.c ip.c log.c main.c mft.c mkdir.c \
+   output.c output-bgpd.c output-bird.c output-csv.c output-json.c \
roa.c rsync.c tal.c validate.c x509.c
 MAN=   rpki-client.8
 
Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.38
diff -u -p -r1.38 extern.h
--- extern.h29 Jan 2021 10:13:16 -  1.38
+++ extern.h2 Feb 2021 17:25:27 -
@@ -422,6 +422,8 @@ int  output_json(FILE *, struct vrp_tre
 void   logx(const char *fmt, ...)
__attribute__((format(printf, 1, 2)));
 
+intmkpath(const char *);
+
 #defineRPKI_PATH_OUT_DIR   "/var/db/rpki-client"
 #defineRPKI_PATH_BASE_DIR  "/var/cache/rpki-client"
 
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.91
diff -u -p -r1.91 main.c
--- main.c  29 Jan 2021 10:13:16 -  1.91
+++ main.c  2 Feb 2021 17:57:51 -
@@ -83,10 +83,11 @@
  * An rsync repository.
  */
 struct repo {
-   char*host; /* hostname */
-   char*module; /* module name */
-   int  loaded; /* whether loaded or not */
-   size_t   id; /* identifier (array index) */
+   char*repo;  /* repository rsync URI */
+   char*local; /* local path name */
+   char*notify; /* RRDB notify URI if available */
+   size_t   id; /* identifier (array index) */
+   int  loaded; /* whether loaded or not */
 };
 
 size_t entity_queue;
@@ -288,6 +289,7 @@ repo_lookup(struct msgbuf *msgq, const c
 {
const char  *host, *mod;
size_t   hostsz, modsz, i;
+   char*local;
struct repo *rp;
struct ibuf *b;
 
@@ -295,17 +297,16 @@ repo_lookup(struct msgbuf *msgq, const c
&mod, &modsz, NULL, NULL, NULL, uri))
errx(1, "%s: malformed", uri);
 
+   if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host,
+   (int)modsz, mod) == -1)
+   err(1, "asprintf");
+
/* Look up in repository table. */
 
for (i = 0; i < rt.reposz; i++) {
-   if (strlen(rt.repos[i].host) != hostsz)
-   continue;
-   if (strlen(rt.repos[i].module) != modsz)
-   continue;
-   if (strncasecmp(rt.repos[i].host, host, hostsz))
-   continue;
-   if (strncasecmp(rt.repos[i].module, mod, modsz))
+   if (strcmp(rt.repos[i].local, local))
continue;
+   free(local);
return &rt.repos[i];
}
 
@@ -317,25 +318,25 @@ repo_lookup(struct msgbuf *msgq, const c
rp = &rt.repos[rt.reposz++];
memset(rp, 0, sizeof(struct repo));
rp->id = rt.reposz - 1;
+   rp->local = local;
 
-   if ((rp->host = strndup(host, hostsz)) == NULL ||
-   (rp->module = strndup(mod, modsz)) == NULL)
-   err(1, "strndup");
-
-   i = rt.reposz - 1;
+   if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL)
+   err(1, "strdup");
 
if (!noop) {
-   logx("%s/%s: pulling from network", rp->host, rp->module);
-   if ((b = ibuf_dynamic(128, UINT_MAX)) == NULL)
+   if (asprintf(&local, "%s", rp->local) == -1)
+   err(1, "asprintf");
+   logx("%s: pulling from network", local);
+   if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
err(1, NULL);
-   io_simple_buffer(b, &i, sizeof(i));
-   io_str_buffer(b, rp->host);
-   io_str_buffer(b, rp->module);
-
+   io_simple_buffer(b, &rp->

Re: sleep_setup/finish simplification

2021-02-03 Thread Claudio Jeker
On Wed, Feb 03, 2021 at 09:43:01AM +0100, Martin Pieuchot wrote:
> On 02/02/21(Tue) 10:45, Claudio Jeker wrote:
> > On Mon, Feb 01, 2021 at 04:25:47PM +0100, Martin Pieuchot wrote:
> > > On 08/12/20(Tue) 10:06, Martin Pieuchot wrote:
> > > > Diff below aims to simplify the API to put a thread on a sleep queue and
> > > > reduce it to the following:
> > > > 
> > > > sleep_setup();
> > > > /* check condition or release lock */
> > > > sleep_finish();
> > > > 
> > > > It is motivated by my work to sleep the SCHED_LOCK() but might as well
> > > > prevent/fix some bugs.
> > > > 
> > > > The tricky part of the current implementation is that 
> > > > sleep_setup_signal()
> > > > can already park/stop the current thread resulting in a context change.
> > > > Should any custom accounting / lock check happen before that?  At least
> > > > two lock primitives do so currently:  drm's schedule_timeout() and
> > > > rwlock's rw_enter().
> > > > 
> > > > As a result of this diff various states can be removed and 
> > > > sleep_finish()
> > > > contains the following magic:
> > > > 
> > > > 1. check for signal/parking
> > > > 2. context switch or remove from sleep queue
> > > > 3. check for signal/parking
> > > > 
> > > > Note that sleep_finish() could be simplified even further but I left
> > > > that for later to ease the review.
> > > 
> > > Updated diff on top of recent changes from claudio@, still ok?
> > 
> > Found the bug. The timeout for rwsleep() got lost. See below.
> 
> Thanks, updated diff addressing your points.
> 

OK claudio@

-- 
:wq Claudio



Re: diff: tcp ack improvement

2021-02-03 Thread Claudio Jeker
On Wed, Feb 03, 2021 at 10:56:38AM +0100, Jan Klemkow wrote:
> On Tue, Jan 05, 2021 at 10:30:33AM +0100, Claudio Jeker wrote:
> > On Tue, Jan 05, 2021 at 10:16:04AM +0100, Jan Klemkow wrote:
> > > On Wed, Dec 23, 2020 at 11:59:13AM +, Stuart Henderson wrote:
> > > > On 2020/12/17 20:50, Jan Klemkow wrote:
> > > > > ping
> > > > > 
> > > > > On Fri, Nov 06, 2020 at 01:10:52AM +0100, Jan Klemkow wrote:
> > > > > > bluhm and I make some network performance measurements and kernel
> > > > > > profiling.
> > > > 
> > > > I've been running this on my workstation since you sent it out - lots
> > > > of long-running ssh connections, hourly reposync, daily rsync of base
> > > > snapshots.
> > > > 
> > > > I don't know enough about TCP stack behaviour to really give a 
> > > > meaningful
> > > > OK, but certainly not seeing any problems with it.
> > > 
> > > Thanks, Stuart.  Has someone else tested this diff?  Or, are there some
> > > opinions or objections about it?  Even bike-shedding is welcome :-)
> > 
> > From my memory TCP uses the ACKs on startup to increase the send window
> > and so your diff could slow down the initial startup. Not sure if that
> > matters actually. It can have some impact if userland reads in big blocks
> > at infrequent intervals since then the ACK clock slows down.
> > 
> > I guess to get converage it would be best to commit this and then monitor
> > the lists for possible slowdowns.
> 
> It there a way to commit this, or to test the diff in snapshots?

Just commit it. OK claudio@
If people see problems we can back it out again.
 
> bye,
> Jan
>  
> > > > > > Setup:  Linux (iperf) -10gbit-> OpenBSD (relayd) -10gbit-> 
> > > > > > Linux (iperf)
> > > > > > 
> > > > > > We figured out, that the kernel uses a huge amount of processing 
> > > > > > time
> > > > > > for sending ACKs to the sender on the receiving interface.  After
> > > > > > receiving a data segment, we send our two ACK.  The first one in
> > > > > > tcp_input() direct after receiving.  The second ACK is send out, 
> > > > > > after
> > > > > > the userland or the sosplice task read some data out of the socket
> > > > > > buffer.
> > > > > > 
> > > > > > The fist ACK in tcp_input() is called after receiving every other 
> > > > > > data
> > > > > > segment like it is discribed in RFC1122:
> > > > > > 
> > > > > > 4.2.3.2  When to Send an ACK Segment
> > > > > > A TCP SHOULD implement a delayed ACK, but an ACK should
> > > > > > not be excessively delayed; in particular, the delay
> > > > > > MUST be less than 0.5 seconds, and in a stream of
> > > > > > full-sized segments there SHOULD be an ACK for at least
> > > > > > every second segment.
> > > > > > 
> > > > > > This advice is based on the paper "Congestion Avoidance and 
> > > > > > Control":
> > > > > > 
> > > > > > 4 THE GATEWAY SIDE OF CONGESTION CONTROL
> > > > > > The 8 KBps senders were talking to 4.3+BSD receivers
> > > > > > which would delay an ack for atmost one packet (because
> > > > > > of an ack’s clock’ role, the authors believe that the
> > > > > > minimum ack frequency should be every other packet).
> > > > > > 
> > > > > > Sending the first ACK (on every other packet) coasts us too much
> > > > > > processing time.  Thus, we run into a full socket buffer earlier.  
> > > > > > The
> > > > > > first ACK just acknowledges the received data, but does not update 
> > > > > > the
> > > > > > window.  The second ACK, caused by the socket buffer reader, also
> > > > > > acknowledges the data and also updates the window.  So, the second 
> > > > > > ACK,
> > > > > > is much more worth for a fast packet processing than the fist one.
> > > > > > 
> > > > > > The performance improvement is between 33% with splicing and 20% 
> > > > > > witho

rpki-client factor out the parser code into own module

2021-02-03 Thread Claudio Jeker
{
-   pfd.events = POLLIN;
-   if (msgq.queued)
-   pfd.events |= POLLOUT;
-
-   if (poll(&pfd, 1, INFTIM) == -1)
-   err(1, "poll");
-   if ((pfd.revents & (POLLERR|POLLNVAL)))
-   errx(1, "poll: bad descriptor");
-
-   /* If the parent closes, return immediately. */
-
-   if ((pfd.revents & POLLHUP))
-   break;
-
-   /*
-* Start with read events.
-* This means that the parent process is sending us
-* something we need to parse.
-* We don't actually parse it til we have space in our
-* outgoing buffer for responding, though.
-*/
-
-   if ((pfd.revents & POLLIN)) {
-   io_socket_blocking(fd);
-   entp = calloc(1, sizeof(struct entity));
-   if (entp == NULL)
-   err(1, NULL);
-   entity_read_req(fd, entp);
-   TAILQ_INSERT_TAIL(&q, entp, entries);
-   io_socket_nonblocking(fd);
-   }
-
-   if (pfd.revents & POLLOUT) {
-   switch (msgbuf_write(&msgq)) {
-   case 0:
-   errx(1, "write: connection closed");
-   case -1:
-   err(1, "write");
-   }
-   }
-
-   /*
-* If there's nothing to parse, then stop waiting for
-* the write signal.
-*/
-
-   if (TAILQ_EMPTY(&q)) {
-   pfd.events &= ~POLLOUT;
-   continue;
-   }
-
-   entp = TAILQ_FIRST(&q);
-   assert(entp != NULL);
-
-   if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
-   err(1, NULL);
-   io_simple_buffer(b, &entp->type, sizeof(entp->type));
-
-   switch (entp->type) {
-   case RTYPE_TAL:
-   if ((tal = tal_parse(entp->uri, entp->descr)) == NULL)
-   goto out;
-   tal_buffer(b, tal);
-   tal_free(tal);
-   break;
-   case RTYPE_CER:
-   if (entp->has_pkey)
-   cert = proc_parser_root_cert(entp, store, ctx,
-   &auths, &crlt);
-   else
-   cert = proc_parser_cert(entp, store, ctx,
-   &auths, &crlt);
-   c = (cert != NULL);
-   io_simple_buffer(b, &c, sizeof(int));
-   if (cert != NULL)
-   cert_buffer(b, cert);
-   /*
-* The parsed certificate data "cert" is now
-* managed in the "auths" table, so don't free
-* it here (see the loop after "out").
-*/
-   break;
-   case RTYPE_MFT:
-   mft = proc_parser_mft(entp, store, ctx, &auths, &crlt);
-   c = (mft != NULL);
-   io_simple_buffer(b, &c, sizeof(int));
-   if (mft != NULL)
-   mft_buffer(b, mft);
-   mft_free(mft);
-   break;
-   case RTYPE_CRL:
-   proc_parser_crl(entp, store, ctx, &crlt);
-   break;
-   case RTYPE_ROA:
-   roa = proc_parser_roa(entp, store, ctx, &auths, &crlt);
-   c = (roa != NULL);
-   io_simple_buffer(b, &c, sizeof(int));
-   if (roa != NULL)
-   roa_buffer(b, roa);
-   roa_free(roa);
-   break;
-   case RTYPE_GBR:
-   proc_parser_gbr(entp, store, ctx, &auths, &crlt);
-   break;
-   default:
-   abort();
-   }
-
-   ibuf_close(&msgq, b);
-   TAILQ_REMOVE(&q, entp, entries);
-   entity_free(entp);
-   }
-
-   rc = 0;
-out:
-   while ((entp = TAILQ_FIRST(&q)) != NULL) {
-   TAILQ_REMOVE(&q, entp, entries);
-   entity_free(entp);
-   }
-
-   /* XXX free auths and crl tree */
-
-   X509_STORE_CTX_free(ctx);
-   X509_STORE_free(store);
-
-

Re: rpki-client factor out the parser code into own module

2021-02-04 Thread Claudio Jeker
On Wed, Feb 03, 2021 at 10:20:47PM +0100, Theo Buehler wrote:
> On Wed, Feb 03, 2021 at 08:08:20PM +0100, Claudio Jeker wrote:
> > This is just shuffling code around and adds a few definitions to extern.h.
> > The goal is to reduce the amount of code in main.c. I constantly get lost
> > in all the parsing and parent functions also I want to extend the
> > repository code and so this makes space for that.
> > 
> > Compiles and works for me :)
> 
> ok tb
> 
> I think you can garbage collect all openssl includes in main.c.
>  and  also seem unused.

 is needed for fts_open(3) according to the man page.
I did remove  and added .
 
> In parser.c I would add openssl/asn1.h (e.g. for the sk_ business) and
> use x509.h instead of x509v3.h.

Done, I also remove evp.h since there is no EVP usage in parse.c 

-- 
:wq Claudio



rpki-client, simplify main process

2021-02-04 Thread Claudio Jeker
Instead of passing around variables all the way down to entity_write_req()
and repo_lookup() use global variables. Especially for the repository
handling this will become more complex with the introduction of RRDP.
Also shuffle code around a bit so that all entity queue functions are
together.

OK?
-- 
:wq Claudio

Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.95
diff -u -p -r1.95 main.c
--- main.c  4 Feb 2021 09:57:37 -   1.95
+++ main.c  4 Feb 2021 10:33:48 -
@@ -114,10 +114,9 @@ filepathcmp(struct filepath *a, struct f
 
 RB_HEAD(filepath_tree, filepath);
 RB_PROTOTYPE(filepath_tree, filepath, entry, filepathcmp);
-struct filepath_tree  fpt = RB_INITIALIZER(&fpt);
 
-static voidentityq_flush(struct msgbuf *, struct entityq *,
-   const struct repo *);
+static struct filepath_treefpt = RB_INITIALIZER(&fpt);
+static struct msgbuf   procq, rsyncq;
 
 const char *bird_tablename = "ROAS";
 
@@ -204,34 +203,23 @@ entity_read_req(int fd, struct entity *e
 }
 
 /*
- * Like entity_write_req() but into a buffer.
+ * Write the queue entity.
  * Matched by entity_read_req().
  */
 static void
-entity_buffer_req(struct ibuf *b, const struct entity *ent)
+entity_write_req(const struct entity *ent)
 {
+   struct ibuf *b;
 
+   if ((b = ibuf_dynamic(sizeof(*ent), UINT_MAX)) == NULL)
+   err(1, NULL);
io_simple_buffer(b, &ent->type, sizeof(ent->type));
io_str_buffer(b, ent->uri);
io_simple_buffer(b, &ent->has_pkey, sizeof(int));
if (ent->has_pkey)
io_buf_buffer(b, ent->pkey, ent->pkeysz);
io_str_buffer(b, ent->descr);
-}
-
-/*
- * Write the queue entity.
- * Simply a wrapper around entity_buffer_req().
- */
-static void
-entity_write_req(struct msgbuf *msgq, const struct entity *ent)
-{
-   struct ibuf *b;
-
-   if ((b = ibuf_dynamic(sizeof(*ent), UINT_MAX)) == NULL)
-   err(1, NULL);
-   entity_buffer_req(b, ent);
-   ibuf_close(msgq, b);
+   ibuf_close(&procq, b);
 }
 
 /*
@@ -239,24 +227,67 @@ entity_write_req(struct msgbuf *msgq, co
  * repo, then flush those into the parser process.
  */
 static void
-entityq_flush(struct msgbuf *msgq, struct entityq *q, const struct repo *repo)
+entityq_flush(struct entityq *q, const struct repo *repo)
 {
struct entity   *p, *np;
 
TAILQ_FOREACH_SAFE(p, q, entries, np) {
if (p->repo < 0 || repo->id != (size_t)p->repo)
continue;
-   entity_write_req(msgq, p);
+   entity_write_req(p);
TAILQ_REMOVE(q, p, entries);
entity_free(p);
}
 }
 
 /*
+ * Add the heap-allocated file to the queue for processing.
+ */
+static void
+entityq_add(struct entityq *q, char *file, enum rtype type,
+const struct repo *rp, const unsigned char *pkey, size_t pkeysz,
+char *descr)
+{
+   struct entity   *p;
+
+   if ((p = calloc(1, sizeof(struct entity))) == NULL)
+   err(1, "calloc");
+
+   p->type = type;
+   p->uri = file;
+   p->repo = (rp != NULL) ? (ssize_t)rp->id : -1;
+   p->has_pkey = pkey != NULL;
+   if (p->has_pkey) {
+   p->pkeysz = pkeysz;
+   if ((p->pkey = malloc(pkeysz)) == NULL)
+   err(1, "malloc");
+   memcpy(p->pkey, pkey, pkeysz);
+   }
+   if (descr != NULL)
+   if ((p->descr = strdup(descr)) == NULL)
+   err(1, "strdup");
+
+   filepath_add(file);
+
+   entity_queue++;
+
+   /*
+* Write to the queue if there's no repo or the repo has already
+* been loaded else enqueue it for later.
+*/
+
+   if (rp == NULL || rp->loaded) {
+   entity_write_req(p);
+   entity_free(p);
+   } else
+   TAILQ_INSERT_TAIL(q, p, entries);
+}
+
+/*
  * Look up a repository, queueing it for discovery if not found.
  */
 static const struct repo *
-repo_lookup(struct msgbuf *msgq, const char *uri)
+repo_lookup(const char *uri)
 {
const char  *host, *mod;
size_t   hostsz, modsz, i;
@@ -303,7 +334,7 @@ repo_lookup(struct msgbuf *msgq, const c
io_simple_buffer(b, &rp->id, sizeof(rp->id));
io_str_buffer(b, local);
io_str_buffer(b, rp->repo);
-   ibuf_close(msgq, b);
+   ibuf_close(&rsyncq, b);
free(local);
} else {
rp->loaded = 1;
@@ -329,54 +360,11 @@ repo_filename(const struct repo *repo, c
 }
 
 /*
- * Add the heap-allocated file to the queue for processing.
- */
-static void
-entityq_add(struct msgbuf *msgq, struct entityq *q, char *file, enum rtype 
type,
-const struct repo *rp, const unsigned char *pkey, size_t pkeysz,
-char *descr)
-{
-   struct entity   

Re: tcpbench -D

2021-02-04 Thread Claudio Jeker
On Thu, Feb 04, 2021 at 11:45:26AM +0100, Alexander Bluhm wrote:
> Hi,
> 
> I would like to analyse tcpbench(1) TCP connections.  So I copied
> the nc -D socket debug option.
> 
> ok?

Fine with me. OK claudio@
 
> Index: usr.bin/tcpbench/tcpbench.1
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/tcpbench/tcpbench.1,v
> retrieving revision 1.28
> diff -u -p -r1.28 tcpbench.1
> --- usr.bin/tcpbench/tcpbench.1   4 May 2020 12:13:09 -   1.28
> +++ usr.bin/tcpbench/tcpbench.1   3 Feb 2021 22:52:07 -
> @@ -24,7 +24,7 @@
>  .Nm
>  .Fl l
>  .Nm
> -.Op Fl 46RUuv
> +.Op Fl 46DRUuv
>  .Op Fl B Ar buf
>  .Op Fl b Ar sourceaddr
>  .Op Fl k Ar kvars
> @@ -39,7 +39,7 @@
>  .Nm
>  .Bk -words
>  .Fl s
> -.Op Fl 46Uuv
> +.Op Fl 46DUuv
>  .Op Fl B Ar buf
>  .Op Fl k Ar kvars
>  .Op Fl p Ar port
> @@ -111,6 +111,8 @@ stream.
>  .It Fl b Ar sourceaddr
>  Specify the IP address to send the packets from,
>  which is useful on machines with multiple interfaces.
> +.It Fl D
> +Enable debugging on the socket.
>  .It Fl k Ar kvars
>  Specify one or more kernel variables to monitor; multiple variables must be
>  separated with commas.
> Index: usr.bin/tcpbench/tcpbench.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/usr.bin/tcpbench/tcpbench.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 tcpbench.c
> --- usr.bin/tcpbench/tcpbench.c   4 May 2020 12:13:09 -   1.63
> +++ usr.bin/tcpbench/tcpbench.c   4 Feb 2021 10:37:42 -
> @@ -65,6 +65,7 @@
>  
>  /* Our tcpbench globals */
>  struct {
> + int   Dflag;/* Socket debug */
>   int   Sflag;/* Socket buffer size */
>   u_int rflag;/* Report rate (ms) */
>   int   sflag;/* True if server */
> @@ -198,10 +199,10 @@ usage(void)
>  {
>   fprintf(stderr,
>   "usage: tcpbench -l\n"
> - "   tcpbench [-46RUuv] [-B buf] [-b sourceaddr] [-k kvars] [-n 
> connections]\n"
> + "   tcpbench [-46DRUuv] [-B buf] [-b sourceaddr] [-k kvars] [-n 
> connections]\n"
>   "[-p port] [-r interval] [-S space] [-T 
> toskeyword]\n"
>   "[-t secs] [-V rtable] hostname\n"
> - "   tcpbench -s [-46Uuv] [-B buf] [-k kvars] [-p port] [-r 
> interval]\n"
> + "   tcpbench -s [-46DUuv] [-B buf] [-k kvars] [-p port] [-r 
> interval]\n"
>   "[-S space] [-T toskeyword] [-V rtable] 
> [hostname]\n");
>   exit(1);
>  }
> @@ -857,6 +858,11 @@ server_init(struct addrinfo *aitop)
>   warn("socket");
>   continue;
>   }
> + if (ptb->Dflag) {
> + if (setsockopt(sock, SOL_SOCKET, SO_DEBUG,
> + &ptb->Dflag, sizeof(ptb->Dflag)))
> + err(1, "setsockopt SO_DEBUG");
> + }
>   if (ptb->Tflag != -1 && ai->ai_family == AF_INET) {
>   if (setsockopt(sock, IPPROTO_IP, IP_TOS,
>   &ptb->Tflag, sizeof(ptb->Tflag)))
> @@ -970,6 +976,11 @@ client_init(struct addrinfo *aitop, int 
>   warn("socket");
>   continue;
>   }
> + if (ptb->Dflag) {
> + if (setsockopt(sock, SOL_SOCKET, SO_DEBUG,
> + &ptb->Dflag, sizeof(ptb->Dflag)))
> + err(1, "setsockopt SO_DEBUG");
> + }
>   if (aib != NULL) {
>   saddr_ntop(aib->ai_addr, aib->ai_addrlen,
>   tmp, sizeof(tmp));
> @@ -1138,6 +1149,7 @@ main(int argc, char **argv)
>   setvbuf(stdout, NULL, _IOLBF, 0);
>   ptb = &tcpbench;
>   ptb->dummybuf_len = 0;
> + ptb->Dflag = 0;
>   ptb->Sflag = ptb->sflag = ptb->vflag = ptb->Rflag = ptb->Uflag = 0;
>   ptb->kvmh  = NULL;
>   ptb->kvars = NULL;
> @@ -1147,7 +1159,8 @@ main(int argc, char **argv)
>   aib = NULL;
>   secs = 0;
>  
> - while ((ch = getopt(argc, argv, "46b:B:hlk:n:p:Rr:sS:t:T:uUvV:")) != 
> -1) {
> + while ((ch = getopt(argc, argv, "46b:B:Dhlk:n:p:Rr:sS:t:T:uUvV:"))
> + != -1) {
>   switch (ch) {
>   case '4':
>   family = PF_INET;
> @@ -1157,6 +1170,9 @@ main(int argc, char **argv)
>   break;
>   case 'b':
>   srcbind = optarg;
> + break;
> + case 'D':
> + ptb->Dflag = 1;
>   break;
>   case 'l':
>   list_kvars();
> 

-- 
:wq Claudio



Re: reference trpt(8) in getsockopt(2)

2021-02-04 Thread Claudio Jeker
On Thu, Feb 04, 2021 at 12:30:17PM +0100, Alexander Bluhm wrote:
> Hi,
> 
> I always forget the name of trpt(8).  It should be refereced in the
> SO_DEBUG section of getsockopt(2).
> 
> ok?

Yes please. Also should we export the tcp_debug buffer via sysctl so that
trpt can run without kern.allowkmem?

> 
> Index: lib/libc/sys/getsockopt.2
> ===
> RCS file: /data/mirror/openbsd/cvs/src/lib/libc/sys/getsockopt.2,v
> retrieving revision 1.56
> diff -u -p -r1.56 getsockopt.2
> --- lib/libc/sys/getsockopt.2 22 Jul 2019 15:34:07 -  1.56
> +++ lib/libc/sys/getsockopt.2 4 Feb 2021 10:52:16 -
> @@ -182,6 +182,8 @@ get the protocol of the socket (get only
>  .Pp
>  .Dv SO_DEBUG
>  enables debugging in the underlying protocol modules.
> +Transliterate the protocol trace with
> +.Xr trpt 8 .
>  .Dv SO_REUSEADDR
>  indicates that the rules used in validating addresses supplied in a
>  .Xr bind 2
> 

-- 
:wq Claudio



rpki-client call a file a file

2021-02-04 Thread Claudio Jeker
The uri field in the entity queue struct is never a URI but always a local
path to the file in the repo. Rename the field so I'm less confused.
Compiler agrees with my change.

-- 
:wq Claudio

? http.c
? http.diff
? obj
Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.40
diff -u -p -r1.40 extern.h
--- extern.h4 Feb 2021 08:10:24 -   1.40
+++ extern.h4 Feb 2021 14:07:02 -
@@ -265,7 +265,7 @@ enum rtype {
  */
 struct entity {
enum rtype   type; /* type of entity (not RTYPE_EOF) */
-   char*uri; /* file or rsync:// URI */
+   char*file; /* local path to file */
ssize_t  repo; /* repo index or <0 if w/o repo */
int  has_pkey; /* whether pkey/sz is specified */
unsigned char   *pkey; /* public key (optional) */
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.96
diff -u -p -r1.96 main.c
--- main.c  4 Feb 2021 13:38:27 -   1.96
+++ main.c  4 Feb 2021 14:07:02 -
@@ -180,7 +180,7 @@ entity_free(struct entity *ent)
return;
 
free(ent->pkey);
-   free(ent->uri);
+   free(ent->file);
free(ent->descr);
free(ent);
 }
@@ -195,7 +195,7 @@ entity_read_req(int fd, struct entity *e
 {
 
io_simple_read(fd, &ent->type, sizeof(enum rtype));
-   io_str_read(fd, &ent->uri);
+   io_str_read(fd, &ent->file);
io_simple_read(fd, &ent->has_pkey, sizeof(int));
if (ent->has_pkey)
io_buf_read_alloc(fd, (void **)&ent->pkey, &ent->pkeysz);
@@ -214,7 +214,7 @@ entity_write_req(const struct entity *en
if ((b = ibuf_dynamic(sizeof(*ent), UINT_MAX)) == NULL)
err(1, NULL);
io_simple_buffer(b, &ent->type, sizeof(ent->type));
-   io_str_buffer(b, ent->uri);
+   io_str_buffer(b, ent->file);
io_simple_buffer(b, &ent->has_pkey, sizeof(int));
if (ent->has_pkey)
io_buf_buffer(b, ent->pkey, ent->pkeysz);
@@ -254,7 +254,7 @@ entityq_add(struct entityq *q, char *fil
err(1, "calloc");
 
p->type = type;
-   p->uri = file;
+   p->file = file;
p->repo = (rp != NULL) ? (ssize_t)rp->id : -1;
p->has_pkey = pkey != NULL;
if (p->has_pkey) {
Index: parser.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/parser.c,v
retrieving revision 1.3
diff -u -p -r1.3 parser.c
--- parser.c4 Feb 2021 08:58:19 -   1.3
+++ parser.c4 Feb 2021 14:07:02 -
@@ -57,10 +57,10 @@ proc_parser_roa(struct entity *entp,
STACK_OF(X509)  *chain;
STACK_OF(X509_CRL)  *crls;
 
-   if ((roa = roa_parse(&x509, entp->uri)) == NULL)
+   if ((roa = roa_parse(&x509, entp->file)) == NULL)
return NULL;
 
-   a = valid_ski_aki(entp->uri, auths, roa->ski, roa->aki);
+   a = valid_ski_aki(entp->file, auths, roa->ski, roa->aki);
 
build_chain(a, &chain);
build_crls(a, crlt, &crls);
@@ -76,7 +76,7 @@ proc_parser_roa(struct entity *entp,
c = X509_STORE_CTX_get_error(ctx);
X509_STORE_CTX_cleanup(ctx);
if (verbose > 0 || c != X509_V_ERR_UNABLE_TO_GET_CRL)
-   warnx("%s: %s", entp->uri,
+   warnx("%s: %s", entp->file,
X509_verify_cert_error_string(c));
X509_free(x509);
roa_free(roa);
@@ -94,7 +94,7 @@ proc_parser_roa(struct entity *entp,
 * the code around roa_read() to check the "valid" field itself.
 */
 
-   if (valid_roa(entp->uri, auths, roa))
+   if (valid_roa(entp->file, auths, roa))
roa->valid = 1;
 
return roa;
@@ -120,10 +120,10 @@ proc_parser_mft(struct entity *entp, X50
struct auth *a;
STACK_OF(X509)  *chain;
 
-   if ((mft = mft_parse(&x509, entp->uri)) == NULL)
+   if ((mft = mft_parse(&x509, entp->file)) == NULL)
return NULL;
 
-   a = valid_ski_aki(entp->uri, auths, mft->ski, mft->aki);
+   a = valid_ski_aki(entp->file, auths, mft->ski, mft->aki);
build_chain(a, &chain);
 
if (!X509_STORE_CTX_init(ctx, store, x509, chain))
@@ -135,7 +135,7 @@ proc_parser_mft(struct entity *entp, X50
if (X509_verify_cert(ctx) <= 0) {
c = X509_STORE_CTX_get_error(ctx);
X509_STORE_CTX_cleanup(ctx);
-   warnx("%s: %s", entp->uri, X509_verify_cert_error_string(c));
+   warnx("%s: %s", entp->file, X509_verify_cert_error_string(c));
mft_free(mft);
X509_free(x509);
sk_X509_free(chain);
@@ -146,7 +146,7 @@ proc_parser_mft(struct 

rpki-client remove debug code

2021-02-04 Thread Claudio Jeker
This bit of debug code to understand the progress of rpki-client is no
longer helpful. Most of the time this is a stuck rsync that causes delays
and those are now nicely handled by an internal timeout.
I propose to remove this.

-- 
:wq Claudio

Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.97
diff -u -p -r1.97 main.c
--- main.c  4 Feb 2021 14:32:01 -   1.97
+++ main.c  4 Feb 2021 17:44:30 -
@@ -738,7 +738,7 @@ main(int argc, char *argv[])
 {
int  rc = 1, c, proc, st, rsync,
 fl = SOCK_STREAM | SOCK_CLOEXEC;
-   size_t   i, j, outsz = 0, talsz = 0;
+   size_t   i, outsz = 0, talsz = 0;
pid_tprocpid, rsyncpid;
int  fd[2];
struct entityq   q;
@@ -952,24 +952,10 @@ main(int argc, char *argv[])
if (procq.queued)
pfd[1].events = POLLOUT;
 
-   if ((c = poll(pfd, 2, verbose ? 1 : INFTIM)) == -1) {
+   if ((c = poll(pfd, 2, INFTIM)) == -1) {
if (errno == EINTR)
continue;
err(1, "poll");
-   }
-
-   /* Debugging: print some statistics if we stall. */
-
-   if (c == 0) {
-   for (i = j = 0; i < rt.reposz; i++)
-   if (!rt.repos[i].loaded) {
-   logx("pending repo %s",
-   rt.repos[i].local);
-   j++;
-   }
-   logx("period stats: %zu pending repos", j);
-   logx("period stats: %zu pending entries", entity_queue);
-   continue;
}
 
if ((pfd[0].revents & (POLLERR|POLLNVAL)) ||



rpki-client parse and check caRepository Subject Information Access

2021-02-05 Thread Claudio Jeker
RPKI certificates have 3 possible Subject Information Access URI that we
may be interested in:
- 1.3.6.1.5.5.7.48.5 (caRepository)
- 1.3.6.1.5.5.7.48.10 (rpkiManifest)
- 1.3.6.1.5.5.7.48.13 (rpkiNotify)

rpkiManifest points to the .mft file inside the caRepository.
Because of this caRepository is the base URI for all the files below
this certificate. rpkiNotify points to an RRDP endpoint where the XML
data also contains URI that again need to match the caRepository. If not
something strange is going on.

Since the caRepository data is useful extract it from the cert and also
do a simple strstr() check to ensure that rpkiManifest starts with
caRepository.

Currently the data is not used further than that but I want to add it to
the repository information as a next step.
-- 
:wq Claudio

Index: cert.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
retrieving revision 1.24
diff -u -p -r1.24 cert.c
--- cert.c  4 Feb 2021 08:58:19 -   1.24
+++ cert.c  5 Feb 2021 13:26:14 -
@@ -194,6 +194,7 @@ sbgp_sia_resource_mft(struct parse *p,
p->fn);
return 0;
}
+
if (strcasecmp(d + dsz - 4, ".mft") != 0) {
warnx("%s: RFC 6487 section 4.8.8: SIA: "
"invalid rsync URI suffix", p->fn);
@@ -214,6 +215,43 @@ sbgp_sia_resource_mft(struct parse *p,
 }
 
 /*
+ * Parse the SIA manifest, 4.8.8.1.
+ * Returns zero on failure, non-zero on success.
+ */
+static int
+sbgp_sia_resource_carepo(struct parse *p,
+   const unsigned char *d, size_t dsz)
+{
+   size_t i;
+
+   if (p->res->repo != NULL) {
+   warnx("%s: RFC 6487 section 4.8.8: SIA: "
+   "CA repository already specified", p->fn);
+   return 0;
+   }
+
+   /* Make sure it's an rsync:// address. */
+   if (dsz <= 8 || strncasecmp(d, "rsync://", 8)) {
+   warnx("%s: RFC 6487 section 4.8.8: not using rsync schema",
+   p->fn);
+   return 0;
+   }
+
+   /* make sure only US-ASCII chars are in the URL */
+   for (i = 0; i < dsz; i++) {
+   if (isalnum(d[i]) || ispunct(d[i]))
+   continue;
+   warnx("%s: invalid URI", p->fn);
+   return 0;
+   }
+
+   if ((p->res->repo = strndup((const char *)d, dsz)) == NULL)
+   err(1, NULL);
+
+   return 1;
+}
+
+/*
  * Parse the SIA entries, 4.8.8.1.
  * There may be multiple different resources at this location, so throw
  * out all but the matching resource type. Currently only two entries
@@ -271,11 +309,13 @@ sbgp_sia_resource_entry(struct parse *p,
/*
 * Ignore all but manifest and RRDP notify URL.
 * Things we may see:
+*  - 1.3.6.1.5.5.7.48.5 (caRepository)
 *  - 1.3.6.1.5.5.7.48.10 (rpkiManifest)
 *  - 1.3.6.1.5.5.7.48.13 (rpkiNotify)
-*  - 1.3.6.1.5.5.7.48.5 (CA repository)
 */
-   if (strcmp(buf, "1.3.6.1.5.5.7.48.10") == 0)
+   if (strcmp(buf, "1.3.6.1.5.5.7.48.5") == 0)
+   rc = sbgp_sia_resource_carepo(p, d, plen);
+   else if (strcmp(buf, "1.3.6.1.5.5.7.48.10") == 0)
rc = sbgp_sia_resource_mft(p, d, plen);
else if (strcmp(buf, "1.3.6.1.5.5.7.48.13") == 0)
rc = sbgp_sia_resource_notify(p, d, plen);
@@ -317,6 +357,12 @@ sbgp_sia_resource(struct parse *p, const
goto out;
}
 
+   if (strstr(p->res->mft, p->res->repo) != p->res->mft) {
+   warnx("%s: RFC 6487 section 4.8.8: SIA: "
+   "conflicting URIs for caRepository and rpkiManifest",
+   p->fn);
+   goto out;
+   }
rc = 1;
 out:
sk_ASN1_TYPE_pop_free(seq, ASN1_TYPE_free);
@@ -1172,6 +1218,7 @@ cert_free(struct cert *p)
return;
 
free(p->crl);
+   free(p->repo);
free(p->mft);
free(p->notify);
free(p->ips);
@@ -1230,6 +1277,7 @@ cert_buffer(struct ibuf *b, const struct
 
io_str_buffer(b, p->mft);
io_str_buffer(b, p->notify);
+   io_str_buffer(b, p->repo);
io_str_buffer(b, p->crl);
io_str_buffer(b, p->aki);
io_str_buffer(b, p->ski);
@@ -1297,6 +1345,7 @@ cert_read(int fd)
io_str_read(fd, &p->mft);
assert(p->mft);
io_str_read(fd, &p->notify);
+   io_str_read(fd, &p->repo);
io_str_read(fd, &p->crl);
io_str_read(fd, &p->aki);
io_str_read(fd, &p->ski);
Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.41
diff -u -p -r1.41 extern.h
--- extern.h4 Feb 2021 14:32:01 -   1.41
+++ extern.h5 Feb 2021 13:20:29 -
@@ -112,6 +112,7 @@ struct cert {
size_t   ipsz; /* length of "ips" */
struct ce

change rpki-client repository code

2021-02-08 Thread Claudio Jeker
Split the repository code into two parts:

- fetch of the trust anchors (the certs referenced by TAL files)
- fetch of the MFT files of a repository

While the two things kind of look similar there are some differences.

- TA files are loaded via rsync or https URI (only one file needs to be
  loaded)
- MFT files need everything inside the repository to be loaded since they
  reference to other files (.roa, .cer, .crl). These repositories are
  synced once with rsync and many mft may be part of a repo. Also these
  repositories can be synced via rsync or RRDP

To simplify these diverse options it is time to split the code up.
Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup
code into subfunctions repo_alloc() and repo_fetch() (both are also used
by ta_lookup()). Use the caRepository URI to figure out the base URI.
Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess
directories from the URI (else thousends of individual rsync calls would
be made against the RIR's CA repos).

The big change is that the layout of the cache directory is changed.
The cache will now have two base directories:
- ta/ (for all trust anchors)
- rsync/ (for all other repositories)

-- 
:wq Claudio

Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.42
diff -u -p -r1.42 extern.h
--- extern.h8 Feb 2021 09:22:53 -   1.42
+++ extern.h8 Feb 2021 13:44:22 -
@@ -392,9 +392,7 @@ void proc_parser(int) __attribute__((n
 
 /* Rsync-specific. */
 
-int rsync_uri_parse(const char **, size_t *,
-   const char **, size_t *, const char **, size_t *,
-   enum rtype *, const char *);
+char   *rsync_base_uri(const char *);
 voidproc_rsync(char *, char *, int) __attribute__((noreturn));
 
 /* Logging (though really used for OpenSSL errors). */
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.98
diff -u -p -r1.98 main.c
--- main.c  5 Feb 2021 12:26:52 -   1.98
+++ main.c  8 Feb 2021 13:50:20 -
@@ -78,11 +78,12 @@
  * An rsync repository.
  */
 struct repo {
-   char*repo;  /* repository rsync URI */
-   char*local; /* local path name */
-   char*notify; /* RRDB notify URI if available */
-   size_t   id; /* identifier (array index) */
-   int  loaded; /* whether loaded or not */
+   char*repouri;   /* CA repository base URI */
+   char*local; /* local path name */
+   char*uris[2];   /* URIs to fetch from */
+   size_t   id;/* identifier (array index) */
+   int  uriidx;/* which URI is fetched */
+   int  loaded;/* whether loaded or not */
 };
 
 size_t entity_queue;
@@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil
 }
 
 /*
- * Look up a repository, queueing it for discovery if not found.
+ * Allocat a new repository be extending the repotable.
  */
-static const struct repo *
-repo_lookup(const char *uri)
+static struct repo *
+repo_alloc(void)
 {
-   const char  *host, *mod;
-   size_t   hostsz, modsz, i;
-   char*local;
-   struct repo *rp;
-   struct ibuf *b;
-
-   if (!rsync_uri_parse(&host, &hostsz,
-   &mod, &modsz, NULL, NULL, NULL, uri))
-   errx(1, "%s: malformed", uri);
-
-   if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host,
-   (int)modsz, mod) == -1)
-   err(1, "asprintf");
-
-   /* Look up in repository table. */
-
-   for (i = 0; i < rt.reposz; i++) {
-   if (strcmp(rt.repos[i].local, local))
-   continue;
-   free(local);
-   return &rt.repos[i];
-   }
+   struct repo *rp;
 
rt.repos = reallocarray(rt.repos,
rt.reposz + 1, sizeof(struct repo));
@@ -320,28 +300,99 @@ repo_lookup(const char *uri)
rp = &rt.repos[rt.reposz++];
memset(rp, 0, sizeof(struct repo));
rp->id = rt.reposz - 1;
-   rp->local = local;
 
-   if ((rp->repo = strndup(uri, mod + modsz - uri)) == NULL)
-   err(1, "strdup");
+   return rp;
+}
 
-   if (!noop) {
-   if (asprintf(&local, "%s", rp->local) == -1)
-   err(1, "asprintf");
-   logx("%s: pulling from network", local);
-   if ((b = ibuf_dynamic(256, UINT_MAX)) == NULL)
-   err(1, NULL);
-   io_simple_buffer(b, &rp->id, sizeof(rp->id));
-   io_str_buffer(b, local);
-   io_str_buffer(b, rp->repo);
-   ibuf_close(&rsyncq, b);
-   free(local);

Re: diff: tcp ack improvement

2021-02-08 Thread Claudio Jeker
On Mon, Feb 08, 2021 at 07:46:46PM +0100, Alexander Bluhm wrote:
> On Mon, Feb 08, 2021 at 07:03:59PM +0100, Jan Klemkow wrote:
> > On Mon, Feb 08, 2021 at 03:42:54PM +0100, Alexander Bluhm wrote:
> > > On Wed, Feb 03, 2021 at 11:20:04AM +0100, Claudio Jeker wrote:
> > > > Just commit it. OK claudio@
> > > > If people see problems we can back it out again.
> > > 
> > > This has huge impact on TCP performance.
> > > 
> > > http://bluhm.genua.de/perform/results/2021-02-07T00%3A01%3A40Z/perform.html
> > > 
> > > For a single TCP connection between to OpenBSD boxes, througput
> > > drops by 77% from 3.1 GBit/sec to 710 MBit/sec.  But with 100
> > > parallel connections the througput over all increases by 5%.
> > 
> > For single connections our kernel is limited to send out 4 max TCP
> > segments.  I don't see that, because I just measured with 10 and 30
> > streams in parallel.
> > 
> > FreeBSD disabled it 20 yeas ago.
> > https://github.com/freebsd/freebsd-src/commit/d912c694ee00de5ea0f46743295a0fc603cab562
> 
> TCP_MAXBURST was added together with SACK in rev 1.12 of tcp_output.c
> to our code base.
> 
> 
> revision 1.12
> date: 1998/11/17 19:23:02;  author: provos;  state: Exp;  lines: +239 -14;
> NewReno, SACK and FACK support for TCP, adapted from code for BSDI
> by Hari Balakrishnan (h...@lcs.mit.edu), Tom Henderson (t...@cs.berkeley.edu)
> and Venkat Padmanabhan (padma...@cs.berkeley.edu) as part of the
> Daedalus research group at the University of California,
> (http://daedalus.cs.berkeley.edu). [I was able to do this on time spent
> at the Center for Information Technology Integration (citi.umich.edu)]
> 
> 
> > I would suggest to remove the whole feature.
> 
> Sending 4 segments per call to tcp_output() cannot scale.  Bandwith
> increases, window size grows, but segment size is 1500 for decades.
> 
> With this diff on top of jan's delay ACK behavior I get 4.1 GBit/sec
> over a single TCP connection using tcpbench -S100.  Before both
> changes it was only 3.0.
> 
> I recommend removing TCP_MAXBURST like FreeBSD did.
> 

I agree that this maxburst limit is no longer adequate. TCP New Reno
RFC6582 has the following:

   In Section 3.2, step 3 above, it is noted that implementations should
   take measures to avoid a possible burst of data when leaving fast
   recovery, in case the amount of new data that the sender is eligible
   to send due to the new value of the congestion window is large.  This
   can arise during NewReno when ACKs are lost or treated as pure window
   updates, thereby causing the sender to underestimate the number of
   new segments that can be sent during the recovery procedure.
   Specifically, bursts can occur when the FlightSize is much less than
   the new congestion window when exiting from fast recovery.  One
   simple mechanism to avoid a burst of data when leaving fast recovery
   is to limit the number of data packets that can be sent in response
   to a single acknowledgment.  (This is known as "maxburst_" in ns-2
   [NS].)  Other possible mechanisms for avoiding bursts include rate-
   based pacing, or setting the slow start threshold to the resultant
   congestion window and then resetting the congestion window to
   FlightSize.  A recommendation on the general mechanism to avoid
   excessively bursty sending patterns is outside the scope of this
   document.

While I agree that bursts need to be limited I think the implementation of
TCP_MAXBURST is bad. Since FreeBSD removed the code I guess nobody really
ran into issues of additional packet loss because of the burts. So go
ahead and remove it. OK claudio@

-- 
:wq Claudio



ocspcheck try all returned addresses from getaddrinfo

2021-02-09 Thread Claudio Jeker
Running regress/usr.sbin/ocspcheck with a resolv.conf that has
'family inet6 inet4' fails because ocspcheck only tries to contact ::1.
The following diff fixes the issue by not breaking out early from the
getaddrinfo loop over the results. With this the regress test works
and I guess it may help in some real life cases as well.

OK?
-- 
:wq Claudio

Index: ocspcheck.c
===
RCS file: /cvs/src/usr.sbin/ocspcheck/ocspcheck.c,v
retrieving revision 1.28
diff -u -p -r1.28 ocspcheck.c
--- ocspcheck.c 16 Oct 2020 01:16:55 -  1.28
+++ ocspcheck.c 9 Feb 2021 09:49:54 -
@@ -113,7 +113,6 @@ host_dns(const char *s, struct addr vec[
 
dspew("DNS returns %s for %s\n", vec[vecsz].ip, s);
vecsz++;
-   break;
}
 
freeaddrinfo(res0);



Re: RTR support for bgpd

2021-02-10 Thread Claudio Jeker
On Tue, Jan 26, 2021 at 10:31:40AM +0100, Claudio Jeker wrote:
> This diff adds initial RTR (RPKI to Router) support to bgpd.
> Instead of loading the roa-set table via the configuration bgpd will use
> RTR to load the RPKI table from one or multiple RTR servers.
> This has the benefit that in large setups only a few systems need to run
> rpki-client instead of running it on every router.
> 
> Currently only RTR via TCP is supported. Basic 'bgpctl show rtr' output is
> available to monitor sessions and 'bgpctl show sets' also shows the right
> info. There is a lot more that can be added here but this diff is already
> big enough.
> 
> Enjoy

I know this is a large diff but I would like to have somebody review it
before commit. This will affect any kind of RPKI usage via roa-set or rtr
session.

-- 
:wq Claudio

Index: bgpctl/bgpctl.8
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.8,v
retrieving revision 1.95
diff -u -p -r1.95 bgpctl.8
--- bgpctl/bgpctl.8 10 May 2020 13:38:46 -  1.95
+++ bgpctl/bgpctl.8 26 Jan 2021 08:42:39 -
@@ -33,7 +33,7 @@ program controls the
 .Xr bgpd 8
 daemon.
 Commands may be abbreviated to the minimum unambiguous prefix; for example,
-.Cm s s
+.Cm s su
 for
 .Cm show summary .
 .Pp
@@ -409,6 +409,18 @@ or
 Multiple options can be used at the same time and the
 .Ar neighbor
 filter can be combined with other filters.
+.It Cm show rtr
+Show a list of all
+.Em RTR
+sessions, including information about the session state.
+.It Cm show sets
+Show a list summarizing all
+.Em roa-set ,
+.Em as-set ,
+.Em prefix-set ,
+and
+.Em origin-set
+tables.
 .It Cm show summary
 Show a list of all neighbors, including information about the session state
 and message counters:
Index: bgpctl/bgpctl.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/bgpctl.c,v
retrieving revision 1.264
diff -u -p -r1.264 bgpctl.c
--- bgpctl/bgpctl.c 30 Dec 2020 07:31:19 -  1.264
+++ bgpctl/bgpctl.c 25 Jan 2021 18:06:13 -
@@ -216,6 +216,9 @@ main(int argc, char *argv[])
case SHOW_SET:
imsg_compose(ibuf, IMSG_CTL_SHOW_SET, 0, 0, -1, NULL, 0);
break;
+   case SHOW_RTR:
+   imsg_compose(ibuf, IMSG_CTL_SHOW_RTR, 0, 0, -1, NULL, 0);
+   break;
case SHOW_NEIGHBOR:
case SHOW_NEIGHBOR_TIMERS:
case SHOW_NEIGHBOR_TERSE:
@@ -393,18 +396,19 @@ int
 show(struct imsg *imsg, struct parse_result *res)
 {
struct peer *p;
-   struct ctl_timer*t;
+   struct ctl_timer t;
struct ctl_show_interface   *iface;
struct ctl_show_nexthop *nh;
-   struct ctl_show_set *set;
+   struct ctl_show_set  set;
+   struct ctl_show_rtr  rtr;
struct kroute_full  *kf;
struct ktable   *kt;
struct ctl_show_rib  rib;
+   struct rde_memstats  stats;
+   struct rde_hashstats hash;
u_char  *asdata;
-   struct rde_memstats stats;
-   struct rde_hashstatshash;
-   u_int   rescode, ilen;
-   size_t  aslen;
+   u_intrescode, ilen;
+   size_t   aslen;
 
switch (imsg->hdr.type) {
case IMSG_CTL_SHOW_NEIGHBOR:
@@ -412,9 +416,11 @@ show(struct imsg *imsg, struct parse_res
output->neighbor(p, res);
break;
case IMSG_CTL_SHOW_TIMER:
-   t = imsg->data;
-   if (t->type > 0 && t->type < Timer_Max)
-   output->timer(t);
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(t))
+   errx(1, "wrong imsg len");
+   memcpy(&t, imsg->data, sizeof(t));
+   if (t.type > 0 && t.type < Timer_Max)
+   output->timer(&t);
break;
case IMSG_CTL_SHOW_INTERFACE:
iface = imsg->data;
@@ -463,16 +469,28 @@ show(struct imsg *imsg, struct parse_res
output->attr(imsg->data, ilen, res);
break;
case IMSG_CTL_SHOW_RIB_MEM:
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(stats))
+   errx(1, "wrong imsg len");
memcpy(&stats, imsg->data, sizeof(stats));
output->rib_mem(&stats);
break;
case IMSG_CTL_SHOW_RIB_HASH:
+   if (imsg->hdr.len < IMSG_HEADER_SIZE + sizeof(hash))
+   errx(1, "wrong imsg len");
memcpy(&hash, imsg->data, sizeof(hash));
output->rib_hash(&hash);
break;
case I

Re: change rpki-client repository code

2021-02-12 Thread Claudio Jeker
On Mon, Feb 08, 2021 at 05:15:40PM +0100, Claudio Jeker wrote:
> Split the repository code into two parts:
> 
> - fetch of the trust anchors (the certs referenced by TAL files)
> - fetch of the MFT files of a repository
> 
> While the two things kind of look similar there are some differences.
> 
> - TA files are loaded via rsync or https URI (only one file needs to be
>   loaded)
> - MFT files need everything inside the repository to be loaded since they
>   reference to other files (.roa, .cer, .crl). These repositories are
>   synced once with rsync and many mft may be part of a repo. Also these
>   repositories can be synced via rsync or RRDP
> 
> To simplify these diverse options it is time to split the code up.
> Introduce a ta_lookup() along with repo_lookup(). Refactor the repo_lookup
> code into subfunctions repo_alloc() and repo_fetch() (both are also used
> by ta_lookup()). Use the caRepository URI to figure out the base URI.
> Simplify rsync_uri_parse() into rsync_base_uri() which clips of excess
> directories from the URI (else thousends of individual rsync calls would
> be made against the RIR's CA repos).
> 
> The big change is that the layout of the cache directory is changed.
> The cache will now have two base directories:
> - ta/ (for all trust anchors)
> - rsync/ (for all other repositories)
> 

My plan at the moment is that rpki-client will split the cache directory
into three parts. ta/, rsync/, and rrdp/. This is done to ensure that data
does not get mixed up. Once this is in then my next step is to support
https:// links in TAL files and fetch the trust anchor via https instead
of rsync. Later RRDP will follow.

-- 
:wq Claudio

Index: extern.h
===
RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
retrieving revision 1.42
diff -u -p -r1.42 extern.h
--- extern.h8 Feb 2021 09:22:53 -   1.42
+++ extern.h8 Feb 2021 13:44:22 -
@@ -392,9 +392,7 @@ void proc_parser(int) __attribute__((n
 
 /* Rsync-specific. */
 
-int rsync_uri_parse(const char **, size_t *,
-   const char **, size_t *, const char **, size_t *,
-   enum rtype *, const char *);
+char   *rsync_base_uri(const char *);
 voidproc_rsync(char *, char *, int) __attribute__((noreturn));
 
 /* Logging (though really used for OpenSSL errors). */
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.98
diff -u -p -r1.98 main.c
--- main.c  5 Feb 2021 12:26:52 -   1.98
+++ main.c  8 Feb 2021 13:50:20 -
@@ -78,11 +78,12 @@
  * An rsync repository.
  */
 struct repo {
-   char*repo;  /* repository rsync URI */
-   char*local; /* local path name */
-   char*notify; /* RRDB notify URI if available */
-   size_t   id; /* identifier (array index) */
-   int  loaded; /* whether loaded or not */
+   char*repouri;   /* CA repository base URI */
+   char*local; /* local path name */
+   char*uris[2];   /* URIs to fetch from */
+   size_t   id;/* identifier (array index) */
+   int  uriidx;/* which URI is fetched */
+   int  loaded;/* whether loaded or not */
 };
 
 size_t entity_queue;
@@ -284,33 +285,12 @@ entityq_add(struct entityq *q, char *fil
 }
 
 /*
- * Look up a repository, queueing it for discovery if not found.
+ * Allocat a new repository be extending the repotable.
  */
-static const struct repo *
-repo_lookup(const char *uri)
+static struct repo *
+repo_alloc(void)
 {
-   const char  *host, *mod;
-   size_t   hostsz, modsz, i;
-   char*local;
-   struct repo *rp;
-   struct ibuf *b;
-
-   if (!rsync_uri_parse(&host, &hostsz,
-   &mod, &modsz, NULL, NULL, NULL, uri))
-   errx(1, "%s: malformed", uri);
-
-   if (asprintf(&local, "%.*s/%.*s", (int)hostsz, host,
-   (int)modsz, mod) == -1)
-   err(1, "asprintf");
-
-   /* Look up in repository table. */
-
-   for (i = 0; i < rt.reposz; i++) {
-   if (strcmp(rt.repos[i].local, local))
-   continue;
-   free(local);
-   return &rt.repos[i];
-   }
+   struct repo *rp;
 
rt.repos = reallocarray(rt.repos,
rt.reposz + 1, sizeof(struct repo));
@@ -320,28 +300,99 @@ repo_lookup(const char *uri)
rp = &rt.repos[rt.reposz++];
memset(rp, 0, sizeof(struct repo));
rp->id = rt.reposz - 1;
-   rp->local = local;
 
-   if ((rp->

Re: snmpd: Add end of sequence tests

2021-02-12 Thread Claudio Jeker
On Fri, Feb 12, 2021 at 10:03:21AM +0100, Martijn van Duren wrote:
> ping
> 
> On Sun, 2021-01-31 at 11:57 +0100, Martijn van Duren wrote:
> > Now that ober_scanf_elements supports '$' lets use it.
> > 
> > Here's a first stab by adding it to snmpd.
> > Passing regress and a few manual checks.
> > 
> > 'e' still doesn't consume the element, but I've talked it over with
> > rob@, who said that shouldn't get in the way of using this new feature.
> > 
> > OK?

Looks reasonable and I guess you verified the layout of all those ASN.1
messages to ensure the $ is at the right place.

Side note: I wonder why does } not imply the $? At least now with S it
would be possible to enforce this.
I like that you closed a lot of open { format strings. What about these:

> > -   if (ober_scanf_elements(usm, "{xiixpxx", &engineid, &enginelen,
> > +   if (ober_scanf_elements(usm, "{xiixpxx$", &engineid, &enginelen,

Wouldn't it be better to use "{xiixpxx$}" here?

> > Index: snmpe.c
> > ===
> > RCS file: /cvs/src/usr.sbin/snmpd/snmpe.c,v
> > retrieving revision 1.68
> > diff -u -p -r1.68 snmpe.c
> > --- snmpe.c 22 Jan 2021 06:33:27 -  1.68
> > +++ snmpe.c 31 Jan 2021 10:55:49 -
> > @@ -220,7 +220,7 @@ snmpe_parse(struct snmp_message *msg)
> > case SNMP_V2:
> > if (env->sc_min_seclevel != 0)
> > goto badversion;
> > -   if (ober_scanf_elements(a, "se", &comn, &msg->sm_pdu) != 0)
> > +   if (ober_scanf_elements(a, "seS$", &comn, &msg->sm_pdu) != 
> > 0)
> > goto parsefail;
> > if (strlcpy(msg->sm_community, comn,
> >     sizeof(msg->sm_community)) >= 
> > sizeof(msg->sm_community)) {
> > @@ -230,7 +230,7 @@ snmpe_parse(st? tm_udp.c
> > Index: snmpe.c
> > ===
> > RCS file: /cvs/src/usr.sbin/snmpd/snmpe.c,v
> > retrieving revision 1.68
> > diff -u -p -r1.68 snmpe.c
> > --- snmpe.c 22 Jan 2021 06:33:27 -  1.68
> > +++ snmpe.c 31 Jan 2021 10:55:49 -
> > @@ -220,7 +220,7 @@ snmpe_parse(struct snmp_message *msg)
> > case SNMP_V2:
> > if (env->sc_min_seclevel != 0)
> > goto badversion;
> > -   if (ober_scanf_elements(a, "se", &comn, &msg->sm_pdu) != 0)
> > +   if (ober_scanf_elements(a, "seS$", &comn, &msg->sm_pdu) != 
> > 0)
> > goto parsefail;
> > if (strlcpy(msg->sm_community, comn,
> >     sizeof(msg->sm_community)) >= 
> > sizeof(msg->sm_community)) {
> > @@ -230,7 +230,7 @@ snmpe_parse(struct snmp_message *msg)
> > }
> > break;
> > case SNMP_V3:
> > -   if (ober_scanf_elements(a, "{iisi}e",
> > +   if (ober_scanf_elements(a, "{iisi$}e",
> >     &msg->sm_msgid, &msg->sm_max_msg_size, &flagstr,
> >     &msg->sm_secmodel, &a) != 0)
> > goto parsefail;
> > @@ -248,7 +248,7 @@ snmpe_parse(struct snmp_message *msg)
> > goto parsefail;
> > }
> >  
> > -   if (ober_scanf_elements(a, "{xxe",
> > +   if (ober_scanf_elements(a, "{xxeS$}$",
> >     &msg->sm_ctxengineid, &msg->sm_ctxengineid_len,
> >     &ctxname, &len, &msg->sm_pdu) != 0)
> > goto parsefail;
> > @@ -370,7 +370,7 @@ snmpe_parse(struct snmp_message *msg)
> > }
> >  
> > /* SNMP PDU */
> > -   if (ober_scanf_elements(a, "iiie{et",
> > +   if (ober_scanf_elements(a, "iiie{etS$}$",
> >     &req, &errval, &erridx, &msg->sm_pduend,
> >     &msg->sm_varbind, &class, &type) != 0) {
> > stats->snmp_silentdrops++;
> > @@ -429,7 +429,7 @@ snmpe_parsevarbinds(struct snmp_message 
> >  
> > for (i = 1; varbind != NULL && i < SNMPD_MAXVARBIND;
> >     varbind = varbind->be_next, i++) {
> > -   if (ober_scanf_elements(varbind, "{oe}", &o, &value) == -1) 
> > {
> > +   if (ober_scanf_elements(varbind, "{oeS$}", &o, &value) == 
> > -1) {
> > stats->snmp_inasnparseerrs++;
> > msg->sm_errstr = "invalid varbind";
> > goto varfail;
> > Index: traphandler.c
> > ===
> > RCS file: /cvs/src/usr.sbin/snmpd/traphandler.c,v
> > retrieving revision 1.20
> > diff -u -p -r1.20 traphandler.c
> > --- traphandler.c   22 Jan 2021 06:33:27 -  1.20
> > +++ traphandler.c   31 Jan 2021 10:55:49 -
> > @@ -67,7 +67,7 @@ traphandler_parse(struct snmp_message *m
> > struct privsep  *ps = &snmpd_env->sc_ps;
> > struct snmp_stats   *stats = &snmpd_env->sc_stats;
> > struct ber  

  1   2   3   4   5   6   7   8   9   10   >