from:"Claudio Jeker"

bgpd: cleanup optparamlen handling in session_open

2023-10-27 Thread Claudio Jeker

In the big ibuf API refactor I also broke the optparamlen handling
by using one variable for two things.

All the size handling in session_open() can be simplified since
ibuf_size() is cheap to call.

I think the result is cleaner than the code before. It is still somewhat
funky because there are a fair amount of conditions to cover now.

-- 
:wq Claudio

Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.452
diff -u -p -r1.452 session.c
--- session.c   27 Oct 2023 09:40:27 -  1.452
+++ session.c   27 Oct 2023 11:00:13 -
@@ -1471,8 +1471,9 @@ session_open(struct peer *p)
 {
struct bgp_msg  *buf;
struct ibuf *opb;
-   uint16_t len, optparamlen = 0, holdtime;
-   uint8_t  i, op_type;
+   size_t   optparamlen;
+   uint16_t holdtime;
+   uint8_t  i;
int  errs = 0, extlen = 0;
int  mpcapa = 0;
 
@@ -1556,16 +1557,16 @@ session_open(struct peer *p)
if (optparamlen == 0) {
/* nothing */
} else if (optparamlen + 2 >= 255) {
-   /* RFC9072: 2 byte length instead of 1 + 3 byte extra header */
-   optparamlen += sizeof(op_type) + 2 + 3;
+   /* RFC9072: use 255 as magic size and request extra header */
optparamlen = 255;
extlen = 1;
} else {
-   optparamlen += sizeof(op_type) + 1;
+   /* regular capabilities header */
+   optparamlen += 2;
}
 
-   len = MSGSIZE_OPEN_MIN + optparamlen;
-   if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
+   if (errs || (buf = session_newmsg(OPEN,
+   MSGSIZE_OPEN_MIN + optparamlen)) == NULL) {
ibuf_free(opb);
bgp_fsm(p, EVNT_CON_FATAL);
return;
@@ -1584,20 +1585,19 @@ session_open(struct peer *p)
errs += ibuf_add_n8(buf->buf, optparamlen);
 
if (extlen) {
-   /* write RFC9072 extra header */
+   /* RFC9072 extra header which spans over the capabilities hdr */
errs += ibuf_add_n8(buf->buf, OPT_PARAM_EXT_LEN);
-   errs += ibuf_add_n16(buf->buf, optparamlen - 3);
+   errs += ibuf_add_n16(buf->buf, ibuf_size(opb) + 1 + 2);
}
 
if (optparamlen) {
errs += ibuf_add_n8(buf->buf, OPT_PARAM_CAPABILITIES);
 
-   optparamlen = ibuf_size(opb);
if (extlen) {
/* RFC9072: 2-byte extended length */
-   errs += ibuf_add_n16(buf->buf, optparamlen);
+   errs += ibuf_add_n16(buf->buf, ibuf_size(opb));
} else {
-   errs += ibuf_add_n8(buf->buf, optparamlen);
+   errs += ibuf_add_n8(buf->buf, ibuf_size(opb));
}
errs += ibuf_add_buf(buf->buf, opb);
}

bgpd fix holdtime in session_open

2023-10-27 Thread Claudio Jeker

While looking for something else I noticed this error.

The holdtime is written into the buffer with ibuf_add_n16() so the htons()
call is done in the ibuf add call. So there is no need for the htons()
when assigning holdtime.

-- 
:wq Claudio

Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.451
diff -u -p -u -5 -r1.451 session.c
--- session.c   19 Oct 2023 07:02:45 -  1.451
+++ session.c   27 Oct 2023 09:24:20 -
@@ -1570,13 +1570,13 @@ session_open(struct peer *p)
bgp_fsm(p, EVNT_CON_FATAL);
return;
}
 
if (p->conf.holdtime)
-   holdtime = htons(p->conf.holdtime);
+   holdtime = p->conf.holdtime;
else
-   holdtime = htons(conf->holdtime);
+   holdtime = conf->holdtime;
 
errs += ibuf_add_n8(buf->buf, 4);
errs += ibuf_add_n16(buf->buf, p->conf.local_short_as);
errs += ibuf_add_n16(buf->buf, holdtime);
/* is already in network byte order */

Re: ibuf free fd on close

2023-10-24 Thread Claudio Jeker

On Tue, Oct 24, 2023 at 03:50:47PM +0200, Theo Buehler wrote:
> On Tue, Oct 24, 2023 at 03:01:26PM +0200, Claudio Jeker wrote:
> > When I added ibuf_get_fd() the idea was to make sure that ibuf_free() will
> > close any fd still on the buffer. This way even if a fd is unexpectedly
> > passed nothing will happen.
> > 
> > That code was disabled at start because userland was not fully ready. In
> > particular rpki-client did not handle that well. All of this is to my
> > knowledge fixed so there is no reason to keep the NOTYET :)
> > 
> > With this users need to use ibuf_fd_get() to take the fd off the ibuf.
> > Code not doing so will break because ibuf_free() will close the fd which
> > is probably still in use somewhere else.
> 
> Nothing in base outside of libutil seems to reach directly for the fd
> (checked by compiling with that struct member renamed in the public
> header).
> 
> The internal uses are addressed by this diff, so
> 
> ok tb
> 
> I can put the fd rename through a bulk to catch some ports in a couple
> of days but I don't think there is a need to wait.

Thanks. Do we have a list of ports that use ibuf / imsg? 

-- 
:wq Claudio

ibuf free fd on close

2023-10-24 Thread Claudio Jeker

When I added ibuf_get_fd() the idea was to make sure that ibuf_free() will
close any fd still on the buffer. This way even if a fd is unexpectedly
passed nothing will happen.

That code was disabled at start because userland was not fully ready. In
particular rpki-client did not handle that well. All of this is to my
knowledge fixed so there is no reason to keep the NOTYET :)

With this users need to use ibuf_fd_get() to take the fd off the ibuf.
Code not doing so will break because ibuf_free() will close the fd which
is probably still in use somewhere else.

-- 
:wq Claudio

Index: imsg-buffer.c
===
RCS file: /cvs/src/lib/libutil/imsg-buffer.c,v
retrieving revision 1.16
diff -u -p -r1.16 imsg-buffer.c
--- imsg-buffer.c   19 Jun 2023 17:19:50 -  1.16
+++ imsg-buffer.c   24 Oct 2023 12:55:44 -
@@ -294,10 +294,8 @@ ibuf_free(struct ibuf *buf)
 {
if (buf == NULL)
return;
-#ifdef NOTYET
if (buf->fd != -1)
close(buf->fd);
-#endif
freezero(buf->buf, buf->size);
free(buf);
 }
@@ -314,9 +312,7 @@ ibuf_fd_get(struct ibuf *buf)
int fd;
 
fd = buf->fd;
-#ifdef NOTYET
buf->fd = -1;
-#endif
return (fd);
 }
 
@@ -480,11 +476,6 @@ static void
 ibuf_dequeue(struct msgbuf *msgbuf, struct ibuf *buf)
 {
TAILQ_REMOVE(>bufs, buf, entry);
-
-   if (buf->fd != -1) {
-   close(buf->fd);
-   buf->fd = -1;
-   }
 
msgbuf->queued--;
ibuf_free(buf);

fix an error in flowspec_get_addr()

2023-10-23 Thread Claudio Jeker

So flowspec_get_addr() in the IPv6 case is utterly complicated.
Since matching can be done on some sub-part of the prefix.
So there is this shift_right() call that moves takes care of this special
offset.

Now the shift_right call uses *olen but should actually use xoff instead.
*olen is set much later in the code.

This should fix:
https://github.com/openbgpd-portable/openbgpd-portable/security/code-scanning/2
-- 
:wq Claudio

Index: flowspec.c
===
RCS file: /cvs/src/usr.sbin/bgpd/flowspec.c,v
retrieving revision 1.4
diff -u -p -r1.4 flowspec.c
--- flowspec.c  19 Apr 2023 09:31:58 -  1.4
+++ flowspec.c  23 Oct 2023 10:44:22 -
@@ -366,7 +366,7 @@ flowspec_get_addr(const uint8_t *flow, i
if (extract_prefix(comp + 2, complen - 2, buf, xlen,
sizeof(buf)) == -1)
return -1;
-   shift_right(addr->v6.s6_addr, buf, *olen, xlen);
+   shift_right(addr->v6.s6_addr, buf, xoff, xlen);
*plen = comp[0];
if (olen != NULL)
*olen = comp[1];

Re: 7.3: Squid 6.3 with multiple workers - net.unix.dgram.sendspace too low

2023-10-19 Thread Claudio Jeker

On Thu, Oct 19, 2023 at 03:09:08PM +0200, Silamael Darkomen wrote:
> Hi,
> 
> Today I upgraded to the brand new Squid version 6.3 from ports and noticed,
> that Squid no longer starts properly if configured with multiple worker
> processes.
> 
> After some debugging the limit from net.unix.dgram.sendspace came up as
> cause. The 2k default is way to low.
> In ktrace I saw sendmessage calls with messages slightly over 4k.
> 
> Increasing this limit to 16k as net.unix.dgram.recvspace fixes the problem
> and Squid can start.
> 
> Perhaps this historically low limit should be adjusted accordingly?
> All for all other Unix sockets, sendspace and recvspace share the same
> limits, just dgram sockets are out of line.
> 
> PS: Tested this with 7.3 but 7.4 seems to have the same limitations.
> 

It is a SOCK_DGRAM socket, it is supposed to be limited.
>From the man-page:
A SOCK_DGRAM socket supports datagrams (connectionless,
unreliable messages of a fixed (typically small) maximum length).

The program should set the socket buffer size via setsockopt() using
SO_SNDBUF. It seems squid just YOLOs this and hopes for the best.

So the best way to fix this is in squid itself.
-- 
:wq Claudio

Re: bgpd convert rtr_proto.c to new ibuf API

2023-10-19 Thread Claudio Jeker

On Thu, Oct 19, 2023 at 12:59:17PM +0200, Theo Buehler wrote:
> On Thu, Oct 19, 2023 at 10:41:07AM +0200, Claudio Jeker wrote:
> > More ibuf cleanup. rtr_proto.c still uses ibuf_add() where it could use
> > the new functions.
> > 
> > Two bits I'm unsure about:
> > - I had to change some sizeof() to use native types (I especially dislike
> >   the sizeof(struct rtr_header).
> 
> Yes, that's not very nice.
> 
> > - ibuf_add_nXX() can fail if the value is too large. Which should be
> >   impossible but still maybe it would be better to check for errors.
> 
> While it should be impossible, the length calculations are non-trivial,
> so it seems wiser to check.
> 
> It's a bit longer than what you have now, but maybe it's an option
> to combine the length calculation with the errs += idiom.
> 
>   len += sizeof(rs->version);
>   len += sizeof(type);
>   len += sizeof(session_id);
>   len += sizeof(len);
> 
>   if ((buf = ibuf_open(len)) == NULL)
>   return NULL;
> 
>   errs += ibuf_add_n8(buf, rs->version);
>   errs += ibuf_add_n8(buf, type);
>   errs += ibuf_add_n16(buf, session_id);
>   errs += ibuf_add_n32(buf, len);
> 
>   if (errs) {
>   ibuf_free(ibuf);
>   return NULL;
>   }
> 
> I'm ok with the diff as it is and you can ponder how you want to shave
> this particular Yak.

I like my yaks shaved like this...

The sizeof yak is still in queue... not sure about it.
-- 
:wq Claudio

Index: rtr_proto.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
retrieving revision 1.18
diff -u -p -r1.18 rtr_proto.c
--- rtr_proto.c 19 Oct 2023 11:12:10 -  1.18
+++ rtr_proto.c 19 Oct 2023 11:24:56 -
@@ -233,6 +233,7 @@ rtr_newmsg(struct rtr_session *rs, enum 
 uint16_t session_id)
 {
struct ibuf *buf;
+   int saved_errno;
 
if (len > RTR_MAX_LEN) {
errno = ERANGE;
@@ -240,15 +241,23 @@ rtr_newmsg(struct rtr_session *rs, enum 
}
len += sizeof(struct rtr_header);
if ((buf = ibuf_open(len)) == NULL)
-   return NULL;
-
-   /* cannot fail with fixed buffers */
-   ibuf_add_n8(buf, rs->version);
-   ibuf_add_n8(buf, type);
-   ibuf_add_n16(buf, session_id);
-   ibuf_add_n32(buf, len);
+   goto fail;
+   if (ibuf_add_n8(buf, rs->version) == -1)
+   goto fail;
+   if (ibuf_add_n8(buf, type) == -1)
+   goto fail;
+   if (ibuf_add_n16(buf, session_id) == -1)
+   goto fail;
+   if (ibuf_add_n32(buf, len) == -1)
+   goto fail;
 
return buf;
+
+ fail:
+   saved_errno = errno;
+   ibuf_free(buf);
+   errno = saved_errno;
+   return NULL;
 }
 
 /*
@@ -271,22 +280,27 @@ rtr_send_error(struct rtr_session *rs, e
 
buf = rtr_newmsg(rs, ERROR_REPORT, 2 * sizeof(uint32_t) + len + mlen,
err);
-   if (buf == NULL) {
-   log_warn("rtr %s: send error report", log_rtr(rs));
-   return;
-   }
-
-   /* cannot fail with fixed buffers */
-   ibuf_add_n32(buf, len);
-   ibuf_add(buf, pdu, len);
-   ibuf_add_n32(buf, mlen);
-   ibuf_add(buf, msg, mlen);
+   if (buf == NULL)
+   goto fail;
+   if (ibuf_add_n32(buf, len) == -1)
+   goto fail;
+   if (ibuf_add(buf, pdu, len) == -1)
+   goto fail;
+   if (ibuf_add_n32(buf, mlen) == -1)
+   goto fail;
+   if (ibuf_add(buf, msg, mlen) == -1)
+   goto fail;
ibuf_close(>w, buf);
 
log_warnx("rtr %s: sending error report[%u] %s", log_rtr(rs), err,
msg ? msg : "");
 
rtr_fsm(rs, RTR_EVNT_SEND_ERROR);
+   return;
+
+ fail:
+   log_warn("rtr %s: send error report", log_rtr(rs));
+   ibuf_free(buf);
 }
 
 static void
@@ -309,15 +323,17 @@ rtr_send_serial_query(struct rtr_session
struct ibuf *buf;
 
buf = rtr_newmsg(rs, SERIAL_QUERY, sizeof(uint32_t), rs->session_id);
-   if (buf == NULL) {
-   log_warn("rtr %s: send serial query", log_rtr(rs));
-   rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
-   return;
-   }
-
-   /* cannot fail with fixed buffers */
-   ibuf_add_n32(buf, rs->serial);
+   if (buf == NULL)
+   goto fail;
+   if (ibuf_add_n32(buf, rs->serial) == -1)
+   goto fail;
ibuf_close(>w, buf);
+   return;
+
+ fail:
+   log_warn("rtr %s: send serial query", log_rtr(rs));
+   ibuf_free(buf);
+   rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
 }
 
 /*

bgpd convert rtr_proto.c to new ibuf API

2023-10-19 Thread Claudio Jeker

More ibuf cleanup. rtr_proto.c still uses ibuf_add() where it could use
the new functions.

Two bits I'm unsure about:
- I had to change some sizeof() to use native types (I especially dislike
  the sizeof(struct rtr_header).
- ibuf_add_nXX() can fail if the value is too large. Which should be
  impossible but still maybe it would be better to check for errors.

-- 
:wq Claudio

Index: rtr_proto.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rtr_proto.c,v
retrieving revision 1.17
diff -u -p -r1.17 rtr_proto.c
--- rtr_proto.c 16 Aug 2023 08:26:35 -  1.17
+++ rtr_proto.c 19 Oct 2023 08:35:49 -
@@ -233,24 +233,21 @@ rtr_newmsg(struct rtr_session *rs, enum 
 uint16_t session_id)
 {
struct ibuf *buf;
-   struct rtr_header rh;
 
if (len > RTR_MAX_LEN) {
errno = ERANGE;
return NULL;
}
-   len += sizeof(rh);
+   len += sizeof(struct rtr_header);
if ((buf = ibuf_open(len)) == NULL)
return NULL;
 
-   memset(, 0, sizeof(rh));
-   rh.version = rs->version;
-   rh.type = type;
-   rh.session_id = htons(session_id);
-   rh.length = htonl(len);
-
/* cannot fail with fixed buffers */
-   ibuf_add(buf, , sizeof(rh));
+   ibuf_add_n8(buf, rs->version);
+   ibuf_add_n8(buf, type);
+   ibuf_add_n16(buf, session_id);
+   ibuf_add_n32(buf, len);
+
return buf;
 }
 
@@ -264,7 +261,6 @@ rtr_send_error(struct rtr_session *rs, e
 {
struct ibuf *buf;
size_t mlen = 0;
-   uint32_t hdrlen;
 
rs->last_sent_error = err;
if (msg) {
@@ -273,7 +269,7 @@ rtr_send_error(struct rtr_session *rs, e
} else
memset(rs->last_sent_msg, 0, sizeof(rs->last_sent_msg));
 
-   buf = rtr_newmsg(rs, ERROR_REPORT, 2 * sizeof(hdrlen) + len + mlen,
+   buf = rtr_newmsg(rs, ERROR_REPORT, 2 * sizeof(uint32_t) + len + mlen,
err);
if (buf == NULL) {
log_warn("rtr %s: send error report", log_rtr(rs));
@@ -281,11 +277,9 @@ rtr_send_error(struct rtr_session *rs, e
}
 
/* cannot fail with fixed buffers */
-   hdrlen = ntohl(len);
-   ibuf_add(buf, , sizeof(hdrlen));
+   ibuf_add_n32(buf, len);
ibuf_add(buf, pdu, len);
-   hdrlen = ntohl(mlen);
-   ibuf_add(buf, , sizeof(hdrlen));
+   ibuf_add_n32(buf, mlen);
ibuf_add(buf, msg, mlen);
ibuf_close(>w, buf);
 
@@ -313,9 +307,8 @@ static void
 rtr_send_serial_query(struct rtr_session *rs)
 {
struct ibuf *buf;
-   uint32_t s;
 
-   buf = rtr_newmsg(rs, SERIAL_QUERY, sizeof(s), rs->session_id);
+   buf = rtr_newmsg(rs, SERIAL_QUERY, sizeof(uint32_t), rs->session_id);
if (buf == NULL) {
log_warn("rtr %s: send serial query", log_rtr(rs));
rtr_send_error(rs, INTERNAL_ERROR, "out of memory", NULL, 0);
@@ -323,8 +316,7 @@ rtr_send_serial_query(struct rtr_session
}
 
/* cannot fail with fixed buffers */
-   s = htonl(rs->serial);
-   ibuf_add(buf, , sizeof(s));
+   ibuf_add_n32(buf, rs->serial);
ibuf_close(>w, buf);
 }

bgpd session.c convert to new ibuf API

2023-10-18 Thread Claudio Jeker

This is a bit overdue. Convert session.c to also use the new ibuf API.
This simplifies some code since there is no need for local variables.
Also kill the struct msg_header and especially msg_open. The are of very
little use.

Regress passes so I think this should be fine :)
-- 
:wq Claudio

Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.450
diff -u -p -r1.450 session.c
--- session.c   17 Oct 2023 17:58:15 -  1.450
+++ session.c   18 Oct 2023 15:09:29 -
@@ -1324,24 +1324,26 @@ session_capa_add(struct ibuf *opb, uint8
 {
int errs = 0;
 
-   errs += ibuf_add(opb, _code, sizeof(capa_code));
-   errs += ibuf_add(opb, _len, sizeof(capa_len));
+   errs += ibuf_add_n8(opb, capa_code);
+   errs += ibuf_add_n8(opb, capa_len);
return (errs);
 }
 
 int
 session_capa_add_mp(struct ibuf *buf, uint8_t aid)
 {
-   uint8_t  safi, pad = 0;
uint16_t afi;
+   uint8_t  safi;
int  errs = 0;
 
-   if (aid2afi(aid, , ) == -1)
-   fatalx("session_capa_add_mp: bad afi/safi pair");
-   afi = htons(afi);
-   errs += ibuf_add(buf, , sizeof(afi));
-   errs += ibuf_add(buf, , sizeof(pad));
-   errs += ibuf_add(buf, , sizeof(safi));
+   if (aid2afi(aid, , ) == -1) {
+   log_warn("session_capa_add_afi: bad AID");
+   return (-1);
+   }
+
+   errs += ibuf_add_n16(buf, afi);
+   errs += ibuf_add_zero(buf, 1);
+   errs += ibuf_add_n8(buf, safi);
 
return (errs);
 }
@@ -1356,13 +1358,12 @@ session_capa_add_afi(struct peer *p, str
 
if (aid2afi(aid, , )) {
log_warn("session_capa_add_afi: bad AID");
-   return (1);
+   return (-1);
}
 
-   afi = htons(afi);
-   errs += ibuf_add(b, , sizeof(afi));
-   errs += ibuf_add(b, , sizeof(safi));
-   errs += ibuf_add(b, , sizeof(flags));
+   errs += ibuf_add_n16(b, afi);
+   errs += ibuf_add_n8(b, safi);
+   errs += ibuf_add_n8(b, flags);
 
return (errs);
 }
@@ -1370,21 +1371,19 @@ session_capa_add_afi(struct peer *p, str
 struct bgp_msg *
 session_newmsg(enum msg_type msgtype, uint16_t len)
 {
+   u_char   marker[MSGSIZE_HEADER_MARKER];
struct bgp_msg  *msg;
-   struct msg_headerhdr;
struct ibuf *buf;
int  errs = 0;
 
-   memset(, 0xff, sizeof(hdr.marker));
-   hdr.len = htons(len);
-   hdr.type = msgtype;
+   memset(marker, 0xff, sizeof(marker));
 
if ((buf = ibuf_open(len)) == NULL)
return (NULL);
 
-   errs += ibuf_add(buf, , sizeof(hdr.marker));
-   errs += ibuf_add(buf, , sizeof(hdr.len));
-   errs += ibuf_add(buf, , sizeof(hdr.type));
+   errs += ibuf_add(buf, marker, sizeof(marker));
+   errs += ibuf_add_n16(buf, len);
+   errs += ibuf_add_n8(buf, msgtype);
 
if (errs || (msg = calloc(1, sizeof(*msg))) == NULL) {
ibuf_free(buf);
@@ -1472,8 +1471,7 @@ session_open(struct peer *p)
 {
struct bgp_msg  *buf;
struct ibuf *opb;
-   struct msg_open  msg;
-   uint16_t len, optparamlen = 0;
+   uint16_t len, optparamlen = 0, holdtime;
uint8_t  i, op_type;
int  errs = 0, extlen = 0;
int  mpcapa = 0;
@@ -1501,10 +1499,8 @@ session_open(struct peer *p)
p->conf.role != ROLE_NONE &&
(p->capa.ann.mp[AID_INET] || p->capa.ann.mp[AID_INET6] ||
mpcapa == 0)) {
-   uint8_t val;
-   val = role2capa(p->conf.role);
errs += session_capa_add(opb, CAPA_ROLE, 1);
-   errs += ibuf_add(opb, , 1);
+   errs += ibuf_add_n8(opb, role2capa(p->conf.role));
}
 
/* graceful restart and End-of-RIB marker, RFC 4724 */
@@ -1520,19 +1516,14 @@ session_open(struct peer *p)
/* Only set the R-flag if no graceful restart is ongoing */
if (!rst)
hdr |= CAPA_GR_R_FLAG;
-   hdr = htons(hdr);
-
errs += session_capa_add(opb, CAPA_RESTART, sizeof(hdr));
-   errs += ibuf_add(opb, , sizeof(hdr));
+   errs += ibuf_add_n16(opb, hdr);
}
 
/* 4-bytes AS numbers, RFC6793 */
if (p->capa.ann.as4byte) {  /* 4 bytes data */
-   uint32_tnas;
-
-   nas = htonl(p->conf.local_as);
-   errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
-   errs += ibuf_add(opb, , sizeof(nas));
+   errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(uint32_t));
+   errs += ibuf_add_n32(opb,

Re: log.c use buffered IO

2023-10-18 Thread Claudio Jeker

On Tue, Oct 17, 2023 at 10:06:54AM +0200, Sebastian Benoit wrote:
> Theo Buehler(t...@theobuehler.org) on 2023.10.17 09:13:15 +0200:
> > On Mon, Oct 16, 2023 at 12:19:17PM +0200, Claudio Jeker wrote:
> > > I dislike how log.c does all these asprintf() calls with dubious
> > > workaround calls in case asprintf() fails.
> > 
> > You're not alone.
> > 
> > > IMO it is easier to use the stdio provided buffers and fflush() to get
> > > "atomic" writes on stderr. At least from my understanding this is the
> > > reason to go all this lenght to do a single fprintf call.
> > 
> > This makes sense, but I don't know the history here.
> 
> as far as i can remember, it was done so it would still be able to work
> somewhat when out of memeory.
>  

After some input off-list here another idea.
Require that logit() is called with a \n and by that simplify a lot of
code around it. vsyslog() handles both so having the \n should not cause
any breakage.

Now logit() is mostly used internally but in bgpd it is also used in
logmsg.c and parse.y. Fixing those is simple.

Also this uses a stack buffer for all the log_* cases now. This should
make the code more thread safe.

Also this removes vlog() from the API.  I had a quick look at all the
other log.c users and apart from ldapd this can be added to all of them
without much issues. Nothing else uses vlog() and the logit() is also very
minimal (mostly the same parse.y change as below).

-- 
:wq Claudio

Index: log.c
===
RCS file: /cvs/src/usr.sbin/bgpd/log.c,v
retrieving revision 1.64
diff -u -p -r1.64 log.c
--- log.c   21 Mar 2017 12:06:55 -  1.64
+++ log.c   18 Oct 2023 07:10:32 -
@@ -26,6 +26,8 @@
 
 #include "log.h"
 
+#define MAX_LOGLEN 4096
+
 static int  debug;
 static int  verbose;
 static const char  *log_procname;
@@ -68,30 +70,15 @@ void
 logit(int pri, const char *fmt, ...)
 {
va_list ap;
+   int saved_errno = errno;
 
va_start(ap, fmt);
-   vlog(pri, fmt, ap);
-   va_end(ap);
-}
-
-void
-vlog(int pri, const char *fmt, va_list ap)
-{
-   char*nfmt;
-   int  saved_errno = errno;
-
if (debug) {
-   /* best effort in out of mem situations */
-   if (asprintf(, "%s\n", fmt) == -1) {
-   vfprintf(stderr, fmt, ap);
-   fprintf(stderr, "\n");
-   } else {
-   vfprintf(stderr, nfmt, ap);
-   free(nfmt);
-   }
+   vfprintf(stderr, fmt, ap);
fflush(stderr);
} else
vsyslog(pri, fmt, ap);
+   va_end(ap);
 
errno = saved_errno;
 }
@@ -99,26 +86,18 @@ vlog(int pri, const char *fmt, va_list a
 void
 log_warn(const char *emsg, ...)
 {
-   char*nfmt;
-   va_list  ap;
-   int  saved_errno = errno;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
+   int saved_errno = errno;
 
/* best effort to even work in out of memory situations */
if (emsg == NULL)
-   logit(LOG_ERR, "%s", strerror(saved_errno));
+   logit(LOG_ERR, "%s\n", strerror(saved_errno));
else {
va_start(ap, emsg);
-
-   if (asprintf(, "%s: %s", emsg,
-   strerror(saved_errno)) == -1) {
-   /* we tried it... */
-   vlog(LOG_ERR, emsg, ap);
-   logit(LOG_ERR, "%s", strerror(saved_errno));
-   } else {
-   vlog(LOG_ERR, nfmt, ap);
-   free(nfmt);
-   }
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
va_end(ap);
+   logit(LOG_ERR, "%s: %s\n", fmtbuf, strerror(saved_errno));
}
 
errno = saved_errno;
@@ -127,53 +106,65 @@ log_warn(const char *emsg, ...)
 void
 log_warnx(const char *emsg, ...)
 {
-   va_list  ap;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
+   int saved_errno = errno;
 
va_start(ap, emsg);
-   vlog(LOG_ERR, emsg, ap);
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
va_end(ap);
+   logit(LOG_ERR, "%s\n", fmtbuf);
+
+   errno = saved_errno;
 }
 
 void
 log_info(const char *emsg, ...)
 {
-   va_list  ap;
+   charfmtbuf[MAX_LOGLEN];
+   va_list ap;
+   int saved_errno = errno;
 
va_start(ap, emsg);
-   vlog(LOG_INFO, emsg, ap);
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
va_end(ap);
+   logit(LOG_INFO, "%s\n", fmtbuf);
+
+   errno = saved_errno;
 }
 
 void
 log_debug(const char *emsg, ...)

bgpd cleanup around mask2prefixlen

2023-10-17 Thread Claudio Jeker

Looking at fixing portable I realized that some bits around mask2prefixlen
can be cleaned up.

First in session.c the plen != 0xff check is not needed since it never can
happen.

2nd the checks for sin_len and sin6_len == 0 are also impossible. A
sockaddr can not have length 0. A default route would at least have
sin_len == 4 but on OpenBSD even that no longer happens.

Last check for RTF_HOST first, then for sa_in != NULL. If RTF_HOST is set
the netmask is irrelevant.

-- 
:wq Claudio

Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.306
diff -u -p -r1.306 kroute.c
--- kroute.c16 Oct 2023 10:25:45 -  1.306
+++ kroute.c17 Oct 2023 13:18:42 -
@@ -2422,8 +2422,6 @@ mask2prefixlen4(struct sockaddr_in *sa_i
 {
in_addr_t ina;
 
-   if (sa_in->sin_len == 0)
-   return (0);
ina = sa_in->sin_addr.s_addr;
if (ina == 0)
return (0);
@@ -2437,8 +2435,6 @@ mask2prefixlen6(struct sockaddr_in6 *sa_
uint8_t *ap, *ep;
u_intl = 0;
 
-   if (sa_in6->sin6_len == 0)
-   return (0);
/*
 * sin6_len is the size of the sockaddr so subtract the offset of
 * the possibly truncated sin6_addr struct.
@@ -3096,20 +3092,20 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt
switch (sa->sa_family) {
case AF_INET:
sa_in = (struct sockaddr_in *)rti_info[RTAX_NETMASK];
-   if (sa_in != NULL) {
-   kf->prefixlen = mask2prefixlen4(sa_in);
-   } else if (rtm->rtm_flags & RTF_HOST)
+   if (rtm->rtm_flags & RTF_HOST)
kf->prefixlen = 32;
+   else if (sa_in != NULL)
+   kf->prefixlen = mask2prefixlen4(sa_in);
else
kf->prefixlen =
prefixlen_classful(kf->prefix.v4.s_addr);
break;
case AF_INET6:
sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK];
-   if (sa_in6 != NULL) {
-   kf->prefixlen = mask2prefixlen6(sa_in6);
-   } else if (rtm->rtm_flags & RTF_HOST)
+   if (rtm->rtm_flags & RTF_HOST)
kf->prefixlen = 128;
+   else if (sa_in6 != NULL)
+   kf->prefixlen = mask2prefixlen6(sa_in6);
else
fatalx("in6 net addr without netmask");
break;
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.449
diff -u -p -r1.449 session.c
--- session.c   16 Oct 2023 10:25:46 -  1.449
+++ session.c   16 Oct 2023 17:14:41 -
@@ -1241,8 +1241,7 @@ get_alternate_addr(struct bgpd_addr *loc
plen = mask2prefixlen(
match->ifa_addr->sa_family,
match->ifa_netmask);
-   if (plen != 0xff &&
-   prefix_compare(local, remote, plen) == 0)
+   if (prefix_compare(local, remote, plen) == 0)
connected = 1;
}
break;

log.c use buffered IO

2023-10-16 Thread Claudio Jeker

I dislike how log.c does all these asprintf() calls with dubious
workaround calls in case asprintf() fails.
IMO it is easier to use the stdio provided buffers and fflush() to get
"atomic" writes on stderr. At least from my understanding this is the
reason to go all this lenght to do a single fprintf call.
We need this since in privsep daemons can log from multiple processes
concurrently and we want the log to be not too mangled.

While there also use one static buffer to handle log_warn() and vfatalc()
where the error message is first expanded and then printed with
logit(LOG_ERR, "%s: %s", fmtbuf, strerror(saved_errno));

Any opinions?
-- 
:wq Claudio

Index: log.c
===
RCS file: /cvs/src/usr.sbin/bgpd/log.c,v
retrieving revision 1.64
diff -u -p -r1.64 log.c
--- log.c   21 Mar 2017 12:06:55 -  1.64
+++ log.c   16 Oct 2023 09:42:55 -
@@ -29,6 +29,7 @@
 static int  debug;
 static int  verbose;
 static const char  *log_procname;
+static char logbuf[4096], fmtbuf[4096];
 
 void
 log_init(int n_debug, int facility)
@@ -41,6 +42,8 @@ log_init(int n_debug, int facility)
 
if (!debug)
openlog(__progname, LOG_PID | LOG_NDELAY, facility);
+   else
+   setvbuf(stderr, logbuf, _IOFBF, sizeof(logbuf));
 
tzset();
 }
@@ -77,18 +80,11 @@ logit(int pri, const char *fmt, ...)
 void
 vlog(int pri, const char *fmt, va_list ap)
 {
-   char*nfmt;
int  saved_errno = errno;
 
if (debug) {
-   /* best effort in out of mem situations */
-   if (asprintf(, "%s\n", fmt) == -1) {
-   vfprintf(stderr, fmt, ap);
-   fprintf(stderr, "\n");
-   } else {
-   vfprintf(stderr, nfmt, ap);
-   free(nfmt);
-   }
+   vfprintf(stderr, fmt, ap);
+   fprintf(stderr, "\n");
fflush(stderr);
} else
vsyslog(pri, fmt, ap);
@@ -99,7 +95,6 @@ vlog(int pri, const char *fmt, va_list a
 void
 log_warn(const char *emsg, ...)
 {
-   char*nfmt;
va_list  ap;
int  saved_errno = errno;
 
@@ -108,16 +103,8 @@ log_warn(const char *emsg, ...)
logit(LOG_ERR, "%s", strerror(saved_errno));
else {
va_start(ap, emsg);
-
-   if (asprintf(, "%s: %s", emsg,
-   strerror(saved_errno)) == -1) {
-   /* we tried it... */
-   vlog(LOG_ERR, emsg, ap);
-   logit(LOG_ERR, "%s", strerror(saved_errno));
-   } else {
-   vlog(LOG_ERR, nfmt, ap);
-   free(nfmt);
-   }
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
+   logit(LOG_ERR, "%s: %s", fmtbuf, strerror(saved_errno));
va_end(ap);
}
 
@@ -159,21 +146,20 @@ log_debug(const char *emsg, ...)
 static void
 vfatalc(int code, const char *emsg, va_list ap)
 {
-   static char s[BUFSIZ];
const char  *sep;
 
if (emsg != NULL) {
-   (void)vsnprintf(s, sizeof(s), emsg, ap);
+   (void)vsnprintf(fmtbuf, sizeof(fmtbuf), emsg, ap);
sep = ": ";
} else {
-   s[0] = '\0';
+   fmtbuf[0] = '\0';
sep = "";
}
if (code)
logit(LOG_CRIT, "fatal in %s: %s%s%s",
-   log_procname, s, sep, strerror(code));
+   log_procname, fmtbuf, sep, strerror(code));
else
-   logit(LOG_CRIT, "fatal in %s%s%s", log_procname, sep, s);
+   logit(LOG_CRIT, "fatal in %s%s%s", log_procname, sep, fmtbuf);
 }
 
 void

Re: Improve IPv6 link-local support in bgpd

2023-10-16 Thread Claudio Jeker

On Mon, Oct 16, 2023 at 09:23:12AM +0200, Claudio Jeker wrote:
> This diff fixes a few more things when establishing connections with
> link-local IPv6 addresses. In get_alternate_addr() the interface scope
> of the connection is recovered and then passed to the RDE. The RDE can
> then use this scope id to insert link-local addresses with the correct
> scope.
> 
> I built a regress test for this which passes with this diff.
> Now probably more is needed because IPv6 link-local addresses are a gift
> that keep on giving. One thing to implement on top of this is template
> matching for link local -- which allows to auto-configure sessions more
> easily. This will probably follow soon.
> 

Here is the regress test I made.
-- 
:wq Claudio

Index: Makefile
===
RCS file: /cvs/src/regress/usr.sbin/bgpd/integrationtests/Makefile,v
retrieving revision 1.22
diff -u -p -r1.22 Makefile
--- Makefile12 Oct 2023 09:18:56 -  1.22
+++ Makefile13 Oct 2023 07:43:57 -
@@ -1,8 +1,9 @@
 # $OpenBSD: Makefile,v 1.22 2023/10/12 09:18:56 claudio Exp $
 
-REGRESS_TARGETS=   network_statement md5 ovs mrt pftable \
-   maxprefix maxprefixout maxcomm \
-   as0 med eval_all policy l3vpn attr ixp
+REGRESS_TARGETS=   network_statement md5 ovs policy pftable \
+   mrt maxprefix maxprefixout maxcomm l3vpn \
+   ixp lladdr \
+   as0 med eval_all attr
 
 BGPD ?=/usr/sbin/bgpd
 
@@ -42,6 +43,9 @@ l3vpn:
${SUDO} ksh ${.CURDIR}/$@.sh ${BGPD} ${.CURDIR} 11 12 pair11 pair12 13 
14
 
 ixp:
+   ${SUDO} ksh ${.CURDIR}/$@.sh ${BGPD} ${.CURDIR} 11 12 pair11 pair12
+
+lladdr:
${SUDO} ksh ${.CURDIR}/$@.sh ${BGPD} ${.CURDIR} 11 12 pair11 pair12
 
 .if ! exists(/usr/local/bin/exabgp)
Index: bgpd.lladdr.rdomain1.conf
===
RCS file: bgpd.lladdr.rdomain1.conf
diff -N bgpd.lladdr.rdomain1.conf
--- /dev/null   1 Jan 1970 00:00:00 -
+++ bgpd.lladdr.rdomain1.conf   16 Oct 2023 07:07:08 -
@@ -0,0 +1,23 @@
+AS 420001
+router-id 42.0.0.1
+fib-update yes
+
+network 2001:db8:1::/48 set community 0:1
+network 2001:db8:11::/48 set community 0:11
+
+neighbor fe80::c0fe:2%pair11 {
+   descr   "RDOMAIN2"
+   remote-as   420002
+   local-address   fe80::c0fe:1%pair11
+}
+neighbor fe80::beef:2%gif11 {
+   descr   "RDOMAIN2_2"
+   remote-as   420002
+   local-address   fe80::beef:1%gif11
+}
+
+
+allow from any
+deny to any
+allow to fe80::c0fe:2%pair11 community 0:1
+allow to fe80::beef:2%gif11 community 0:11
Index: bgpd.lladdr.rdomain2.conf
===
RCS file: bgpd.lladdr.rdomain2.conf
diff -N bgpd.lladdr.rdomain2.conf
--- /dev/null   1 Jan 1970 00:00:00 -
+++ bgpd.lladdr.rdomain2.conf   16 Oct 2023 07:07:24 -
@@ -0,0 +1,23 @@
+AS 420002
+router-id 42.0.0.2
+fib-update yes
+
+network 2001:db8:2::/48 set community 0:1
+network 2001:db8:12::/48 set community 0:11
+
+neighbor fe80::c0fe:1%pair12 {
+   descr   "RDOMAIN1"
+   remote-as   420001
+   local-address   fe80::c0fe:2%pair12
+}
+
+neighbor fe80::beef:1%gif12 {
+   descr   "RDOMAIN1_2"
+   remote-as   420001
+   local-address   fe80::beef:2%gif12
+}
+
+allow from any
+deny to any
+allow to fe80::c0fe:1%pair12 community 0:1
+allow to fe80::beef:1%gif12 community 0:11
Index: lladdr.rdomain1.ok
===
RCS file: lladdr.rdomain1.ok
diff -N lladdr.rdomain1.ok
--- /dev/null   1 Jan 1970 00:00:00 -
+++ lladdr.rdomain1.ok  16 Oct 2023 07:09:31 -
@@ -0,0 +1,36 @@
+flags: * = Valid, > = Selected, I = via IBGP, A = Announced,
+   S = Stale, E = Error
+origin validation state: N = not-found, V = valid, ! = invalid
+aspa validation state: ? = unknown, V = valid, ! = invalid
+origin: i = IGP, e = EGP, ? = Incomplete
+
+flags  vs destination  gateway  lpref   med aspath origin
+AI*>  N-? 2001:db8:1::/48  ::100 0 i
+*>N-? 2001:db8:2::/48  fe80::c0fe:2%pair11   100 0 420002 i
+AI*>  N-? 2001:db8:11::/48 ::100 0 i
+*>N-? 2001:db8:12::/48 fe80::beef:2%gif11   100 0 420002 i
+flags: B = BGP, C = Connected, S = Static
+   N = BGP Nexthop reachable via this route
+   r = reject route, b = blackhole route
+
+flags prio destination  gateway 
+B   48 2001:db8:2::/48  fe80::c0fe:2%pair11
+B   48 2001:db8:12::/48 fe80::beef:2%gif11
+   route to: 2001:db8:2::
+destination: 2001:db8:2::
+   mask: ff

Improve IPv6 link-local support in bgpd

2023-10-16 Thread Claudio Jeker

This diff fixes a few more things when establishing connections with
link-local IPv6 addresses. In get_alternate_addr() the interface scope
of the connection is recovered and then passed to the RDE. The RDE can
then use this scope id to insert link-local addresses with the correct
scope.

I built a regress test for this which passes with this diff.
Now probably more is needed because IPv6 link-local addresses are a gift
that keep on giving. One thing to implement on top of this is template
matching for link local -- which allows to auto-configure sessions more
easily. This will probably follow soon.

-- 
:wq Claudio

Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.477
diff -u -p -r1.477 bgpd.h
--- bgpd.h  30 Aug 2023 08:16:28 -  1.477
+++ bgpd.h  9 Oct 2023 13:50:36 -
@@ -796,6 +796,7 @@ struct session_up {
struct bgpd_addrremote_addr;
struct capabilities capa;
uint32_tremote_bgpid;
+   unsigned intif_scope;
uint16_tshort_as;
 };
 
@@ -1439,6 +1440,7 @@ void   kr_ifinfo(char *);
 voidkr_net_reload(u_int, uint64_t, struct network_head *);
 int kr_reload(void);
 int get_mpe_config(const char *, u_int *, u_int *);
+uint8_t mask2prefixlen(sa_family_t, struct sockaddr *);
 
 /* log.c */
 voidlog_peer_info(const struct peer_config *, const char *, ...)
Index: kroute.c
===
RCS file: /cvs/src/usr.sbin/bgpd/kroute.c,v
retrieving revision 1.305
diff -u -p -r1.305 kroute.c
--- kroute.c1 Jun 2023 09:47:34 -   1.305
+++ kroute.c9 Oct 2023 13:54:25 -
@@ -168,8 +168,6 @@ struct kroute6  *kroute6_match(struct kta
 voidkroute_detach_nexthop(struct ktable *, struct knexthop *);
 
 uint8_tprefixlen_classful(in_addr_t);
-uint8_tmask2prefixlen(in_addr_t);
-uint8_tmask2prefixlen6(struct sockaddr_in6 *);
 uint64_t   ift2ifm(uint8_t);
 const char *get_media_descr(uint64_t);
 const char *get_linkstate(uint8_t, int);
@@ -2419,21 +2417,28 @@ prefixlen_classful(in_addr_t ina)
return (8);
 }
 
-uint8_t
-mask2prefixlen(in_addr_t ina)
+static uint8_t
+mask2prefixlen4(struct sockaddr_in *sa_in)
 {
+   in_addr_t ina;
+
+   if (sa_in->sin_len == 0)
+   return (0);
+   ina = sa_in->sin_addr.s_addr;
if (ina == 0)
return (0);
else
return (33 - ffs(ntohl(ina)));
 }
 
-uint8_t
+static uint8_t
 mask2prefixlen6(struct sockaddr_in6 *sa_in6)
 {
uint8_t *ap, *ep;
u_intl = 0;
 
+   if (sa_in6->sin6_len == 0)
+   return (0);
/*
 * sin6_len is the size of the sockaddr so subtract the offset of
 * the possibly truncated sin6_addr struct.
@@ -2480,6 +2485,19 @@ mask2prefixlen6(struct sockaddr_in6 *sa_
return (l);
 }
 
+uint8_t
+mask2prefixlen(sa_family_t af, struct sockaddr *mask)
+{
+   switch (af) {
+   case AF_INET:
+   return mask2prefixlen4((struct sockaddr_in *)mask);
+   case AF_INET6:
+   return mask2prefixlen6((struct sockaddr_in6 *)mask);
+   default:
+   fatalx("%s: unsupported af", __func__);
+   }
+}
+
 const struct if_status_description
if_status_descriptions[] = LINK_STATE_DESCRIPTIONS;
 const struct ifmedia_description
@@ -3079,9 +3097,7 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt
case AF_INET:
sa_in = (struct sockaddr_in *)rti_info[RTAX_NETMASK];
if (sa_in != NULL) {
-   if (sa_in->sin_len != 0)
-   kf->prefixlen =
-   mask2prefixlen(sa_in->sin_addr.s_addr);
+   kf->prefixlen = mask2prefixlen4(sa_in);
} else if (rtm->rtm_flags & RTF_HOST)
kf->prefixlen = 32;
else
@@ -3091,8 +3107,7 @@ dispatch_rtmsg_addr(struct rt_msghdr *rt
case AF_INET6:
sa_in6 = (struct sockaddr_in6 *)rti_info[RTAX_NETMASK];
if (sa_in6 != NULL) {
-   if (sa_in6->sin6_len != 0)
-   kf->prefixlen = mask2prefixlen6(sa_in6);
+   kf->prefixlen = mask2prefixlen6(sa_in6);
} else if (rtm->rtm_flags & RTF_HOST)
kf->prefixlen = 128;
else
Index: logmsg.c
===
RCS file: /cvs/src/usr.sbin/bgpd/logmsg.c,v
retrieving revision 1.9
diff -u -p -r1.9 logmsg.c
--- logmsg.c24 Aug 2022 17:14:02 -  1.9
+++ logmsg.c14 Oct 2023 09:49:36 -
@@ -60,55 +60,54 @@ log_fmt_peer(const struct peer_config

Re: l2vpn pseudowire and bridge type interface

2023-10-14 Thread Claudio Jeker

On Sat, Oct 14, 2023 at 11:07:11AM +0200, Wouter Prins wrote:
> hello list,
> 
> Was wondering if the veb interface is supported as a bridge for pseudowires?
> The manpage doesn't mention anything about the type of
> bridge interface required (bridge/veb)?

I have not tried it but I think mpw(4) should work with veb(4) just fine.

-- 
:wq Claudio

Re: bgpd, fix log_peer_info() and friends

2023-10-14 Thread Claudio Jeker

On Fri, Oct 13, 2023 at 07:16:13PM +0200, Theo Buehler wrote:
> On Fri, Oct 13, 2023 at 07:01:06PM +0200, Claudio Jeker wrote:
> > Extending the format string with the peer info is a bad idea.
> > The reason is DNS^WIPv6 and scoped addresses which add a % to the
> > string returned by log_fmt_peer.
> > 
> > So instead vasprintf() the emsg and then just use logit().
> 
> Ugh. That's nasty.
> 
> Diff reads fine. There's a change of behavior in that logit() does
> asprintf() but will not fatal on asprintf() failure.

In a way that was already the case before. vlog() is doing this asprintf
dance. Which is dumb.

I'm tempted to rewrite more of this and just use buffered stdio and
fflush() to get the atomic behaviour.

> ok tb
> 
> > -- 
> > :wq Claudio
> > 
> > Index: logmsg.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/logmsg.c,v
> > retrieving revision 1.9
> > diff -u -p -r1.9 logmsg.c
> > --- logmsg.c24 Aug 2022 17:14:02 -  1.9
> > +++ logmsg.c13 Oct 2023 16:43:55 -
> > @@ -60,55 +60,54 @@ log_fmt_peer(const struct peer_config *p
> >  void
> >  log_peer_info(const struct peer_config *peer, const char *emsg, ...)
> >  {
> > -   char*p, *nfmt;
> > +   char*p, *msg;
> > va_list  ap;
> >  
> > p = log_fmt_peer(peer);
> > -   if (asprintf(, "%s: %s", p, emsg) == -1)
> > -   fatal(NULL);
> > va_start(ap, emsg);
> > -   vlog(LOG_INFO, nfmt, ap);
> > +   if (vasprintf(, emsg, ap) == -1)
> > +   fatal(NULL);
> > va_end(ap);
> > +   logit(LOG_INFO, "%s: %s", p, msg);
> > +   free(msg);
> > free(p);
> > -   free(nfmt);
> >  }
> >  
> >  void
> >  log_peer_warn(const struct peer_config *peer, const char *emsg, ...)
> >  {
> > -   char*p, *nfmt;
> > +   char*p, *msg;
> > va_list  ap;
> > +   int  saved_errno = errno;
> >  
> > p = log_fmt_peer(peer);
> > if (emsg == NULL) {
> > -   if (asprintf(, "%s: %s", p, strerror(errno)) == -1)
> > -   fatal(NULL);
> > +   logit(LOG_ERR, "%s: %s", p, strerror(saved_errno));
> > } else {
> > -   if (asprintf(, "%s: %s: %s", p, emsg, strerror(errno)) ==
> > -   -1)
> > +   va_start(ap, emsg);
> > +   if (vasprintf(, emsg, ap) == -1)
> > fatal(NULL);
> > +   va_end(ap);
> > +   logit(LOG_ERR, "%s: %s: %s", p, msg, strerror(saved_errno));
> > +   free(msg);
> > }
> > -   va_start(ap, emsg);
> > -   vlog(LOG_ERR, nfmt, ap);
> > -   va_end(ap);
> > free(p);
> > -   free(nfmt);
> >  }
> >  
> >  void
> >  log_peer_warnx(const struct peer_config *peer, const char *emsg, ...)
> >  {
> > -   char*p, *nfmt;
> > +   char*p, *msg;
> > va_list  ap;
> >  
> > p = log_fmt_peer(peer);
> > -   if (asprintf(, "%s: %s", p, emsg) == -1)
> > -   fatal(NULL);
> > va_start(ap, emsg);
> > -   vlog(LOG_ERR, nfmt, ap);
> > +   if (vasprintf(, emsg, ap) == -1)
> > +   fatal(NULL);
> > va_end(ap);
> > +   logit(LOG_ERR, "%s: %s", p, msg);
> > +   free(msg);
> > free(p);
> > -   free(nfmt);
> >  }
> >  
> >  void
> > 
> 

-- 
:wq Claudio

bgpd, fix log_peer_info() and friends

2023-10-13 Thread Claudio Jeker

Extending the format string with the peer info is a bad idea.
The reason is DNS^WIPv6 and scoped addresses which add a % to the
string returned by log_fmt_peer.

So instead vasprintf() the emsg and then just use logit().
-- 
:wq Claudio

Index: logmsg.c
===
RCS file: /cvs/src/usr.sbin/bgpd/logmsg.c,v
retrieving revision 1.9
diff -u -p -r1.9 logmsg.c
--- logmsg.c24 Aug 2022 17:14:02 -  1.9
+++ logmsg.c13 Oct 2023 16:43:55 -
@@ -60,55 +60,54 @@ log_fmt_peer(const struct peer_config *p
 void
 log_peer_info(const struct peer_config *peer, const char *emsg, ...)
 {
-   char*p, *nfmt;
+   char*p, *msg;
va_list  ap;
 
p = log_fmt_peer(peer);
-   if (asprintf(, "%s: %s", p, emsg) == -1)
-   fatal(NULL);
va_start(ap, emsg);
-   vlog(LOG_INFO, nfmt, ap);
+   if (vasprintf(, emsg, ap) == -1)
+   fatal(NULL);
va_end(ap);
+   logit(LOG_INFO, "%s: %s", p, msg);
+   free(msg);
free(p);
-   free(nfmt);
 }
 
 void
 log_peer_warn(const struct peer_config *peer, const char *emsg, ...)
 {
-   char*p, *nfmt;
+   char*p, *msg;
va_list  ap;
+   int  saved_errno = errno;
 
p = log_fmt_peer(peer);
if (emsg == NULL) {
-   if (asprintf(, "%s: %s", p, strerror(errno)) == -1)
-   fatal(NULL);
+   logit(LOG_ERR, "%s: %s", p, strerror(saved_errno));
} else {
-   if (asprintf(, "%s: %s: %s", p, emsg, strerror(errno)) ==
-   -1)
+   va_start(ap, emsg);
+   if (vasprintf(, emsg, ap) == -1)
fatal(NULL);
+   va_end(ap);
+   logit(LOG_ERR, "%s: %s: %s", p, msg, strerror(saved_errno));
+   free(msg);
}
-   va_start(ap, emsg);
-   vlog(LOG_ERR, nfmt, ap);
-   va_end(ap);
free(p);
-   free(nfmt);
 }
 
 void
 log_peer_warnx(const struct peer_config *peer, const char *emsg, ...)
 {
-   char*p, *nfmt;
+   char*p, *msg;
va_list  ap;
 
p = log_fmt_peer(peer);
-   if (asprintf(, "%s: %s", p, emsg) == -1)
-   fatal(NULL);
va_start(ap, emsg);
-   vlog(LOG_ERR, nfmt, ap);
+   if (vasprintf(, emsg, ap) == -1)
+   fatal(NULL);
va_end(ap);
+   logit(LOG_ERR, "%s: %s", p, msg);
+   free(msg);
free(p);
-   free(nfmt);
 }
 
 void

OpenBGPD 8.3 released

2023-10-13 Thread Claudio Jeker

We have released OpenBGPD 8.3, which will be arriving in the
OpenBGPD directory of your local OpenBSD mirror soon.

This release includes the following changes to the previous release:

* bgpd 8.1 and 8.2 could send a bad COMMUNITY attribute when
  non-transitive ext-communities are present. A workaround is to
  add a filter rule to clear non-transitive ext-communities:
match to ebgp set ext-community delete ovs *
  This fix is included in OpenBSD 7.4.

* Fix a possible fatal error in the RDE when "announce add-path send all"
  is used. The error is triggered by an ineligible path which is wrongly
  distributed.

* Fix selection of the local nexthop for the alternate address family.
  This is used by 'announce IPv6 unicast' over an IPv4 session or
  vice-versa.

OpenBGPD-portable is known to compile and run on FreeBSD and the
Linux distributions Alpine, Debian, Fedora, RHEL/CentOS and Ubuntu.
It is our hope that packagers take interest and help adapt OpenBGPD-portable
to more distributions.

We welcome feedback and improvements from the broader community.
Thanks to all of the contributors who helped make this release
possible.

bgpd: announce add-path send all can hit a fatal error

2023-10-12 Thread Claudio Jeker

I optimized "announce add-path send all" to not always re-evaluate all
possible prefixes. While doing that I introduced a small bug. The problem
is that the new prefix passed to up_generate_addpath_all() could be not
eligible. This causes up_process_prefix() -> up_test_update() to error
out.

The below fix should solve this twice. The rde_update.c diff ensures that
up_generate_addpath_all() checks the new path to be eligible and is the
minimal fix. The rde_decide.c diff fixes the problem at the root.
I came to the conclusion that I want both.

Btw. the other up_generate_* functions don't have this issue since they
use prefix_best() to get new and that already calls prefix_eligible().
-- 
:wq Claudio

Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.101
diff -u -p -r1.101 rde_decide.c
--- rde_decide.c13 Mar 2023 16:52:42 -  1.101
+++ rde_decide.c12 Oct 2023 12:56:59 -
@@ -568,9 +568,12 @@ prefix_evaluate(struct rib_entry *re, st
 * to be passed on (not only a change of the best prefix).
 * rde_generate_updates() will then take care of distribution.
 */
-   if (rde_evaluate_all())
-   if ((new != NULL && prefix_eligible(new)) || old != NULL)
+   if (rde_evaluate_all()) {
+   if (new != NULL && !prefix_eligible(new))
+   new = NULL;
+   if (new != NULL || old != NULL)
rde_generate_updates(re, new, old, EVAL_ALL);
+   }
 }
 
 void
@@ -578,7 +581,7 @@ prefix_evaluate_nexthop(struct prefix *p
 enum nexthop_state oldstate)
 {
struct rib_entry *re = prefix_re(p);
-   struct prefix   *newbest, *oldbest;
+   struct prefix   *newbest, *oldbest, *new, *old;
struct rib  *rib;
 
/* Skip non local-RIBs or RIBs that are flagged as noeval. */
@@ -608,6 +611,7 @@ prefix_evaluate_nexthop(struct prefix *p
 * Re-evaluate the prefix by removing the prefix then updating the
 * nexthop state and reinserting the prefix again.
 */
+   old = p;
oldbest = prefix_best(re);
prefix_remove(p, re);
 
@@ -618,6 +622,9 @@ prefix_evaluate_nexthop(struct prefix *p
 
prefix_insert(p, NULL, re);
newbest = prefix_best(re);
+   new = p;
+   if (!prefix_eligible(new))
+   new = NULL;
 
/*
 * If the active prefix changed or the active prefix was removed
@@ -631,7 +638,7 @@ prefix_evaluate_nexthop(struct prefix *p
 */
if ((rib->flags & F_RIB_NOFIB) == 0)
rde_send_kroute(rib, newbest, oldbest);
-   rde_generate_updates(re, p, p, EVAL_DEFAULT);
+   rde_generate_updates(re, new, old, EVAL_DEFAULT);
return;
}
 
@@ -641,5 +648,5 @@ prefix_evaluate_nexthop(struct prefix *p
 * rde_generate_updates() will then take care of distribution.
 */
if (rde_evaluate_all())
-   rde_generate_updates(re, p, p, EVAL_ALL);
+   rde_generate_updates(re, new, old, EVAL_ALL);
 }
Index: rde_update.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_update.c,v
retrieving revision 1.163
diff -u -p -r1.163 rde_update.c
--- rde_update.c12 Jul 2023 14:45:43 -  1.163
+++ rde_update.c12 Oct 2023 12:54:05 -
@@ -352,6 +352,11 @@ up_generate_addpath_all(struct rde_peer 
all = 1;
}
 
+   if (new != NULL && !prefix_eligible(new)) {
+   /* only allow valid prefixes */
+   new = NULL;
+   }
+
if (old != NULL) {
/* withdraw stale paths */
p = prefix_adjout_get(peer, old->path_id_tx, old->pt);

bgpd: fix handling of non-transitive ext communities

2023-10-10 Thread Claudio Jeker

See https://github.com/openbgpd-portable/openbgpd-portable/issues/64

The handling of non-transitive ext communities causes encoding errors in
for regular communities. The problem is that the start and end points of
the loop are calculated like this:

for (l = 0; l < comm->nentries; l++) {
cp = >communities[l];
if (ebgp && non_transitive_ext_community(cp))
continue;
if ((uint8_t)cp->flags == t) {
num++;
if (start == -1)
start = l;
}
if ((uint8_t)cp->flags > t)
break;
}
end = l;

But in the write out loop the non_transitive_ext_community() check
is only done inside the ext-community block. As a result
COMMUNITY_TYPE_BASIC communities can walk over their respective end of the
list into the COMMUNITY_TYPE_EXT communities (if the non-transitive
ext-comm is the first ext-community).

The fix is to move the non_transitive_ext_community() check into
if ((uint8_t)cp->flags == t) { } block. Since then we know the type is
correct.

-- 
:wq Claudio

Index: rde_community.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
retrieving revision 1.13
diff -u -p -r1.13 rde_community.c
--- rde_community.c 12 Jul 2023 14:45:43 -  1.13
+++ rde_community.c 10 Oct 2023 13:43:03 -
@@ -556,10 +556,9 @@ community_writebuf(struct rde_community 
start = -1;
for (l = 0; l < comm->nentries; l++) {
cp = >communities[l];
-
-   if (ebgp && non_transitive_ext_community(cp))
-   continue;
if ((uint8_t)cp->flags == t) {
+   if (ebgp && non_transitive_ext_community(cp))
+   continue;
num++;
if (start == -1)
start = l;

Re: wg destroy hangs

2023-10-08 Thread Claudio Jeker

On Wed, Oct 04, 2023 at 11:31:47PM +0200, Alexander Bluhm wrote:
> On Wed, Oct 04, 2023 at 11:03:27PM +0300, Vitaliy Makkoveev wrote:
> > On Wed, Oct 04, 2023 at 09:13:59PM +0200, Alexander Bluhm wrote:
> > > On Wed, Oct 04, 2023 at 08:42:48PM +0200, Kirill Miazine wrote:
> > > > > If it happns again, could you send an 'ps axlww | grep ifconifg'
> > > > > output?  Then we see the wait channel where it hangs in the kernel.
> > > > > 
> > > > > $ ps axlww
> > > > >UID   PID  PPID CPU PRI  NI   VSZ   RSS WCHAN   STAT   TT   
> > > > > TIME COMMAND
> > > > 
> > > > Here it happened again:
> > > > 
> > > >  0 75339 23922   0  10   0   360   296 wg_ifq  D+Up00:00.00 
> > > > ifconfig wg1 destroy
> > > 
> > > wg_peer_destroy()
> > >   ...
> > > NET_LOCK();
> > > while (!ifq_empty(>sc_if.if_snd)) {
> > > NET_UNLOCK();
> > > tsleep_nsec(sc, PWAIT, "wg_ifq", 1000);
> > > NET_LOCK();
> > > }
> > > NET_UNLOCK();
> > > 
> > > This net lock dance looks fishy.  And the sleep has a timeout of 1
> > > milli second.  But that is may be per packet.  So if you have a
> > > long queue or the queue refills somehow, it will take forever.
> > > 
> > > I think the difference in the usage is constant traffic that keeps
> > > the send queue full.  The timeout hides the problem when there are
> > > only a few packets.
> > > 
> > 
> > This should ensure wg_qstart() will not dereference dying `peer'. Looks
> > crappy and potentially could block forever, but should work. However
> > netlock it unnecessary here. netlocked wg_output() could fill `if_snd'
> > while netlock released before tsleep(), so it serializes nothing but
> > stops packets processing.
> > 
> > Kirill, does this diff help? 
> 
> I doubt that it changes much.  When netlock is not taken, the queue
> can still be filled with packets.
> 
> Removing this ugly netlock makes sense anyway.  But without any
> synchronisation just reading a variable feels wrong.  Can we add a
> read once like for mq_len in sys/mbuf.h?  And the ifq_set_maxlen()
> also looks very unsafe.  For mbuf queues I added a mutex, interface
> queues should do the same.
> 
> ok?

I reverted this diff. Since it breaks the API. There are numerous cases
where ifq_set_maxlen() is called before the mutex is initalized. So doing
this does not work.

I also question this diff in general. ifq_set_maxlen() is not called
concurrently, it is called when the interface is attached. So there is
no need for a mutex here.
 
Also the READ_ONCE() added seem not needed. 
ifiq_len() is unused and ifiq_empty() is only used by ifiq_process().
So maybe that call should be moved into the mutex protected block, the
task should only run when the ifiq_ml has data enqueued.

> Index: net/ifq.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 ifq.c
> --- net/ifq.c 30 Jul 2023 05:39:52 -  1.50
> +++ net/ifq.c 4 Oct 2023 21:04:20 -
> @@ -529,6 +529,14 @@ ifq_hdatalen(struct ifqueue *ifq)
>   return (len);
>  }
>  
> +void
> +ifq_set_maxlen(struct ifqueue *ifq, unsigned int maxlen)
> +{
> + mtx_enter(>ifq_mtx);
> + ifq->ifq_maxlen = maxlen;
> + mtx_leave(>ifq_mtx);
> +}
> +
>  unsigned int
>  ifq_purge(struct ifqueue *ifq)
>  {
> Index: net/ifq.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/ifq.h,v
> retrieving revision 1.38
> diff -u -p -r1.38 ifq.h
> --- net/ifq.h 30 Jul 2023 05:39:52 -  1.38
> +++ net/ifq.h 4 Oct 2023 21:09:04 -
> @@ -435,6 +435,7 @@ void   ifq_deq_commit(struct ifqueue *, 
>  void  ifq_deq_rollback(struct ifqueue *, struct mbuf *);
>  struct mbuf  *ifq_dequeue(struct ifqueue *);
>  int   ifq_hdatalen(struct ifqueue *);
> +void  ifq_set_maxlen(struct ifqueue *, unsigned int);
>  void  ifq_mfreem(struct ifqueue *, struct mbuf *);
>  void  ifq_mfreeml(struct ifqueue *, struct mbuf_list *);
>  unsigned int  ifq_purge(struct ifqueue *);
> @@ -448,9 +449,8 @@ intifq_deq_sleep(struct ifqueue *, st
>const char *, volatile unsigned int *,
>volatile unsigned int *);
>  
> -#define  ifq_len(_ifq)   ((_ifq)->ifq_len)
> -#define  ifq_empty(_ifq) (ifq_len(_ifq) == 0)
> -#define  ifq_set_maxlen(_ifq, _l)((_ifq)->ifq_maxlen = (_l))
> +#define ifq_len(_ifq)READ_ONCE((_ifq)->ifq_len)
> +#define ifq_empty(_ifq)  (ifq_len(_ifq) == 0)
>  
>  static inline int
>  ifq_is_priq(struct ifqueue *ifq)
> @@ -490,8 +490,8 @@ intifiq_input(struct ifiqueue *, stru
>  int   ifiq_enqueue(struct ifiqueue *, struct mbuf *);
>  void  ifiq_add_data(struct ifiqueue *, struct if_data *);
>  
> -#define

OpenBGPD 8.2 released

2023-10-02 Thread Claudio Jeker

We have released OpenBGPD 8.2, which will be arriving in the
OpenBGPD directory of your local OpenBSD mirror soon.

This release includes the following changes to the previous release:

* Update ASPA support to follow draft-ietf-sidrops-aspa-verification-16
  and draft-ietf-sidrops-aspa-profile-16 by making the ASPA lookup
  tables AFI-agnostic.

* Fix a fatal error in the Linux netlink parser which was triggered
  because of a mismatched netlink message size.

* Rework UPDATE message generation to use the new ibuf API instead
  of the hand-rolled solution before. 

* Improve error message in bgpctl for features not supported by the
  portable version of OpenBGPD.

* Adjusted example GRACEFUL_SHUTDOWN filter rule in the example config
  to only match on ebgp sessions.

OpenBGPD-portable is known to compile and run on FreeBSD and the
Linux distributions Alpine, Debian, Fedora, RHEL/CentOS and Ubuntu.
It is our hope that packagers take interest and help adapt OpenBGPD-portable
to more distributions.

We welcome feedback and improvements from the broader community.
Thanks to all of the contributors who helped make this release
possible.

Re: vscsi(4): make `vscsi_filtops' mpsafe

2023-09-28 Thread Claudio Jeker

On Thu, Sep 28, 2023 at 01:58:45PM +0300, Vitaliy Makkoveev wrote:
> filt_vscsiread() checks `sc_ccb_i2t' protected by `sc_state_mtx'
> mutex(9), so use it to protect `sc_klist' knotes list too.
> 
> Tested with iscsid(8).

Your diff removes a device_unref(>sc_dev) call in filt_vscsidetach()
which seems dubious to me since the reference is still taken in
vscsikqfilter().
 
> Index: sys/dev/vscsi.c
> ===
> RCS file: /cvs/src/sys/dev/vscsi.c,v
> retrieving revision 1.61
> diff -u -p -r1.61 vscsi.c
> --- sys/dev/vscsi.c   2 Jul 2022 08:50:41 -   1.61
> +++ sys/dev/vscsi.c   28 Sep 2023 10:47:19 -
> @@ -27,13 +27,18 @@
>  #include 
>  #include 
>  #include 
> -#include 
> +#include 
>  
>  #include 
>  #include 
>  
>  #include 
>  
> +/*
> + * Locks used to protect struct members and global data
> + *   s   sc_state_mtx
> + */
> +
>  int  vscsi_match(struct device *, void *, void *);
>  void vscsi_attach(struct device *, struct device *, void *);
>  void vscsi_shutdown(void *);
> @@ -64,14 +69,13 @@ struct vscsi_softc {
>  
>   struct scsi_iopool  sc_iopool;
>  
> - struct vscsi_ccb_list   sc_ccb_i2t;
> + struct vscsi_ccb_list   sc_ccb_i2t; /* [s] */
>   struct vscsi_ccb_list   sc_ccb_t2i;
>   int sc_ccb_tag;
>   struct mutexsc_poll_mtx;
>   struct rwlock   sc_ioc_lock;
>  
> - struct selinfo  sc_sel;
> - struct mutexsc_sel_mtx;
> + struct klistsc_klist;   /* [s] */
>  };
>  
>  #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
> @@ -110,12 +114,16 @@ voidvscsi_ccb_put(void *, void *);
>  
>  void filt_vscsidetach(struct knote *);
>  int  filt_vscsiread(struct knote *, long);
> +int  filt_vscsimodify(struct kevent *, struct knote *);
> +int  filt_vscsiprocess(struct knote *, struct kevent *);
>  
>  const struct filterops vscsi_filtops = {
> - .f_flags= FILTEROP_ISFD,
> + .f_flags= FILTEROP_ISFD | FILTEROP_MPSAFE,
>   .f_attach   = NULL,
>   .f_detach   = filt_vscsidetach,
>   .f_event= filt_vscsiread,
> + .f_modify   = filt_vscsimodify,
> + .f_process  = filt_vscsiprocess,
>  };
>  
>  
> @@ -133,15 +141,15 @@ vscsi_attach(struct device *parent, stru
>  
>   printf("\n");
>  
> - mtx_init(>sc_state_mtx, IPL_BIO);
> + mtx_init(>sc_state_mtx, IPL_MPFLOOR);
>   sc->sc_state = VSCSI_S_CLOSED;
>  
>   TAILQ_INIT(>sc_ccb_i2t);
>   TAILQ_INIT(>sc_ccb_t2i);
>   mtx_init(>sc_poll_mtx, IPL_BIO);
> - mtx_init(>sc_sel_mtx, IPL_BIO);
>   rw_init(>sc_ioc_lock, "vscsiioc");
>   scsi_iopool_init(>sc_iopool, sc, vscsi_ccb_get, vscsi_ccb_put);
> + klist_init_mutex(>sc_klist, >sc_state_mtx);
>  
>   saa.saa_adapter = _switch;
>   saa.saa_adapter_softc = sc;
> @@ -181,6 +189,7 @@ vscsi_cmd(struct scsi_xfer *xs)
>   running = 1;
>   TAILQ_INSERT_TAIL(>sc_ccb_i2t, ccb, ccb_entry);
>   }
> + knote_locked(>sc_klist, 0);
>   mtx_leave(>sc_state_mtx);
>  
>   if (!running) {
> @@ -189,8 +198,6 @@ vscsi_cmd(struct scsi_xfer *xs)
>   return;
>   }
>  
> - selwakeup(>sc_sel);
> -
>   if (polled) {
>   mtx_enter(>sc_poll_mtx);
>   while (ccb->ccb_xs != NULL)
> @@ -530,13 +537,10 @@ int
>  vscsikqfilter(dev_t dev, struct knote *kn)
>  {
>   struct vscsi_softc *sc = DEV2SC(dev);
> - struct klist *klist;
>  
>   if (sc == NULL)
>   return (ENXIO);
>  
> - klist = >sc_sel.si_note;
> -
>   switch (kn->kn_filter) {
>   case EVFILT_READ:
>   kn->kn_fop = _filtops;
> @@ -547,10 +551,7 @@ vscsikqfilter(dev_t dev, struct knote *k
>   }
>  
>   kn->kn_hook = sc;
> -
> - mtx_enter(>sc_sel_mtx);
> - klist_insert_locked(klist, kn);
> - mtx_leave(>sc_sel_mtx);
> + klist_insert(>sc_klist, kn);
>  
>   /* device ref is given to the knote in the klist */
>  
> @@ -561,27 +562,42 @@ void
>  filt_vscsidetach(struct knote *kn)
>  {
>   struct vscsi_softc *sc = kn->kn_hook;
> - struct klist *klist = >sc_sel.si_note;
> -
> - mtx_enter(>sc_sel_mtx);
> - klist_remove_locked(klist, kn);
> - mtx_leave(>sc_sel_mtx);
>  
> - device_unref(>sc_dev);
> + klist_remove(>sc_klist, kn);
>  }
>  
>  int
>  filt_vscsiread(struct knote *kn, long hint)
>  {
>   struct vscsi_softc *sc = kn->kn_hook;
> - int event = 0;
> +
> + return (!TAILQ_EMPTY(>sc_ccb_i2t));
> +}
> +
> +int
> +filt_vscsimodify(struct kevent *kev, struct knote *kn)
> +{
> + struct vscsi_softc *sc = kn->kn_hook;
> + int active;
> +
> + mtx_enter(>sc_state_mtx);
> + active = knote_modify(kev, kn);
> + mtx_leave(>sc_state_mtx);
> +
> + return (active);
> +}
> +
> +int
> +filt_vscsiprocess(struct knote *kn,

adjust example bgpd.conf GRACEFUL_SHUTDOWN rule

2023-09-27 Thread Claudio Jeker

Graceful Shutdown should only be done on eBGP sessions.
If you alter the local-pref on ibgp sessions it is possible to produce
loops or other network instabilities. Now if all iBGP routers apply the
same rule it is fine but if not you can get into trouble.

So better adjust our example and only apply the rule to ebgp peers.
Btw. RFC8326 mentions explicitly that GRACEFUL_SHUTDOWN should only be
applied for eBGP sessions.
-- 
:wq Claudio


Index: etc/examples/bgpd.conf
===
RCS file: /cvs/src/etc/examples/bgpd.conf,v
retrieving revision 1.21
diff -u -p -r1.21 bgpd.conf
--- etc/examples/bgpd.conf  2 Feb 2021 00:34:03 -   1.21
+++ etc/examples/bgpd.conf  19 Sep 2023 07:38:36 -
@@ -111,7 +111,7 @@ allow from any inet6 prefixlen 16 - 48
 
 # Honor requests to gracefully shutdown BGP sessions
 # https://tools.ietf.org/html/rfc8326
-match from any community GRACEFUL_SHUTDOWN set { localpref 0 }
+match from ebgp community GRACEFUL_SHUTDOWN set { localpref 0 }
 
 deny quick from any prefix-set bogons

Re: rpki-client: refactor sbgp_assysnum() and sbgp_ipaddrblk()

2023-09-25 Thread Claudio Jeker

On Mon, Sep 25, 2023 at 05:02:06PM +0200, Theo Buehler wrote:
> On Mon, Sep 25, 2023 at 04:43:31PM +0200, Claudio Jeker wrote:
> > On Mon, Sep 25, 2023 at 04:38:48PM +0200, Theo Buehler wrote:
> > > On Mon, Sep 25, 2023 at 02:47:37PM +0200, Claudio Jeker wrote:
> > > > On Sat, Sep 23, 2023 at 01:23:34PM +0200, Theo Buehler wrote:
> > > > > This is a second chunk split out of the diff mentioned in my previous
> > > > > mail. It factors the parsing of ASIdentifiers and IPAddrBlocks out of
> > > > > sbgp_assysnum() and sbgp_ipaddrblk() and makes the latter only extract
> > > > > the info from the X509_EXTENSION. This should not change anything, but
> > > > > the logic is a bit tricky.
> > > > > 
> > > > > We could initialize *as and *asz, as well as *ips and *ipsz to NULL/0,
> > > > > at the top of the two new sbgp_parse_*.
> > > > 
> > > > It looks inded like nthing is changed. The thing I dislike a bit is how
> > > > **as and *asz are updated inside the sbgp_parse_* functions. There is
> > > > return 0 before and after the calloc / recallocarray calls and so it
> > > > depends a lot on the caller to be careful here. The code right now is 
> > > > ok.
> > > 
> > > Thanks for that clue. I didn't particularly like my diff either.  The
> > > below is better, has less churn and should be easier to review. This way
> > > the caller doesn't have to be careful.
> > > 
> > > I left the currently existing variables asz and ipsz untouched since it
> > > becomes too confusing. I want to rename asz -> sz and new_asz -> asz in
> > > a follow-up, similarly for ipsz.
> > 
> > Indeed much better. OK claudio@
> 
> And here's the rename. It is mechanical apart from two lines where I
> fixed the order of the variable declarations and from a line I
> unwrapped.

OK claudio@
 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.116
> diff -u -p -r1.116 cert.c
> --- cert.c25 Sep 2023 14:56:20 -  1.116
> +++ cert.c25 Sep 2023 14:58:57 -
> @@ -159,7 +159,7 @@ sbgp_parse_assysnum(const char *fn, cons
>  {
>   const ASIdOrRanges  *aors = NULL;
>   struct cert_as  *as = NULL;
> - size_t   asz, new_asz = 0;
> + size_t   asz = 0, sz;
>   int  i;
>  
>   assert(*out_as == NULL && *out_asz == 0);
> @@ -178,11 +178,11 @@ sbgp_parse_assysnum(const char *fn, cons
>  
>   switch (asidentifiers->asnum->type) {
>   case ASIdentifierChoice_inherit:
> - asz = 1;
> + sz = 1;
>   break;
>   case ASIdentifierChoice_asIdsOrRanges:
>   aors = asidentifiers->asnum->u.asIdsOrRanges;
> - asz = sk_ASIdOrRange_num(aors);
> + sz = sk_ASIdOrRange_num(aors);
>   break;
>   default:
>   warnx("%s: RFC 3779 section 3.2.3.2: ASIdentifierChoice: "
> @@ -190,21 +190,21 @@ sbgp_parse_assysnum(const char *fn, cons
>   goto out;
>   }
>  
> - if (asz == 0) {
> + if (sz == 0) {
>   warnx("%s: RFC 6487 section 4.8.11: empty asIdsOrRanges", fn);
>   goto out;
>   }
> - if (asz >= MAX_AS_SIZE) {
> + if (sz >= MAX_AS_SIZE) {
>   warnx("%s: too many AS number entries: limit %d",
>   fn, MAX_AS_SIZE);
>   goto out;
>   }
> - as = calloc(asz, sizeof(struct cert_as));
> + as = calloc(sz, sizeof(struct cert_as));
>   if (as == NULL)
>   err(1, NULL);
>  
>   if (aors == NULL) {
> - if (!sbgp_as_inherit(fn, as, _asz))
> + if (!sbgp_as_inherit(fn, as, ))
>   goto out;
>   }
>  
> @@ -214,11 +214,11 @@ sbgp_parse_assysnum(const char *fn, cons
>   aor = sk_ASIdOrRange_value(aors, i);
>   switch (aor->type) {
>   case ASIdOrRange_id:
> - if (!sbgp_as_id(fn, as, _asz, aor->u.id))
> + if (!sbgp_as_id(fn, as, , aor->u.id))
>   goto out;
>   break;
>   case ASIdOrRange_range:
> - if (!sbgp_as_range(fn, as, _asz, aor->u.range))
> + if (!sbgp_as_range(fn, as, , aor->u.range))
>   goto out;
>

Re: rpki-client: refactor sbgp_assysnum() and sbgp_ipaddrblk()

2023-09-25 Thread Claudio Jeker

On Mon, Sep 25, 2023 at 04:38:48PM +0200, Theo Buehler wrote:
> On Mon, Sep 25, 2023 at 02:47:37PM +0200, Claudio Jeker wrote:
> > On Sat, Sep 23, 2023 at 01:23:34PM +0200, Theo Buehler wrote:
> > > This is a second chunk split out of the diff mentioned in my previous
> > > mail. It factors the parsing of ASIdentifiers and IPAddrBlocks out of
> > > sbgp_assysnum() and sbgp_ipaddrblk() and makes the latter only extract
> > > the info from the X509_EXTENSION. This should not change anything, but
> > > the logic is a bit tricky.
> > > 
> > > We could initialize *as and *asz, as well as *ips and *ipsz to NULL/0,
> > > at the top of the two new sbgp_parse_*.
> > 
> > It looks inded like nthing is changed. The thing I dislike a bit is how
> > **as and *asz are updated inside the sbgp_parse_* functions. There is
> > return 0 before and after the calloc / recallocarray calls and so it
> > depends a lot on the caller to be careful here. The code right now is ok.
> 
> Thanks for that clue. I didn't particularly like my diff either.  The
> below is better, has less churn and should be easier to review. This way
> the caller doesn't have to be careful.
> 
> I left the currently existing variables asz and ipsz untouched since it
> becomes too confusing. I want to rename asz -> sz and new_asz -> asz in
> a follow-up, similarly for ipsz.

Indeed much better. OK claudio@
 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.115
> diff -u -p -r1.115 cert.c
> --- cert.c12 Sep 2023 09:33:30 -  1.115
> +++ cert.c25 Sep 2023 14:29:56 -
> @@ -153,40 +153,26 @@ sbgp_as_inherit(const char *fn, struct c
>   return append_as(fn, ases, asz, );
>  }
>  
> -/*
> - * Parse RFC 6487 4.8.11 X509v3 extension, with syntax documented in RFC
> - * 3779 starting in section 3.2.
> - * Returns zero on failure, non-zero on success.
> - */
> -static int
> -sbgp_assysnum(struct parse *p, X509_EXTENSION *ext)
> +int
> +sbgp_parse_assysnum(const char *fn, const ASIdentifiers *asidentifiers,
> +struct cert_as **out_as, size_t *out_asz)
>  {
> - ASIdentifiers   *asidentifiers = NULL;
>   const ASIdOrRanges  *aors = NULL;
> - size_t   asz;
> - int  i, rc = 0;
> + struct cert_as  *as = NULL;
> + size_t   asz, new_asz = 0;
> + int  i;
>  
> - if (!X509_EXTENSION_get_critical(ext)) {
> - warnx("%s: RFC 6487 section 4.8.11: autonomousSysNum: "
> - "extension not critical", p->fn);
> - goto out;
> - }
> -
> - if ((asidentifiers = X509V3_EXT_d2i(ext)) == NULL) {
> - warnx("%s: RFC 6487 section 4.8.11: autonomousSysNum: "
> - "failed extension parse", p->fn);
> - goto out;
> - }
> + assert(*out_as == NULL && *out_asz == 0);
>  
>   if (asidentifiers->rdi != NULL) {
>   warnx("%s: RFC 6487 section 4.8.11: autonomousSysNum: "
> - "should not have RDI values", p->fn);
> + "should not have RDI values", fn);
>   goto out;
>   }
>  
>   if (asidentifiers->asnum == NULL) {
>   warnx("%s: RFC 6487 section 4.8.11: autonomousSysNum: "
> - "no AS number resource set", p->fn);
> + "no AS number resource set", fn);
>   goto out;
>   }
>  
> @@ -200,26 +186,25 @@ sbgp_assysnum(struct parse *p, X509_EXTE
>   break;
>   default:
>   warnx("%s: RFC 3779 section 3.2.3.2: ASIdentifierChoice: "
> - "unknown type %d", p->fn, asidentifiers->asnum->type);
> + "unknown type %d", fn, asidentifiers->asnum->type);
>   goto out;
>   }
>  
>   if (asz == 0) {
> - warnx("%s: RFC 6487 section 4.8.11: empty asIdsOrRanges",
> - p->fn);
> + warnx("%s: RFC 6487 section 4.8.11: empty asIdsOrRanges", fn);
>   goto out;
>   }
>   if (asz >= MAX_AS_SIZE) {
>   warnx("%s: too many AS number entries: limit %d",
> - p->fn, MAX_AS_SIZE);
> + fn, MAX_AS_SIZE);
>   goto out;
>   }
> - p->res->as = calloc(asz, sizeof(struct cert_as));
&g

Re: rpki-client: refactor sbgp_assysnum() and sbgp_ipaddrblk()

2023-09-25 Thread Claudio Jeker

On Sat, Sep 23, 2023 at 01:23:34PM +0200, Theo Buehler wrote:
> This is a second chunk split out of the diff mentioned in my previous
> mail. It factors the parsing of ASIdentifiers and IPAddrBlocks out of
> sbgp_assysnum() and sbgp_ipaddrblk() and makes the latter only extract
> the info from the X509_EXTENSION. This should not change anything, but
> the logic is a bit tricky.
> 
> We could initialize *as and *asz, as well as *ips and *ipsz to NULL/0,
> at the top of the two new sbgp_parse_*.

It looks inded like nthing is changed. The thing I dislike a bit is how
**as and *asz are updated inside the sbgp_parse_* functions. There is
return 0 before and after the calloc / recallocarray calls and so it
depends a lot on the caller to be careful here. The code right now is ok.
One minor nit though:
 
> Index: cert.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/cert.c,v
> retrieving revision 1.115
> diff -u -p -r1.115 cert.c
> --- cert.c12 Sep 2023 09:33:30 -  1.115
> +++ cert.c23 Sep 2023 11:03:48 -

> +/*
> + * Parse RFC 6487 4.8.11 X509v3 extension, with syntax documented in RFC
> + * 3779 starting in section 3.2.
> + * Returns zero on failure, non-zero on success.
> + */
> +static int
> +sbgp_assysnum(struct parse *p, X509_EXTENSION *ext)
> +{
> + ASIdentifiers   *asidentifiers = NULL;
> + int  rc = 0;
> +
> + if (!X509_EXTENSION_get_critical(ext)) {
> + warnx("%s: RFC 6487 section 4.8.11: autonomousSysNum: "
> + "extension not critical", p->fn);
> + goto out;
> + }
> +
> + if ((asidentifiers = X509V3_EXT_d2i(ext)) == NULL) {
> + warnx("%s: RFC 6487 section 4.8.11: autonomousSysNum: "
> + "failed extension parse", p->fn);
> + goto out;
> + }
> +
> + if (!sbgp_parse_assysnum(p->fn, asidentifiers, >res->as, 
> >res->asz))

This line is over 80 chars.

Apart from that OK.
-- 
:wq Claudio

Re: rpki-client: pass talid to callers of cert_parse_ee_cert()

2023-09-25 Thread Claudio Jeker

On Sat, Sep 23, 2023 at 12:59:48PM +0200, Theo Buehler wrote:
> This is a boring mechanical diff that splits some of the noise out of a
> larger diff that Job will send out and explain in detail soon. In itself
> it changes nothing. For a given product we will need to know the
> originating TA for additional checks in cert_parse_ee_cert().
> 
> The callers of cert_parse_ee_cert() are *_parse(), except cert_parse()
> and crl_parse(), which are special anyway.
> 
> In !filemode the talid is known to the caller of proc_parser_* (since
> struct entp contains it). proc_parser_* later recovers this info from
> struct auth returned by valid_ski_aki() but that's only possible after
> *_parse() was called.
> 
> So pass the full struct entp * to proc_parser_*() instead of only the
> entp->mftaki and then pass entp->talid and entp->mftaki where needed.
> 
> In filemode the talid is unknown at the point when *_parse() is called,
> so set it to -1 to indicate that.
> 
> There are various other ways of achieving what Job's plan needs. For
> example, we could replace X509 ** with struct cert_ip ** in *_parse and
> do the check in proc_parser_* instead of cert_parse_ee_cert(). The
> resulting complexity is about the same and unless there are strong
> concerns or objections I'd like to do it the way below.
> 
> Regress needs a trivial adjustment that I will commit at the same time.

I see nothing that speaks against this. OK claudio@
 
> Index: aspa.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/aspa.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 aspa.c
> --- aspa.c10 Jul 2023 12:02:37 -  1.22
> +++ aspa.c23 Sep 2023 09:59:32 -
> @@ -159,7 +159,8 @@ aspa_parse_econtent(const unsigned char 
>   * Returns the payload or NULL if the file was malformed.
>   */
>  struct aspa *
> -aspa_parse(X509 **x509, const char *fn, const unsigned char *der, size_t len)
> +aspa_parse(X509 **x509, const char *fn, int talid, const unsigned char *der,
> +size_t len)
>  {
>   struct parse p;
>   size_t   cmsz;
> Index: extern.h
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
> retrieving revision 1.189
> diff -u -p -r1.189 extern.h
> --- extern.h  12 Sep 2023 09:33:30 -  1.189
> +++ extern.h  23 Sep 2023 09:59:32 -
> @@ -624,33 +624,33 @@ void cert_insert_brks(struct brk_tree 
>  enum rtypertype_from_file_extension(const char *);
>  void  mft_buffer(struct ibuf *, const struct mft *);
>  void  mft_free(struct mft *);
> -struct mft   *mft_parse(X509 **, const char *, const unsigned char *,
> +struct mft   *mft_parse(X509 **, const char *, int, const unsigned char *,
>   size_t);
>  struct mft   *mft_read(struct ibuf *);
>  int   mft_compare(const struct mft *, const struct mft *);
>  
>  void  roa_buffer(struct ibuf *, const struct roa *);
>  void  roa_free(struct roa *);
> -struct roa   *roa_parse(X509 **, const char *, const unsigned char *,
> +struct roa   *roa_parse(X509 **, const char *, int, const unsigned char *,
>   size_t);
>  struct roa   *roa_read(struct ibuf *);
>  void  roa_insert_vrps(struct vrp_tree *, struct roa *,
>   struct repo *);
>  
>  void  gbr_free(struct gbr *);
> -struct gbr   *gbr_parse(X509 **, const char *, const unsigned char *,
> +struct gbr   *gbr_parse(X509 **, const char *, int, const unsigned char *,
>   size_t);
>  
>  void  geofeed_free(struct geofeed *);
> -struct geofeed   *geofeed_parse(X509 **, const char *, char *, size_t);
> +struct geofeed   *geofeed_parse(X509 **, const char *, int, char *, 
> size_t);
>  
>  void  rsc_free(struct rsc *);
> -struct rsc   *rsc_parse(X509 **, const char *, const unsigned char *,
> +struct rsc   *rsc_parse(X509 **, const char *, int, const unsigned char *,
>   size_t);
>  
>  void  takey_free(struct takey *);
>  void  tak_free(struct tak *);
> -struct tak   *tak_parse(X509 **, const char *, const unsigned char *,
> +struct tak   *tak_parse(X509 **, const char *, int, const unsigned char *,
>   size_t);
>  struct tak   *tak_read(struct ibuf *);
>  
> @@ -658,7 +658,7 @@ void   aspa_buffer(struct ibuf *, const 
>  void  aspa_free(struct aspa *);
>  void  aspa_insert_vaps(struct vap_tree *, struct aspa *,
>   struct repo *);
> -struct aspa  *aspa_parse(X509 **, const char *, const unsigned char *,
> +struct aspa  *aspa_parse(X509 **, const char *, int, const unsigned char *,
>   size_t);
>  struct aspa  *aspa_read(struct ibuf *);
>  
> Index: filemode.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/filemode.c,v
> retrieving revision 1.34
> diff -u

Re: update bsd.regress.mk(5)

2023-09-24 Thread Claudio Jeker

On Sun, Sep 24, 2023 at 04:22:30PM +0200, Theo Buehler wrote:
> On Sun, Sep 24, 2023 at 03:17:11PM +0200, Claudio Jeker wrote:
> > Try to document how REGRESS_LOG and REGRESS_FAIL_EARLY interact.
> 
> We could make it fully precise with a few more words.
> 

Done

-- 
:wq Claudio

Index: bsd.regress.mk.5
===
RCS file: /cvs/src/share/man/man5/bsd.regress.mk.5,v
retrieving revision 1.24
diff -u -p -r1.24 bsd.regress.mk.5
--- bsd.regress.mk.531 Mar 2022 17:27:22 -  1.24
+++ bsd.regress.mk.524 Sep 2023 16:58:58 -
@@ -79,11 +79,20 @@ If this variable is set to anything but
 the
 .Cm regress
 target will abort as soon as a test fails.
+Defaults to
+.Dq yes
+unless
+.Ev REGRESS_LOG
+is set.
 .It Ev REGRESS_LOG
 Points to the fully-qualified path of a file to which regression
 results are appended.
 Defaults to
 .Pa /dev/null .
+If set to any other path,
+.Ev REGRESS_FAIL_EARLY
+defaults to
+.Dq no .
 .It Ev REGRESS_ROOT_TARGETS
 Targets for which root access is required to run the test.
 The

update bsd.regress.mk(5)

2023-09-24 Thread Claudio Jeker

Try to document how REGRESS_LOG and REGRESS_FAIL_EARLY interact.

-- 
:wq Claudio

Index: bsd.regress.mk.5
===
RCS file: /cvs/src/share/man/man5/bsd.regress.mk.5,v
retrieving revision 1.24
diff -u -p -r1.24 bsd.regress.mk.5
--- bsd.regress.mk.531 Mar 2022 17:27:22 -  1.24
+++ bsd.regress.mk.524 Sep 2023 13:15:27 -
@@ -79,11 +79,20 @@ If this variable is set to anything but
 the
 .Cm regress
 target will abort as soon as a test fails.
+By default the value is
+.Dq yes
+unless
+.Ev REGRESS_LOG
+is set.
 .It Ev REGRESS_LOG
 Points to the fully-qualified path of a file to which regression
 results are appended.
 Defaults to
 .Pa /dev/null .
+If set
+.Ev REGRESS_FAIL_EARLY
+defaults to
+.Dq no .
 .It Ev REGRESS_ROOT_TARGETS
 Targets for which root access is required to run the test.
 The

Re: scheduler_start: move static timeout structs into callback functions

2023-09-17 Thread Claudio Jeker

On Sat, Sep 16, 2023 at 11:31:45AM -0500, Scott Cheloha wrote:
> The schedcpu() and loadavg() timeout structures are already hidden
> from the global namespace.  We can further simplify scheduler_start()
> by moving the structures into the callback functions and statically
> initializing them with TIMEOUT_INITIALIZER(9).
> 
> ok?

Sure.
 
> Index: sched_bsd.c
> ===
> RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> retrieving revision 1.86
> diff -u -p -r1.86 sched_bsd.c
> --- sched_bsd.c   10 Sep 2023 03:08:05 -  1.86
> +++ sched_bsd.c   16 Sep 2023 16:24:33 -
> @@ -117,9 +117,9 @@ roundrobin(struct clockintr *cl, void *c
>   * 1, 5, and 15 minute intervals.
>   */
>  void
> -update_loadavg(void *arg)
> +update_loadavg(void *unused)
>  {
> - struct timeout *to = (struct timeout *)arg;
> + static struct timeout to = TIMEOUT_INITIALIZER(update_loadavg, NULL);
>   CPU_INFO_ITERATOR cii;
>   struct cpu_info *ci;
>   u_int i, nrun = 0;
> @@ -135,7 +135,7 @@ update_loadavg(void *arg)
>   nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
>   }
>  
> - timeout_add_sec(to, 5);
> + timeout_add_sec(, 5);
>  }
>  
>  /*
> @@ -227,9 +227,9 @@ fixpt_t   ccpu = 0.95122942450071400909 * 
>   * Recompute process priorities, every second.
>   */
>  void
> -schedcpu(void *arg)
> +schedcpu(void *unused)
>  {
> - struct timeout *to = (struct timeout *)arg;
> + static struct timeout to = TIMEOUT_INITIALIZER(schedcpu, NULL);
>   fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
>   struct proc *p;
>   int s;
> @@ -280,7 +280,7 @@ schedcpu(void *arg)
>   SCHED_UNLOCK(s);
>   }
>   wakeup();
> - timeout_add_sec(to, 1);
> + timeout_add_sec(, 1);
>  }
>  
>  /*
> @@ -726,23 +726,14 @@ sysctl_hwperfpolicy(void *oldp, size_t *
>  }
>  #endif
>  
> +/*
> + * Start the scheduler's periodic timeouts.
> + */
>  void
>  scheduler_start(void)
>  {
> - static struct timeout schedcpu_to;
> - static struct timeout loadavg_to;
> -
> - /*
> -  * We avoid polluting the global namespace by keeping the scheduler
> -  * timeouts static in this function.
> -  * We setup the timeout here and kick schedcpu once to make it do
> -  * its job.
> -  */
> - timeout_set(_to, schedcpu, _to);
> - timeout_set(_to, update_loadavg, _to);
> -
> - schedcpu(_to);
> - update_loadavg(_to);
> + schedcpu(NULL);
> + update_loadavg(NULL);
>  
>  #ifndef SMALL_KERNEL
>   if (perfpolicy == PERFPOL_AUTO)

-- 
:wq Claudio

Re: ps.1/kvm documentation

2023-09-11 Thread Claudio Jeker

On Mon, Sep 11, 2023 at 11:02:00AM +0200, Marc Espie wrote:
> I was reading through ps.1, which has two slightly different options
>  -H  Also display information about kernel visible threads.
>  -k  Also display information about kernel threads.
> 
> It's not at all obvious what the difference between these options might be.
> 
 
kernel threads == kthread(9) created threads

Those should have K in STAT and the name is in () like:
 3141 ??  RK/1   4057:57.90 (idle1)

kernel visible threads == __tfork_thread(3) created threads for userland
applications. For example:

43838  556612 ??  IpU  0:01.58 firefox (firefox/Cache2 I/O)
43838  415551 ??  IpU  0:00.01 firefox (firefox/Cookie)
43838  377915 ??  IpU  0:00.01 firefox (firefox/Worker Launcher)

These threads all share the same PID but have different TID.

I think the "kernel visible" is there to tell that pure userland threads
can not be reported by ps. I think go routines are such an example.

> From the log:
> 
> revision 1.77
> date: 2011/09/25 00:29:59;  author: guenther;  state: Exp;  lines: +5 -3;
> Add -H option to show rthreads, hiding them by default
> 
> Diff from uwe@
> 
> so slightly more info.
> 
> Looking at the code, now this is KERN_PROC_KTHREAD vs KERN_PROC_SHOW_THREADS
> in kvm_getprocs(3).
> 
> Now KERN_PROC_KTHREAD is documented, but there is nothing about
> KERN_PROC_SHOW_THREADS.
> 
> The code around (dothreads) in kvm* doesn't make things really obvious.
> 

-- 
:wq Claudio

Re: btop++

2023-09-04 Thread Claudio Jeker

On Mon, Sep 04, 2023 at 11:13:01AM +0200, Jos Dehaes wrote:
> Hi,
> 
> I ported btop++ (a resource monitor program like top/htop) to macOS,
> FreeBSD, and now OpenBSD (pull request:
> https://github.com/aristocratos/btop/pull/607).
> 
> I would appreciate if anyone could have a look and tell me what stupid
> things I did and how I can do it better (I don't have much OpenBSD
> experience).
> 
> I have some questions that I can't seem to find an answer to:
> 
>- is it possible to get the temperature per CPU thread? Right now I'm
>giving both threads on same physical core same temperature

There is no physical temp sensor per thread. Only core temparatures make
sense. So what you do is correct.

>- same question about CPU usage, it seems only possible to get info on
>physical core granularity

In OpenBSD SMT cores show up as independent CPUs. So a 2 core 4 thread
cpu has cpu0 - cpu3.

>- Similar question about disk I/O, I can only find a way to get disk I/O
>per disk, so I'm giving all mountpoints the same disk I/O.

>From my knowledge there is no per mount stats. In general they do not
really matter.

>- Is it possible to get disk I/O info per process? Linux and macOS can
>do that, in FreeBSD I also could not find an API for it.

You can look at getrusage ru_inblock/ru_oublock and p_uru_inblock /
p_uru_oublock from struct kinfo_proc. Not perfect but there is nothing
else.
 
> How could we go about including this in OpenBSD (ports or native package)?
> 
> (Please CC me as I'm not subscribed to the list)
> 
> Thx,
> Jos

-- 
:wq Claudio

Re: sched_cpu_init: no need to NULL-check clockintr pointers

2023-08-31 Thread Claudio Jeker

On Thu, Aug 31, 2023 at 10:30:45AM -0500, Scott Cheloha wrote:
> We don't actually need these NULL-checks, my mistake.
> 
> sched_init_cpu() is only ever run once for a given cpu_info.
> 
> ok?

Yes please. OK claudio@
 
> Index: kern_sched.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.87
> diff -u -p -r1.87 kern_sched.c
> --- kern_sched.c  29 Aug 2023 16:19:34 -  1.87
> +++ kern_sched.c  31 Aug 2023 15:24:02 -
> @@ -88,26 +88,15 @@ sched_init_cpu(struct cpu_info *ci)
>  
>   spc->spc_idleproc = NULL;
>  
> - if (spc->spc_itimer == NULL) {
> - spc->spc_itimer = clockintr_establish(>ci_queue,
> - itimer_update);
> - if (spc->spc_itimer == NULL) {
> - panic("%s: clockintr_establish itimer_update",
> - __func__);
> - }
> - }
> - if (spc->spc_profclock == NULL) {
> - spc->spc_profclock = clockintr_establish(>ci_queue,
> - profclock);
> - if (spc->spc_profclock == NULL)
> - panic("%s: clockintr_establish profclock", __func__);
> - }
> - if (spc->spc_roundrobin == NULL) {
> - spc->spc_roundrobin = clockintr_establish(>ci_queue,
> - roundrobin);
> - if (spc->spc_roundrobin == NULL)
> - panic("%s: clockintr_establish roundrobin", __func__);
> - }
> + spc->spc_itimer = clockintr_establish(>ci_queue, itimer_update);
> + if (spc->spc_itimer == NULL)
> + panic("%s: clockintr_establish itimer_update", __func__);
> + spc->spc_profclock = clockintr_establish(>ci_queue, profclock);
> + if (spc->spc_profclock == NULL)
> + panic("%s: clockintr_establish profclock", __func__);
> + spc->spc_roundrobin = clockintr_establish(>ci_queue, roundrobin);
> + if (spc->spc_roundrobin == NULL)
> + panic("%s: clockintr_establish roundrobin", __func__);
>  
>   kthread_create_deferred(sched_kthreads_create, ci);
>  

-- 
:wq Claudio

introduce ctl_results CTL_RES_OPNOTSUPP for portable

2023-08-30 Thread Claudio Jeker

In portable some features are disabled and requests by bgpctl should
result in an error that is reasonable.

For example bgpctl show fib returns "permission denied" in
kroute-disabled.c which is not great. So lets have an explicit error for
unavailable features in -portable.

-- 
:wq Claudio

Index: bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.476
diff -u -p -r1.476 bgpd.h
--- bgpd.h  16 Aug 2023 08:26:35 -  1.476
+++ bgpd.h  30 Aug 2023 07:35:34 -
@@ -691,7 +691,8 @@ enum ctl_results {
CTL_RES_NOMEM,
CTL_RES_BADPEER,
CTL_RES_BADSTATE,
-   CTL_RES_NOSUCHRIB
+   CTL_RES_NOSUCHRIB,
+   CTL_RES_OPNOTSUPP,
 };
 
 /* needed for session.h parse prototype */
@@ -1709,7 +1710,8 @@ static const char * const ctl_res_strerr
"out of memory",
"not a cloned peer",
"peer still active, down peer first",
-   "no such RIB"
+   "no such RIB",
+   "operation not supported",
 };
 
 static const char * const timernames[] = {

Re: __predict_{true,false} is this right?

2023-08-22 Thread Claudio Jeker

On Tue, Aug 22, 2023 at 02:33:39PM +0200, Peter J. Philipp wrote:
> Hi,
> 
> I have an outstanding bug report that I send to deraadt and claudio, in
> reading the code I came across these macros:
> 
> https://github.com/openbsd/src/blame/master/sys/sys/cdefs.h
> 
> lines 195 and 196.  Now my question, does this not sorta look wrong?
> 
> Shouldn't these values be a little more unique?  As in not the same?

No. Think about how exp is is expanded.

if (_predict_false(x == 0)) -> if ((x == 0) != 0)

The _predict_xyz do not alter the outcome of the if statement, they are
just a hint.

-- 
:wq Claudio

Re: EVFILT_TIMER add support for different timer precisions NOTE_{,U,N,M}SECONDS

2023-08-08 Thread Claudio Jeker

On Tue, Aug 08, 2023 at 10:40:06AM -0500, Scott Cheloha wrote:
> On Sat, Aug 05, 2023 at 01:33:05AM -0400, A Tammy wrote:
> > 
> > On 8/5/23 00:49, Scott Cheloha wrote:
> > > On Sat, Aug 05, 2023 at 12:17:48AM -0400, aisha wrote:
> > >> On 22/09/10 01:53PM, Visa Hankala wrote:
> > >>> On Wed, Aug 31, 2022 at 04:48:37PM -0400, aisha wrote:
> >  I've added a patch which adds support for NOTE_{,U,M,N}SECONDS for
> >  EVFILT_TIMER in the kqueue interface.
> > >>> It sort of makes sense to add an option to specify timeouts in
> > >>> sub-millisecond precision. It feels complete overengineering to add
> > >>> multiple time units on the level of the kernel interface. However,
> > >>> it looks that FreeBSD and NetBSD have already done this following
> > >>> macOS' lead...
> > >>>
> >  I've also added the NOTE_ABSTIME but haven't done any actual 
> >  implementation
> >  there as I am not sure how the `data` field should be interpreted (is 
> >  it
> >  absolute time in seconds since epoch?).
> > >>> I think FreeBSD and NetBSD take NOTE_ABSTIME as time since the epoch.
> > >>>
> > >>> Below is a revised patch that takes into account some corner cases.
> > >>> It tries to be API-compatible with FreeBSD and NetBSD. I have adjusted
> > >>> the NOTE_{,M,U,N}SECONDS flags so that they are enum-like.
> > >>>
> > >>> The manual page bits are from NetBSD.
> > >>>
> > >>> It is quite late to introduce a feature like this within this release
> > >>> cycle. Until now, the timer code has ignored the fflags field. There
> > >>> might be pieces of software that are careless with struct kevent and
> > >>> that would break as a result of this patch. Programs that are widely
> > >>> used on different BSDs are probably fine already, though.
> > >> 
> > >> Sorry, I had forgotten this patch for a long time!!! I've been running 
> > >> with this for a while now and it's been working nicely.
> > > 
> > > Where is this being used in ports?  I think having "one of each" for
> > > seconds, milliseconds, microseconds, and nanoseconds is (as visa
> > > noted) way, way over-the-top.
> > 
> > I was using it with a port that I sent out a while ago but never got
> > into tree (was before I joined the project) -
> > https://marc.info/?l=openbsd-ports=165715874509440=2
> 
> If nothing in ports is using this I am squeamish about adding it.
> Once we add it, we're stuck maintaining it, warts and all.
> 
> If www/workflow were in the tree I could see the upside.  Is it in
> ports?
> 
> It looks like workflow actually wants timerfd(2) from Linux and is
> simulating timerfd(2) with EVFILT_TIMER and NOTE_NSECONDS:
> 
> https://github.com/sogou/workflow/blob/80b3dfbad2264bcd79ba37811c66421490e337d2/src/kernel/poller.c#L227
> 
> I think timerfd(2) is the superior interface here.  It keeps the POSIX
> interval timer semantics without all the signal delivery baggage.  It
> also supports multiple clocks and starting a periodic timeout from an
> absolute starting time.
> 
> So, if the goal is "add www/workflow to ports", adding timerfd(2) might
> be the right thing.

I don't think that this is a good move. Adding timerfd(2) will result in
the need to add all those magic fd interfaces linux invents on a weekly
basis. I would not go down that rabbit-hole unless there is realy realy no
alternative.

-- 
:wq Claudio

bgpd, be more carefule with shutdown reason

2023-08-04 Thread Claudio Jeker

When copying the shutdown reason from ctl_neighbor into the peer struct
the strlcpy needs a NUL terminated string input. This may not be the case
so we should be more careful here.
I see two ways to fix this.
a) force in a NUL before callin strlcpy() as done below.
b) use memcpy() and then force terminate p->conf.reason.

What is the preferred way?
-- 
:wq Claudio

? obj
Index: control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.112
diff -u -p -r1.112 control.c
--- control.c   4 Aug 2023 09:20:12 -   1.112
+++ control.c   4 Aug 2023 09:35:37 -
@@ -388,14 +388,20 @@ control_dispatch_msg(struct pollfd *pfd,
control_result(c, CTL_RES_OK);
break;
case IMSG_CTL_NEIGHBOR_DOWN:
-   p->conf.down = 1;
+   neighbor->reason[
+   sizeof(neighbor->reason) - 1] =
+   '\0';
strlcpy(p->conf.reason,
neighbor->reason,
sizeof(p->conf.reason));
+   p->conf.down = 1;
session_stop(p, ERR_CEASE_ADMIN_DOWN);
control_result(c, CTL_RES_OK);
break;
case IMSG_CTL_NEIGHBOR_CLEAR:
+   neighbor->reason[
+   sizeof(neighbor->reason) - 1] =
+   '\0';
strlcpy(p->conf.reason,
neighbor->reason,
sizeof(p->conf.reason));

Re: vmd: handle EAGAIN from imsg_flush

2023-08-04 Thread Claudio Jeker

On Thu, Aug 03, 2023 at 07:01:51PM -0400, Dave Voutila wrote:
> 
> Claudio Jeker  writes:
> 
> > On Thu, Aug 03, 2023 at 04:20:47PM -0400, Dave Voutila wrote:
> >> Found this while working on some virtio stuff. My original
> >> implementation as part of the multi-process redesign didn't handle if
> >> imsg_flush failed due to resource shortages (EAGAIN), so adopt the
> >> do/while idiom used by other daemons like iked(8).
> >>
> >> Manifests with errors from the vm process looking like:
> >>
> >>   virtio_pci_io: imsg_flush (write)
> >>   vmd: pci i/o access function failed
> >>
> >> ok?
> >
> > There are way to many imsg_flush() calls in this code. I don't think this
> > is designed the right way. imsg_flush() should almost never be used.
> > Busy loop on EAGAIN will bite you further down the road.
> >
> 
> Bite me how?

We had massive EAGAIN spins in relayd. Busy loop on EAGAIN is almost
always wrong. A possible fix is to poll on the fd until it is writable --
while this does not burn CPU for nothing it still does not allow any other
event from happening either. Have a look at what atomicio.c does you may
need to add something similar for imsg_flush().
 
> This is all for the synchronous imsg channel to the virtio device
> process, not the async one. What's going to actually perform the flush
> for me if not imsg_flush?

This is where I'm a bit confused. There is no such concept of a
synchronous imsg channel. imsg are by nature asynchronous. You pass
messages beween processes (especially in a libevent driven daemon).
It seems this code requires some kind of synchronisation but blocking
all processing during that time seems like a very big hammer.

I don't fully grasp vmd but looking at the code and seeing that atomicio
is mixed with libevent is a red flag. Also why is the synchronous channel
set non-blocking? If it is synchronous it should most probably be blocking
(which would also solve the EAGAIN issue).
 
> >> diff refs/heads/master refs/heads/vmd-imsg_flush
> >> commit - 40d57955f2f1a3a65c42ea374f86c74cf879d76d
> >> commit + 2c42dedb675f013276cdbd47464f656e2451b92c
> >> blob - 798b5fea6d589d43083c134a040df6272037869f
> >> blob + ff1ab5fdcb52866f10a14df38d30abde277619df
> >> --- usr.sbin/vmd/virtio.c
> >> +++ usr.sbin/vmd/virtio.c
> >> @@ -825,8 +825,11 @@ virtio_shutdown(struct vmd_vm *vm)
> >>if (ret == -1)
> >>fatalx("%s: failed to send shutdown to device",
> >>__func__);
> >> -  if (imsg_flush(ibuf) == -1)
> >> -  fatalx("%s: imsg_flush", __func__);
> >> +  do {
> >> +  ret = imsg_flush(ibuf);
> >> +  } while (ret == -1 && errno == EAGAIN);
> >> +  if (ret == -1)
> >> +  fatal("%s: imsg_flush", __func__);
> >>}
> >>
> >>/*
> >> @@ -1132,8 +1135,11 @@ vionet_dump(int fd)
> >>__func__, dev->vionet.idx);
> >>return (-1);
> >>}
> >> -  if (imsg_flush(ibuf) == -1) {
> >> -  log_warnx("%s: imsg_flush", __func__);
> >> +  do {
> >> +  ret = imsg_flush(ibuf);
> >> +  } while (ret == -1 && errno == EAGAIN);
> >> +  if (ret == -1) {
> >> +  log_warn("%s: imsg_flush", __func__);
> >>return (-1);
> >>}
> >>
> >> @@ -1189,12 +1195,14 @@ vioblk_dump(int fd)
> >>__func__, dev->vioblk.idx);
> >>return (-1);
> >>}
> >> -  if (imsg_flush(ibuf) == -1) {
> >> -  log_warnx("%s: imsg_flush", __func__);
> >> +  do {
> >> +  ret = imsg_flush(ibuf);
> >> +  } while (ret == -1 && errno == EAGAIN);
> >> +  if (ret == -1) {
> >> +  log_warn("%s: imsg_flush", __func__);
> >>return (-1);
> >>}
> >>
> >> -
> >>sz = atomicio(read, dev->sync_fd, , sizeof(temp));
> >>if (sz != sizeof(temp)) {
> >>log_warnx("%s: failed to dump vioblk[%d]", __func__,
> >> @@ -1660,8 +1668,11 @@ virtio_pci_io(int dir, uint16_t reg, uint32_t 
> >> *d

Re: vmd: handle EAGAIN from imsg_flush

2023-08-03 Thread Claudio Jeker

On Thu, Aug 03, 2023 at 04:20:47PM -0400, Dave Voutila wrote:
> Found this while working on some virtio stuff. My original
> implementation as part of the multi-process redesign didn't handle if
> imsg_flush failed due to resource shortages (EAGAIN), so adopt the
> do/while idiom used by other daemons like iked(8).
> 
> Manifests with errors from the vm process looking like:
> 
>   virtio_pci_io: imsg_flush (write)
>   vmd: pci i/o access function failed
> 
> ok?

There are way to many imsg_flush() calls in this code. I don't think this
is designed the right way. imsg_flush() should almost never be used.
Busy loop on EAGAIN will bite you further down the road.
 
> diff refs/heads/master refs/heads/vmd-imsg_flush
> commit - 40d57955f2f1a3a65c42ea374f86c74cf879d76d
> commit + 2c42dedb675f013276cdbd47464f656e2451b92c
> blob - 798b5fea6d589d43083c134a040df6272037869f
> blob + ff1ab5fdcb52866f10a14df38d30abde277619df
> --- usr.sbin/vmd/virtio.c
> +++ usr.sbin/vmd/virtio.c
> @@ -825,8 +825,11 @@ virtio_shutdown(struct vmd_vm *vm)
>   if (ret == -1)
>   fatalx("%s: failed to send shutdown to device",
>   __func__);
> - if (imsg_flush(ibuf) == -1)
> - fatalx("%s: imsg_flush", __func__);
> + do {
> + ret = imsg_flush(ibuf);
> + } while (ret == -1 && errno == EAGAIN);
> + if (ret == -1)
> + fatal("%s: imsg_flush", __func__);
>   }
> 
>   /*
> @@ -1132,8 +1135,11 @@ vionet_dump(int fd)
>   __func__, dev->vionet.idx);
>   return (-1);
>   }
> - if (imsg_flush(ibuf) == -1) {
> - log_warnx("%s: imsg_flush", __func__);
> + do {
> + ret = imsg_flush(ibuf);
> + } while (ret == -1 && errno == EAGAIN);
> + if (ret == -1) {
> + log_warn("%s: imsg_flush", __func__);
>   return (-1);
>   }
> 
> @@ -1189,12 +1195,14 @@ vioblk_dump(int fd)
>   __func__, dev->vioblk.idx);
>   return (-1);
>   }
> - if (imsg_flush(ibuf) == -1) {
> - log_warnx("%s: imsg_flush", __func__);
> + do {
> + ret = imsg_flush(ibuf);
> + } while (ret == -1 && errno == EAGAIN);
> + if (ret == -1) {
> + log_warn("%s: imsg_flush", __func__);
>   return (-1);
>   }
> 
> -
>   sz = atomicio(read, dev->sync_fd, , sizeof(temp));
>   if (sz != sizeof(temp)) {
>   log_warnx("%s: failed to dump vioblk[%d]", __func__,
> @@ -1660,8 +1668,11 @@ virtio_pci_io(int dir, uint16_t reg, uint32_t *data, u
>   " device", __func__);
>   return (ret);
>   }
> - if (imsg_flush(ibuf) == -1) {
> - log_warnx("%s: imsg_flush (write)", __func__);
> + do {
> + ret = imsg_flush(ibuf);
> + } while (ret == -1 && errno == EAGAIN);
> + if (ret == -1) {
> + log_warn("%s: imsg_flush (write)", __func__);
>   return (-1);
>   }
>   } else {
> @@ -1675,8 +1686,11 @@ virtio_pci_io(int dir, uint16_t reg, uint32_t *data, u
>   " device", __func__);
>   return (ret);
>   }
> - if (imsg_flush(ibuf) == -1) {
> - log_warnx("%s: imsg_flush (read)", __func__);
> + do {
> + ret = imsg_flush(ibuf);
> + } while (ret == -1 && errno == EAGAIN);
> + if (ret == -1) {
> + log_warn("%s: imsg_flush (read)", __func__);
>   return (-1);
>   }
> 

-- 
:wq Claudio

iked, ibuf_length vs ibuf_size

2023-08-03 Thread Claudio Jeker

iked has a special version of ibuf_size() called ibuf_length(). In the
long run I want to remove this special case. The problem is that
ibuf_length(NULL) returns 0 while ibuf_size() fails.
Allowing the NULL pointer here results in bad code since it is no longer
obvious if a buffer is initalised or not.

So here is a first step on cleaning up this mess. It switches all
ibuf_length() calls to ibuf_size() where it is obvious that the argument
is not NULL (e.g. when ibuf_data(buf) is just at the same time).
Also in some cases the check should actually be if buf == NULL since
in those cases the buf is later allocated. (ikev2_pld.c and
ikev2.c::ikev2_sa_responder()).

Please double check if I did not introduce some error.
-- 
:wq Claudio


Index: ca.c
===
RCS file: /cvs/src/sbin/iked/ca.c,v
retrieving revision 1.95
diff -u -p -r1.95 ca.c
--- ca.c28 Jun 2023 14:10:24 -  1.95
+++ ca.c28 Jul 2023 11:29:25 -
@@ -207,7 +207,7 @@ int
 ca_certbundle_add(struct ibuf *buf, struct iked_id *id)
 {
uint8_t  type = id->id_type;
-   size_t   len = ibuf_length(id->id_buf);
+   size_t   len = ibuf_size(id->id_buf);
void*val = ibuf_data(id->id_buf);
 
if (id == NULL ||
@@ -416,16 +416,16 @@ ca_setcert(struct iked *env, struct iked
/* Must send the cert and a valid Id to the ca process */
if (procid == PROC_CERT) {
if (id == NULL || id->id_type == IKEV2_ID_NONE ||
-   ibuf_length(id->id_buf) > IKED_ID_SIZE)
+   ibuf_size(id->id_buf) > IKED_ID_SIZE)
return (-1);
bzero(, sizeof(idb));
 
/* Convert to a static Id */
idb.id_type = id->id_type;
idb.id_offset = id->id_offset;
-   idb.id_length = ibuf_length(id->id_buf);
+   idb.id_length = ibuf_size(id->id_buf);
memcpy(_data, ibuf_data(id->id_buf),
-   ibuf_length(id->id_buf));
+   ibuf_size(id->id_buf));
 
iov[iovcnt].iov_base = 
iov[iovcnt].iov_len = sizeof(idb);
@@ -491,13 +491,13 @@ ca_setreq(struct iked *env, struct iked_
if (ikev2_policy2id(localid, , 1) != 0)
return (-1);
 
-   if (ibuf_length(id.id_buf) > IKED_ID_SIZE)
+   if (ibuf_size(id.id_buf) > IKED_ID_SIZE)
return (-1);
bzero(, sizeof(idb));
idb.id_type = id.id_type;
idb.id_offset = id.id_offset;
-   idb.id_length = ibuf_length(id.id_buf);
-   memcpy(_data, ibuf_data(id.id_buf), ibuf_length(id.id_buf));
+   idb.id_length = ibuf_size(id.id_buf);
+   memcpy(_data, ibuf_data(id.id_buf), ibuf_size(id.id_buf));
iov[iovcnt].iov_base = 
iov[iovcnt].iov_len = sizeof(idb);
iovcnt++;
@@ -637,7 +637,7 @@ ca_getcert(struct iked *env, struct imsg
ret = ca_pubkey_serialize(certkey, );
if (ret == 0) {
ptr = ibuf_data(key.id_buf);
-   len = ibuf_length(key.id_buf);
+   len = ibuf_size(key.id_buf);
type = key.id_type;
break;
}
@@ -668,7 +668,7 @@ ca_getcert(struct iked *env, struct imsg
ret = ca_validate_pubkey(env, , NULL, 0, );
if (ret == 0) {
ptr = ibuf_data(key.id_buf);
-   len = ibuf_length(key.id_buf);
+   len = ibuf_size(key.id_buf);
type = key.id_type;
}
break;
@@ -1060,18 +1060,18 @@ ca_reload(struct iked *env)
}
}
 
-   if (ibuf_length(env->sc_certreq)) {
+   if (ibuf_size(env->sc_certreq)) {
env->sc_certreqtype = IKEV2_CERT_X509_CERT;
iov[0].iov_base = >sc_certreqtype;
iov[0].iov_len = sizeof(env->sc_certreqtype);
iovcnt++;
iov[1].iov_base = ibuf_data(env->sc_certreq);
-   iov[1].iov_len = ibuf_length(env->sc_certreq);
+   iov[1].iov_len = ibuf_size(env->sc_certreq);
iovcnt++;
 
log_debug("%s: loaded %zu ca certificate%s", __func__,
-   ibuf_length(env->sc_certreq) / SHA_DIGEST_LENGTH,
-   ibuf_length(env->sc_certreq) == SHA_DIGEST_LENGTH ?
+   ibuf_size(env->sc_certreq) / SHA_DIGEST_LENGTH,
+   ibuf_size(env->sc_certreq) == SHA_DIGEST_LENGTH ?
"" : "s");
 
(void)proc_composev(>sc_ps, PROC_IKEV2, IMSG_CERTREQ,
@@ -1252,7 +1252,7 @@ ca_cert_local(struct iked *env, X509  *c
int  ret = 0;

Re: uvm_loadav: don't recompute schedstate_percpu.spc_nrun

2023-08-03 Thread Claudio Jeker

On Thu, Aug 03, 2023 at 10:53:24AM +0200, Claudio Jeker wrote:
> On Thu, Aug 03, 2023 at 10:13:57AM +0200, Martin Pieuchot wrote:
> > On 02/08/23(Wed) 14:22, Claudio Jeker wrote:
> > > On Mon, Jul 31, 2023 at 10:21:11AM -0500, Scott Cheloha wrote:
> > > > On Fri, Jul 28, 2023 at 07:36:41PM -0500, Scott Cheloha wrote:
> > > > > claudio@ notes that uvm_loadav() pointlessly walks the allproc list to
> > > > > recompute schedstate_percpu.spn_nrun for each CPU.
> > > > > 
> > > > > We can just use the value instead of recomputing it.
> > > > 
> > > > Whoops, off-by-one.  The current load averaging code includes the
> > > > running thread in the nrun count if it is *not* the idle thread.
> > > 
> > > Yes, with this the loadavg seems to be consistent and following the number
> > > of running processes. The code seems to behave like before (with all its
> > > quirks).
> > > 
> > > OK claudio@, this is a good first step. Now I think this code should later
> > > be moved into kern_sched.c or sched_bsd.c and removed from uvm. Not sure 
> > > why
> > > the load calculation is part of memory management...
> > > 
> > > On top of this I wonder about the per-CPU load calculation. In my opinion
> > > it is wrong to skip the calculation if the CPU is idle. Because of this
> > > there is no decay for idle CPUs and that feels wrong to me.
> > > Do we have a userland utility that reports spc_ldavg?
> > 
> > I don't understand why the SCHED_LOCK() is needed.  Since I'm really
> > against adding new uses for it, could you comment on that?
> 
> The question is how sloppy do we want to be. This code looks at
> ci_schedstate (spc_idleproc and spc_nrun) and ci_curproc so the be correct
> this needs to lock the scheduler. Do we really want that, hell no.
  
How about this. Kill the spc_ldavg calculation. Its use is more then
questionable. The cpu selection code using this is not wroking well and
process stealing will do the rest.
Also use sched_cpu_idle to know if a cpu is idle.

-- 
:wq Claudio

Index: kern/kern_sched.c
===
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.81
diff -u -p -r1.81 kern_sched.c
--- kern/kern_sched.c   27 Jul 2023 17:52:53 -  1.81
+++ kern/kern_sched.c   3 Aug 2023 08:41:38 -
@@ -373,7 +373,6 @@ sched_choosecpu_fork(struct proc *parent
 {
 #ifdef MULTIPROCESSOR
struct cpu_info *choice = NULL;
-   fixpt_t load, best_load = ~0;
int run, best_run = INT_MAX;
struct cpu_info *ci;
struct cpuset set;
@@ -407,13 +406,10 @@ sched_choosecpu_fork(struct proc *parent
while ((ci = cpuset_first()) != NULL) {
cpuset_del(, ci);
 
-   load = ci->ci_schedstate.spc_ldavg;
run = ci->ci_schedstate.spc_nrun;
 
-   if (choice == NULL || run < best_run ||
-   (run == best_run & < best_load)) {
+   if (choice == NULL || run < best_run) {
choice = ci;
-   best_load = load;
best_run = run;
}
}
@@ -605,11 +601,6 @@ sched_proc_to_cpu_cost(struct cpu_info *
 */
if (CPU_IS_PRIMARY(ci))
cost += sched_cost_runnable;
-
-   /*
-* Higher load on the destination means we don't want to go there.
-*/
-   cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
 
/*
 * If the proc is on this cpu already, lower the cost by how much
Index: sys/sched.h
===
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.58
diff -u -p -r1.58 sched.h
--- sys/sched.h 25 Jul 2023 18:16:19 -  1.58
+++ sys/sched.h 3 Aug 2023 08:42:39 -
@@ -110,7 +110,6 @@ struct schedstate_percpu {
struct clockintr *spc_profclock; /* [o] profclock handle */
 
u_int spc_nrun; /* procs on the run queues */
-   fixpt_t spc_ldavg;  /* shortest load avg. for this cpu */
 
volatile uint32_t spc_whichqs;
volatile u_int spc_spinning;/* this cpu is currently spinning */
Index: uvm/uvm_meter.c
===
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.46
diff -u -p -r1.46 uvm_meter.c
--- uvm/uvm_meter.c 2 Aug 2023 13:54:45 -   1.46
+++ uvm/uvm_meter.c 3 Aug 2023 10:12:02 -
@@ -70,7 +70,7 @@ struct loadavg averunnable;
  * 5 second intervals.
  */
 
-static fixpt_t cexp[3] = {
+static const fixpt_t cexp[3] = {
0.9200444146293232 * FSCALE,/* exp(-1/1

Re: uvm_loadav: don't recompute schedstate_percpu.spc_nrun

2023-08-03 Thread Claudio Jeker

On Thu, Aug 03, 2023 at 10:13:57AM +0200, Martin Pieuchot wrote:
> On 02/08/23(Wed) 14:22, Claudio Jeker wrote:
> > On Mon, Jul 31, 2023 at 10:21:11AM -0500, Scott Cheloha wrote:
> > > On Fri, Jul 28, 2023 at 07:36:41PM -0500, Scott Cheloha wrote:
> > > > claudio@ notes that uvm_loadav() pointlessly walks the allproc list to
> > > > recompute schedstate_percpu.spn_nrun for each CPU.
> > > > 
> > > > We can just use the value instead of recomputing it.
> > > 
> > > Whoops, off-by-one.  The current load averaging code includes the
> > > running thread in the nrun count if it is *not* the idle thread.
> > 
> > Yes, with this the loadavg seems to be consistent and following the number
> > of running processes. The code seems to behave like before (with all its
> > quirks).
> > 
> > OK claudio@, this is a good first step. Now I think this code should later
> > be moved into kern_sched.c or sched_bsd.c and removed from uvm. Not sure why
> > the load calculation is part of memory management...
> > 
> > On top of this I wonder about the per-CPU load calculation. In my opinion
> > it is wrong to skip the calculation if the CPU is idle. Because of this
> > there is no decay for idle CPUs and that feels wrong to me.
> > Do we have a userland utility that reports spc_ldavg?
> 
> I don't understand why the SCHED_LOCK() is needed.  Since I'm really
> against adding new uses for it, could you comment on that?

The question is how sloppy do we want to be. This code looks at
ci_schedstate (spc_idleproc and spc_nrun) and ci_curproc so the be correct
this needs to lock the scheduler. Do we really want that, hell no.
 
> > > Index: uvm_meter.c
> > > ===
> > > RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> > > retrieving revision 1.44
> > > diff -u -p -r1.44 uvm_meter.c
> > > --- uvm_meter.c   21 Jun 2023 21:16:21 -  1.44
> > > +++ uvm_meter.c   31 Jul 2023 15:20:37 -
> > > @@ -102,43 +102,29 @@ uvm_loadav(struct loadavg *avg)
> > >  {
> > >   CPU_INFO_ITERATOR cii;
> > >   struct cpu_info *ci;
> > > - int i, nrun;
> > > - struct proc *p;
> > > - int nrun_cpu[MAXCPUS];
> > > + struct schedstate_percpu *spc;
> > > + u_int i, nrun = 0, nrun_cpu;
> > > + int s;
> > >  
> > > - nrun = 0;
> > > - memset(nrun_cpu, 0, sizeof(nrun_cpu));
> > >  
> > > - LIST_FOREACH(p, , p_list) {
> > > - switch (p->p_stat) {
> > > - case SSTOP:
> > > - case SSLEEP:
> > > - break;
> > > - case SRUN:
> > > - case SONPROC:
> > > - if (p == p->p_cpu->ci_schedstate.spc_idleproc)
> > > - continue;
> > > - /* FALLTHROUGH */
> > > - case SIDL:
> > > - nrun++;
> > > - if (p->p_cpu)
> > > - nrun_cpu[CPU_INFO_UNIT(p->p_cpu)]++;
> > > - }
> > > + SCHED_LOCK(s);
> > > + CPU_INFO_FOREACH(cii, ci) {
> > > + spc = >ci_schedstate;
> > > + nrun_cpu = spc->spc_nrun;
> > > + if (ci->ci_curproc != spc->spc_idleproc)
> > > + nrun_cpu++;
> > > + if (nrun_cpu == 0)
> > > + continue;
> > > + spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
> > > + nrun_cpu * FSCALE *
> > > + (FSCALE - cexp[0])) >> FSHIFT;
> > > + nrun += nrun_cpu;
> > >   }
> > > + SCHED_UNLOCK(s);
> > >  
> > >   for (i = 0; i < 3; i++) {
> > >   avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
> > >   nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
> > > - }
> > > -
> > > - CPU_INFO_FOREACH(cii, ci) {
> > > - struct schedstate_percpu *spc = >ci_schedstate;
> > > -
> > > - if (nrun_cpu[CPU_INFO_UNIT(ci)] == 0)
> > > - continue;
> > > - spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
> > > - nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *
> > > - (FSCALE - cexp[0])) >> FSHIFT;
> > >   }
> > >  }
> > >  
> > 
> > -- 
> > :wq Claudio
> > 
> 

-- 
:wq Claudio

Re: [v2]: uvm_meter, schedcpu: make uvm_meter() an independent timeout

2023-08-02 Thread Claudio Jeker

On Wed, Aug 02, 2023 at 10:15:20AM -0500, Scott Cheloha wrote:
> Now that the proc0 wakeup(9) is gone we can retry the other part of
> the uvm_meter() patch.
> 
> uvm_meter() is meant to run every 5 seconds, but for historical
> reasons it is called from schedcpu() and it is scheduled against the
> UTC clock.  schedcpu() and uvm_meter() have different periods, so
> uvm_meter() ought to be a separate timeout.  uvm_meter() is started
> alongside schedcpu() so the two will still run in sync.
> 
> v1: https://marc.info/?l=openbsd-tech=168710929409153=2
> 
> ok?

I would refer if uvm_meter is killed and the load calcualtion moved to the
scheduler.
 
> Index: sys/uvm/uvm_meter.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 uvm_meter.c
> --- sys/uvm/uvm_meter.c   2 Aug 2023 13:54:45 -   1.46
> +++ sys/uvm/uvm_meter.c   2 Aug 2023 15:13:49 -
> @@ -85,10 +85,12 @@ void uvmexp_read(struct uvmexp *);
>   * uvm_meter: calculate load average
>   */
>  void
> -uvm_meter(void)
> +uvm_meter(void *unused)
>  {
> - if ((gettime() % 5) == 0)
> - uvm_loadav();
> + static struct timeout to = TIMEOUT_INITIALIZER(uvm_meter, NULL);
> +
> + timeout_add_sec(, 5);
> + uvm_loadav();
>  }
>  
>  /*
> Index: sys/uvm/uvm_extern.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_extern.h,v
> retrieving revision 1.170
> diff -u -p -r1.170 uvm_extern.h
> --- sys/uvm/uvm_extern.h  21 Jun 2023 21:16:21 -  1.170
> +++ sys/uvm/uvm_extern.h  2 Aug 2023 15:13:49 -
> @@ -414,7 +414,7 @@ void  uvmspace_free(struct vmspace *);
>  struct vmspace   *uvmspace_share(struct process *);
>  int  uvm_share(vm_map_t, vaddr_t, vm_prot_t,
>   vm_map_t, vaddr_t, vsize_t);
> -void uvm_meter(void);
> +void uvm_meter(void *);
>  int  uvm_sysctl(int *, u_int, void *, size_t *, 
>   void *, size_t, struct proc *);
>  struct vm_page   *uvm_pagealloc(struct uvm_object *,
> Index: sys/kern/sched_bsd.c
> ===
> RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> retrieving revision 1.78
> diff -u -p -r1.78 sched_bsd.c
> --- sys/kern/sched_bsd.c  25 Jul 2023 18:16:19 -  1.78
> +++ sys/kern/sched_bsd.c  2 Aug 2023 15:13:50 -
> @@ -235,7 +235,6 @@ schedcpu(void *arg)
>   }
>   SCHED_UNLOCK(s);
>   }
> - uvm_meter();
>   wakeup();
>   timeout_add_sec(to, 1);
>  }
> @@ -688,6 +687,7 @@ scheduler_start(void)
>  
>   rrticks_init = hz / 10;
>   schedcpu(_to);
> + uvm_meter(NULL);
>  
>  #ifndef SMALL_KERNEL
>   if (perfpolicy == PERFPOL_AUTO)
> Index: share/man/man9/uvm_init.9
> ===
> RCS file: /cvs/src/share/man/man9/uvm_init.9,v
> retrieving revision 1.7
> diff -u -p -r1.7 uvm_init.9
> --- share/man/man9/uvm_init.9 21 Jun 2023 21:16:21 -  1.7
> +++ share/man/man9/uvm_init.9 2 Aug 2023 15:13:50 -
> @@ -168,7 +168,7 @@ argument is ignored.
>  .Ft void
>  .Fn uvm_kernacc "caddr_t addr" "size_t len" "int rw"
>  .Ft void
> -.Fn uvm_meter
> +.Fn uvm_meter "void *arg"
>  .Ft int
>  .Fn uvm_sysctl "int *name" "u_int namelen" "void *oldp" "size_t *oldlenp" 
> "void *newp " "size_t newlen" "struct proc *p"
>  .Ft int
> @@ -212,7 +212,7 @@ access, in the kernel address space.
>  .Pp
>  The
>  .Fn uvm_meter
> -function calculates the load average and wakes up the swapper if necessary.
> +timeout updates system load averages every five seconds.
>  .Pp
>  The
>  .Fn uvm_sysctl

-- 
:wq Claudio

Re: uvm_loadav: don't recompute schedstate_percpu.spc_nrun

2023-08-02 Thread Claudio Jeker

On Mon, Jul 31, 2023 at 10:21:11AM -0500, Scott Cheloha wrote:
> On Fri, Jul 28, 2023 at 07:36:41PM -0500, Scott Cheloha wrote:
> > claudio@ notes that uvm_loadav() pointlessly walks the allproc list to
> > recompute schedstate_percpu.spn_nrun for each CPU.
> > 
> > We can just use the value instead of recomputing it.
> 
> Whoops, off-by-one.  The current load averaging code includes the
> running thread in the nrun count if it is *not* the idle thread.

Yes, with this the loadavg seems to be consistent and following the number
of running processes. The code seems to behave like before (with all its
quirks).

OK claudio@, this is a good first step. Now I think this code should later
be moved into kern_sched.c or sched_bsd.c and removed from uvm. Not sure why
the load calculation is part of memory management...

On top of this I wonder about the per-CPU load calculation. In my opinion
it is wrong to skip the calculation if the CPU is idle. Because of this
there is no decay for idle CPUs and that feels wrong to me.
Do we have a userland utility that reports spc_ldavg?

> Index: uvm_meter.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 uvm_meter.c
> --- uvm_meter.c   21 Jun 2023 21:16:21 -  1.44
> +++ uvm_meter.c   31 Jul 2023 15:20:37 -
> @@ -102,43 +102,29 @@ uvm_loadav(struct loadavg *avg)
>  {
>   CPU_INFO_ITERATOR cii;
>   struct cpu_info *ci;
> - int i, nrun;
> - struct proc *p;
> - int nrun_cpu[MAXCPUS];
> + struct schedstate_percpu *spc;
> + u_int i, nrun = 0, nrun_cpu;
> + int s;
>  
> - nrun = 0;
> - memset(nrun_cpu, 0, sizeof(nrun_cpu));
>  
> - LIST_FOREACH(p, , p_list) {
> - switch (p->p_stat) {
> - case SSTOP:
> - case SSLEEP:
> - break;
> - case SRUN:
> - case SONPROC:
> - if (p == p->p_cpu->ci_schedstate.spc_idleproc)
> - continue;
> - /* FALLTHROUGH */
> - case SIDL:
> - nrun++;
> - if (p->p_cpu)
> - nrun_cpu[CPU_INFO_UNIT(p->p_cpu)]++;
> - }
> + SCHED_LOCK(s);
> + CPU_INFO_FOREACH(cii, ci) {
> + spc = >ci_schedstate;
> + nrun_cpu = spc->spc_nrun;
> + if (ci->ci_curproc != spc->spc_idleproc)
> + nrun_cpu++;
> + if (nrun_cpu == 0)
> + continue;
> + spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
> + nrun_cpu * FSCALE *
> + (FSCALE - cexp[0])) >> FSHIFT;
> + nrun += nrun_cpu;
>   }
> + SCHED_UNLOCK(s);
>  
>   for (i = 0; i < 3; i++) {
>   avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
>   nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
> - }
> -
> - CPU_INFO_FOREACH(cii, ci) {
> - struct schedstate_percpu *spc = >ci_schedstate;
> -
> - if (nrun_cpu[CPU_INFO_UNIT(ci)] == 0)
> - continue;
> - spc->spc_ldavg = (cexp[0] * spc->spc_ldavg +
> - nrun_cpu[CPU_INFO_UNIT(ci)] * FSCALE *
> - (FSCALE - cexp[0])) >> FSHIFT;
>   }
>  }
>  

-- 
:wq Claudio

uvm_meter remove wakeup of swapper

2023-08-01 Thread Claudio Jeker

Now that the issue in inteldrm was resolved we can finally remove this
old wakeup of the swapper.

OK?
-- 
:wq Claudio

Index: uvm_meter.c
===
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.44
diff -u -p -r1.44 uvm_meter.c
--- uvm_meter.c 21 Jun 2023 21:16:21 -  1.44
+++ uvm_meter.c 1 Aug 2023 09:22:22 -
@@ -82,15 +82,13 @@ void uvm_total(struct vmtotal *);
 void uvmexp_read(struct uvmexp *);
 
 /*
- * uvm_meter: calculate load average and wake up the swapper (if needed)
+ * uvm_meter: calculate load average
  */
 void
 uvm_meter(void)
 {
if ((gettime() % 5) == 0)
uvm_loadav();
-   if (proc0.p_slptime > (maxslp / 2))
-   wakeup();
 }
 
 /*

Re: uvm_meter: remove wakeup of proc0

2023-08-01 Thread Claudio Jeker

On Mon, Jul 31, 2023 at 08:31:41PM -0500, Scott Cheloha wrote:
> On Mon, Jul 31, 2023 at 10:04:44PM +0200, Claudio Jeker wrote:
> > On Mon, Jul 31, 2023 at 09:49:30PM +0200, Claudio Jeker wrote:
> > > On Mon, Jul 31, 2023 at 08:03:41PM +0300, Vitaliy Makkoveev wrote:
> > > > This is the culprit:
> > > > 
> > > > schedule_timeout_uninterruptible(long timeout)
> > > > {
> > > > tsleep(curproc, PWAIT, "schtou", timeout);
> > > > return 0;
> > > > }
> > > > 
> > > 
> > > Please give this a try. I think on initialization
> > > intel_dp_wait_source_oui() is called before intel_dp->last_oui_write is
> > > set and this results in a 10min timeout because our jiffies are set to
> > > ULONG_MAX - (10 * 60 * HZ);
> > 
> > After talking with kettenis@ I think the following diff is better.
> > Starting with 0 jiffies should fix this issue.
> > Unless we want to do the linux madness and set it to
> > ((unsigned long)(unsigned int) (-300*HZ))
> > 
> > -- 
> > :wq Claudio
> > 
> > Index: kern_clock.c
> > ===
> > RCS file: /cvs/src/sys/kern/kern_clock.c,v
> > retrieving revision 1.109
> > diff -u -p -r1.109 kern_clock.c
> > --- kern_clock.c25 Jul 2023 18:16:19 -  1.109
> > +++ kern_clock.c31 Jul 2023 20:01:57 -
> > @@ -84,7 +84,7 @@ int   profhz;
> >  intprofprocs;
> >  intticks = INT_MAX - (15 * 60 * HZ);
> >  
> > -volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ);
> > +volatile unsigned long jiffies;
> >  
> >  /*
> >   * Initialize clock frequencies and start both clocks running.
> > 
> 
> I think this is backwards.
> 
> Why are we changing the initial value of jiffies (wide) to resolve a
> problem with the initialization of one struct (narrow)?  Changing the
> initial value of jiffies just masks the root cause.
> 
> Isn't the right thing here to initialize the last-write timestamp when
> the struct is allocated?

This is all in code that is regularly synced with linux and so any local
change there is less then ideal. So it is better to alter the way jiffies
is initalized. jiffies is only there for drm so I don't think it is that
wide of a change.
Btw. on linux jiffies is initalized to:
#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
and so the behaviour is different on 32 vs 64bit systems (which is the
worst possible default they could choose). So on the more common 64bit
systems the wrap around no longer happens early and bugs are introduced
because of this without anyone noticing.

Somebody could report this upstream since it is a bug in the intel drm
codebase.
-- 
:wq Claudio

Re: uvm_meter: remove wakeup of proc0

2023-07-31 Thread Claudio Jeker

On Mon, Jul 31, 2023 at 09:49:30PM +0200, Claudio Jeker wrote:
> On Mon, Jul 31, 2023 at 08:03:41PM +0300, Vitaliy Makkoveev wrote:
> > This is the culprit:
> > 
> > schedule_timeout_uninterruptible(long timeout)
> > {
> > tsleep(curproc, PWAIT, "schtou", timeout);
> > return 0;
> > }
> > 
> 
> Please give this a try. I think on initialization
> intel_dp_wait_source_oui() is called before intel_dp->last_oui_write is
> set and this results in a 10min timeout because our jiffies are set to
> ULONG_MAX - (10 * 60 * HZ);

After talking with kettenis@ I think the following diff is better.
Starting with 0 jiffies should fix this issue.
Unless we want to do the linux madness and set it to
((unsigned long)(unsigned int) (-300*HZ))

-- 
:wq Claudio

Index: kern_clock.c
===
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.109
diff -u -p -r1.109 kern_clock.c
--- kern_clock.c25 Jul 2023 18:16:19 -  1.109
+++ kern_clock.c31 Jul 2023 20:01:57 -
@@ -84,7 +84,7 @@ int   profhz;
 intprofprocs;
 intticks = INT_MAX - (15 * 60 * HZ);
 
-volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ);
+volatile unsigned long jiffies;
 
 /*
  * Initialize clock frequencies and start both clocks running.

Re: uvm_meter: remove wakeup of proc0

2023-07-31 Thread Claudio Jeker

On Mon, Jul 31, 2023 at 08:03:41PM +0300, Vitaliy Makkoveev wrote:
> This is the culprit:
> 
> schedule_timeout_uninterruptible(long timeout)
> {
> tsleep(curproc, PWAIT, "schtou", timeout);
> return 0;
> }
> 

Please give this a try. I think on initialization
intel_dp_wait_source_oui() is called before intel_dp->last_oui_write is
set and this results in a 10min timeout because our jiffies are set to
ULONG_MAX - (10 * 60 * HZ);

-- 
:wq Claudio

Index: intel_dp.c
===
RCS file: /cvs/src/sys/dev/pci/drm/i915/display/intel_dp.c,v
retrieving revision 1.14
diff -u -p -r1.14 intel_dp.c
--- intel_dp.c  28 Jul 2023 06:56:32 -  1.14
+++ intel_dp.c  31 Jul 2023 19:39:37 -
@@ -2228,6 +2228,8 @@ void intel_dp_wait_source_oui(struct int
struct drm_i915_private *i915 = dp_to_i915(intel_dp);
 
drm_dbg_kms(>drm, "Performing OUI wait\n");
+   if (intel_dp->last_oui_write == 0)
+   intel_dp->last_oui_write = jiffies;
wait_remaining_ms_from_jiffies(intel_dp->last_oui_write, 30);
 }

Re: uvm_meter: remove wakeup of proc0

2023-07-31 Thread Claudio Jeker

On Mon, Jul 31, 2023 at 08:03:41PM +0300, Vitaliy Makkoveev wrote:
> This is the culprit:
> 
> schedule_timeout_uninterruptible(long timeout)
> {
> tsleep(curproc, PWAIT, "schtou", timeout);
> return 0;
> }

Not really. The problem is lower in intel_dp_wait_source_oui().
Which either missed the wakeup and still hit the
schedule_timeout_uninterruptible() codepath or the wakeup() was issued
before the tsleep(). In anycase something is not quite correct in that
codepath. Will look into it.

-- 
:wq Claudio

Re: uvm_meter: remove wakeup of proc0

2023-07-31 Thread Claudio Jeker

On Sat, Jul 29, 2023 at 03:00:59PM +0300, Vitaliy Makkoveev wrote:
> On Sat, Jul 29, 2023 at 11:16:14AM +0200, Claudio Jeker wrote:
> > proc0 aka the swapper does not do anything. So there is no need to wake it
> > up. Now the problem is that last time this was tried some inteldrm systems
> > did hang during bootup because the drm code unexpectedly depended on this
> > wakeup.
> > 
> > I think I fixed all possible cases of this in the drm stack and so it is
> > time to retry this. People with affected machines please give this a try.
> > 
> 
> Hi,
> 
> With this diff "inteldrm0: msi, ALDERLAKE_P, gen 12" sticks after "root
> on ...", "inteldrm0: apic 4 int 16, G45, gen 4" works fine.

Would it be possible to get a backtrace of proc0 from the system that
hangs?

I think the simplest way is to:
1. boot -d
2. in ddb:
w db_console 1
c
3. once you hang on "root on" line. Hit ctrl-alt-esc
4. in ddb:
tr /t 0

Thanks.
-- 
:wq Claudio
 
> > -- 
> > :wq Claudio
> > 
> > Index: uvm/uvm_meter.c
> > ===
> > RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> > retrieving revision 1.44
> > diff -u -p -r1.44 uvm_meter.c
> > --- uvm/uvm_meter.c 21 Jun 2023 21:16:21 -  1.44
> > +++ uvm/uvm_meter.c 29 Jul 2023 07:48:44 -
> > @@ -89,8 +89,6 @@ uvm_meter(void)
> >  {
> > if ((gettime() % 5) == 0)
> > uvm_loadav();
> > -   if (proc0.p_slptime > (maxslp / 2))
> > -   wakeup();
> >  }
> >  
> >  /*
> > 
>

uvm_meter: remove wakeup of proc0

2023-07-29 Thread Claudio Jeker

proc0 aka the swapper does not do anything. So there is no need to wake it
up. Now the problem is that last time this was tried some inteldrm systems
did hang during bootup because the drm code unexpectedly depended on this
wakeup.

I think I fixed all possible cases of this in the drm stack and so it is
time to retry this. People with affected machines please give this a try.

-- 
:wq Claudio

Index: uvm/uvm_meter.c
===
RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
retrieving revision 1.44
diff -u -p -r1.44 uvm_meter.c
--- uvm/uvm_meter.c 21 Jun 2023 21:16:21 -  1.44
+++ uvm/uvm_meter.c 29 Jul 2023 07:48:44 -
@@ -89,8 +89,6 @@ uvm_meter(void)
 {
if ((gettime() % 5) == 0)
uvm_loadav();
-   if (proc0.p_slptime > (maxslp / 2))
-   wakeup();
 }
 
 /*

iked: add print_hexbuf() to hexdump an ibuf

2023-07-28 Thread Claudio Jeker

As suggested by tb@ add print_hexbuf() to hexdump an ibuf.
Use this in place where a full ibuf is dumped. In some cases
print_hex() is still used because the length is not the full
ibuf or an offset is used.

-- 
:wq Claudio

Index: iked.h
===
RCS file: /cvs/src/sbin/iked/iked.h,v
retrieving revision 1.222
diff -u -p -r1.222 iked.h
--- iked.h  18 Jul 2023 15:07:41 -  1.222
+++ iked.h  28 Jul 2023 09:59:30 -
@@ -1242,6 +1242,7 @@ const char *
 voidlc_idtype(char *);
 voidprint_hex(const uint8_t *, off_t, size_t);
 voidprint_hexval(const uint8_t *, off_t, size_t);
+voidprint_hexbuf(struct ibuf *);
 const char *
 print_bits(unsigned short, unsigned char *);
 int sockaddr_cmp(struct sockaddr *, struct sockaddr *, int);
Index: ikev2.c
===
RCS file: /cvs/src/sbin/iked/ikev2.c,v
retrieving revision 1.375
diff -u -p -r1.375 ikev2.c
--- ikev2.c 28 Jul 2023 07:31:38 -  1.375
+++ ikev2.c 28 Jul 2023 10:00:48 -
@@ -1443,7 +1443,7 @@ ikev2_init_ike_sa_peer(struct iked *env,
 
log_debug("%s: added cookie, len %zu", __func__,
ibuf_size(cookie));
-   print_hex(ibuf_data(cookie), 0, ibuf_size(cookie));
+   print_hexbuf(cookie);
 
if (ikev2_next_payload(pld, len, IKEV2_PAYLOAD_SA) == -1)
goto done;
@@ -5738,7 +5738,7 @@ ikev2_sa_keys(struct iked *env, struct i
 
log_debug("%s: DHSECRET with %zu bytes", SPI_SA(sa, __func__),
ibuf_length(dhsecret));
-   print_hex(ibuf_data(dhsecret), 0, ibuf_length(dhsecret));
+   print_hexbuf(dhsecret);
 
if (!key) {
/*
@@ -5810,7 +5810,7 @@ ikev2_sa_keys(struct iked *env, struct i
}
 
log_debug("%s: S with %zu bytes", SPI_SA(sa, __func__), ibuf_length(s));
-   print_hex(ibuf_data(s), 0, ibuf_length(s));
+   print_hexbuf(s);
 
/*
 * Get the size of the key material we need and the number
@@ -5850,31 +5850,27 @@ ikev2_sa_keys(struct iked *env, struct i
 
log_debug("%s: SK_d with %zu bytes", __func__,
ibuf_length(sa->sa_key_d));
-   print_hex(ibuf_data(sa->sa_key_d), 0, ibuf_length(sa->sa_key_d));
+   print_hexbuf(sa->sa_key_d);
if (!isaead) {
log_debug("%s: SK_ai with %zu bytes", __func__,
ibuf_length(sa->sa_key_iauth));
-   print_hex(ibuf_data(sa->sa_key_iauth), 0,
-   ibuf_length(sa->sa_key_iauth));
+   print_hexbuf(sa->sa_key_iauth);
log_debug("%s: SK_ar with %zu bytes", __func__,
ibuf_length(sa->sa_key_rauth));
-   print_hex(ibuf_data(sa->sa_key_rauth), 0,
-   ibuf_length(sa->sa_key_rauth));
+   print_hexbuf(sa->sa_key_rauth);
}
log_debug("%s: SK_ei with %zu bytes", __func__,
ibuf_length(sa->sa_key_iencr));
-   print_hex(ibuf_data(sa->sa_key_iencr), 0,
-   ibuf_length(sa->sa_key_iencr));
+   print_hexbuf(sa->sa_key_iencr);
log_debug("%s: SK_er with %zu bytes", __func__,
ibuf_length(sa->sa_key_rencr));
-   print_hex(ibuf_data(sa->sa_key_rencr), 0,
-   ibuf_length(sa->sa_key_rencr));
+   print_hexbuf(sa->sa_key_rencr);
log_debug("%s: SK_pi with %zu bytes", __func__,
ibuf_length(sa->sa_key_iprf));
-   print_hex(ibuf_data(sa->sa_key_iprf), 0, ibuf_length(sa->sa_key_iprf));
+   print_hexbuf(sa->sa_key_iprf);
log_debug("%s: SK_pr with %zu bytes", __func__,
ibuf_length(sa->sa_key_rprf));
-   print_hex(ibuf_data(sa->sa_key_rprf), 0, ibuf_length(sa->sa_key_rprf));
+   print_hexbuf(sa->sa_key_rprf);
 
ret = 0;
 
@@ -5954,11 +5950,11 @@ ikev2_prfplus(struct iked_hash *prf, str
 
log_debug("%s: T%d with %zu bytes", __func__,
pad, ibuf_length(t1));
-   print_hex(ibuf_data(t1), 0, ibuf_length(t1));
+   print_hexbuf(t1);
}
 
log_debug("%s: Tn with %zu bytes", __func__, ibuf_length(t));
-   print_hex(ibuf_data(t), 0, ibuf_length(t));
+   print_hexbuf(t);
 
ibuf_free(t1);
 
Index: ikev2_msg.c
===
RCS file: /cvs/src/sbin/iked/ikev2_msg.c,v
retrieving revision 1.98
diff -u -p -r1.98 ikev2_msg.c
--- ikev2_msg.c 28 Jul 2023 07:31:38 -  1.98
+++ ikev2_msg.c 28 Jul 2023 10:01:22 -
@@ -446,7 +446,7 @@ ikev2_msg_encrypt(struct iked *env, stru
goto done;
 
log_debug("%s: padded length %zu", __func__, ibuf_size(src));
-   print_hex(ibuf_data(src), 0, ibuf_size(src));
+   print_hexbuf(src);
 
cipher_setkey(sa->sa_encr, ibuf_data(encr), ibuf_length(encr));
cipher_setiv(sa->sa_encr, NULL, 0); /*

iked: more ibuf cleanup

2023-07-27 Thread Claudio Jeker

Use ibuf_data() instead of direct access to ibuf->buf.
In some cases use ibuf_add_buf().

-- 
:wq Claudio

Index: crypto.c
===
RCS file: /cvs/src/sbin/iked/crypto.c,v
retrieving revision 1.44
diff -u -p -r1.44 crypto.c
--- crypto.c6 Jun 2023 13:27:49 -   1.44
+++ crypto.c27 Jul 2023 13:28:59 -
@@ -327,7 +327,7 @@ hash_free(struct iked_hash *hash)
 void
 hash_init(struct iked_hash *hash)
 {
-   HMAC_Init_ex(hash->hash_ctx, hash->hash_key->buf,
+   HMAC_Init_ex(hash->hash_ctx, ibuf_data(hash->hash_key),
ibuf_length(hash->hash_key), hash->hash_priv, NULL);
 }
 
@@ -572,7 +572,7 @@ cipher_init(struct iked_cipher *encr, in
encr->encr_saltlength), encr->encr_saltlength);
if (nonce == NULL)
return (-1);
-   if (ibuf_add(nonce, ibuf_data(encr->encr_iv) , 
ibuf_size(encr->encr_iv)) != 0)
+   if (ibuf_add_buf(nonce, encr->encr_iv) != 0)
goto done;
if (EVP_CipherInit_ex(encr->encr_ctx, NULL, NULL,
ibuf_data(encr->encr_key), ibuf_data(nonce), enc) != 1)
Index: dh.c
===
RCS file: /cvs/src/sbin/iked/dh.c,v
retrieving revision 1.32
diff -u -p -r1.32 dh.c
--- dh.c3 Dec 2022 22:34:35 -   1.32
+++ dh.c27 Jul 2023 08:27:36 -
@@ -401,7 +401,7 @@ dh_create_exchange(struct dh_group *grou
if (buf == NULL)
return -1;
*bufp = buf;
-   return (group->exchange(group, buf->buf));
+   return (group->exchange(group, ibuf_data(buf)));
 }
 
 int
@@ -419,7 +419,7 @@ dh_create_shared(struct dh_group *group,
if (buf == NULL)
return -1;
*secretp = buf;
-   return (group->shared(group, buf->buf, exchange->buf));
+   return (group->shared(group, ibuf_data(buf), ibuf_data(exchange)));
 }
 
 int
@@ -801,7 +801,7 @@ kemsx_create_exchange2(struct dh_group *
buf = ibuf_new(NULL, need);
if (buf == NULL)
return -1;
-   cp = buf->buf;
+   cp = ibuf_data(buf);
memcpy(cp, kemsx->public,
crypto_kem_sntrup761_PUBLICKEYBYTES);
cp += crypto_kem_sntrup761_PUBLICKEYBYTES;
@@ -819,8 +819,8 @@ kemsx_create_exchange2(struct dh_group *
buf = ibuf_new(NULL, need);
if (buf == NULL)
return -1;
-   cp = buf->buf;
-   pk = iexchange->buf;
+   cp = ibuf_data(buf);
+   pk = ibuf_data(iexchange);
crypto_kem_sntrup761_enc(cp, kemsx->kemkey, pk);
cp += crypto_kem_sntrup761_CIPHERTEXTBYTES;
}
@@ -850,7 +850,7 @@ kemsx_create_shared2(struct dh_group *gr
return (-1);
 
have = ibuf_size(exchange);
-   cp = exchange->buf;
+   cp = ibuf_data(exchange);
if (kemsx->initiator) {
/* input */
need = crypto_kem_sntrup761_CIPHERTEXTBYTES +
@@ -878,7 +878,7 @@ kemsx_create_shared2(struct dh_group *gr
EVP_DigestInit_ex(ctx, EVP_sha512(), NULL) != 1 ||
EVP_DigestUpdate(ctx, kemsx->kemkey, sizeof(kemsx->kemkey)) != 1 ||
EVP_DigestUpdate(ctx, shared, sizeof(shared)) != 1 ||
-   EVP_DigestFinal_ex(ctx, buf->buf, ) != 1) {
+   EVP_DigestFinal_ex(ctx, ibuf_data(buf), ) != 1) {
EVP_MD_CTX_free(ctx);
ibuf_free(buf);
return (-1);
Index: ikev2.c
===
RCS file: /cvs/src/sbin/iked/ikev2.c,v
retrieving revision 1.374
diff -u -p -r1.374 ikev2.c
--- ikev2.c 18 Jul 2023 15:07:41 -  1.374
+++ ikev2.c 27 Jul 2023 13:28:15 -
@@ -5738,14 +5738,14 @@ ikev2_sa_keys(struct iked *env, struct i
 
log_debug("%s: DHSECRET with %zu bytes", SPI_SA(sa, __func__),
ibuf_length(dhsecret));
-   print_hex(dhsecret->buf, 0, ibuf_length(dhsecret));
+   print_hex(ibuf_data(dhsecret), 0, ibuf_length(dhsecret));
 
if (!key) {
/*
 * Set PRF key to generate SKEYSEED = prf(Ni | Nr, g^ir)
 */
-   if ((ninr = ibuf_new(sa->sa_inonce->buf, ilen)) == NULL ||
-   ibuf_add(ninr, sa->sa_rnonce->buf, rlen) != 0) {
+   if ((ninr = ibuf_new(ibuf_data(sa->sa_inonce), ilen)) == NULL ||
+   ibuf_add(ninr, ibuf_data(sa->sa_rnonce), rlen) != 0) {
log_info("%s: failed to get nonce key buffer",
SPI_SA(sa, __func__));
goto done;
@@ -5755,15 +5755,15 @@ ikev2_sa_keys(struct iked *env, struct i
/*
 * Set PRF key to generate SKEYSEED = prf(key, g^ir | Ni | Nr)

Re: inetd echo localhost

2023-07-21 Thread Claudio Jeker

On Fri, Jul 21, 2023 at 03:17:35PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Jul 20, 2023 at 09:57:00PM +0200, Alexander Bluhm wrote:
> > Hi,
> > 
> > I wonder why UDP echo does not work with inetd on 127.0.0.1.
> > 
> > Note that it is default off.  One of my regress machines has it
> > enabled for other tests.  There perl dist/Net-Ping/t/510_ping_udp.t
> > expects that UDP echo works on 127.0.0.1.
> > 
> > It was disabled with this commit:
> > 
> > revision 1.65
> > date: 2000/08/01 19:02:05;  author: itojun;  state: Exp;  lines: +47 -11;
> > be more paranoid about UDP-based echo services validation.  namely,
> > reject the following sources:
> > 0.0.0.0/8 127.0.0.0/8 240.0.0.0/4 255.0.0.0/8
> > ff00::/8 ::/128
> > :::0.0.0.0/96 and ::0.0.0.0/96 obeys IPv4 rule.
> > reserved port, or NFS port.
> > hint from deraadt.
> > 
> > 
> > Note that IPv6 echo to ::1 works fine.  Only IPv4 echo to 127.0.0.1
> > is broken.
> > 
> > I cannot see the security reason for disabling 127/8.
> > Loops are prevented by blocking priviledged ports.
> > Echo to a local interface address through loopback is still allowed.
> > The kernel checks that 127/8 does not come from extern.
> > 127.0.0.1 should be handled like ::1 .
> > 
> > The feature was introduced together with IPv6 mapped addresses.
> > See cvs diff -r1.64 -r1.65 inetd.c
> > There it made sense to be paranoid about the IPv4 compatibility part
> > of the IPv6 address.  But this feature has been removed since decades.
> > So it could be a left over.
> > 
> > Should we also disable ::1 IPv6?
> > Or allow 127.0.0.1 only?
> > Or remove the case 127 completely?
> > 
> 
> It's better to have similar behaviour for both ipv4 and ipv6 cases. I
> see no reason to disable localhost.

Now hold your horses. This was done because of RPC / NFS and especially
portmap. Neither of these protocols work over IPv6 so there is no reason
to block ::1.
 
> ok mvs
> 
> > bluhm
> > 
> > Index: usr.sbin/inetd/inetd.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/inetd/inetd.c,v
> > retrieving revision 1.164
> > diff -u -p -r1.164 inetd.c
> > --- usr.sbin/inetd/inetd.c  19 Apr 2023 12:58:16 -  1.164
> > +++ usr.sbin/inetd/inetd.c  20 Jul 2023 19:52:39 -
> > @@ -444,7 +444,7 @@ dg_badinput(struct sockaddr *sa)
> > if (IN_MULTICAST(in.s_addr))
> > goto bad;
> > switch ((in.s_addr & 0xff00) >> 24) {
> > -   case 0: case 127: case 255:
> > +   case 0: case 255:
> > goto bad;
> > }
> > if (dg_broadcast())
> > 
> 

-- 
:wq Claudio

Re: bgpd: adjust ctl_neighbor usage

2023-07-20 Thread Claudio Jeker

On Thu, Jul 20, 2023 at 05:22:25PM +0200, Theo Buehler wrote:
> On Thu, Jul 20, 2023 at 05:06:00PM +0200, Claudio Jeker wrote:
> > I think it is better to use a safe ideom when matching against a peer name
> > instead of forcefully NUL terminate the string somewhere unrelated.
> > By default all these string buffers use the same size so strncmp() will
> > not clip since the peer description is enforced by bgpd to be smaller.
> >
> > Another option would be to move
> > neighbor->descr[PEER_DESCR_LEN - 1] = 0;
> > into the match functions. At least then it is certainly done.
> 
> I prefer strncpy(). So this diff is ok.
> 
> However this makes me wonder: in your earlier diff today you adjusted
> the strlcpy with source neighbor->reason. If we can't be sure that
> neighbor->descr is NUL terminated, why can we assume this to be the
> case for neighbor->reason? (strlcpy walks the source string until it
> hits NUL).

Good point, I guess in that code we need to do this enforced NUL
termination before calling strlcpy().
I think in general we should be more careful with data sent via the
control socket. That will be a big project.
 
> > 
> > -- 
> > :wq Claudio
> > 
> > Index: control.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
> > retrieving revision 1.111
> > diff -u -p -r1.111 control.c
> > --- control.c   20 Jul 2023 11:10:03 -  1.111
> > +++ control.c   20 Jul 2023 14:04:33 -
> > @@ -314,7 +314,6 @@ control_dispatch_msg(struct pollfd *pfd,
> > if (imsg.hdr.len == IMSG_HEADER_SIZE +
> > sizeof(struct ctl_neighbor)) {
> > neighbor = imsg.data;
> > -   neighbor->descr[PEER_DESCR_LEN - 1] = 0;
> > } else {
> > neighbor = NULL;
> > }
> > @@ -370,7 +369,6 @@ control_dispatch_msg(struct pollfd *pfd,
> > }
> >  
> > neighbor = imsg.data;
> > -   neighbor->descr[PEER_DESCR_LEN - 1] = 0;
> >  
> > matched = 0;
> > RB_FOREACH(p, peer_head, peers) {
> > @@ -474,7 +472,6 @@ control_dispatch_msg(struct pollfd *pfd,
> >  
> > ribreq = imsg.data;
> > neighbor = >neighbor;
> > -   neighbor->descr[PEER_DESCR_LEN - 1] = 0;
> >  
> > /* check if at least one neighbor exists */
> > RB_FOREACH(p, peer_head, peers)
> > Index: rde.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > retrieving revision 1.608
> > diff -u -p -r1.608 rde.c
> > --- rde.c   12 Jul 2023 14:45:42 -  1.608
> > +++ rde.c   20 Jul 2023 14:01:47 -
> > @@ -2947,7 +2947,8 @@ rde_match_peer(struct rde_peer *p, struc
> > return 0;
> > } else if (n && n->descr[0]) {
> > s = n->is_group ? p->conf.group : p->conf.descr;
> > -   if (strcmp(s, n->descr))
> > +   /* cannot trust n->descr to be properly terminated */
> > +   if (strncmp(s, n->descr, sizeof(n->descr)))
> > return 0;
> > }
> > return 1;
> > Index: session.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
> > retrieving revision 1.446
> > diff -u -p -r1.446 session.c
> > --- session.c   12 Jul 2023 14:45:43 -  1.446
> > +++ session.c   20 Jul 2023 14:01:25 -
> > @@ -3461,7 +3461,8 @@ peer_matched(struct peer *p, struct ctl_
> > return 0;
> > } else if (n && n->descr[0]) {
> > s = n->is_group ? p->conf.group : p->conf.descr;
> > -   if (strcmp(s, n->descr))
> > +   /* cannot trust n->descr to be properly terminated */
> > +   if (strncmp(s, n->descr, sizeof(n->descr)))
> > return 0;
> > }
> > return 1;
> > 
> 

-- 
:wq Claudio

bgpd: adjust ctl_neighbor usage

2023-07-20 Thread Claudio Jeker

I think it is better to use a safe ideom when matching against a peer name
instead of forcefully NUL terminate the string somewhere unrelated.
By default all these string buffers use the same size so strncmp() will
not clip since the peer description is enforced by bgpd to be smaller.

Another option would be to move
neighbor->descr[PEER_DESCR_LEN - 1] = 0;
into the match functions. At least then it is certainly done.

-- 
:wq Claudio

Index: control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.111
diff -u -p -r1.111 control.c
--- control.c   20 Jul 2023 11:10:03 -  1.111
+++ control.c   20 Jul 2023 14:04:33 -
@@ -314,7 +314,6 @@ control_dispatch_msg(struct pollfd *pfd,
if (imsg.hdr.len == IMSG_HEADER_SIZE +
sizeof(struct ctl_neighbor)) {
neighbor = imsg.data;
-   neighbor->descr[PEER_DESCR_LEN - 1] = 0;
} else {
neighbor = NULL;
}
@@ -370,7 +369,6 @@ control_dispatch_msg(struct pollfd *pfd,
}
 
neighbor = imsg.data;
-   neighbor->descr[PEER_DESCR_LEN - 1] = 0;
 
matched = 0;
RB_FOREACH(p, peer_head, peers) {
@@ -474,7 +472,6 @@ control_dispatch_msg(struct pollfd *pfd,
 
ribreq = imsg.data;
neighbor = >neighbor;
-   neighbor->descr[PEER_DESCR_LEN - 1] = 0;
 
/* check if at least one neighbor exists */
RB_FOREACH(p, peer_head, peers)
Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.608
diff -u -p -r1.608 rde.c
--- rde.c   12 Jul 2023 14:45:42 -  1.608
+++ rde.c   20 Jul 2023 14:01:47 -
@@ -2947,7 +2947,8 @@ rde_match_peer(struct rde_peer *p, struc
return 0;
} else if (n && n->descr[0]) {
s = n->is_group ? p->conf.group : p->conf.descr;
-   if (strcmp(s, n->descr))
+   /* cannot trust n->descr to be properly terminated */
+   if (strncmp(s, n->descr, sizeof(n->descr)))
return 0;
}
return 1;
Index: session.c
===
RCS file: /cvs/src/usr.sbin/bgpd/session.c,v
retrieving revision 1.446
diff -u -p -r1.446 session.c
--- session.c   12 Jul 2023 14:45:43 -  1.446
+++ session.c   20 Jul 2023 14:01:25 -
@@ -3461,7 +3461,8 @@ peer_matched(struct peer *p, struct ctl_
return 0;
} else if (n && n->descr[0]) {
s = n->is_group ? p->conf.group : p->conf.descr;
-   if (strcmp(s, n->descr))
+   /* cannot trust n->descr to be properly terminated */
+   if (strncmp(s, n->descr, sizeof(n->descr)))
return 0;
}
return 1;

bgpd, use right buffer in strlcpy

2023-07-20 Thread Claudio Jeker

Use the destination for the buffer len not the source.

-- 
:wq Claudio

Index: control.c
===
RCS file: /cvs/src/usr.sbin/bgpd/control.c,v
retrieving revision 1.110
diff -u -p -r1.110 control.c
--- control.c   20 Apr 2023 12:53:27 -  1.110
+++ control.c   20 Jul 2023 10:51:54 -
@@ -393,14 +393,14 @@ control_dispatch_msg(struct pollfd *pfd,
p->conf.down = 1;
strlcpy(p->conf.reason,
neighbor->reason,
-   sizeof(neighbor->reason));
+   sizeof(p->conf.reason));
session_stop(p, ERR_CEASE_ADMIN_DOWN);
control_result(c, CTL_RES_OK);
break;
case IMSG_CTL_NEIGHBOR_CLEAR:
strlcpy(p->conf.reason,
neighbor->reason,
-   sizeof(neighbor->reason));
+   sizeof(p->conf.reason));
p->IdleHoldTime =
INTERVAL_IDLE_HOLD_INITIAL;
p->errcnt = 0;

iked: s/ibuf_cat/ibuf_add_buf/

2023-07-18 Thread Claudio Jeker

ibuf_cat() is the same as ibuf_add_buf() so use the latter.

-- 
:wq Claudio

Index: eap.c
===
RCS file: /cvs/src/sbin/iked/eap.c,v
retrieving revision 1.24
diff -u -p -r1.24 eap.c
--- eap.c   23 May 2023 13:57:14 -  1.24
+++ eap.c   18 Jul 2023 13:11:27 -
@@ -112,7 +112,7 @@ eap_identity_request(struct iked *env, s
if ((pld = ikev2_add_payload(e)) == NULL)
goto done;
firstpayload = IKEV2_PAYLOAD_IDr;
-   if (ibuf_cat(e, id->id_buf) != 0)
+   if (ibuf_add_buf(e, id->id_buf) != 0)
goto done;
len = ibuf_size(id->id_buf);
 
@@ -127,7 +127,7 @@ eap_identity_request(struct iked *env, s
if ((cert = ibuf_reserve(e, sizeof(*cert))) == NULL)
goto done;
cert->cert_type = certid->id_type;
-   if (ibuf_cat(e, certid->id_buf) != 0)
+   if (ibuf_add_buf(e, certid->id_buf) != 0)
goto done;
len = ibuf_size(certid->id_buf) + sizeof(*cert);
 
@@ -142,7 +142,7 @@ eap_identity_request(struct iked *env, s
if ((cert = ibuf_reserve(e, sizeof(*cert))) == NULL)
goto done;
cert->cert_type = sa->sa_scert[i].id_type;
-   if (ibuf_cat(e, sa->sa_scert[i].id_buf) != 0)
+   if (ibuf_add_buf(e, sa->sa_scert[i].id_buf) != 0)
goto done;
len = ibuf_size(sa->sa_scert[i].id_buf) + sizeof(*cert);
}
@@ -157,7 +157,7 @@ eap_identity_request(struct iked *env, s
if ((auth = ibuf_reserve(e, sizeof(*auth))) == NULL)
goto done;
auth->auth_method = sa->sa_localauth.id_type;
-   if (ibuf_cat(e, sa->sa_localauth.id_buf) != 0)
+   if (ibuf_add_buf(e, sa->sa_localauth.id_buf) != 0)
goto done;
len = ibuf_size(sa->sa_localauth.id_buf) + sizeof(*auth);
 
Index: iked.h
===
RCS file: /cvs/src/sbin/iked/iked.h,v
retrieving revision 1.221
diff -u -p -r1.221 iked.h
--- iked.h  16 Jul 2023 15:21:46 -  1.221
+++ iked.h  18 Jul 2023 13:11:31 -
@@ -1268,7 +1268,6 @@ struct ibuf *
 ibuf_new(const void *, size_t);
 struct ibuf *
 ibuf_static(void);
-int ibuf_cat(struct ibuf *, struct ibuf *);
 size_t  ibuf_length(struct ibuf *);
 int ibuf_setsize(struct ibuf *, size_t);
 struct ibuf *
Index: ikev2.c
===
RCS file: /cvs/src/sbin/iked/ikev2.c,v
retrieving revision 1.373
diff -u -p -r1.373 ikev2.c
--- ikev2.c 16 Jul 2023 15:21:46 -  1.373
+++ ikev2.c 18 Jul 2023 13:13:37 -
@@ -1609,7 +1609,7 @@ ikev2_init_ike_auth(struct iked *env, st
if ((pld = ikev2_add_payload(e)) == NULL)
goto done;
firstpayload = IKEV2_PAYLOAD_IDi;
-   if (ibuf_cat(e, id->id_buf) != 0)
+   if (ibuf_add_buf(e, id->id_buf) != 0)
goto done;
len = ibuf_size(id->id_buf);
 
@@ -1623,7 +1623,7 @@ ikev2_init_ike_auth(struct iked *env, st
goto done;
if ((pld = ikev2_add_payload(e)) == NULL)
goto done;
-   if (ibuf_cat(e, peerid.id_buf) != 0)
+   if (ibuf_add_buf(e, peerid.id_buf) != 0)
goto done;
len = ibuf_size(peerid.id_buf);
}
@@ -1639,7 +1639,7 @@ ikev2_init_ike_auth(struct iked *env, st
if ((cert = ibuf_reserve(e, sizeof(*cert))) == NULL)
goto done;
cert->cert_type = certid->id_type;
-   if (ibuf_cat(e, certid->id_buf) != 0)
+   if (ibuf_add_buf(e, certid->id_buf) != 0)
goto done;
len = ibuf_size(certid->id_buf) + sizeof(*cert);
 
@@ -1654,7 +1654,7 @@ ikev2_init_ike_auth(struct iked *env, st
if ((cert = ibuf_reserve(e, sizeof(*cert))) == NULL)
goto done;
cert->cert_type = sa->sa_scert[i].id_type;
-   if (ibuf_cat(e, sa->sa_scert[i].id_buf) != 0)
+   if (ibuf_add_buf(e, sa->sa_scert[i].id_buf) != 0)
goto done;
len = ibuf_size(sa->sa_scert[i].id_buf) + sizeof(*cert);
}
@@ -1679,7 +1679,7 @@ ikev2_init_ike_auth(struct iked *env, st
if ((auth = ibuf_reserve(e, sizeof(*auth))) == NULL)
goto done;
auth->auth_method = sa->sa_localauth.id_type;
-   if (ibuf_cat(e, sa->sa_localauth.id_buf) != 0)
+   if (ibuf_add_buf(e, sa->sa_localauth.id_buf) != 0)
goto done;
len = ibuf_size(sa->sa_localauth.id_buf) + sizeof(*auth);
 
@@ -2212,7 +2212,7 @@

Re: vnode: drop comment, nonsensical where it is

2023-07-17 Thread Claudio Jeker

On Wed, Jul 12, 2023 at 12:25:19PM +0200, thib4711 wrote:
> The line comment in struct vnode is fine;
> 
> diff --git sys/sys/vnode.h sys/sys/vnode.h
> index 30787afddd8..b2f0fa4b60c 100644
> --- sys/sys/vnode.h
> +++ sys/sys/vnode.h
> @@ -74,12 +74,7 @@ enum vtagtype  {
>  "unused", "unused", "unused", "ISOFS", "unused", \
>  "EXT2FS", "VFS", "NTFS", "UDF", "FUSEFS", "TMPFS"
>  
> -/*
> - * Each underlying filesystem allocates its own private area and hangs
> - * it from v_data.  If non-null, this area is freed in getnewvnode().
> - */
>  LIST_HEAD(buflists, buf);
> -
>  RBT_HEAD(buf_rb_bufs, buf);
>  
>  struct namecache;
> 

Yes, this comment is not helpful (especially since v_data is cleaned up by
the reclaim function).

OK claudio@
-- 
:wq Claudio

Re: vfs: drop a bunch of cast macros

2023-07-17 Thread Claudio Jeker

On Mon, Jul 17, 2023 at 11:05:03AM +0200, Sebastien Marie wrote:
> On Wed, Jul 12, 2023 at 12:26:01PM +0200, thib4711 wrote:
> > make it obvious in the vfsops assignment that an op isnt supported.
> 
> I agree that it is more readable.
> 
> ok semarie@

OK claudio@ as well. Semarie can you commit this?
 
> thanks.
> -- 
> Sebastien Marie
> 
> > diff --git sys/isofs/cd9660/cd9660_extern.h sys/isofs/cd9660/cd9660_extern.h
> > index 2a5348e1768..bd8154a27bd 100644
> > --- sys/isofs/cd9660/cd9660_extern.h
> > +++ sys/isofs/cd9660/cd9660_extern.h
> > @@ -94,10 +94,8 @@ int cd9660_vptofh(struct vnode *, struct fid *);
> >  int cd9660_init(struct vfsconf *);
> >  int cd9660_check_export(struct mount *, struct mbuf *, int *,
> >   struct ucred **);
> > -#define cd9660_sysctl ((int (*)(int *, u_int, void *, size_t *, void *, \
> > -size_t, struct proc *))eopnotsupp)
> >  
> > -int cd9660_mountroot(void); 
> > +int cd9660_mountroot(void);
> >  
> >  extern const struct vops   cd9660_vops;
> >  extern const struct vops   cd9660_specvops;
> > diff --git sys/isofs/cd9660/cd9660_vfsops.c sys/isofs/cd9660/cd9660_vfsops.c
> > index ef0ffbbb152..b844a2ff709 100644
> > --- sys/isofs/cd9660/cd9660_vfsops.c
> > +++ sys/isofs/cd9660/cd9660_vfsops.c
> > @@ -72,7 +72,7 @@ const struct vfsops cd9660_vfsops = {
> > .vfs_fhtovp = cd9660_fhtovp,
> > .vfs_vptofh = cd9660_vptofh,
> > .vfs_init   = cd9660_init,
> > -   .vfs_sysctl = cd9660_sysctl,
> > +   .vfs_sysctl = (void *)eopnotsupp,
> > .vfs_checkexp   = cd9660_check_export,
> >  };
> >  
> > diff --git sys/msdosfs/msdosfs_vfsops.c sys/msdosfs/msdosfs_vfsops.c
> > index 0de37665dfd..6b90195b5e5 100644
> > --- sys/msdosfs/msdosfs_vfsops.c
> > +++ sys/msdosfs/msdosfs_vfsops.c
> > @@ -762,27 +762,18 @@ msdosfs_check_export(struct mount *mp, struct mbuf 
> > *nam, int *exflagsp,
> > return (0);
> >  }
> >  
> > -#define msdosfs_vget ((int (*)(struct mount *, ino_t, struct vnode **)) \
> > - eopnotsupp)
> > -
> > -#define msdosfs_quotactl ((int (*)(struct mount *, int, uid_t, caddr_t, \
> > -   struct proc *))eopnotsupp)
> > -
> > -#define msdosfs_sysctl ((int (*)(int *, u_int, void *, size_t *, void *, \
> > -size_t, struct proc *))eopnotsupp)
> > -
> >  const struct vfsops msdosfs_vfsops = {
> > .vfs_mount  = msdosfs_mount,
> > .vfs_start  = msdosfs_start,
> > .vfs_unmount= msdosfs_unmount,
> > .vfs_root   = msdosfs_root,
> > -   .vfs_quotactl   = msdosfs_quotactl,
> > +   .vfs_quotactl   = (void *)eopnotsupp,
> > .vfs_statfs = msdosfs_statfs,
> > .vfs_sync   = msdosfs_sync,
> > -   .vfs_vget   = msdosfs_vget,
> > +   .vfs_vget   = (void *)eopnotsupp,
> > .vfs_fhtovp = msdosfs_fhtovp,
> > .vfs_vptofh = msdosfs_vptofh,
> > .vfs_init   = msdosfs_init,
> > -   .vfs_sysctl = msdosfs_sysctl,
> > +   .vfs_sysctl = (void *)eopnotsupp,
> > .vfs_checkexp   = msdosfs_check_export,
> >  };
> > 
> 

-- 
:wq Claudio

iked: more ibuf cleanup

2023-07-16 Thread Claudio Jeker

Rename ibuf_get() to ibuf_getdata() by merging the two functions together.
I want to use ibuf_get() as part of the ibuf API so this needs to move.
Also use ibuf_add_zero() in a place of ibuf_reserve() and remove a check
for buf->buf == NULL in ibuf_length() which has nothing to do there.

-- 
:wq Claudio

Index: iked.h
===
RCS file: /cvs/src/sbin/iked/iked.h,v
retrieving revision 1.220
diff -u -p -r1.220 iked.h
--- iked.h  28 Jun 2023 14:10:24 -  1.220
+++ iked.h  16 Jul 2023 13:45:20 -
@@ -1271,9 +1271,8 @@ struct ibuf *
 int ibuf_cat(struct ibuf *, struct ibuf *);
 size_t  ibuf_length(struct ibuf *);
 int ibuf_setsize(struct ibuf *, size_t);
-void   *ibuf_getdata(struct ibuf *, size_t);
 struct ibuf *
-ibuf_get(struct ibuf *, size_t);
+ibuf_getdata(struct ibuf *, size_t);
 struct ibuf *
 ibuf_dup(struct ibuf *);
 struct ibuf *
Index: ikev2.c
===
RCS file: /cvs/src/sbin/iked/ikev2.c,v
retrieving revision 1.372
diff -u -p -r1.372 ikev2.c
--- ikev2.c 28 Jun 2023 14:10:24 -  1.372
+++ ikev2.c 16 Jul 2023 13:54:01 -
@@ -5829,16 +5829,20 @@ ikev2_sa_keys(struct iked *env, struct i
goto done;
}
 
-   /* ibuf_get() returns a new buffer from the next read offset */
-   if ((sa->sa_key_d = ibuf_get(t, hash_length(prf))) == NULL ||
+   /* ibuf_getdata() returns a new buffer from the next read offset */
+   if ((sa->sa_key_d = ibuf_getdata(t, hash_length(prf))) == NULL ||
(!isaead &&
-   (sa->sa_key_iauth = ibuf_get(t, hash_keylength(integr))) == NULL) ||
+   (sa->sa_key_iauth = ibuf_getdata(t, hash_keylength(integr))) ==
+   NULL) ||
(!isaead &&
-   (sa->sa_key_rauth = ibuf_get(t, hash_keylength(integr))) == NULL) ||
-   (sa->sa_key_iencr = ibuf_get(t, cipher_keylength(encr))) == NULL ||
-   (sa->sa_key_rencr = ibuf_get(t, cipher_keylength(encr))) == NULL ||
-   (sa->sa_key_iprf = ibuf_get(t, hash_length(prf))) == NULL ||
-   (sa->sa_key_rprf = ibuf_get(t, hash_length(prf))) == NULL) {
+   (sa->sa_key_rauth = ibuf_getdata(t, hash_keylength(integr))) ==
+   NULL) ||
+   (sa->sa_key_iencr = ibuf_getdata(t, cipher_keylength(encr))) ==
+   NULL ||
+   (sa->sa_key_rencr = ibuf_getdata(t, cipher_keylength(encr))) ==
+   NULL ||
+   (sa->sa_key_iprf = ibuf_getdata(t, hash_length(prf))) == NULL ||
+   (sa->sa_key_rprf = ibuf_getdata(t, hash_length(prf))) == NULL) {
log_debug("%s: failed to get SA keys", SPI_SA(sa, __func__));
goto done;
}
@@ -6307,13 +6311,13 @@ ikev2_childsa_negotiate(struct iked *env
csa->csa_spi.spi_size = 4;
}
 
-   if (encrxf && (csa->csa_encrkey = ibuf_get(keymat,
+   if (encrxf && (csa->csa_encrkey = ibuf_getdata(keymat,
encrxf->xform_keylength / 8)) == NULL) {
log_debug("%s: failed to get CHILD SA encryption key",
__func__);
goto done;
}
-   if (integrxf && (csa->csa_integrkey = ibuf_get(keymat,
+   if (integrxf && (csa->csa_integrkey = ibuf_getdata(keymat,
integrxf->xform_keylength / 8)) == NULL) {
log_debug("%s: failed to get CHILD SA integrity key",
__func__);
@@ -6340,13 +6344,13 @@ ikev2_childsa_negotiate(struct iked *env
csb->csa_local = csa->csa_peer;
csb->csa_peer = csa->csa_local;
 
-   if (encrxf && (csb->csa_encrkey = ibuf_get(keymat,
+   if (encrxf && (csb->csa_encrkey = ibuf_getdata(keymat,
encrxf->xform_keylength / 8)) == NULL) {
log_debug("%s: failed to get CHILD SA encryption key",
__func__);
goto done;
}
-   if (integrxf && (csb->csa_integrkey = ibuf_get(keymat,
+   if (integrxf && (csb->csa_integrkey = ibuf_getdata(keymat,
integrxf->xform_keylength / 8)) == NULL) {
log_debug("%s: failed to get CHILD SA integrity key",
__func__);
Index: imsg_util.c
===
RCS file: /cvs/src/sbin/iked/imsg_util.c,v
retrieving revision 1.19
diff -u -p -r1.19 imsg_util.c
--- imsg_util.c 19 Jun 2023 17:19:50 -  1.19
+++ imsg_util.c 16 Jul 2023 13:59:29 -
@@ -55,7 +55,7 @@ ibuf_new(const void *data, size_t len)
return (buf);
 
if (data == NULL) {
-   if (ibuf_reserve(buf, len) == NULL) {
+   if (ibuf_add_zero(buf, len) != 0) {

Re: bgpd: cleanup mrt.c

2023-07-14 Thread Claudio Jeker

On Thu, Jul 13, 2023 at 11:36:22AM +0200, Theo Buehler wrote:
> On Thu, Jul 13, 2023 at 10:04:33AM +0200, Claudio Jeker wrote:
> > This is a follow-up to use more of the new ibuf API to write the mrt 
> > message.
> > 
> > This removes all of the DUMP_XYZ macros and replaces them with
> > ibuf_add_nX() calls. Also unify the error handling by using
> > goto fail; in all cases and use a more generic log_warn() there once.
> 
> The conversions all look correct, so that's ok.
> 
> There are a few silent failures and a few double logs. The former should
> be avoided and I think this should be done in this diff. I'm not sure
> how much effort should be invested in fully avoiding the latter. It's a
> bit messy:
> 
> The only caller mrt_dump_entry_v2_rib() already logs an ibuf failure on
> error, so there's no need to add a log to mrt_dump_entry_v2_rib()'s fail
> label.
> 
> In mrt_dump_hdr_se() there is no log after the fail: label. I think it
> needs one, otherwise mrt_dump_{bgp_msg,state}() could fail silently.
> 
> Most callers of mrt_dump_hdr_rde() will log an ibuf error on failure, so
> you probably want to remove the log in the early return after ibuf_dynamic.
> 
> Also, add a log (or goto fail) on mrt_dump_hdr_rde() failure in
> mrt_dump_entry(). In particular, there is often a double log for the
> 'unsupported type' case in mrt_dump_hdr_rde()...

This is a cleaned up version following what you mentioned. I marked the
internal functions with static and removed most logging from them and
instead log only on the primary functions.

I did not do this for those default cases in various switches. These
errors are more like asserts and IIRC once were fatalx() calls but they
got demoted to warnings to reduce the number of fatal errors in bgpd. 
Since these code paths are in most cases unreachable (unless a bug is
introduced) I'm fine with the double logging.

-- 
:wq Claudio

Index: mrt.c
===
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.115
diff -u -p -r1.115 mrt.c
--- mrt.c   12 Jul 2023 14:45:42 -  1.115
+++ mrt.c   14 Jul 2023 09:33:35 -
@@ -34,55 +34,19 @@
 #include "mrt.h"
 #include "log.h"
 
-int mrt_attr_dump(struct ibuf *, struct rde_aspath *, struct rde_community *,
-struct bgpd_addr *, int);
-int mrt_dump_entry_mp(struct mrt *, struct prefix *, uint16_t,
-struct rde_peer*);
-int mrt_dump_entry(struct mrt *, struct prefix *, uint16_t, struct rde_peer*);
-int mrt_dump_entry_v2(struct mrt *, struct rib_entry *, uint32_t);
-int mrt_dump_peer(struct ibuf *, struct rde_peer *);
-int mrt_dump_hdr_se(struct ibuf **, struct peer *, uint16_t, uint16_t,
-uint32_t, int);
-int mrt_dump_hdr_rde(struct ibuf **, uint16_t type, uint16_t, uint32_t);
-int mrt_open(struct mrt *, time_t);
-
-#define DUMP_BYTE(x, b)
\
-   do {\
-   u_char  t = (b);\
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump1: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
-#define DUMP_SHORT(x, s)   \
-   do {\
-   uint16_tt;  \
-   t = htons((s)); \
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump2: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
-#define DUMP_LONG(x, l)
\
-   do {\
-   uint32_tt;  \
-   t = htonl((l)); \
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump3: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
-#define DUMP_NLONG(x, l)   \
-   do {\
-   uint32_tt = (l);

bgpd: cleanup mrt.c

2023-07-13 Thread Claudio Jeker

This is a follow-up to use more of the new ibuf API to write the mrt message.

This removes all of the DUMP_XYZ macros and replaces them with
ibuf_add_nX() calls. Also unify the error handling by using
goto fail; in all cases and use a more generic log_warn() there once.
-- 
:wq Claudio

Index: mrt.c
===
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.115
diff -u -p -r1.115 mrt.c
--- mrt.c   12 Jul 2023 14:45:42 -  1.115
+++ mrt.c   13 Jul 2023 07:57:23 -
@@ -46,44 +46,6 @@ int mrt_dump_hdr_se(struct ibuf **, stru
 int mrt_dump_hdr_rde(struct ibuf **, uint16_t type, uint16_t, uint32_t);
 int mrt_open(struct mrt *, time_t);
 
-#define DUMP_BYTE(x, b)
\
-   do {\
-   u_char  t = (b);\
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump1: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
-#define DUMP_SHORT(x, s)   \
-   do {\
-   uint16_tt;  \
-   t = htons((s)); \
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump2: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
-#define DUMP_LONG(x, l)
\
-   do {\
-   uint32_tt;  \
-   t = htonl((l)); \
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump3: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
-#define DUMP_NLONG(x, l)   \
-   do {\
-   uint32_tt = (l);\
-   if (ibuf_add((x), , sizeof(t)) == -1) {   \
-   log_warn("mrt_dump4: ibuf_add error");  \
-   goto fail;  \
-   }   \
-   } while (0)
-
 #define RDEIDX 0
 #define SEIDX  1
 #define TYPE2IDX(x)((x == MRT_TABLE_DUMP ||\
@@ -248,13 +210,16 @@ mrt_dump_state(struct mrt *mrt, uint16_t
2 * sizeof(short), 0) == -1)
return;
 
-   DUMP_SHORT(buf, old_state);
-   DUMP_SHORT(buf, new_state);
+   if (ibuf_add_n16(buf, old_state) == -1)
+   goto fail;
+   if (ibuf_add_n16(buf, new_state) == -1)
+   goto fail;
 
ibuf_close(>wbuf, buf);
return;
 
 fail:
+   log_warn("%s: ibuf error", __func__);
ibuf_free(buf);
 }
 
@@ -330,39 +295,48 @@ mrt_attr_dump(struct ibuf *buf, struct r
return (-1);
if (!v2) {
if (aid2afi(nexthop->aid, , ))
-   return (-1);
-   DUMP_SHORT(nhbuf, afi);
-   DUMP_BYTE(nhbuf, safi);
+   goto fail;
+   if (ibuf_add_n16(nhbuf, afi) == -1)
+   goto fail;
+   if (ibuf_add_n8(nhbuf, safi) == -1)
+   goto fail;
}
switch (nexthop->aid) {
case AID_INET6:
-   DUMP_BYTE(nhbuf, sizeof(struct in6_addr));
+   if (ibuf_add_n8(nhbuf, sizeof(struct in6_addr)) == -1)
+   goto fail;
if (ibuf_add(nhbuf, >v6,
sizeof(struct in6_addr)) == -1)
goto fail;
break;
case AID_VPN_IPv4:
-   DUMP_BYTE(nhbuf, sizeof(uint64_t) +
-   sizeof(struct in_addr));
-   DUMP_NLONG(nhbuf, 0);   /* set RD to 0 */
-   DUMP_NLONG(nhbuf, 0);
-   DUMP_NLONG(nhbuf,

OpenBGPD 8.1 released

2023-07-12 Thread Claudio Jeker

We have released OpenBGPD 8.1, which will be arriving in the
OpenBGPD directory of your local OpenBSD mirror soon.

This release includes the following changes to the previous release:

* Include OpenBSD 7.3 errata 002:
  Avoid fatal errors in bgpd(8) due to incorrect refcounting and
  mishandling of ASPA objects. Fix bgpctl(8) 'show rib in' by renaming
  'invalid' into 'disqualified'.

* Include OpenBSD 7.3 errata 006:
  Incorrect length handling of path attributes in bgpd(8) can lead to a
  session reset.

* Include OpenBSD 7.3 errata 009:
  When tracking nexthops over IPv6 multipath routes, or when receiving
  a NOTIFICATION while reaching an internal limit, bgpd(8) could crash.

  When checking the next hop for IPv6 multipath routes, or when receiving
  a NOTIFICATION while reaching an internal limit, bgpd(8) could crash.

* Add configure options to adjust WWW_USER and wwwrunstatedir.

* Fix 'ext-community * *' matching which also affects filters removing
  all ext-commuinites.

* Limit the socket buffer size to 64k for all sessions.
  Limiting the buffer size to a reasonable size ensures that not too many
  updates end up queued in the TCP stack.

OpenBGPD-portable is known to compile and run on FreeBSD and the
Linux distributions Alpine, Debian, Fedora, RHEL/CentOS and Ubuntu.
It is our hope that packagers take interest and help adapt OpenBGPD-portable
to more distributions.

We welcome feedback and improvements from the broader community.
Thanks to all of the contributors who helped make this release
possible.

Re: GPROF: sleep_state: disable _mcount() across suspend/resume

2023-07-09 Thread Claudio Jeker

On Sun, Jul 09, 2023 at 12:52:20PM -0500, Scott Cheloha wrote:
> This patch fixes resume/unhibernate on GPROF kernels where kgmon(8)
> has activated kernel profiling.
> 
> I think the problem is that code called from cpu_hatch() does not play
> nicely with _mcount(), so GPROF kernels crash during resume.  I can't
> point you to which code in particular, but keeping all CPUs out of
> _mcount() until the primary CPU has completed resume/unhibernate fixes
> the crash.
> 
> ok?

To be honest, I'm not sure we need something like this. GPROF is already a
special case and poeple running a GPROF kernel should probably stop the
collection of profile data before suspend/hibernate.
Unless someone wants to gprof suspend/hibernate but then doing this will
result in bad data.
Another option is to abort zzz/ZZZ if kernel profiling is running.

In your diff I would remove these asserts:
KASSERT(CPU_IS_PRIMARY(curcpu()));
The code does not require the primary cpu there and KASSERT are not for free.
 
> Index: sys/lib/libkern/mcount.c
> ===
> RCS file: /cvs/src/sys/lib/libkern/mcount.c,v
> retrieving revision 1.14
> diff -u -p -r1.14 mcount.c
> --- sys/lib/libkern/mcount.c  11 Jan 2022 09:21:34 -  1.14
> +++ sys/lib/libkern/mcount.c  9 Jul 2023 17:49:55 -
> @@ -33,6 +33,32 @@
>  #include 
>  #include 
>  
> +#ifdef _KERNEL
> +#ifdef SUSPEND
> +#include 
> +
> +#include  /* KASSERT */
> +
> +volatile int mcount_disabled;
> +
> +void
> +mcount_disable(void)
> +{
> + KASSERT(CPU_IS_PRIMARY(curcpu()));
> + mcount_disabled = 1;
> + membar_producer();
> +}
> +
> +void
> +mcount_enable(void)
> +{
> + KASSERT(CPU_IS_PRIMARY(curcpu()));
> + mcount_disabled = 0;
> + membar_producer();
> +}
> +#endif /* SUSPEND */
> +#endif /* _KERNEL */
> +
>  /*
>   * mcount is called on entry to each function compiled with the profiling
>   * switch set.  _mcount(), which is declared in a machine-dependent way
> @@ -63,7 +89,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
>*/
>   if (gmoninit == 0)
>   return;
> -
> +#ifdef SUSPEND
> + if (mcount_disabled)
> + return;
> +#endif
>   if ((p = curcpu()->ci_gmon) == NULL)
>   return;
>  #else
> Index: sys/kern/subr_suspend.c
> ===
> RCS file: /cvs/src/sys/kern/subr_suspend.c,v
> retrieving revision 1.15
> diff -u -p -r1.15 subr_suspend.c
> --- sys/kern/subr_suspend.c   2 Jul 2023 19:02:27 -   1.15
> +++ sys/kern/subr_suspend.c   9 Jul 2023 17:49:55 -
> @@ -26,6 +26,9 @@
>  #include 
>  #include 
>  #include 
> +#ifdef GPROF
> +#include 
> +#endif
>  #ifdef HIBERNATE
>  #include 
>  #endif
> @@ -63,6 +66,9 @@ top:
>  
>   if (sleep_showstate(v, sleepmode))
>   return EOPNOTSUPP;
> +#ifdef GPROF
> + mcount_disable();
> +#endif
>  #if NWSDISPLAY > 0
>   wsdisplay_suspend();
>  #endif
> @@ -192,6 +198,9 @@ fail_hiballoc:
>   start_periodic_resettodr();
>  #if NWSDISPLAY > 0
>   wsdisplay_resume();
> +#endif
> +#ifdef GPROF
> + mcount_enable();
>  #endif
>   sys_sync(curproc, NULL, NULL);
>   if (cpu_setperf != NULL)
> Index: sys/sys/gmon.h
> ===
> RCS file: /cvs/src/sys/sys/gmon.h,v
> retrieving revision 1.9
> diff -u -p -r1.9 gmon.h
> --- sys/sys/gmon.h11 Jan 2022 23:59:55 -  1.9
> +++ sys/sys/gmon.h9 Jul 2023 17:49:55 -
> @@ -158,6 +158,10 @@ struct gmonparam {
>  #ifdef _KERNEL
>  extern int gmoninit; /* Is the kernel ready for being profiled? */
>  
> +#ifdef SUSPEND
> +void mcount_disable(void);
> +void mcount_enable(void);
> +#endif
>  #else /* !_KERNEL */
>  
>  #include 
> Index: lib/libc/gmon/mcount.c
> ===
> RCS file: /cvs/src/lib/libc/gmon/mcount.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 mcount.c
> --- lib/libc/gmon/mcount.c11 Jan 2022 09:21:34 -  1.16
> +++ lib/libc/gmon/mcount.c9 Jul 2023 17:49:55 -
> @@ -31,6 +31,32 @@
>  #include 
>  #include 
>  
> +#ifdef _KERNEL
> +#ifdef SUSPEND
> +#include 
> +
> +#include  /* KASSERT */
> +
> +volatile int mcount_disabled;
> +
> +void
> +mcount_disable(void)
> +{
> + KASSERT(CPU_IS_PRIMARY(curcpu()));
> + mcount_disabled = 1;
> + membar_producer();
> +}
> +
> +void
> +mcount_enable(void)
> +{
> + KASSERT(CPU_IS_PRIMARY(curcpu()));
> + mcount_disabled = 0;
> + membar_producer();
> +}
> +#endif /* SUSPEND */
> +#endif /* _KERNEL */
> +
>  /*
>   * mcount is called on entry to each function compiled with the profiling
>   * switch set.  _mcount(), which is declared in a machine-dependent way
> @@ -61,7 +87,10 @@ _MCOUNT_DECL(u_long frompc, u_long selfp
>*/
>   if (gmoninit == 0)
>   return;
> -
> +#ifdef SUSPEND
> + if (mcount_disabled)

Re: make mbstat smaller (was Re: make mstat smaller)

2023-07-09 Thread Claudio Jeker

On Sun, Jul 09, 2023 at 10:25:46AM +0300, YASUOKA Masahiko wrote:
> On Sat, 08 Jul 2023 21:58:30 +0300 (EEST)
> YASUOKA Masahiko  wrote:
> > The diff makes the mbstat be the same size which is actually used.
> > Also revert the previous that the mbstat is located on the stack.
> 
> The userland program also needed to be changed.
> 
> ok?

The code in netstat still has some more 256 bits:

bool seen[256]; /* "have we seen this type yet?" */

I guess that can be fixed once this is in tree.
The diff is OK claudio@
 
> Index: sys/kern/kern_sysctl.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.417
> diff -u -p -r1.417 kern_sysctl.c
> --- sys/kern/kern_sysctl.c7 Jul 2023 16:27:46 -   1.417
> +++ sys/kern/kern_sysctl.c9 Jul 2023 07:22:58 -
> @@ -515,22 +515,20 @@ kern_sysctl(int *name, u_int namelen, vo
>   case KERN_MBSTAT: {
>   extern struct cpumem *mbstat;
>   uint64_t counters[MBSTAT_COUNT];
> - struct mbstat *mbs;
> + struct mbstat mbs;
>   unsigned int i;
> - int ret;
>  
> - mbs = malloc(sizeof(*mbs), M_TEMP, M_WAITOK | M_ZERO);
> + memset(, 0, sizeof(mbs));
>   counters_read(mbstat, counters, MBSTAT_COUNT);
>   for (i = 0; i < MBSTAT_TYPES; i++)
> - mbs->m_mtypes[i] = counters[i];
> + mbs.m_mtypes[i] = counters[i];
>  
> - mbs->m_drops = counters[MBSTAT_DROPS];
> - mbs->m_wait = counters[MBSTAT_WAIT];
> - mbs->m_drain = counters[MBSTAT_DRAIN];
> + mbs.m_drops = counters[MBSTAT_DROPS];
> + mbs.m_wait = counters[MBSTAT_WAIT];
> + mbs.m_drain = counters[MBSTAT_DRAIN];
>  
> - ret = sysctl_rdstruct(oldp, oldlenp, newp, mbs, sizeof(*mbs));
> - free(mbs, M_TEMP, sizeof(*mbs));
> - return (ret);
> + return (sysctl_rdstruct(oldp, oldlenp, newp,
> + , sizeof(mbs)));
>   }
>   case KERN_MSGBUFSIZE:
>   case KERN_CONSBUFSIZE: {
> Index: sys/sys/mbuf.h
> ===
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.260
> diff -u -p -r1.260 mbuf.h
> --- sys/sys/mbuf.h7 Jul 2023 14:17:34 -   1.260
> +++ sys/sys/mbuf.h9 Jul 2023 07:22:58 -
> @@ -363,6 +363,12 @@ u_int mextfree_register(void (*)(caddr_t
>  /* length to m_copy to copy all */
>  #define  M_COPYALL   10
>  
> +#define MBSTAT_TYPES   MT_NTYPES
> +#define MBSTAT_DROPS   (MBSTAT_TYPES + 0)
> +#define MBSTAT_WAIT(MBSTAT_TYPES + 1)
> +#define MBSTAT_DRAIN   (MBSTAT_TYPES + 2)
> +#define MBSTAT_COUNT   (MBSTAT_TYPES + 3)
> +
>  /*
>   * Mbuf statistics.
>   * For statistics related to mbuf and cluster allocations, see also the
> @@ -372,14 +378,9 @@ struct mbstat {
>   u_long  m_drops;/* times failed to find space */
>   u_long  m_wait; /* times waited for space */
>   u_long  m_drain;/* times drained protocols for space */
> - u_long  m_mtypes[256];  /* type specific mbuf allocations */
> + u_long  m_mtypes[MBSTAT_COUNT];
> + /* type specific mbuf allocations */
>  };
> -
> -#define MBSTAT_TYPES   MT_NTYPES
> -#define MBSTAT_DROPS   (MBSTAT_TYPES + 0)
> -#define MBSTAT_WAIT(MBSTAT_TYPES + 1)
> -#define MBSTAT_DRAIN   (MBSTAT_TYPES + 2)
> -#define MBSTAT_COUNT   (MBSTAT_TYPES + 3)
>  
>  #include 
>  
> Index: usr.bin/netstat/mbuf.c
> ===
> RCS file: /cvs/src/usr.bin/netstat/mbuf.c,v
> retrieving revision 1.44
> diff -u -p -r1.44 mbuf.c
> --- usr.bin/netstat/mbuf.c7 Jul 2023 14:17:35 -   1.44
> +++ usr.bin/netstat/mbuf.c9 Jul 2023 07:22:58 -
> @@ -93,7 +93,7 @@ mbpr(void)
>   struct mbtypes *mp;
>   size_t size;
>  
> - if (nmbtypes != 256) {
> + if (nmbtypes != MBSTAT_COUNT) {
>   fprintf(stderr,
>   "%s: unexpected change to mbstat; check source\n",
>   __progname);
> 
> 
> 
> 

-- 
:wq Claudio

Re: tcp timer wrap around, use 64 bit

2023-07-07 Thread Claudio Jeker

On Fri, Jul 07, 2023 at 11:38:58AM +0300, YASUOKA Masahiko wrote:
> Hi,
> 
> Does using 64 bit for timer in tcpcb require this?

Not sure about this but one comment below.
 
> ok?
> 
> Index: sys/netinet/tcp.h
> ===
> RCS file: /cvs/src/sys/netinet/tcp.h,v
> retrieving revision 1.24
> diff -u -p -r1.24 tcp.h
> --- sys/netinet/tcp.h 19 May 2023 01:04:39 -  1.24
> +++ sys/netinet/tcp.h 7 Jul 2023 08:33:26 -
> @@ -194,9 +194,9 @@ struct tcp_info {
>   uint32_ttcpi_snd_wl2;
>   uint32_ttcpi_snd_max;
>   uint32_ttcpi_ts_recent;
> - uint32_ttcpi_ts_recent_age;
> + uint64_ttcpi_ts_recent_age;
>   uint32_ttcpi_rfbuf_cnt;
> - uint32_ttcpi_rfbuf_ts;
> + uint64_ttcpi_rfbuf_ts;
>   uint32_ttcpi_so_rcv_sb_cc;
>   uint32_ttcpi_so_rcv_sb_hiwat;
>   uint32_ttcpi_so_rcv_sb_lowat;
> Index: usr.bin/tcpbench/tcpbench.c
> ===
> RCS file: /cvs/src/usr.bin/tcpbench/tcpbench.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 tcpbench.c
> --- usr.bin/tcpbench/tcpbench.c   22 May 2023 12:53:04 -  1.69
> +++ usr.bin/tcpbench/tcpbench.c   7 Jul 2023 08:33:26 -
> @@ -51,6 +51,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define DEFAULT_PORT "12345"
>  #define DEFAULT_STATS_INTERVAL 1000 /* ms */
> @@ -411,7 +412,7 @@ tcp_stats_display(unsigned long long tot
>   P(tcpi, rcv_up, "%u")
>   P(tcpi, rcv_wscale, "%hhu")
>   P(tcpi, rfbuf_cnt, "%u")
> - P(tcpi, rfbuf_ts, "%u")
> + P(tcpi, rfbuf_ts, "%" PRIu64)

I don't think we need these ugly PRIu64 here. Just use %llu since in
OpenBSD uint64_t is always a unsigned long long.

>   P(tcpi, rtt, "%u")
>   P(tcpi, rttmin, "%u")
>   P(tcpi, rttvar, "%u")
> @@ -436,7 +437,7 @@ tcp_stats_display(unsigned long long tot
>   P(tcpi, so_snd_sb_lowat, "%u")
>   P(tcpi, so_snd_sb_wat, "%u")
>   P(tcpi, ts_recent, "%u")
> - P(tcpi, ts_recent_age, "%u")
> + P(tcpi, ts_recent_age, "%" PRIu64)
>  #undef S
>  #undef P
>   }

-- 
:wq Claudio

Re: validate vm.conf local prefixes in parser

2023-07-04 Thread Claudio Jeker

On Tue, Jul 04, 2023 at 11:39:19AM -0400, Dave Voutila wrote:
> vmd's doing something close to shotgun parsing of the "local prefix" and
> "local inet6 prefix" settings in vm.conf(5). The parser intermixes ipv4
> and ipv6 parsing even when we know which one is valid in the parsing
> context. This makes me sad.
> 
> Even worse, we're not validating the inputs at time of parsing and
> deferring to vm creation time. This makes me even sadder.
> 
> The diff below:
>  - splits parsing ipv4 and ipv6 into distinct functions
>  - puts the validation into those functions (e.g prefix length, prefix
>has properly zero'd octets)
>  - does *not* muck (yet) with the existing validation sprinkled in
>priv.c or config.c
> 
> I thought about making pulling apart the prefix from prefix length
> parsing, but getaddrinfo(3) appears to not like parsing addresses like
> "192.168.0.0" vs "192.168.0.0/16". (I'm not a network person...maybe I'm
> being dumb here.)
> 
> kn: this addresses some of your feedback on my previous diff from a few
> weeks ago.
> 
> ok?

Most of these issues have been solved in for example bgpd.
The code there is able to parse most address forms also shortcuts like
192.168/24. Have a look at bgpd/config.c::host() and host_ip() on how it
works.


 
> diff refs/heads/master refs/heads/vmd-parse
> commit - 5d90c77abd2d7447f16f88ac9ea9e0485eac9f73
> commit + 228fe48802ec6250e3487aa791daceba4626b03f
> blob - b538d40be1a1e600c1021d95e3fadd310079fa7a
> blob + f5a2507ff5742ea3a62b0112e14d17aa8cbdf99d
> --- usr.sbin/vmd/config.c
> +++ usr.sbin/vmd/config.c
> @@ -49,7 +49,7 @@ config_init_localprefix(struct vmd_config *cfg)
>  {
>   struct sockaddr_in6 *sin6;
> 
> - if (host(VMD_DHCP_PREFIX, >cfg_localprefix) == -1)
> + if (parse_prefix4(VMD_DHCP_PREFIX, >cfg_localprefix, NULL) == -1)
>   return (-1);
> 
>   /* IPv6 is disabled by default */
> @@ -58,7 +58,7 @@ config_init_localprefix(struct vmd_config *cfg)
>   /* Generate random IPv6 prefix only once */
>   if (cfg->cfg_flags & VMD_CFG_AUTOINET6)
>   return (0);
> - if (host(VMD_ULA_PREFIX, >cfg_localprefix6) == -1)
> + if (parse_prefix6(VMD_ULA_PREFIX, >cfg_localprefix6, NULL) == -1)
>   return (-1);
>   /* Randomize the 56 bits "Global ID" and "Subnet ID" */
>   sin6 = ss2sin6(>cfg_localprefix6.ss);
> blob - 09468e3fe2c9f4f9193710c65667132f79a90df3
> blob + 3d030c201db3e8167831846cb1c8f6e3facc40fc
> --- usr.sbin/vmd/parse.y
> +++ usr.sbin/vmd/parse.y
> @@ -190,31 +190,30 @@ main: LOCAL INET6 {
>   }
>   | LOCAL INET6 PREFIX STRING {
>   struct address   h;
> + const char  *err;
> 
> - if (host($4, ) == -1 ||
> - h.ss.ss_family != AF_INET6 ||
> - h.prefixlen > 64 || h.prefixlen < 0) {
> - yyerror("invalid local inet6 prefix: %s", $4);
> - free($4);
> + if (parse_prefix6($4, , )) {
> + yyerror("invalid local inet6 prefix: %s", err);
>   YYERROR;
> + } else {
> + env->vmd_cfg.cfg_flags |= VMD_CFG_INET6;
> + env->vmd_cfg.cfg_flags &= ~VMD_CFG_AUTOINET6;
> + memcpy(>vmd_cfg.cfg_localprefix6, ,
> + sizeof(h));
>   }
> -
> - env->vmd_cfg.cfg_flags |= VMD_CFG_INET6;
> - env->vmd_cfg.cfg_flags &= ~VMD_CFG_AUTOINET6;
> - memcpy(>vmd_cfg.cfg_localprefix6, , sizeof(h));
> + free($4);
>   }
>   | LOCAL PREFIX STRING {
>   struct address   h;
> + const char  *err;
> 
> - if (host($3, ) == -1 ||
> - h.ss.ss_family != AF_INET ||
> - h.prefixlen > 32 || h.prefixlen < 0) {
> - yyerror("invalid local prefix: %s", $3);
> - free($3);
> + if (parse_prefix4($3, , )) {
> + yyerror("invalid local prefix: %s", err);
>   YYERROR;
> - }
> -
> - memcpy(>vmd_cfg.cfg_localprefix, , sizeof(h));
> + } else
> + memcpy(>vmd_cfg.cfg_localprefix, ,
> + sizeof(h));
> + free($3);
>   }
>   | SOCKET OWNER owner_id {
>   env->vmd_ps.ps_csock.cs_uid = $3.uid;
> @@ -1404,42 +1403,119 @@ int
>   return (0);
>  }
> 
> +/*
> + * Parse an ipv4 address and prefix for local interfaces and validate
> + * constraints for vmd networking.
> + */
>  int
> -host(const

Re: ldpd: use new ibuf api

2023-07-03 Thread Claudio Jeker

On Mon, Jun 26, 2023 at 06:14:30PM +0200, Claudio Jeker wrote:
> Adjust ldpd to follow the new ibuf API.
> Diff is simple but as usual I could not test it out so please test.

This is still open. I will commit this later today.
 
> -- 
> :wq Claudio
> 
> Index: hello.c
> ===
> RCS file: /cvs/src/usr.sbin/ldpd/hello.c,v
> retrieving revision 1.58
> diff -u -p -r1.58 hello.c
> --- hello.c   12 Dec 2019 00:10:29 -  1.58
> +++ hello.c   12 Jun 2023 07:27:51 -
> @@ -139,7 +139,7 @@ send_hello(enum hello_type type, struct 
>   return (-1);
>   }
>  
> - send_packet(fd, af, , ia, buf->buf, buf->wpos);
> + send_packet(fd, af, , ia, ibuf_data(buf), ibuf_size(buf));
>   ibuf_free(buf);
>  
>   return (0);
> Index: labelmapping.c
> ===
> RCS file: /cvs/src/usr.sbin/ldpd/labelmapping.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 labelmapping.c
> --- labelmapping.c4 Mar 2017 00:15:35 -   1.68
> +++ labelmapping.c22 Jun 2023 08:49:55 -
> @@ -22,6 +22,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -39,10 +40,8 @@ static void log_msg_mapping(int, uint16
>  static void
>  enqueue_pdu(struct nbr *nbr, struct ibuf *buf, uint16_t size)
>  {
> - struct ldp_hdr  *ldp_hdr;
> -
> - ldp_hdr = ibuf_seek(buf, 0, sizeof(struct ldp_hdr));
> - ldp_hdr->length = htons(size);
> + if (ibuf_set_n16(buf, offsetof(struct ldp_hdr, length), size) == -1)
> + fatal(__func__);
>   evbuf_enqueue(>tcp->wbuf, buf);
>  }
>  
> 

-- 
:wq Claudio

eigrpd more ibuf cleanup

2023-07-03 Thread Claudio Jeker

Same change as done in other daemons, use ibuf_data() and ibuf_size().

-- 
:wq Claudio

Index: packet.c
===
RCS file: /cvs/src/usr.sbin/eigrpd/packet.c,v
retrieving revision 1.21
diff -u -p -r1.21 packet.c
--- packet.c26 Jun 2023 14:07:19 -  1.21
+++ packet.c30 Jun 2023 08:10:37 -
@@ -97,7 +97,7 @@ send_packet_v4(struct iface *iface, stru
memset(, 0, sizeof(msg));
iov[0].iov_base = _hdr;
iov[0].iov_len = sizeof(ip_hdr);
-   iov[1].iov_base = buf->buf;
+   iov[1].iov_base = ibuf_data(buf);
iov[1].iov_len = ibuf_size(buf);
msg.msg_name = 
msg.msg_namelen = sizeof(dst);
@@ -144,7 +144,7 @@ send_packet_v6(struct iface *iface, stru
return (-1);
}
 
-   if (sendto(global.eigrp_socket_v6, buf->buf, buf->wpos, 0,
+   if (sendto(global.eigrp_socket_v6, ibuf_data(buf), ibuf_size(buf), 0,
(struct sockaddr *), sizeof(sa6)) == -1) {
log_warn("%s: error sending packet on interface %s",
__func__, iface->name);

ripd more ibuf cleanup

2023-07-03 Thread Claudio Jeker

Similar to the other daemons use ibuf_data() and ibuf_size() instead of
direct access.

-- 
:wq Claudio

Index: message.c
===
RCS file: /cvs/src/usr.sbin/ripd/message.c,v
retrieving revision 1.16
diff -u -p -r1.16 message.c
--- message.c   8 Mar 2023 04:43:14 -   1.16
+++ message.c   19 Apr 2023 13:13:02 -
@@ -134,7 +134,7 @@ send_triggered_update(struct iface *ifac
ibuf_add(buf, , sizeof(nexthop));
ibuf_add(buf, , sizeof(metric));
 
-   send_packet(iface, buf->buf, buf->wpos, );
+   send_packet(iface, ibuf_data(buf), ibuf_size(buf), );
ibuf_free(buf);
 
return (0);
@@ -210,7 +210,7 @@ send_request(struct packet_head *r_list,
delete_entry(entry->rr);
free(entry);
}
-   send_packet(iface, buf->buf, buf->wpos, );
+   send_packet(iface, ibuf_data(buf), ibuf_size(buf), );
ibuf_free(buf);
}
 
@@ -305,7 +305,7 @@ free:
if (iface->auth_type == AUTH_CRYPT)
auth_add_trailer(buf, iface);
 
-   send_packet(iface, buf->buf, buf->wpos, );
+   send_packet(iface, ibuf_data(buf), ibuf_size(buf), );
ibuf_free(buf);
}

ospf6d ibuf cleanup

2023-07-03 Thread Claudio Jeker

Like ospfd use ibuf_data() consistently, also use ibuf_size() instead of
ibuf->wpos and ibuf_left() to check if there is enough space left.

-- 
:wq Claudio

Index: lsreq.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/lsreq.c,v
retrieving revision 1.14
diff -u -p -r1.14 lsreq.c
--- lsreq.c 8 Mar 2023 04:43:14 -   1.14
+++ lsreq.c 19 Apr 2023 13:13:01 -
@@ -59,8 +59,9 @@ send_ls_req(struct nbr *nbr)
goto fail;
 
/* LSA header(s) */
-   for (le = TAILQ_FIRST(>ls_req_list); le != NULL &&
-   buf->wpos + sizeof(struct ls_req_hdr) < buf->max; le = nle) {
+   for (le = TAILQ_FIRST(>ls_req_list);
+   le != NULL && sizeof(ls_req_hdr) < ibuf_left(buf);
+   le = nle) {
nbr->ls_req = nle = TAILQ_NEXT(le, entry);
ls_req_hdr.zero = 0;
ls_req_hdr.type = le->le_lsa->type;
Index: lsupdate.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/lsupdate.c,v
retrieving revision 1.23
diff -u -p -r1.23 lsupdate.c
--- lsupdate.c  21 Jun 2023 07:45:47 -  1.23
+++ lsupdate.c  30 Jun 2023 08:10:37 -
@@ -194,7 +194,7 @@ add_ls_update(struct ibuf *buf, struct i
size_t  ageoff;
u_int16_t   age;
 
-   if (buf->wpos + len >= buf->max)
+   if (len >= ibuf_left(buf))
return (0);
 
ageoff = ibuf_size(buf);
Index: ospfe.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/ospfe.c,v
retrieving revision 1.69
diff -u -p -r1.69 ospfe.c
--- ospfe.c 21 Jun 2023 07:45:47 -  1.69
+++ ospfe.c 30 Jun 2023 08:58:37 -
@@ -954,18 +954,18 @@ orig_rtr_lsa(struct area *area)
lsa_hdr.ls_id = 0;
lsa_hdr.adv_rtr = oeconf->rtr_id.s_addr;
lsa_hdr.seq_num = htonl(INIT_SEQ_NUM);
-   lsa_hdr.len = htons(buf->wpos);
+   lsa_hdr.len = htons(ibuf_size(buf));
lsa_hdr.ls_chksum = 0;  /* updated later */
if (ibuf_set(buf, 0, _hdr, sizeof(lsa_hdr)) == -1)
fatal("orig_rtr_lsa: ibuf_set failed");
 
-   chksum = iso_cksum(buf->buf, buf->wpos, LS_CKSUM_OFFSET);
+   chksum = iso_cksum(ibuf_data(buf), ibuf_size(buf), LS_CKSUM_OFFSET);
if (ibuf_set_n16(buf, LS_CKSUM_OFFSET, chksum) == -1)
fatal("orig_rtr_lsa: ibuf_set_n16 failed");
 
if (self)
imsg_compose_event(iev_rde, IMSG_LS_UPD, self->peerid, 0,
-   -1, buf->buf, buf->wpos);
+   -1, ibuf_data(buf), ibuf_size(buf));
else
log_warnx("orig_rtr_lsa: empty area %s",
inet_ntoa(area->id));
@@ -1018,7 +1018,7 @@ orig_net_lsa(struct iface *iface)
lsa_hdr.ls_id = htonl(iface->ifindex);
lsa_hdr.adv_rtr = oeconf->rtr_id.s_addr;
lsa_hdr.seq_num = htonl(INIT_SEQ_NUM);
-   lsa_hdr.len = htons(buf->wpos);
+   lsa_hdr.len = htons(ibuf_size(buf));
lsa_hdr.ls_chksum = 0;  /* updated later */
if (ibuf_set(buf, 0, _hdr, sizeof(lsa_hdr)) == -1)
fatal("orig_net_lsa: ibuf_set failed");
@@ -1027,12 +1027,12 @@ orig_net_lsa(struct iface *iface)
if (ibuf_set(buf, sizeof(lsa_hdr), _net, sizeof(lsa_net)) == -1)
fatal("orig_net_lsa: ibuf_set failed");
 
-   chksum = iso_cksum(buf->buf, buf->wpos, LS_CKSUM_OFFSET);
+   chksum = iso_cksum(ibuf_data(buf), ibuf_size(buf), LS_CKSUM_OFFSET);
if (ibuf_set_n16(buf, LS_CKSUM_OFFSET, chksum) == -1)
fatal("orig_net_lsa: ibuf_set_n16 failed");
 
imsg_compose_event(iev_rde, IMSG_LS_UPD, iface->self->peerid, 0,
-   -1, buf->buf, buf->wpos);
+   -1, ibuf_data(buf), ibuf_size(buf));
 
ibuf_free(buf);
 }
@@ -1116,17 +1116,17 @@ orig_link_lsa(struct iface *iface)
lsa_hdr.ls_id = htonl(iface->ifindex);
lsa_hdr.adv_rtr = oeconf->rtr_id.s_addr;
lsa_hdr.seq_num = htonl(INIT_SEQ_NUM);
-   lsa_hdr.len = htons(buf->wpos);
+   lsa_hdr.len = htons(ibuf_size(buf));
lsa_hdr.ls_chksum = 0;  /* updated later */
if (ibuf_set(buf, 0, _hdr, sizeof(lsa_hdr)) == -1)
fatal("orig_link_lsa: ibuf_set failed");
 
-   chksum = iso_cksum(buf->buf, buf->wpos, LS_CKSUM_OFFSET);
+   chksum = iso_cksum(ibuf_data(buf), ibuf_size(buf), LS_CKSUM_OFFSET);
if (ibuf_set_n16(buf, LS_CKSUM_OFFSET, chksum) == -1)
fatal("orig_link_lsa: ibuf_set_n16 failed");
 
imsg_compose_event(iev_rde, IMSG_LS_UPD, iface->self->peerid, 0,
-   -1, buf->buf, buf->wpos);
+   -1, ibuf_data(buf), ibuf_size(buf));
 
ibuf_free(buf);
 }
Index: packet.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/packet.c,v
retrieving revision 1.21
diff -u -p -r1.21 packet.c
---

ospfd more ibuf cleanup

2023-07-03 Thread Claudio Jeker

Similar to the relayd diff use ibuf_data instead of ibuf->buf.

-- 
:wq Claudio

Index: auth.c
===
RCS file: /cvs/src/usr.sbin/ospfd/auth.c,v
retrieving revision 1.21
diff -u -p -r1.21 auth.c
--- auth.c  20 Jun 2023 15:19:55 -  1.21
+++ auth.c  30 Jun 2023 08:56:56 -
@@ -154,13 +154,13 @@ auth_gen(struct ibuf *buf, struct iface 
 
switch (iface->auth_type) {
case AUTH_NONE:
-   chksum = in_cksum(buf->buf, ibuf_size(buf));
+   chksum = in_cksum(ibuf_data(buf), ibuf_size(buf));
if (ibuf_set(buf, offsetof(struct ospf_hdr, chksum),
, sizeof(chksum)) == -1)
fatalx("auth_gen: ibuf_set failed");
break;
case AUTH_SIMPLE:
-   chksum = in_cksum(buf->buf, ibuf_size(buf));
+   chksum = in_cksum(ibuf_data(buf), ibuf_size(buf));
if (ibuf_set(buf, offsetof(struct ospf_hdr, chksum),
, sizeof(chksum)) == -1)
fatalx("auth_gen: ibuf_set failed");
@@ -193,7 +193,7 @@ auth_gen(struct ibuf *buf, struct iface 
 
/* calculate MD5 digest */
MD5Init();
-   MD5Update(, buf->buf, ibuf_size(buf));
+   MD5Update(, ibuf_data(buf), ibuf_size(buf));
MD5Update(, digest, MD5_DIGEST_LENGTH);
MD5Final(digest, );
 
Index: ospfe.c
===
RCS file: /cvs/src/usr.sbin/ospfd/ospfe.c,v
retrieving revision 1.112
diff -u -p -r1.112 ospfe.c
--- ospfe.c 20 Jun 2023 15:19:55 -  1.112
+++ ospfe.c 30 Jun 2023 08:55:44 -
@@ -1099,13 +1099,13 @@ orig_rtr_lsa(struct area *area)
if (ibuf_set(buf, 0, _hdr, sizeof(lsa_hdr)) == -1)
fatal("orig_rtr_lsa: ibuf_set failed");
 
-   chksum = iso_cksum(buf->buf, ibuf_size(buf), LS_CKSUM_OFFSET);
+   chksum = iso_cksum(ibuf_data(buf), ibuf_size(buf), LS_CKSUM_OFFSET);
if (ibuf_set_n16(buf, LS_CKSUM_OFFSET, chksum) == -1)
fatal("orig_rtr_lsa: ibuf_set_n16 failed");
 
if (self && num_links)
imsg_compose_event(iev_rde, IMSG_LS_UPD, self->peerid, 0,
-   -1, buf->buf, ibuf_size(buf));
+   -1, ibuf_data(buf), ibuf_size(buf));
else
log_warnx("orig_rtr_lsa: empty area %s",
inet_ntoa(area->id));
@@ -1165,12 +1165,12 @@ orig_net_lsa(struct iface *iface)
if (ibuf_set(buf, 0, _hdr, sizeof(lsa_hdr)) == -1)
fatal("orig_net_lsa: ibuf_set failed");
 
-   chksum = iso_cksum(buf->buf, ibuf_size(buf), LS_CKSUM_OFFSET);
+   chksum = iso_cksum(ibuf_data(buf), ibuf_size(buf), LS_CKSUM_OFFSET);
if (ibuf_set_n16(buf, LS_CKSUM_OFFSET, chksum) == -1)
fatal("orig_net_lsa: ibuf_set_n16 failed");
 
imsg_compose_event(iev_rde, IMSG_LS_UPD, iface->self->peerid, 0,
-   -1, buf->buf, ibuf_size(buf));
+   -1, ibuf_data(buf), ibuf_size(buf));
 
ibuf_free(buf);
 }
Index: packet.c
===
RCS file: /cvs/src/usr.sbin/ospfd/packet.c,v
retrieving revision 1.36
diff -u -p -r1.36 packet.c
--- packet.c3 Nov 2021 21:40:03 -   1.36
+++ packet.c7 Nov 2021 11:14:35 -
@@ -85,7 +85,7 @@ send_packet(struct iface *iface, struct 
bzero(, sizeof(msg));
iov[0].iov_base = _hdr;
iov[0].iov_len = sizeof(ip_hdr);
-   iov[1].iov_base = buf->buf;
+   iov[1].iov_base = ibuf_data(buf);
iov[1].iov_len = ibuf_size(buf);
msg.msg_name = dst;
msg.msg_namelen = sizeof(*dst);

more relayd ibuf cleanup

2023-07-03 Thread Claudio Jeker

This is just some additional ibuf cleanup in relayd.
Mostly use ibuf_data() instead of direct access to ibuf->buf.

-- 
:wq Claudio

Index: check_tcp.c
===
RCS file: /cvs/src/usr.sbin/relayd/check_tcp.c,v
retrieving revision 1.60
diff -u -p -r1.60 check_tcp.c
--- check_tcp.c 21 Jun 2023 07:54:54 -  1.60
+++ check_tcp.c 30 Jun 2023 08:52:36 -
@@ -214,7 +214,7 @@ tcp_send_req(int s, short event, void *a
req = ibuf_data(cte->table->sendbinbuf);
log_debug("%s: table %s sending binary", __func__,
cte->table->conf.name);
-   print_hex(cte->table->sendbinbuf->buf, 0, len);
+   print_hex(req, 0, len);
} else {
len = strlen(cte->table->sendbuf);
req = cte->table->sendbuf;
@@ -323,7 +323,7 @@ check_send_expect(struct ctl_tcp_event *
/*
 * ensure string is nul-terminated.
 */
-   b = strndup(cte->buf->buf, ibuf_size(cte->buf));
+   b = strndup(ibuf_data(cte->buf), ibuf_size(cte->buf));
if (b == NULL)
fatal("out of memory");
if (fnmatch(cte->table->conf.exbuf, b, 0) == 0) {
Index: check_tls.c
===
RCS file: /cvs/src/usr.sbin/relayd/check_tls.c,v
retrieving revision 1.2
diff -u -p -r1.2 check_tls.c
--- check_tls.c 15 Sep 2019 19:23:29 -  1.2
+++ check_tls.c 3 Sep 2021 15:58:41 -
@@ -104,10 +104,10 @@ check_tls_write(int s, short event, void
 
if (cte->table->sendbinbuf != NULL) {
len = ibuf_size(cte->table->sendbinbuf);
-   buf = cte->table->sendbinbuf->buf;
+   buf = ibuf_data(cte->table->sendbinbuf);
log_debug("%s: table %s sending binary", __func__,
cte->table->conf.name);
-   print_hex(cte->table->sendbinbuf->buf, 0, len);
+   print_hex(buf, 0, len);
} else {
len = strlen(cte->table->sendbuf);
buf = cte->table->sendbuf;
Index: parse.y
===
RCS file: /cvs/src/usr.sbin/relayd/parse.y,v
retrieving revision 1.253
diff -u -p -r1.253 parse.y
--- parse.y 15 Oct 2021 15:01:28 -  1.253
+++ parse.y 23 Oct 2021 15:16:33 -
@@ -996,7 +996,7 @@ tablecheck  : ICMP  { table->conf.check 
free($5);
YYERROR;
}
-   memcpy(table->conf.exbinbuf, ibuf->buf,
+   memcpy(table->conf.exbinbuf, ibuf_data(ibuf),
ibuf_size(ibuf));
ibuf_free(ibuf);
free($5);

Re: lo(4) loopback LRO and TSO

2023-07-02 Thread Claudio Jeker

On Sun, Jul 02, 2023 at 02:28:17PM +0200, Alexander Bluhm wrote:
> anyone?

Was not able to test yet but I like the diff.
Right now this is a noop since LRO is not on by default for lo(4).
Because of that OK claudio@
 
> On Fri, Jun 23, 2023 at 06:06:16PM +0200, Alexander Bluhm wrote:
> > Hi,
> > 
> > Claudio@ mentioned the idea to use TSO and LRO on the loopback
> > interface to transfer TCP faster.
> > 
> > I see a performance effect with this diff, but more importantly it
> > gives us more test coverage.  Currently LRO on lo(4) is default
> > off.
> > 
> > Future plan is:
> > - Fix some corner cases for LRO/TSO with TCP path-MTU discovery
> >   and IP forwarding when LRO is enabled.
> > - Enable LRO/TSO for lo(4) and ix(4) per default.
> > - Jan@ commits his ixl(4) TSO diff.
> > 
> > ok for lo(4) LRO/TSO with default off?
> > 
> > bluhm
> > 
> > Index: sys/net/if.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
> > retrieving revision 1.700
> > diff -u -p -r1.700 if.c
> > --- sys/net/if.c12 Jun 2023 21:19:54 -  1.700
> > +++ sys/net/if.c23 Jun 2023 15:48:27 -
> > @@ -106,6 +106,9 @@
> >  #ifdef MROUTING
> >  #include 
> >  #endif
> > +#include 
> > +#include 
> > +#include 
> >  
> >  #ifdef INET6
> >  #include 
> > @@ -802,12 +805,29 @@ if_input_local(struct ifnet *ifp, struct
> >  * is now incorrect, will be calculated before sending.
> >  */
> > keepcksum = m->m_pkthdr.csum_flags & (M_IPV4_CSUM_OUT |
> > -   M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT);
> > +   M_TCP_CSUM_OUT | M_UDP_CSUM_OUT | M_ICMP_CSUM_OUT |
> > +   M_TCP_TSO);
> > m_resethdr(m);
> > m->m_flags |= M_LOOP | keepflags;
> > m->m_pkthdr.csum_flags = keepcksum;
> > m->m_pkthdr.ph_ifidx = ifp->if_index;
> > m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
> > +
> > +   if (ISSET(keepcksum, M_TCP_TSO) && m->m_pkthdr.len > ifp->if_mtu) {
> > +   if (ifp->if_mtu > 0 &&
> > +   ((af == AF_INET &&
> > +   ISSET(ifp->if_capabilities, IFCAP_TSOv4)) ||
> > +   (af == AF_INET6 &&
> > +   ISSET(ifp->if_capabilities, IFCAP_TSOv6 {
> > +   tcpstat_inc(tcps_inswlro);
> > +   tcpstat_add(tcps_inpktlro,
> > +   (m->m_pkthdr.len + ifp->if_mtu - 1) / ifp->if_mtu);
> > +   } else {
> > +   tcpstat_inc(tcps_inbadlro);
> > +   m_freem(m);
> > +   return (EPROTONOSUPPORT);
> > +   }
> > +   }
> >  
> > if (ISSET(keepcksum, M_TCP_CSUM_OUT))
> > m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
> > Index: sys/net/if_loop.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_loop.c,v
> > retrieving revision 1.94
> > diff -u -p -r1.94 if_loop.c
> > --- sys/net/if_loop.c   5 Jun 2023 11:35:46 -   1.94
> > +++ sys/net/if_loop.c   23 Jun 2023 15:48:27 -
> > @@ -175,7 +175,8 @@ loop_clone_create(struct if_clone *ifc, 
> > ifp->if_xflags = IFXF_CLONED;
> > ifp->if_capabilities = IFCAP_CSUM_IPv4 |
> > IFCAP_CSUM_TCPv4 | IFCAP_CSUM_UDPv4 |
> > -   IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6;
> > +   IFCAP_CSUM_TCPv6 | IFCAP_CSUM_UDPv6 |
> > +   IFCAP_LRO;
> > ifp->if_rtrequest = lortrequest;
> > ifp->if_ioctl = loioctl;
> > ifp->if_input = loinput;
> > @@ -281,6 +282,10 @@ loioctl(struct ifnet *ifp, u_long cmd, c
> >  
> > switch (cmd) {
> > case SIOCSIFFLAGS:
> > +   if (ISSET(ifp->if_xflags, IFXF_LRO))
> > +   SET(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6);
> > +   else
> > +   CLR(ifp->if_capabilities, IFCAP_TSOv4 | IFCAP_TSOv6);
> > break;
> >  
> > case SIOCSIFADDR:
> > Index: sys/netinet/tcp_usrreq.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_usrreq.c,v
> > retrieving revision 1.219
> > diff -u -p -r1.219 tcp_usrreq.c
> > --- sys/netinet/tcp_usrreq.c23 May 2023 09:16:16 -  1.219
> > +++ sys/netinet/tcp_usrreq.c23 Jun 2023 15:48:27 -
> > @@ -1340,6 +1340,7 @@ tcp_sysctl_tcpstat(void *oldp, size_t *o
> > ASSIGN(tcps_outhwtso);
> > ASSIGN(tcps_outpkttso);
> > ASSIGN(tcps_outbadtso);
> > +   ASSIGN(tcps_inswlro);
> > ASSIGN(tcps_inhwlro);
> > ASSIGN(tcps_inpktlro);
> > ASSIGN(tcps_inbadlro);
> > Index: sys/netinet/tcp_var.h
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v
> > retrieving revision 1.167
> > diff -u -p -r1.167 tcp_var.h
> > --- sys/netinet/tcp_var.h   23 May 2023 09:16:16 -  1.167
> > +++ sys/netinet/tcp_var.h   23 Jun 2023 15:48:27 -
> > @@ -447,6 +447,7 @@ struct  tcpstat {
> >

Re: bgpd: rewrite code to send UPDATE messages

2023-06-29 Thread Claudio Jeker

On Thu, Jun 29, 2023 at 06:25:01PM +0200, Theo Buehler wrote:
> On Tue, Jun 27, 2023 at 12:10:21PM +0200, Claudio Jeker wrote:
> > Sorry this diff is a monster but it kind of turned into a all or nothing
> > situation.
> 
> Frankly, it's not that bad.
> 
> > For a long time bgpd used a static 4k buffer plus a len argument to build
> > updates. The result was ok but was also very fragile.
> > With the new ibuf API all of this can be rewritten and the result is IMO
> > already a lot cleaner (mainly because a lot of code duplication could be
> > removed).
> > 
> > Some bits I'm not super happy with are community_writebuf() and
> > pt_writebuf(). Both functions are a bit too complex to my taste but for
> > different reasons. pt_writebuf() needs to be transactional (either all or
> > nothing) since we call pt_writebuf() until there is no more space.
> > community_writebuf() does maybe too much at once.
> 
> It is a big step in the right direction. It's great. There's always time for
> more passes and cleanup. I agree that both these functions are a bit on the
> complex side, but compared to the code they replace...
> 
> > As mentioned this is a major rewrite, I did run this through regress and
> > also on a few personal systems but I'm unable to test all possible cases.
> > Please try this out and report back.
> 
> I need to make more passes over this, but here's a first round of feedback.
> Some small suggestions and an (already existing) leak in up_generate_attr()
> inline.
> 
> (There's the usual bit of "return (foo)" vs "return foo" being mixed in
> the same function. I refrained from making comments.)

I fixed some return (-1) that should be return -1; I prefer return
statements without () for simple things like -1 or 0.
 
> > -- 
> > :wq Claudio
> > 
> > Index: mrt.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
> > retrieving revision 1.114
> > diff -u -p -r1.114 mrt.c
> > --- mrt.c   19 Apr 2023 09:03:00 -  1.114
> > +++ mrt.c   27 Jun 2023 08:33:54 -

> > @@ -678,7 +680,7 @@ mrt_dump_entry_v2_rib(struct rib_entry *
> > }
> > len = ibuf_size(tbuf);
> > DUMP_SHORT(buf, (uint16_t)len);
> 
> I assume the DUMP_* macros will be dealt with in a subsequent step?

I kind of ignored mrt.c apart from fixing just the most obvious.
I agree that DUMP_* macros will all be changed.
 
> > -   if (ibuf_add(buf, tbuf->buf, len) == -1) {
> > +   if (ibuf_add_buf(buf, tbuf) == -1) {
> > log_warn("%s: ibuf_add error", __func__);
> 
> ibuf_add_buf error

Fixed, and all others as well in mrt.c.

> > Index: rde_community.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
> > retrieving revision 1.12
> > diff -u -p -r1.12 rde_community.c
> > --- rde_community.c 17 Jun 2023 08:05:48 -  1.12
> > +++ rde_community.c 27 Jun 2023 09:37:35 -
> > @@ -225,10 +225,9 @@ insert_community(struct rde_community *c
> > struct community *new;
> > int newsize = comm->size + 8;
> >  
> > -   if ((new = reallocarray(comm->communities, newsize,
> > -   sizeof(struct community))) == NULL)
> > +   if ((new = recallocarray(comm->communities, comm->size,
> > +   newsize, sizeof(struct community))) == NULL)
> > fatal(__func__);
> > -   memset(new + comm->size, 0, 8 * sizeof(struct community));
> 
> Since you fatal on failure, new doesn't really add anything

I prefer to always use the extra variable.
 
> > comm->communities = new;
> > comm->size = newsize;
> > }
> > +   case ATTR_EXT_COMMUNITIES:
> > +   if (comm->flags & PARTIAL_EXT_COMMUNITIES)
> > +   flags |= ATTR_PARTIAL;
> > +   size = 8;
> > +   t = COMMUNITY_TYPE_EXT;
> > +   break;
> > +   case ATTR_LARGE_COMMUNITIES:
> > +   if (comm->flags & PARTIAL_LARGE_COMMUNITIES)
> > +   flags |= ATTR_PARTIAL;
> > +   size = 12;
> > +   t = COMMUNITY_TYPE_LARGE;
> > +   break;
> > +   default:
> > return -1;
> 
> 
> I wonder if it is worth pulling the flags handling out of the switch by adding
> 
>   int is_partial = PARTIAL_COMMUNITIES | PARTIAL_EXT_COMMUNITIES |
>

Re: rpki-client: fix aspa provider stats

2023-06-29 Thread Claudio Jeker

On Thu, Jun 29, 2023 at 04:16:32PM +0200, Theo Buehler wrote:
> A thinko slipped through review in aspa.c which broke the ASPA counters.
> Use only one counter and call it STYPE_PROVIDERS.
> 
> Is it ok to switch from "both" to "valid" in output-ometric.c for
> consistency with the other products or do we need to maintain compat for
> some parsers?

No, I think renaming is fine. It is just the value of the label that is
changing. Now I would not use "valid" but instead "total" since we use
"total" for roa and aspa as well.

Apart from that the diff is OK claudio
 
> output before and after my diff:
> 
> -rpki_client_objects{type="vap providers",state="both"} 0
> -rpki_client_objects{type="vap providers",state="IPv4 only"} 0
> -rpki_client_objects{type="vap providers",state="IPv6 only"} 0
> +rpki_client_objects{type="vap providers",state="valid"} 4
> 
> Index: aspa.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/aspa.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 aspa.c
> --- aspa.c28 Jun 2023 17:24:20 -  1.20
> +++ aspa.c29 Jun 2023 14:11:46 -
> @@ -369,7 +369,7 @@ aspa_insert_vaps(struct vap_tree *tree, 
>   aspa->providers[i] < v->providers[j]) {
>   /* merge provider from aspa into v */
>   repo_stat_inc(rp, v->talid, RTYPE_ASPA,
> - STYPE_BOTH + aspa->providers[i]);
> + STYPE_PROVIDERS);
>   insert_vap(v, j, >providers[i]);
>   i++;
>   } else if (aspa->providers[i] == v->providers[j])
> Index: extern.h
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/extern.h,v
> retrieving revision 1.187
> diff -u -p -r1.187 extern.h
> --- extern.h  29 Jun 2023 10:28:25 -  1.187
> +++ extern.h  29 Jun 2023 14:11:47 -
> @@ -530,9 +530,7 @@ enum stype {
>   STYPE_TOTAL,
>   STYPE_UNIQUE,
>   STYPE_DEC_UNIQUE,
> - STYPE_BOTH,
> - STYPE_ONLY_IPV4,
> - STYPE_ONLY_IPV6,
> + STYPE_PROVIDERS,
>  };
>  
>  struct repo;
> @@ -562,8 +560,6 @@ struct repotalstats {
>   uint32_t vaps; /* total number of Validated ASPA Payloads */
>   uint32_t vaps_uniqs; /* total number of unique VAPs */
>   uint32_t vaps_pas; /* total number of providers */
> - uint32_t vaps_pas4; /* total number of IPv4 only providers */
> - uint32_t vaps_pas6; /* total number of IPv6 only providers */
>   uint32_t vrps; /* total number of Validated ROA Payloads */
>   uint32_t vrps_uniqs; /* number of unique vrps */
>  };
> Index: main.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
> retrieving revision 1.243
> diff -u -p -r1.243 main.c
> --- main.c23 Jun 2023 11:36:24 -  1.243
> +++ main.c29 Jun 2023 14:11:47 -
> @@ -750,8 +750,6 @@ sum_stats(const struct repo *rp, const s
>   out->vaps += in->vaps;
>   out->vaps_uniqs += in->vaps_uniqs;
>   out->vaps_pas += in->vaps_pas;
> - out->vaps_pas4 += in->vaps_pas4;
> - out->vaps_pas6 += in->vaps_pas6;
>  }
>  
>  static void
> Index: output-ometric.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/output-ometric.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 output-ometric.c
> --- output-ometric.c  5 Jun 2023 14:19:13 -   1.4
> +++ output-ometric.c  29 Jun 2023 14:11:47 -
> @@ -83,11 +83,7 @@ set_common_stats(const struct repotalsta
>   ometric_set_int_with_labels(metric, in->vaps_uniqs,
>   OKV("type", "state"), OKV("vap", "unique"), ol);
>   ometric_set_int_with_labels(metric, in->vaps_pas,
> - OKV("type", "state"), OKV("vap providers", "both"), ol);
> - ometric_set_int_with_labels(metric, in->vaps_pas4,
> - OKV("type", "state"), OKV("vap providers", "IPv4 only"), ol);
> - ometric_set_int_with_labels(metric, in->vaps_pas6,
> - OKV("type", "state"), OKV("vap providers", "IPv6 only"), ol);
> + OKV("type", "state"), OKV("vap providers", "valid"), ol);
>  }
>  
>  static void
> Index: repo.c
> ===
> RCS file: /cvs/src/usr.sbin/rpki-client/repo.c,v
> retrieving revision 1.49
> diff -u -p -r1.49 repo.c
> --- repo.c29 Jun 2023 14:09:42 -  1.49
> +++ repo.c29 Jun 2023 14:11:47 -
> @@ -1473,14 +1473,8 @@ repo_stat_inc(struct repo *rp, int talid
>   case STYPE_DEC_UNIQUE:
>   rp->stats[talid].vaps_uniqs--;
>   break;
> - case STYPE_BOTH:
> + case STYPE_PROVIDERS:
>   rp->stats[talid].vaps_pas++;
> - break;
> - case STYPE_ONLY_IPV4:
> -

relayd: fix route handling for IPv6

2023-06-29 Thread Claudio Jeker

Once again struct sockaddr_in6 causes 64bit systems to cry. This time in
relayd. You can not statically setup a route message and think it will
work. All our routing daemons switched to iov for building the route
message out of various components. This diff does the same for relayd.
With this it is possible to use router blocks with IPv6 addrs.

Btw. this does not work with link local addressing but I do not care
about that dumpster fire.
-- 
:wq Claudio

Index: pfe_route.c
===
RCS file: /cvs/src/usr.sbin/relayd/pfe_route.c,v
retrieving revision 1.12
diff -u -p -r1.12 pfe_route.c
--- pfe_route.c 28 May 2017 10:39:15 -  1.12
+++ pfe_route.c 29 Jun 2023 12:55:59 -
@@ -19,12 +19,14 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 #include 
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -32,24 +34,6 @@
 
 #include "relayd.h"
 
-struct relay_rtmsg {
-   struct rt_msghdrrm_hdr;
-   union {
-   struct {
-   struct sockaddr_in  rm_dst;
-   struct sockaddr_in  rm_gateway;
-   struct sockaddr_in  rm_netmask;
-   struct sockaddr_rtlabel rm_label;
-   }u4;
-   struct {
-   struct sockaddr_in6 rm_dst;
-   struct sockaddr_in6 rm_gateway;
-   struct sockaddr_in6 rm_netmask;
-   struct sockaddr_rtlabel rm_label;
-   }u6;
-   }rm_u;
-};
-
 void
 init_routes(struct relayd *env)
 {
@@ -103,110 +87,97 @@ sync_routes(struct relayd *env, struct r
}
 }
 
+static void
+pfe_apply_prefixlen(struct sockaddr_storage *ss, int af, int len)
+{
+   int q, r, off;
+   uint8_t *b = (uint8_t *)ss;
+
+q = len >> 3;
+   r = len & 7;
+
+   bzero(ss, sizeof(*ss));
+   ss->ss_family = af;
+   switch (af) {
+   case AF_INET:
+   ss->ss_len = sizeof(struct sockaddr_in);
+   off = offsetof(struct sockaddr_in, sin_addr);
+   break;
+   case AF_INET6:
+   ss->ss_len = sizeof(struct sockaddr_in6);
+   off = offsetof(struct sockaddr_in6, sin6_addr);
+   break;
+   default:
+   fatal("%s: invalid address family", __func__);
+   }
+   if (q > 0)
+   memset(b + off, 0xff, q);
+   if (r > 0)
+   b[off + q] = (0xff00 >> r) & 0xff;
+}
+
+#define ROUNDUP(a) \
+   ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+
 int
 pfe_route(struct relayd *env, struct ctl_netroute *crt)
 {
-   struct relay_rtmsg   rm;
-   struct sockaddr_rtlabel  sr;
-   struct sockaddr_storage *gw;
-   struct sockaddr_in  *s4;
-   struct sockaddr_in6 *s6;
-   size_t   len = 0;
+   struct iovec iov[5];
+   struct rt_msghdr hdr;
+   struct sockaddr_storage  dst, gw, mask, label;
+   struct sockaddr_rtlabel *sr = (struct sockaddr_rtlabel *)
+   int  iovcnt = 0;
char*gwname;
-   int  i = 0;
 
-   gw = >host.ss;
-   gwname = crt->host.name;
+   bzero(, sizeof(hdr));
+   hdr.rtm_msglen = sizeof(hdr);
+   hdr.rtm_version = RTM_VERSION;
+   hdr.rtm_type = HOST_ISUP(crt->up) ? RTM_ADD : RTM_DELETE;
+   hdr.rtm_flags = RTF_STATIC | RTF_GATEWAY | RTF_MPATH;
+   hdr.rtm_seq = env->sc_rtseq++;
+   hdr.rtm_addrs = RTA_DST | RTA_GATEWAY | RTA_NETMASK;
+   hdr.rtm_tableid = crt->rt.rtable;
+   hdr.rtm_priority = crt->host.priority;
 
-   bzero(, sizeof(rm));
-   bzero(, sizeof(sr));
+   iov[iovcnt].iov_base = 
+   iov[iovcnt++].iov_len = sizeof(hdr);
+
+   dst = crt->nr.ss;
+   gw = crt->host.ss;
+   gwname = crt->host.name;
+   pfe_apply_prefixlen(, dst.ss_family, crt->nr.prefixlen);
 
-   rm.rm_hdr.rtm_msglen = len;
-   rm.rm_hdr.rtm_version = RTM_VERSION;
-   rm.rm_hdr.rtm_type = HOST_ISUP(crt->up) ? RTM_ADD : RTM_DELETE;
-   rm.rm_hdr.rtm_flags = RTF_STATIC | RTF_GATEWAY | RTF_MPATH;
-   rm.rm_hdr.rtm_seq = env->sc_rtseq++;
-   rm.rm_hdr.rtm_addrs = RTA_DST | RTA_GATEWAY;
-   rm.rm_hdr.rtm_tableid = crt->rt.rtable;
-   rm.rm_hdr.rtm_priority = crt->host.priority;
+   iov[iovcnt].iov_base = 
+   iov[iovcnt++].iov_len = ROUNDUP(dst.ss_len);
+   hdr.rtm_msglen += ROUNDUP(dst.ss_len);
+
+   iov[iovcnt].iov_base = 
+   iov[iovcnt++].iov_len = ROUNDUP(gw.ss_len);
+   hdr.rtm_msglen += ROUNDUP(gw.ss_len);
+
+   iov[iovcnt].iov_base = 
+   iov[iovcnt++].iov_len = ROUNDUP(mask.ss_len);
+

Re: rpki-client: retire log.c

2023-06-29 Thread Claudio Jeker

On Thu, Jun 29, 2023 at 09:12:06AM +, Job Snijders wrote:
> On Thu, Jun 29, 2023 at 09:30:19AM +0200, Theo Buehler wrote:
> > I wrote versions of this diff several times in the past but never sent
> > it out. A question by claudio encouraged me...
> > 
> > cryptowarnx() and cryptoerrx() fail at showing openssl error stacks
> > in a pleasant way as no amount of lipstick can make this pig pretty.
> > I don't think these stacks should be shown to the user and they are
> > not a real debugging aid either as I don't recall that this ever made
> > things any easier for me.
> > 
> > This mechanically replaces cryptowarnx() with warnx() and cryptoerrx()
> > with either errx(1, ...) or err(1, ...) depending on whether the only
> > possible error is allocation failure as that might give a useful hint.
> > 
> > Regress will need a trivial diff and I'll send a PR to portable once
> > this is in.
> 
> OK job@
 
and OK claudio@
-- 
:wq Claudio

Re: btrace(8) allow to store kstack in a map

2023-06-27 Thread Claudio Jeker

On Mon, Jun 26, 2023 at 08:58:47PM +, Klemens Nanni wrote:
> On Mon, Jun 26, 2023 at 10:52:20PM +0200, Claudio Jeker wrote:
> > count() is strange since it only works on maps (at least from what I
> > figured out). I need to double check how min() and max() work. Since the
> > usage also seems non-intuitive.
> > 
> > I find the documentation of bt(5) rather weak. So more is needed for sure.
> 
> Agreed, I'm fine with your diff as-is and improving docs later.
 
Was this an OK for all of the diff?

Btw. I think this is a better version of the bt.5 change. I moved the map
specific functions into their own section.

-- 
:wq Claudio

Index: bt.5
===
RCS file: /cvs/src/usr.sbin/btrace/bt.5,v
retrieving revision 1.14
diff -u -p -r1.14 bt.5
--- bt.531 Mar 2022 17:27:29 -  1.14
+++ bt.527 Jun 2023 13:25:54 -
@@ -111,8 +111,8 @@ Full name of the probe.
 Return value of the traced syscall.
 .It Va tid
 Thread ID of the current thread.
-.\".It Va ustack
-.\"Userland stack of the current thread.
+.It Va ustack
+Userland stack of the current thread.
 .El
 .Pp
 Functions:
@@ -141,10 +141,6 @@ and
 with buckets of
 .Va step
 size.
-.It Fn max
-Returns the maximum recorded value.
-.It Fn min
-Returns the minimum recorded value.
 .It Fn print "@map"
 Print all pairs from
 .Va @map .
@@ -162,8 +158,6 @@ Return the string from argument
 truncated to
 .Va index
 characters (up to 64, the default) including a guaranteed NUL-terminator.
-.It Fn sum
-Returns the sum of all recorded values.
 .It Fn time timefmt
 Print timestamps using
 .Xr strftime 3 .
@@ -171,6 +165,25 @@ Print timestamps using
 Set all values from
 .Va @map
 to 0.
+.El
+.Pp
+Operation on maps:
+.Pp
+The following functions only work on a sepcific map entry.
+.Bl -tag -width "count"
+.It "@map[key]" = Fn count
+Increase the stored value for
+.Va key
+by one.
+.It "@map[key]" = Fn max "value"
+Store the maximum recorded value for
+.Va key .
+.It "@map[key]" = Fn min "value"
+Store the minimum recorded value for
+.Va key .
+.It "@map[key]" = Fn sum "value"
+Store the sum of all recorded values for
+.Va key .
 .El
 .Sh SEE ALSO
 .Xr awk 1 ,

Re: Introduce M_IFGROUP type of memory allocation

2023-06-27 Thread Claudio Jeker

On Tue, Jun 27, 2023 at 11:09:32AM +, Klemens Nanni wrote:
> On Tue, Jun 27, 2023 at 01:32:37PM +0300, Vitaliy Makkoveev wrote:
> > M_TEMP seems unreasonable for interface groups data allocations.
> 
> After claudio pointed out the wrong type, I thought of the same name,
> no other malloc(9) type fits.
> 
> FWIW OK kn, but please wait for other to chime in.

Also OK claudio@ the abuse of M_TEMP must stop.
 
> > 
> > Don't forget to recompile systat(1) and vmstat(8) with new sys/malloc.h.
> > 
> > Index: sys/net/if.c
> > ===
> > RCS file: /cvs/src/sys/net/if.c,v
> > retrieving revision 1.700
> > diff -u -p -r1.700 if.c
> > --- sys/net/if.c12 Jun 2023 21:19:54 -  1.700
> > +++ sys/net/if.c27 Jun 2023 10:15:12 -
> > @@ -2784,7 +2784,7 @@ if_creategroup(const char *groupname)
> >  {
> > struct ifg_group*ifg;
> >  
> > -   if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
> > +   if ((ifg = malloc(sizeof(*ifg), M_IFGROUP, M_NOWAIT)) == NULL)
> > return (NULL);
> >  
> > strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
> > @@ -2819,11 +2819,11 @@ if_addgroup(struct ifnet *ifp, const cha
> > if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
> > return (EEXIST);
> >  
> > -   if ((ifgl = malloc(sizeof(*ifgl), M_TEMP, M_NOWAIT)) == NULL)
> > +   if ((ifgl = malloc(sizeof(*ifgl), M_IFGROUP, M_NOWAIT)) == NULL)
> > return (ENOMEM);
> >  
> > -   if ((ifgm = malloc(sizeof(*ifgm), M_TEMP, M_NOWAIT)) == NULL) {
> > -   free(ifgl, M_TEMP, sizeof(*ifgl));
> > +   if ((ifgm = malloc(sizeof(*ifgm), M_IFGROUP, M_NOWAIT)) == NULL) {
> > +   free(ifgl, M_IFGROUP, sizeof(*ifgl));
> > return (ENOMEM);
> > }
> >  
> > @@ -2834,8 +2834,8 @@ if_addgroup(struct ifnet *ifp, const cha
> > if (ifg == NULL) {
> > ifg = if_creategroup(groupname);
> > if (ifg == NULL) {
> > -   free(ifgl, M_TEMP, sizeof(*ifgl));
> > -   free(ifgm, M_TEMP, sizeof(*ifgm));
> > +   free(ifgl, M_IFGROUP, sizeof(*ifgl));
> > +   free(ifgm, M_IFGROUP, sizeof(*ifgm));
> > return (ENOMEM);
> > }
> > } else
> > @@ -2878,7 +2878,7 @@ if_delgroup(struct ifnet *ifp, const cha
> >  
> > if (ifgm != NULL) {
> > TAILQ_REMOVE(>ifgl_group->ifg_members, ifgm, ifgm_next);
> > -   free(ifgm, M_TEMP, sizeof(*ifgm));
> > +   free(ifgm, M_IFGROUP, sizeof(*ifgm));
> > }
> >  
> >  #if NPF > 0
> > @@ -2891,10 +2891,10 @@ if_delgroup(struct ifnet *ifp, const cha
> >  #if NPF > 0
> > pfi_detach_ifgroup(ifgl->ifgl_group);
> >  #endif
> > -   free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
> > +   free(ifgl->ifgl_group, M_IFGROUP, sizeof(*ifgl->ifgl_group));
> > }
> >  
> > -   free(ifgl, M_TEMP, sizeof(*ifgl));
> > +   free(ifgl, M_IFGROUP, sizeof(*ifgl));
> >  
> > return (0);
> >  }
> > Index: sys/sys/malloc.h
> > ===
> > RCS file: /cvs/src/sys/sys/malloc.h,v
> > retrieving revision 1.122
> > diff -u -p -r1.122 malloc.h
> > --- sys/sys/malloc.h3 Feb 2022 17:18:22 -   1.122
> > +++ sys/sys/malloc.h27 Jun 2023 10:15:13 -
> > @@ -72,7 +72,7 @@
> >  /* 7 - free */
> >  /* 8 - free */
> >  #defineM_IFADDR9   /* interface address */
> > -/* 10 - free */
> > +#define M_IFGROUP  10  /* interface group */
> >  #defineM_SYSCTL11  /* sysctl buffers (persistent storage) 
> > */
> >  #defineM_COUNTERS  12  /* per CPU counters */
> >  /* 13 - free */
> > @@ -190,7 +190,7 @@
> > NULL, \
> > NULL, \
> > "ifaddr",   /* 9 M_IFADDR */ \
> > -   NULL, \
> > +   "ifgroup",  /* 10 M_IFGROUP */ \
> > "sysctl",   /* 11 M_SYSCTL */ \
> > "counters", /* 12 M_COUNTERS */ \
> > NULL, \
> > 
> 

-- 
:wq Claudio

bgpd: rewrite code to send UPDATE messages

2023-06-27 Thread Claudio Jeker

Sorry this diff is a monster but it kind of turned into a all or nothing
situation.

For a long time bgpd used a static 4k buffer plus a len argument to build
updates. The result was ok but was also very fragile.
With the new ibuf API all of this can be rewritten and the result is IMO
already a lot cleaner (mainly because a lot of code duplication could be
removed).

Some bits I'm not super happy with are community_writebuf() and
pt_writebuf(). Both functions are a bit too complex to my taste but for
different reasons. pt_writebuf() needs to be transactional (either all or
nothing) since we call pt_writebuf() until there is no more space.
community_writebuf() does maybe too much at once.

As mentioned this is a major rewrite, I did run this through regress and
also on a few personal systems but I'm unable to test all possible cases.
Please try this out and report back.
-- 
:wq Claudio

Index: mrt.c
===
RCS file: /cvs/src/usr.sbin/bgpd/mrt.c,v
retrieving revision 1.114
diff -u -p -r1.114 mrt.c
--- mrt.c   19 Apr 2023 09:03:00 -  1.114
+++ mrt.c   27 Jun 2023 08:33:54 -
@@ -309,7 +309,9 @@ mrt_attr_dump(struct ibuf *buf, struct r
return (-1);
 
/* communities */
-   if (community_writebuf(buf, c) == -1)
+   if (community_writebuf(c, ATTR_COMMUNITIES, 0, buf) == -1 ||
+   community_writebuf(c, ATTR_EXT_COMMUNITIES, 0, buf) == -1 ||
+   community_writebuf(c, ATTR_LARGE_COMMUNITIES, 0, buf) == -1)
return (-1);
 
/* dump all other path attributes without modification */
@@ -502,7 +504,7 @@ mrt_dump_entry_mp(struct mrt *mrt, struc
goto fail;
}
 
-   if (pt_writebuf(h2buf, p->pt) == -1) {
+   if (pt_writebuf(h2buf, p->pt, 0, 0, 0) == -1) {
log_warnx("%s: pt_writebuf error", __func__);
goto fail;
}
@@ -678,7 +680,7 @@ mrt_dump_entry_v2_rib(struct rib_entry *
}
len = ibuf_size(tbuf);
DUMP_SHORT(buf, (uint16_t)len);
-   if (ibuf_add(buf, tbuf->buf, len) == -1) {
+   if (ibuf_add_buf(buf, tbuf) == -1) {
log_warn("%s: ibuf_add error", __func__);
ibuf_free(tbuf);
goto fail;
@@ -731,7 +733,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
break;
}
 
-   if (pt_writebuf(pbuf, re->prefix) == -1) {
+   if (pt_writebuf(pbuf, re->prefix, 0, 0, 0) == -1) {
log_warnx("%s: pt_writebuf error", __func__);
goto fail;
}
@@ -748,7 +750,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
goto fail;
 
DUMP_LONG(hbuf, snum);
-   if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) {
+   if (ibuf_add_buf(hbuf, pbuf) == -1) {
log_warn("%s: ibuf_add error", __func__);
goto fail;
}
@@ -767,7 +769,7 @@ mrt_dump_entry_v2(struct mrt *mrt, struc
goto fail;
 
DUMP_LONG(hbuf, snum);
-   if (ibuf_add(hbuf, pbuf->buf, ibuf_size(pbuf)) == -1) {
+   if (ibuf_add_buf(hbuf, pbuf) == -1) {
log_warn("%s: ibuf_add error", __func__);
goto fail;
}
@@ -833,8 +835,8 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct 
}
 
off = ibuf_size(buf);
-   if (ibuf_reserve(buf, sizeof(nump)) == NULL) {
-   log_warn("%s: ibuf_reserve error", __func__);
+   if (ibuf_add_zero(buf, sizeof(nump)) == -1) {
+   log_warn("%s: ibuf_add_zero error", __func__);
goto fail;
}
arg.nump = 0;
@@ -843,8 +845,10 @@ mrt_dump_v2_hdr(struct mrt *mrt, struct 
if (arg.nump == -1)
goto fail;
 
-   nump = htons(arg.nump);
-   memcpy(ibuf_seek(buf, off, sizeof(nump)), , sizeof(nump));
+   if (ibuf_set_n16(buf, off, arg.nump) == -1) {
+   log_warn("%s: ibuf_set_n16 error", __func__);
+   goto fail;
+   }
 
len = ibuf_size(buf);
if (mrt_dump_hdr_rde(, MSG_TABLE_DUMP_V2,
@@ -1099,14 +1103,8 @@ mrt_write(struct mrt *mrt)
 void
 mrt_clean(struct mrt *mrt)
 {
-   struct ibuf *b;
-
close(mrt->wbuf.fd);
-   while ((b = TAILQ_FIRST(>wbuf.bufs))) {
-   TAILQ_REMOVE(>wbuf.bufs, b, entry);
-   ibuf_free(b);
-   }
-   mrt->wbuf.queued = 0;
+   msgbuf_clear(>wbuf);
 }
 
 static struct imsgbuf  *mrt_imsgbuf[2];
Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.606
diff -u -p -r1.606 rde.c
--- rde.c   12 Jun 2023 12:48:07 -  1.606
+++ rde.c   27 Jun 2023 08:33:54 -
@@ -86,8 +86,7 @@ static void

Re: btrace(8) allow to store kstack in a map

2023-06-26 Thread Claudio Jeker

On Mon, Jun 26, 2023 at 04:53:45PM +, Klemens Nanni wrote:
> On Mon, Jun 26, 2023 at 05:24:53PM +0200, Claudio Jeker wrote:
> > I created this simple btrace script to help find malloc(9) leaks but
> > it did not work. First step was adding kstack support to the map
> > implementation. But then it still did not work because btrace did not
> > enable the kstack reporting for the probe. This turned out to be an
> > issue with the if () condition I added to filter out uninteresting events.
> > The problem is that the statements in the if block are not scanned for
> > possible extra probe arguments. The below diff fixes all of that.
> > 
> > Here is the btrace script:
> > tracepoint:uvm:malloc {
> > if (arg0 == 127 && arg2 <= 64) {
> > @mem[arg1] = kstack
> > }
> > }
> > tracepoint:uvm:free {
> > if (arg0 == 127 && arg2 <= 64) {
> > delete(@mem[arg1])
> > }
> > }
> > END {
> > printf("Possible memory leaks\n");
> > print(@mem)
> > }
> > 
> > -- 
> > :wq Claudio
> > 
> > Index: bt.5
> > ===
> > RCS file: /cvs/src/usr.sbin/btrace/bt.5,v
> > retrieving revision 1.14
> > diff -u -p -r1.14 bt.5
> > --- bt.531 Mar 2022 17:27:29 -  1.14
> > +++ bt.526 Jun 2023 15:16:25 -
> > @@ -120,6 +120,11 @@ Functions:
> >  .It Fn clear "@map"
> >  Delete all (key, value) pairs from
> >  .Va @map .
> > +.It "@map[key]" = Fn count
> > +Increment the value of
> > +.Va key
> > +in
> > +.Va @map .
> 
> Would it make more sense to document count() like min() and max()?
> Then it doesn't look like a special case.
> 
> Looks like min/max functions can be used in map key assignments as well,
> although I'm not super familar with btrace internals yet.

count() is strange since it only works on maps (at least from what I
figured out). I need to double check how min() and max() work. Since the
usage also seems non-intuitive.

I find the documentation of bt(5) rather weak. So more is needed for sure.
 
> >  .It Fn delete "@map[key]"
> >  Delete the pair indexed by
> >  .Va key
> > Index: btrace.c
> > ===
> > RCS file: /cvs/src/usr.sbin/btrace/btrace.c,v
> > retrieving revision 1.70
> > diff -u -p -r1.70 btrace.c
> > --- btrace.c12 May 2023 14:14:16 -  1.70
> > +++ btrace.c26 Jun 2023 15:12:18 -
> > @@ -450,6 +450,37 @@ rules_do(int fd)
> > }
> >  }
> >  
> > +static uint64_t
> > +rules_action_scan(struct bt_stmt *bs)
> > +{
> > +   struct bt_arg *ba;
> > +   uint64_t evtflags = 0;
> > +
> > +   while (bs != NULL) {
> > +   SLIST_FOREACH(ba, >bs_args, ba_next)
> > +   evtflags |= ba2dtflags(ba);
> > +
> > +   /* Also check the value for map/hist insertion */
> > +   switch (bs->bs_act) {
> > +   case B_AC_BUCKETIZE:
> > +   case B_AC_INSERT:
> > +   ba = (struct bt_arg *)bs->bs_var;
> > +   evtflags |= ba2dtflags(ba);
> > +   break;
> > +   case B_AC_TEST:
> > +   evtflags |= rules_action_scan(
> > +   (struct bt_stmt *)bs->bs_var);
> > +   break;
> > +   default:
> > +   break;
> > +   }
> > +
> > +   bs = SLIST_NEXT(bs, bs_next);
> > +   }
> > +
> > +   return evtflags;
> > +}
> > +
> >  void
> >  rules_setup(int fd)
> >  {
> > @@ -474,21 +505,7 @@ rules_setup(int fd)
> > evtflags |= ba2dtflags(ba);
> > }
> >  
> > -   SLIST_FOREACH(bs, >br_action, bs_next) {
> > -   SLIST_FOREACH(ba, >bs_args, ba_next)
> > -   evtflags |= ba2dtflags(ba);
> > -
> > -   /* Also check the value for map/hist insertion */
> > -   switch (bs->bs_act) {
> > -   case B_AC_BUCKETIZE:
> > -   case B_AC_INSERT:
> > -   ba = (struct bt_arg *)bs->bs_var;
> > -   evtflags |= ba2dtflags(ba);
> > -   break;
> > -   default:
> > -   break;
> > -

ldpd: use new ibuf api

2023-06-26 Thread Claudio Jeker

Adjust ldpd to follow the new ibuf API.
Diff is simple but as usual I could not test it out so please test.

-- 
:wq Claudio

Index: hello.c
===
RCS file: /cvs/src/usr.sbin/ldpd/hello.c,v
retrieving revision 1.58
diff -u -p -r1.58 hello.c
--- hello.c 12 Dec 2019 00:10:29 -  1.58
+++ hello.c 12 Jun 2023 07:27:51 -
@@ -139,7 +139,7 @@ send_hello(enum hello_type type, struct 
return (-1);
}
 
-   send_packet(fd, af, , ia, buf->buf, buf->wpos);
+   send_packet(fd, af, , ia, ibuf_data(buf), ibuf_size(buf));
ibuf_free(buf);
 
return (0);
Index: labelmapping.c
===
RCS file: /cvs/src/usr.sbin/ldpd/labelmapping.c,v
retrieving revision 1.68
diff -u -p -r1.68 labelmapping.c
--- labelmapping.c  4 Mar 2017 00:15:35 -   1.68
+++ labelmapping.c  22 Jun 2023 08:49:55 -
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -39,10 +40,8 @@ static void   log_msg_mapping(int, uint16
 static void
 enqueue_pdu(struct nbr *nbr, struct ibuf *buf, uint16_t size)
 {
-   struct ldp_hdr  *ldp_hdr;
-
-   ldp_hdr = ibuf_seek(buf, 0, sizeof(struct ldp_hdr));
-   ldp_hdr->length = htons(size);
+   if (ibuf_set_n16(buf, offsetof(struct ldp_hdr, length), size) == -1)
+   fatal(__func__);
evbuf_enqueue(>tcp->wbuf, buf);
 }

btrace(8) allow to store kstack in a map

2023-06-26 Thread Claudio Jeker

I created this simple btrace script to help find malloc(9) leaks but
it did not work. First step was adding kstack support to the map
implementation. But then it still did not work because btrace did not
enable the kstack reporting for the probe. This turned out to be an
issue with the if () condition I added to filter out uninteresting events.
The problem is that the statements in the if block are not scanned for
possible extra probe arguments. The below diff fixes all of that.

Here is the btrace script:
tracepoint:uvm:malloc {
if (arg0 == 127 && arg2 <= 64) {
@mem[arg1] = kstack
}
}
tracepoint:uvm:free {
if (arg0 == 127 && arg2 <= 64) {
delete(@mem[arg1])
}
}
END {
printf("Possible memory leaks\n");
print(@mem)
}

-- 
:wq Claudio

Index: bt.5
===
RCS file: /cvs/src/usr.sbin/btrace/bt.5,v
retrieving revision 1.14
diff -u -p -r1.14 bt.5
--- bt.531 Mar 2022 17:27:29 -  1.14
+++ bt.526 Jun 2023 15:16:25 -
@@ -120,6 +120,11 @@ Functions:
 .It Fn clear "@map"
 Delete all (key, value) pairs from
 .Va @map .
+.It "@map[key]" = Fn count
+Increment the value of
+.Va key
+in
+.Va @map .
 .It Fn delete "@map[key]"
 Delete the pair indexed by
 .Va key
Index: btrace.c
===
RCS file: /cvs/src/usr.sbin/btrace/btrace.c,v
retrieving revision 1.70
diff -u -p -r1.70 btrace.c
--- btrace.c12 May 2023 14:14:16 -  1.70
+++ btrace.c26 Jun 2023 15:12:18 -
@@ -450,6 +450,37 @@ rules_do(int fd)
}
 }
 
+static uint64_t
+rules_action_scan(struct bt_stmt *bs)
+{
+   struct bt_arg *ba;
+   uint64_t evtflags = 0;
+
+   while (bs != NULL) {
+   SLIST_FOREACH(ba, >bs_args, ba_next)
+   evtflags |= ba2dtflags(ba);
+
+   /* Also check the value for map/hist insertion */
+   switch (bs->bs_act) {
+   case B_AC_BUCKETIZE:
+   case B_AC_INSERT:
+   ba = (struct bt_arg *)bs->bs_var;
+   evtflags |= ba2dtflags(ba);
+   break;
+   case B_AC_TEST:
+   evtflags |= rules_action_scan(
+   (struct bt_stmt *)bs->bs_var);
+   break;
+   default:
+   break;
+   }
+
+   bs = SLIST_NEXT(bs, bs_next);
+   }
+
+   return evtflags;
+}
+
 void
 rules_setup(int fd)
 {
@@ -474,21 +505,7 @@ rules_setup(int fd)
evtflags |= ba2dtflags(ba);
}
 
-   SLIST_FOREACH(bs, >br_action, bs_next) {
-   SLIST_FOREACH(ba, >bs_args, ba_next)
-   evtflags |= ba2dtflags(ba);
-
-   /* Also check the value for map/hist insertion */
-   switch (bs->bs_act) {
-   case B_AC_BUCKETIZE:
-   case B_AC_INSERT:
-   ba = (struct bt_arg *)bs->bs_var;
-   evtflags |= ba2dtflags(ba);
-   break;
-   default:
-   break;
-   }
-   }
+   evtflags |= rules_action_scan(SLIST_FIRST(>br_action));
 
SLIST_FOREACH(bp, >br_probes, bp_next) {
debug("parsed probe '%s'", debug_probe_name(bp));
@@ -1685,10 +1702,17 @@ ba2dtflags(struct bt_arg *ba)
 long
 bacmp(struct bt_arg *a, struct bt_arg *b)
 {
-   assert(a->ba_type == b->ba_type);
-   assert(a->ba_type == B_AT_LONG);
+   if (a->ba_type != b->ba_type)
+   return a->ba_type - b->ba_type;
 
-   return ba2long(a, NULL) - ba2long(b, NULL);
+   switch (a->ba_type) {
+   case B_AT_LONG:
+   return ba2long(a, NULL) - ba2long(b, NULL);
+   case B_AT_STR:
+   return strcmp(ba2str(a, NULL), ba2str(b, NULL));
+   default:
+   errx(1, "no compare support for type %d", a->ba_type);
+   }
 }
 
 __dead void
Index: map.c
===
RCS file: /cvs/src/usr.sbin/btrace/map.c,v
retrieving revision 1.20
diff -u -p -r1.20 map.c
--- map.c   30 Apr 2022 01:29:05 -  1.20
+++ map.c   24 Jun 2023 10:27:59 -
@@ -176,6 +176,11 @@ map_insert(struct map *map, const char *
val += ba2long(bval->ba_value, dtev);
mep->mval->ba_value = (void *)val;
break;
+   case B_AT_BI_KSTACK:
+   case B_AT_BI_USTACK:
+   free(mep->mval);
+   mep->mval = ba_new(ba2str(bval, dtev), B_AT_STR);
+   break;
default:
errx(1, "no insert support for type %d", bval->ba_type);
}

eigrpd: use new ibuf API

2023-06-26 Thread Claudio Jeker

This diff changes some parts of eigrpd to use the new ibuf API.
There is one ibuf_seek() left in packet.c since the changes done to the
header there are not as trivial as in any other daemon. So that part I
left out for now. The suggested changes on the other hand are simple.

Again if someone uses eigrpd please test.
-- 
:wq Claudio

Index: packet.c
===
RCS file: /cvs/src/usr.sbin/eigrpd/packet.c,v
retrieving revision 1.20
diff -u -p -r1.20 packet.c
--- packet.c19 Jan 2021 11:49:26 -  1.20
+++ packet.c26 Jun 2023 10:21:00 -
@@ -172,12 +172,11 @@ send_packet(struct eigrp_iface *ei, stru
eigrp_hdr->ack_num = htonl(nbr->recv_seq);
rtp_ack_stop_timer(nbr);
}
-   if (flags) {
-   eigrp_hdr->flags = ntohl(eigrp_hdr->flags) | flags;
-   eigrp_hdr->flags = htonl(eigrp_hdr->flags);
-   }
+   if (flags)
+   eigrp_hdr->flags |= htonl(flags);
+   
eigrp_hdr->chksum = 0;
-   eigrp_hdr->chksum = in_cksum(buf->buf, ibuf_size(buf));
+   eigrp_hdr->chksum = in_cksum(ibuf_data(buf), ibuf_size(buf));
 
/* log packet being sent */
if (eigrp_hdr->opcode != EIGRP_OPC_HELLO) {
Index: tlv.c
===
RCS file: /cvs/src/usr.sbin/eigrpd/tlv.c,v
retrieving revision 1.16
diff -u -p -r1.16 tlv.c
--- tlv.c   3 Nov 2021 13:48:46 -   1.16
+++ tlv.c   16 Jun 2023 10:24:17 -
@@ -19,6 +19,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #include 
@@ -51,13 +52,14 @@ gen_parameter_tlv(struct ibuf *buf, stru
 int
 gen_sequence_tlv(struct ibuf *buf, struct seq_addr_head *seq_addr_list)
 {
-   struct tlv   tlv, *tlvp;
+   struct tlv   tlv;
struct seq_addr_entry   *sa;
uint8_t  alen;
uint16_t len = TLV_HDR_LEN;
-   size_t   original_size = ibuf_size(buf);
+   size_t   off;
 
tlv.type = htons(TLV_TYPE_SEQ);
+   off = ibuf_size(buf) + offsetof(struct tlv, length);
if (ibuf_add(buf, , sizeof(tlv))) {
log_warn("%s: ibuf_add failed", __func__);
return (-1);
@@ -85,9 +87,8 @@ gen_sequence_tlv(struct ibuf *buf, struc
}
 
/* adjust tlv length */
-   if ((tlvp = ibuf_seek(buf, original_size, sizeof(*tlvp))) == NULL)
-fatalx("gen_sequence_tlv: buf_seek failed");
-   tlvp->length = htons(len);
+   if (ibuf_set_n16(buf, off, len) == -1)
+fatalx("gen_sequence_tlv: buf_set_n16 failed");
 
return (0);
 }
@@ -158,13 +159,13 @@ len_route_tlv(struct rinfo *ri)
 int
 gen_route_tlv(struct ibuf *buf, struct rinfo *ri)
 {
-   struct tlv   tlv, *tlvp;
+   struct tlv   tlv;
struct in_addr   addr;
struct classic_metricmetric;
struct classic_emetric   emetric;
uint16_t tlvlen;
uint8_t  pflen;
-   size_t   original_size = ibuf_size(buf);
+   size_t   off;
 
switch (ri->af) {
case AF_INET:
@@ -189,6 +190,7 @@ gen_route_tlv(struct ibuf *buf, struct r
}
tlv.type = htons(tlv.type);
 
+   off = ibuf_size(buf) + offsetof(struct tlv, length);
if (ibuf_add(buf, , sizeof(tlv)))
return (-1);
tlvlen = TLV_HDR_LEN;
@@ -251,9 +253,8 @@ gen_route_tlv(struct ibuf *buf, struct r
tlvlen += sizeof(pflen) + pflen;
 
/* adjust tlv length */
-   if ((tlvp = ibuf_seek(buf, original_size, sizeof(*tlvp))) == NULL)
-fatalx("gen_route_tlv: buf_seek failed");
-   tlvp->length = htons(tlvlen);
+   if (ibuf_set_n16(buf, off, tlvlen) == -1)
+fatalx("gen_route_tlv: buf_set_n16 failed");
 
return (0);
 }

ldapd better bufferevent error callback

2023-06-26 Thread Claudio Jeker

Noticed this since an ldapd ran out of memory and the error printed by
the callback is just not helpful. So lets try to improve this.

I think errno is not clobbered so log_warn() should work and then also add
an indication if it is read or write.
-- 
:wq Claudio

Index: conn.c
===
RCS file: /cvs/src/usr.sbin/ldapd/conn.c,v
retrieving revision 1.20
diff -u -p -r1.20 conn.c
--- conn.c  30 Apr 2023 23:49:14 -  1.20
+++ conn.c  26 Jun 2023 09:07:53 -
@@ -242,7 +242,8 @@ conn_err(struct bufferevent *bev, short 
else if ((why & EVBUFFER_TIMEOUT) == EVBUFFER_TIMEOUT)
log_debug("timeout on connection %d", conn->fd);
else
-   log_warnx("error 0x%02X on connection %d", why, conn->fd);
+   log_warn("%s error on connection %d",
+   why & EVBUFFER_WRITE ? "write" : "read", conn->fd);
 
conn_close(conn);
 }

dvrmpd refactor and ibuf API updates

2023-06-22 Thread Claudio Jeker

Here is a diff to refactor and update dvrmpd to use the new ibuf API.
Instead of doing the checksum calculations in various places do it in
send_packet() as a central place.

Could people using dvrmpd test this diff, thanks
-- 
:wq Claudio

Index: ask_nbrs2.c
===
RCS file: /cvs/src/usr.sbin/dvmrpd/ask_nbrs2.c,v
retrieving revision 1.5
diff -u -p -r1.5 ask_nbrs2.c
--- ask_nbrs2.c 7 Dec 2015 19:17:18 -   1.5
+++ ask_nbrs2.c 15 Jun 2023 07:43:01 -
@@ -36,7 +36,6 @@ send_ask_nbrs2(struct iface *iface, stru
 {
struct sockaddr_in   dst;
struct ibuf *buf;
-   struct dvmrp_hdr*dvmrp_hdr;
int  ret = 0;
 
log_debug("send_ask_nbrs2: interface %s addr %s",
@@ -56,11 +55,7 @@ send_ask_nbrs2(struct iface *iface, stru
dst.sin_len = sizeof(struct sockaddr_in);
dst.sin_addr.s_addr = addr.s_addr;
 
-   /* update chksum */
-   dvmrp_hdr = ibuf_seek(buf, 0, sizeof(*dvmrp_hdr));
-   dvmrp_hdr->chksum = in_cksum(buf->buf, buf->wpos);
-
-   ret = send_packet(iface, buf->buf, buf->wpos, );
+   ret = send_packet(iface, buf, );
ibuf_free(buf);
return (ret);
 fail:
Index: dvmrpe.h
===
RCS file: /cvs/src/usr.sbin/dvmrpd/dvmrpe.h,v
retrieving revision 1.7
diff -u -p -r1.7 dvmrpe.h
--- dvmrpe.h19 Jan 2021 12:23:30 -  1.7
+++ dvmrpe.h15 Jun 2023 07:43:01 -
@@ -183,7 +183,8 @@ struct ctl_nbr  *nbr_to_ctl(struct nbr *)
 
 /* packet.c */
 int gen_dvmrp_hdr(struct ibuf *, struct iface *, u_int8_t);
-int send_packet(struct iface *, void *, size_t, struct sockaddr_in 
*);
+int send_packet(struct iface *, struct ibuf *,
+struct sockaddr_in *);
 voidrecv_packet(int, short, void *);
 
 /* probe.c */
Index: graft.c
===
RCS file: /cvs/src/usr.sbin/dvmrpd/graft.c,v
retrieving revision 1.5
diff -u -p -r1.5 graft.c
--- graft.c 7 Dec 2015 19:17:18 -   1.5
+++ graft.c 15 Jun 2023 07:43:01 -
@@ -36,7 +36,6 @@ send_graft(struct iface *iface, struct i
 {
struct sockaddr_in   dst;
struct ibuf *buf;
-   struct dvmrp_hdr*dvmrp_hdr;
int  ret = 0;
 
log_debug("send_graft: interface %s addr %s",
@@ -56,11 +55,7 @@ send_graft(struct iface *iface, struct i
dst.sin_len = sizeof(struct sockaddr_in);
dst.sin_addr.s_addr = addr.s_addr;
 
-   /* update chksum */
-   dvmrp_hdr = ibuf_seek(buf, 0, sizeof(*dvmrp_hdr));
-   dvmrp_hdr->chksum = in_cksum(buf->buf, buf->wpos);
-
-   ret = send_packet(iface, buf->buf, buf->wpos, );
+   ret = send_packet(iface, buf, );
ibuf_free(buf);
return (ret);
 fail:
Index: graft_ack.c
===
RCS file: /cvs/src/usr.sbin/dvmrpd/graft_ack.c,v
retrieving revision 1.5
diff -u -p -r1.5 graft_ack.c
--- graft_ack.c 7 Dec 2015 19:17:18 -   1.5
+++ graft_ack.c 15 Jun 2023 07:43:01 -
@@ -36,7 +36,6 @@ send_graft_ack(struct iface *iface, stru
 {
struct sockaddr_in   dst;
struct ibuf *buf;
-   struct dvmrp_hdr*dvmrp_hdr;
int  ret = 0;
 
log_debug("send_graft_ack: interface %s addr %s",
@@ -56,11 +55,7 @@ send_graft_ack(struct iface *iface, stru
dst.sin_len = sizeof(struct sockaddr_in);
dst.sin_addr.s_addr = addr.s_addr;
 
-   /* update chksum */
-   dvmrp_hdr = ibuf_seek(buf, 0, sizeof(*dvmrp_hdr));
-   dvmrp_hdr->chksum = in_cksum(buf->buf, buf->wpos);
-
-   ret = send_packet(iface, buf->buf, buf->wpos, );
+   ret = send_packet(iface, buf, );
ibuf_free(buf);
return (ret);
 fail:
Index: igmp.c
===
RCS file: /cvs/src/usr.sbin/dvmrpd/igmp.c,v
retrieving revision 1.4
diff -u -p -r1.4 igmp.c
--- igmp.c  7 Dec 2015 19:14:49 -   1.4
+++ igmp.c  15 Jun 2023 07:43:01 -
@@ -73,9 +73,6 @@ send_igmp_query(struct iface *iface, str
igmp_hdr.max_resp_time = iface->last_member_query_interval;
}
 
-   /* update chksum */
-   igmp_hdr.chksum = in_cksum(_hdr, sizeof(igmp_hdr));
-
ibuf_add(buf, _hdr, sizeof(igmp_hdr));
 
/* set destination address */
@@ -83,7 +80,7 @@ send_igmp_query(struct iface *iface, str
dst.sin_len = sizeof(struct sockaddr_in);
inet_aton(AllSystems, _addr);
 
-   ret = send_packet(iface, buf->buf, buf->wpos, );
+   ret = send_packet(iface, buf, );
ibuf_free(buf);
return (ret);
 }
Index: nbrs2.c
===
RCS file: /cvs/src/usr.sbin/dvmrpd/nbrs2.c,v

Re: update ripd to use new ibuf API

2023-06-22 Thread Claudio Jeker

On Thu, Jun 22, 2023 at 11:15:31AM +0200, Theo Buehler wrote:
> On Thu, Jun 22, 2023 at 10:26:29AM +0200, Claudio Jeker wrote:
> > Another ibuf_seek() -> ibuf_set_n16() conversions.
> > While there also switch to ibuf_data() and ibuf_size().
> 
> Reads fine. ok. a test would indeed be nice.

I did a quick test myself and configured three ripd with auth-type crypt.
It seems to work as well as before.

-- 
:wq Claudio

update ripd to use new ibuf API

2023-06-22 Thread Claudio Jeker

Another ibuf_seek() -> ibuf_set_n16() conversions.
While there also switch to ibuf_data() and ibuf_size().

Anyone still uses ripd that can test this?
-- 
:wq Claudio

Index: auth.c
===
RCS file: /cvs/src/usr.sbin/ripd/auth.c,v
retrieving revision 1.12
diff -u -p -r1.12 auth.c
--- auth.c  19 Dec 2019 16:47:14 -  1.12
+++ auth.c  16 Jun 2023 10:29:23 -
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -236,14 +237,14 @@ auth_add_trailer(struct ibuf *buf, struc
MD5_CTX  hash;
u_int8_t digest[MD5_DIGEST_LENGTH];
struct auth_md  *md;
-   struct md5_auth *a;
-   int  pos;
+   size_t   pos;
 
-   pos = sizeof(struct rip_hdr) + sizeof(struct rip_auth);
+   pos = sizeof(struct rip_hdr) + sizeof(struct rip_auth) +
+   offsetof(struct md5_auth, auth_offset);
 
/* add offset to header */
-   a = ibuf_seek(buf, pos, sizeof(*a));
-   a->auth_offset = htons(buf->wpos);
+   if (ibuf_set_n16(buf, pos, ibuf_size(buf)) == -1)
+   return (-1);
 
/* insert plaintext key */
if ((md = md_list_find(>auth_md_list,
@@ -259,7 +260,7 @@ auth_add_trailer(struct ibuf *buf, struc
 
/* calculate MD5 digest */
MD5Init();
-   MD5Update(, buf->buf, buf->wpos);
+   MD5Update(, ibuf_data(buf), ibuf_size(buf));
MD5Update(, digest, MD5_DIGEST_LENGTH);
MD5Final(digest, );

Re: [s...@spacehopper.org: ospf6d fib reload [Re: bgpd fix for possible crash in SE]]

2023-06-21 Thread Claudio Jeker

On Tue, Jun 20, 2023 at 05:31:34PM +0100, Stuart Henderson wrote:
> This hasn't blown up yet... any interest?

Some comments to the kroute.c changes below. Everything else is fine.
 
> - Forwarded message from Stuart Henderson  -
> 
> From: Stuart Henderson 
> Date: Fri, 26 May 2023 14:40:45 +0100
> To: tech@openbsd.org
> Subject: ospf6d fib reload [Re: bgpd fix for possible crash in SE]
> Mail-Followup-To: tech@openbsd.org
> 
> On 2023/05/26 13:52, Stuart Henderson wrote:
> > I think my main issues come around LS_REFRESH_TIME intervals, when
> > there's loads of churn and "ospf6d: ospf engine" can be busy for
> > minutes at a time (not always, but very often). Don't know if that rings
> > any bells for anyone... (I am now reminded that RTM_DESYNC isn't handled
> > by ospf6d which probably doesn't help matters).
> 
> Here's a first attempt at porting the fib reload/desync diffs from
> ospfd to ospf6d ... Not sure if it's good yet, but it didn't immediately
> crash and burn when I ran "ospf6ctl fib reload", at least.
> 

...

> Index: ospf6d/kroute.c
> ===
> RCS file: /cvs/src/usr.sbin/ospf6d/kroute.c,v
> retrieving revision 1.67
> diff -u -p -r1.67 kroute.c
> --- ospf6d/kroute.c   8 Mar 2023 04:43:14 -   1.67
> +++ ospf6d/kroute.c   26 May 2023 13:37:55 -
> @@ -45,16 +45,22 @@ struct {
>   u_int32_t   rtseq;
>   pid_t   pid;
>   int fib_sync;
> + int fib_serial;
>   u_int8_tfib_prio;
>   int fd;
> - struct eventev;
> + struct eventev, reload;

Please put reload on its own line.

>   u_int   rdomain;
> +#define KR_RELOAD_IDLE 0
> +#define KR_RELOAD_FETCH1
> +#define KR_RELOAD_HOLD 2
> + int reload_state;
>  } kr_state;
>  
>  struct kroute_node {
>   RB_ENTRY(kroute_node)entry;
>   struct kroute_node  *next;
>   struct krouter;
> + int  serial;
>  };
>  
>  void kr_redist_remove(struct kroute_node *, struct kroute_node *);
> @@ -90,7 +96,10 @@ void   if_announce(void *);
>  int  send_rtmsg(int, int, struct kroute *);
>  int  dispatch_rtmsg(void);
>  int  fetchtable(void);
> -int  rtmsg_process(char *, size_t); 
> +int  refetchtable(void);
> +int  rtmsg_process(char *, size_t);
> +void kr_fib_reload_timer(int, short, void *);
> +void kr_fib_reload_arm_timer(int);
>  
>  RB_HEAD(kroute_tree, kroute_node)krt;
>  RB_PROTOTYPE(kroute_tree, kroute_node, entry, kroute_compare)
> @@ -165,6 +174,9 @@ kr_init(int fs, u_int rdomain, int redis
>   kr_dispatch_msg, NULL);
>   event_add(_state.ev, NULL);
>  
> + kr_state.reload_state = KR_RELOAD_IDLE;
> + evtimer_set(_state.reload, kr_fib_reload_timer, NULL);
> +
>   return (0);
>  }
>  
> @@ -374,6 +386,62 @@ kr_fib_decouple(void)
>  }
>  
>  void
> +kr_fib_reload_timer(int fd, short event, void *bula)
> +{
> + if (kr_state.reload_state == KR_RELOAD_FETCH) {
> + kr_fib_reload();
> + kr_state.reload_state = KR_RELOAD_HOLD;
> + kr_fib_reload_arm_timer(KR_RELOAD_HOLD_TIMER);
> + } else {
> + kr_state.reload_state = KR_RELOAD_IDLE;
> + }
> +}
> +
> +void
> +kr_fib_reload_arm_timer(int delay)
> +{
> + struct timeval  tv;
> +
> + timerclear();
> + tv.tv_sec = delay / 1000;
> + tv.tv_usec = (delay % 1000) * 1000;
> +
> + if (evtimer_add(_state.reload, ) == -1)
> + fatal("add_reload_timer");
> +}
> +
> +void
> +kr_fib_reload(void)
> +{
> + struct kroute_node  *krn, *kr, *kn;
> +

Maybe include the:

log_info("reloading interface list and routing table");

line from ospfd/kroute.c here as well.

> + kr_state.fib_serial++;
> +
> + if (fetchifs(0) != 0 || fetchtable() != 0)
> + return;
> +
> + for (kr = RB_MIN(kroute_tree, ); kr != NULL; kr = krn) {
> + krn = RB_NEXT(kroute_tree, , kr);
> +
> + do {
> + kn = kr->next;
> +
> + if (kr->serial != kr_state.fib_serial) {
> +
> + if (kr->r.priority == RTP_OSPF) {

Here you should use kr_state.fib_prio instead of RTP_OSPF.

> + kr->serial = kr_state.fib_serial;
> + if (send_rtmsg(kr_state.fd,
> + RTM_ADD, >r) != 0)
> + break;
> + } else
> + kroute_remove(kr);
> + }
> +
> + } while ((kr = kn) != NULL);
> + }
> +}
> +
> +void
>  kr_fib_update_prio(u_int8_t fib_prio)
>  {
>   struct kroute_node  *kr;
> @@

Re: ospfd use new ibuf functions

2023-06-20 Thread Claudio Jeker

On Tue, Jun 20, 2023 at 02:47:41PM +0200, Claudio Jeker wrote:
> This diff updates ospfd to use the new ibuf API.
> 
> It mainly removes the use of ibuf_seek() and replaces these calls with
> ibuf_set().
> 
> Regress still passes with this diff in.

Here the same diff for ospf6d.
-- 
:wq Claudio

Index: database.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/database.c,v
retrieving revision 1.22
diff -u -p -r1.22 database.c
--- database.c  8 Mar 2023 04:43:14 -   1.22
+++ database.c  16 Jun 2023 10:27:04 -
@@ -51,7 +51,7 @@ send_db_description(struct nbr *nbr)
goto fail;
 
/* reserve space for database description header */
-   if (ibuf_reserve(buf, sizeof(dd_hdr)) == NULL)
+   if (ibuf_add_zero(buf, sizeof(dd_hdr)) == -1)
goto fail;
 
switch (nbr->state) {
@@ -134,8 +134,9 @@ send_db_description(struct nbr *nbr)
dd_hdr.bits = bits;
dd_hdr.dd_seq_num = htonl(nbr->dd_seq_num);
 
-   memcpy(ibuf_seek(buf, sizeof(struct ospf_hdr), sizeof(dd_hdr)),
-   _hdr, sizeof(dd_hdr));
+   if (ibuf_set(buf, sizeof(struct ospf_hdr), _hdr,
+   sizeof(dd_hdr)) == -1)
+   goto fail;
 
/* calculate checksum */
if (upd_ospf_hdr(buf, nbr->iface))
Index: lsupdate.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/lsupdate.c,v
retrieving revision 1.22
diff -u -p -r1.22 lsupdate.c
--- lsupdate.c  8 Mar 2023 04:43:14 -   1.22
+++ lsupdate.c  16 Jun 2023 10:27:15 -
@@ -177,7 +177,7 @@ prepare_ls_update(struct iface *iface, i
goto fail;
 
/* reserve space for number of lsa field */
-   if (ibuf_reserve(buf, sizeof(u_int32_t)) == NULL)
+   if (ibuf_add_zero(buf, sizeof(u_int32_t)) == -1)
goto fail;
 
return (buf);
@@ -208,8 +208,10 @@ add_ls_update(struct ibuf *buf, struct i
age = ntohs(age);
if ((age += older + iface->transmit_delay) >= MAX_AGE)
age = MAX_AGE;
-   age = htons(age);
-   memcpy(ibuf_seek(buf, ageoff, sizeof(age)), , sizeof(age));
+   if (ibuf_set_n16(buf, ageoff, age) == -1) {
+   log_warn("add_ls_update");
+   return (0);
+   }
 
return (1);
 }
@@ -218,9 +220,8 @@ int
 send_ls_update(struct ibuf *buf, struct iface *iface, struct in6_addr addr,
 u_int32_t nlsa)
 {
-   nlsa = htonl(nlsa);
-   memcpy(ibuf_seek(buf, sizeof(struct ospf_hdr), sizeof(nlsa)),
-   , sizeof(nlsa));
+   if (ibuf_set_n32(buf, sizeof(struct ospf_hdr), nlsa) == -1)
+   goto fail;
/* calculate checksum */
if (upd_ospf_hdr(buf, iface))
goto fail;
Index: ospfe.c
===
RCS file: /cvs/src/usr.sbin/ospf6d/ospfe.c,v
retrieving revision 1.68
diff -u -p -r1.68 ospfe.c
--- ospfe.c 8 Mar 2023 04:43:14 -   1.68
+++ ospfe.c 20 Jun 2023 16:25:52 -
@@ -780,11 +780,11 @@ orig_rtr_lsa(struct area *area)
fatal("orig_rtr_lsa");
 
/* reserve space for LSA header and LSA Router header */
-   if (ibuf_reserve(buf, sizeof(lsa_hdr)) == NULL)
-   fatal("orig_rtr_lsa: ibuf_reserve failed");
+   if (ibuf_add_zero(buf, sizeof(lsa_hdr)) == -1)
+   fatal("orig_rtr_lsa: ibuf_add_zero failed");
 
-   if (ibuf_reserve(buf, sizeof(lsa_rtr)) == NULL)
-   fatal("orig_rtr_lsa: ibuf_reserve failed");
+   if (ibuf_add_zero(buf, sizeof(lsa_rtr)) == -1)
+   fatal("orig_rtr_lsa: ibuf_add_zero failed");
 
/* links */
LIST_FOREACH(iface, >iface_list, entry) {
@@ -944,8 +944,8 @@ orig_rtr_lsa(struct area *area)
LSA_24_SETLO(lsa_rtr.opts, area_ospf_options(area));
LSA_24_SETHI(lsa_rtr.opts, flags);
lsa_rtr.opts = htonl(lsa_rtr.opts);
-   memcpy(ibuf_seek(buf, sizeof(lsa_hdr), sizeof(lsa_rtr)),
-   _rtr, sizeof(lsa_rtr));
+   if (ibuf_set(buf, sizeof(lsa_hdr), _rtr, sizeof(lsa_rtr)) == -1)
+   fatal("orig_rtr_lsa: ibuf_set failed");
 
/* LSA header */
lsa_hdr.age = htons(DEFAULT_AGE);
@@ -956,11 +956,12 @@ orig_rtr_lsa(struct area *area)
lsa_hdr.seq_num = htonl(INIT_SEQ_NUM);
lsa_hdr.len = htons(buf->wpos);
lsa_hdr.ls_chksum = 0;  /* updated later */
-   memcpy(ibuf_seek(buf, 0, sizeof(lsa_hdr)), _hdr, sizeof(lsa_hdr));
+   if (ibuf_set(buf, 0, _hdr, sizeof(lsa_hdr)) == -1)
+   fatal("orig_rtr_lsa: ibuf_set failed");
 
-   chksum = htons(iso_cksum(buf->buf, buf->wpos, LS_CKSUM_OFFSET));
-   memcpy(ibuf_seek(buf, LS_CKSUM_OFFSET, sizeof(chksum)),
-   , sizeof(chksum));
+   chksum = iso_cks

Re: more relayd cleanup

2023-06-20 Thread Claudio Jeker

On Tue, Jun 20, 2023 at 03:35:11PM +0200, Theo Buehler wrote:
> On Tue, Jun 20, 2023 at 02:17:06PM +0200, Claudio Jeker wrote:
> > Ok, this went overboard. I just wanted to clean up a bit more in
> > check_tcp.c but noticed check_send_expect and CHECK_BINSEND_EXPECT.
> > 
> > This code is not very consitent in the differnt ways the strings are
> > encoded. Especially check_send_expect() is a bit of a mess because of
> > that.
> > 
> > While there I noticed string2binary() and decided to write it in simpler
> > way (copying code over from rpki-client).
> > 
> > All in all I think this diff is improving the situation a little bit.
> 
> Write "I will not modify Reyk's daemons more than I need to" 100 times.
> 
> Then it's ok.
> 

I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daemons more than I need to
I will not modify Reyk's daem

Re: uvm_meter: improve periodic execution logic for uvm_loadav()

2023-06-20 Thread Claudio Jeker

On Tue, Jun 20, 2023 at 10:25:02AM -0500, Scott Cheloha wrote:
> On Tue, Jun 20, 2023 at 11:47:10AM +0200, Claudio Jeker wrote:
> > On Mon, Jun 19, 2023 at 04:45:03PM -0500, Scott Cheloha wrote:
> > 
> > [...]
> > 
> > > Index: uvm/uvm_meter.c
> > > ===
> > > RCS file: /cvs/src/sys/uvm/uvm_meter.c,v
> > > retrieving revision 1.42
> > > diff -u -p -r1.42 uvm_meter.c
> > > --- uvm/uvm_meter.c   28 Dec 2020 14:01:23 -  1.42
> > > +++ uvm/uvm_meter.c   19 Jun 2023 21:35:22 -
> > > @@ -42,6 +42,7 @@
> > >  #include 
> > >  #include 
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > @@ -65,6 +66,9 @@
> > >  int maxslp = MAXSLP; /* patchable ... */
> > >  struct loadavg averunnable;
> > >  
> > > +/* Update load averages every five seconds. */
> > > +#define UVM_METER_INTVL  5
> > > +
> > >  /*
> > >   * constants for averages over 1, 5, and 15 minutes when sampling at
> > >   * 5 second intervals.
> > > @@ -78,17 +82,29 @@ static fixpt_t cexp[3] = {
> > >  
> > >  
> > >  static void uvm_loadav(struct loadavg *);
> > > +void uvm_meter(void *);
> > >  void uvm_total(struct vmtotal *);
> > >  void uvmexp_read(struct uvmexp *);
> > >  
> > > +void
> > > +uvm_meter_start(void)
> > > +{
> > > + static struct timeout to = TIMEOUT_INITIALIZER(uvm_meter, );
> > > +
> > > + uvm_meter();
> > > +}
> > > +
> > >  /*
> > >   * uvm_meter: calculate load average and wake up the swapper (if needed)
> > >   */
> > >  void
> > > -uvm_meter(void)
> > > +uvm_meter(void *arg)
> > >  {
> > > - if ((gettime() % 5) == 0)
> > > - uvm_loadav();
> > > + struct timeout *to = arg;
> > > +
> > > + timeout_add_sec(to, UVM_METER_INTVL);
> > > +
> > > + uvm_loadav();
> > >   if (proc0.p_slptime > (maxslp / 2))
> > >   wakeup();
> > >  }
> > 
> > Why add uvm_meter_start() using a static global value and then pass that
> > value around. This code could just be:
> > 
> > struct timeout uvm_meter_to = TIMEOUT_INITIALIZER(uvm_meter, NULL);
> > 
> > void
> > uvm_meter(void *arg)
> > {
> > timeout_add_sec(_meter_to, UVM_METER_INTVL);
> > uvm_loadav();
> > }
> > 
> > and then just call uvm_meter() once in scheduler_start().
> > I don't understand why all extra this indirection is needed it does not
> > make the code better..
> > 
> > Apart from that and the fact that the proc0 wakeup and go I'm OK with this
> > diff.
> 
> I like that better.  I'll commit the attached tomorrow unless I hear
> otherwise.
> 
> Index: share/man/man9/uvm_init.9
> ===
> RCS file: /cvs/src/share/man/man9/uvm_init.9,v
> retrieving revision 1.5
> diff -u -p -r1.5 uvm_init.9
> --- share/man/man9/uvm_init.9 21 May 2023 05:11:38 -  1.5
> +++ share/man/man9/uvm_init.9 20 Jun 2023 15:20:59 -
> @@ -168,7 +168,7 @@ argument is ignored.
>  .Ft void
>  .Fn uvm_kernacc "caddr_t addr" "size_t len" "int rw"
>  .Ft void
> -.Fn uvm_meter
> +.Fn uvm_meter "void *"
>  .Ft int
>  .Fn uvm_sysctl "int *name" "u_int namelen" "void *oldp" "size_t *oldlenp" 
> "void *newp " "size_t newlen" "struct proc *p"
>  .Ft int
> @@ -212,7 +212,7 @@ access, in the kernel address space.
>  .Pp
>  The
>  .Fn uvm_meter
> -function calculates the load average and wakes up the swapper if necessary.
> +function periodically recomputes the load average.
>  .Pp
>  The
>  .Fn uvm_sysctl
> Index: sys/kern/sched_bsd.c
> ===
> RCS file: /cvs/src/sys/kern/sched_bsd.c,v
> retrieving revision 1.74
> diff -u -p -r1.74 sched_bsd.c
> --- sys/kern/sched_bsd.c  4 Feb 2023 19:33:03 -   1.74
> +++ sys/kern/sched_bsd.c  20 Jun 2023 15:20:59 -
> @@ -234,7 +234,6 @@ schedcpu(void *arg)
>   }
>   SCHED_UNLOCK(s);
>   }
> - uvm_meter();
>   wakeup();
>   timeout_add_sec(to, 1);
>  }
> @@ -669,6 +668,7 @@ scheduler_start(void)
>  
>   rrticks_init = hz / 10;
>   sche

open_memstream cleanup

2023-06-20 Thread Claudio Jeker

In open_memstream() the code does a bzero() of the new memory even though
recallocarray() used which does this already.

In open_wmemstream() the code does the same but is still using
reallocarray(). So adjust that code to be like open_memstream().

-- 
:wq Claudio

Index: open_memstream.c
===
RCS file: /cvs/src/lib/libc/stdio/open_memstream.c,v
retrieving revision 1.8
diff -u -p -r1.8 open_memstream.c
--- open_memstream.c2 May 2019 08:30:10 -   1.8
+++ open_memstream.c8 Jun 2023 12:21:50 -
@@ -53,7 +53,6 @@ memstream_write(void *v, const char *b, 
p = recallocarray(st->string, st->size, sz, 1);
if (!p)
return (-1);
-   bzero(p + st->size, sz - st->size);
*st->pbuf = st->string = p;
st->size = sz;
}
Index: open_wmemstream.c
===
RCS file: /cvs/src/lib/libc/stdio/open_wmemstream.c,v
retrieving revision 1.8
diff -u -p -r1.8 open_wmemstream.c
--- open_wmemstream.c   12 Sep 2015 16:23:14 -  1.8
+++ open_wmemstream.c   15 Jun 2023 14:54:42 -
@@ -52,10 +52,9 @@ wmemstream_write(void *v, const char *b,
 
if (sz < end + 1)
sz = end + 1;
-   p = reallocarray(st->string, sz, sizeof(wchar_t));
+   p = recallocarray(st->string, st->size, sz, sizeof(wchar_t));
if (!p)
return (-1);
-   bzero(p + st->size, (sz - st->size) * sizeof(wchar_t));
*st->pbuf = st->string = p;
st->size = sz;
}

Re: ospfd use new ibuf functions

2023-06-20 Thread Claudio Jeker

On Tue, Jun 20, 2023 at 03:46:23PM +0200, Theo Buehler wrote:
> On Tue, Jun 20, 2023 at 02:47:41PM +0200, Claudio Jeker wrote:
> > This diff updates ospfd to use the new ibuf API.
> > 
> > It mainly removes the use of ibuf_seek() and replaces these calls with
> > ibuf_set().
> > 
> > Regress still passes with this diff in.
> 
> There's a function vs fatal mismatch in orig_rtr_lsa
> 
> > +   if (ibuf_set_n16(buf, LS_CKSUM_OFFSET, chksum) == -1)
> > +   fatal("orig_rtr_lsa: ibuf_set failed");
> 
> not sure if that's deliberate. Similarly in orig_net_lsa.

It is not deliberate. I will fix them before commit.
 
> ok

-- 
:wq Claudio

rpki-client use new ibuf API

2023-06-20 Thread Claudio Jeker

Use the ibuf_fd_*() API for file descriptor passing and also ibuf_set()
instead of ibuf_seek().

-- 
:wq Claudio

Index: http.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/http.c,v
retrieving revision 1.76
diff -u -p -r1.76 http.c
--- http.c  12 Jun 2023 15:27:52 -  1.76
+++ http.c  12 Jun 2023 16:03:16 -
@@ -2150,7 +2150,7 @@ proc_http(char *bind_addr, int fd)
io_read_str(b, );
 
/* queue up new requests */
-   http_req_new(id, uri, mod, 0, b->fd);
+   http_req_new(id, uri, mod, 0, ibuf_fd_get(b));
ibuf_free(b);
}
}
Index: io.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/io.c,v
retrieving revision 1.22
diff -u -p -r1.22 io.c
--- io.c14 Dec 2022 15:19:16 -  1.22
+++ io.c16 Jun 2023 14:37:42 -
@@ -41,7 +41,7 @@ io_new_buffer(void)
 
if ((b = ibuf_dynamic(64, INT32_MAX)) == NULL)
err(1, NULL);
-   ibuf_reserve(b, sizeof(size_t));/* can not fail */
+   ibuf_add_zero(b, sizeof(size_t));   /* can not fail */
return b;
 }
 
@@ -88,7 +88,7 @@ io_close_buffer(struct msgbuf *msgbuf, s
size_t len;
 
len = ibuf_size(b) - sizeof(len);
-   memcpy(ibuf_seek(b, 0, sizeof(len)), , sizeof(len));
+   ibuf_set(b, 0, , sizeof(len));
ibuf_close(msgbuf, b);
 }
 
@@ -280,7 +280,7 @@ io_buf_recvfd(int fd, struct ibuf **ib)
for (i = 0; i < j; i++) {
f = ((int *)CMSG_DATA(cmsg))[i];
if (i == 0)
-   b->fd = f;
+   ibuf_fd_set(b, f);
else
close(f);
}
Index: main.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/main.c,v
retrieving revision 1.241
diff -u -p -r1.241 main.c
--- main.c  30 May 2023 16:02:28 -  1.241
+++ main.c  12 Jun 2023 07:09:58 -
@@ -341,7 +341,7 @@ http_fetch(unsigned int id, const char *
io_str_buffer(b, uri);
io_str_buffer(b, last_mod);
/* pass file as fd */
-   b->fd = fd;
+   ibuf_fd_set(b, fd);
io_close_buffer(, b);
 }
 
@@ -362,7 +362,7 @@ rrdp_http_fetch(unsigned int id, const c
b = io_new_buffer();
io_simple_buffer(b, , sizeof(type));
io_simple_buffer(b, , sizeof(id));
-   b->fd = pi[0];
+   ibuf_fd_set(b, pi[0]);
io_close_buffer(, b);
 
http_fetch(id, uri, last_mod, pi[1]);
Index: rrdp.c
===
RCS file: /cvs/src/usr.sbin/rpki-client/rrdp.c,v
retrieving revision 1.30
diff -u -p -r1.30 rrdp.c
--- rrdp.c  3 May 2023 07:51:08 -   1.30
+++ rrdp.c  16 Jun 2023 15:06:58 -
@@ -431,20 +431,20 @@ rrdp_input_handler(int fd)
io_read_str(b, _id);
io_read_buf(b, , sizeof(serial));
io_read_str(b, _mod);
-   if (b->fd != -1)
+   if (ibuf_fd_avail(b))
errx(1, "received unexpected fd");
 
rrdp_new(id, local, notify, session_id, serial, last_mod);
break;
case RRDP_HTTP_INI:
-   if (b->fd == -1)
-   errx(1, "expected fd not received");
s = rrdp_get(id);
if (s == NULL)
errx(1, "http ini, rrdp session %u does not exist", id);
if (s->state != RRDP_STATE_WAIT)
errx(1, "%s: bad internal state", s->local);
-   s->infd = b->fd;
+   s->infd = ibuf_fd_get(b);
+   if (s->infd == -1)
+   errx(1, "expected fd not received");
s->state = RRDP_STATE_PARSE;
if (s->aborted) {
rrdp_abort_req(s);
@@ -454,7 +454,7 @@ rrdp_input_handler(int fd)
case RRDP_HTTP_FIN:
io_read_buf(b, , sizeof(res));
io_read_str(b, _mod);
-   if (b->fd != -1)
+   if (ibuf_fd_avail(b))
errx(1, "received unexpected fd");
 
s = rrdp_get(id);
@@ -472,7 +472,7 @@ rrdp_input_handler(int fd)
s = rrdp_get(id);
if (s == NULL)
errx(1, "file, rrdp session %u does not exist", id);;
-   if (b->fd != -1)
+   if (ibuf_fd_avail(b))
errx(1, "received unexpected fd");
io_read_buf(b, , sizeof(ok));
if (ok != 1)
@@ -482,7 +482,7 @@

ospfd use new ibuf functions

2023-06-20 Thread Claudio Jeker

This diff updates ospfd to use the new ibuf API.

It mainly removes the use of ibuf_seek() and replaces these calls with
ibuf_set().

Regress still passes with this diff in.
-- 
:wq Claudio

Index: auth.c
===
RCS file: /cvs/src/usr.sbin/ospfd/auth.c,v
retrieving revision 1.20
diff -u -p -r1.20 auth.c
--- auth.c  5 May 2015 01:26:37 -   1.20
+++ auth.c  16 Jun 2023 10:25:48 -
@@ -141,35 +141,44 @@ auth_gen(struct ibuf *buf, struct iface 
 {
MD5_CTX  hash;
u_int8_t digest[MD5_DIGEST_LENGTH];
-   struct ospf_hdr *ospf_hdr;
+   struct crypt crypt;
struct auth_md  *md;
-
-   if ((ospf_hdr = ibuf_seek(buf, 0, sizeof(*ospf_hdr))) == NULL)
-   fatalx("auth_gen: buf_seek failed");
+   u_int16_tchksum;
 
/* update length */
if (ibuf_size(buf) > USHRT_MAX)
fatalx("auth_gen: resulting ospf packet too big");
-   ospf_hdr->len = htons(ibuf_size(buf));
-   /* clear auth_key field */
-   bzero(ospf_hdr->auth_key.simple, sizeof(ospf_hdr->auth_key.simple));
+   if (ibuf_set_n16(buf, offsetof(struct ospf_hdr, len),
+   ibuf_size(buf)) == -1)
+   fatalx("auth_gen: ibuf_set_n16 failed");
 
switch (iface->auth_type) {
case AUTH_NONE:
-   ospf_hdr->chksum = in_cksum(buf->buf, ibuf_size(buf));
+   chksum = in_cksum(buf->buf, ibuf_size(buf));
+   if (ibuf_set(buf, offsetof(struct ospf_hdr, chksum),
+   , sizeof(chksum)) == -1)
+   fatalx("auth_gen: ibuf_set failed");
break;
case AUTH_SIMPLE:
-   ospf_hdr->chksum = in_cksum(buf->buf, ibuf_size(buf));
+   chksum = in_cksum(buf->buf, ibuf_size(buf));
+   if (ibuf_set(buf, offsetof(struct ospf_hdr, chksum),
+   , sizeof(chksum)) == -1)
+   fatalx("auth_gen: ibuf_set failed");
 
-   strncpy(ospf_hdr->auth_key.simple, iface->auth_key,
-   sizeof(ospf_hdr->auth_key.simple));
+   if (ibuf_set(buf, offsetof(struct ospf_hdr, auth_key),
+   iface->auth_key, strlen(iface->auth_key)) == -1)
+   fatalx("auth_gen: ibuf_set failed");
break;
case AUTH_CRYPT:
-   ospf_hdr->chksum = 0;
-   ospf_hdr->auth_key.crypt.keyid = iface->auth_keyid;
-   ospf_hdr->auth_key.crypt.seq_num = htonl(iface->crypt_seq_num);
-   ospf_hdr->auth_key.crypt.len = MD5_DIGEST_LENGTH;
+   bzero(, sizeof(crypt));
+   crypt.keyid = iface->auth_keyid;
+   crypt.seq_num = htonl(iface->crypt_seq_num);
+   crypt.len = MD5_DIGEST_LENGTH;
iface->crypt_seq_num++;
+
+   if (ibuf_set(buf, offsetof(struct ospf_hdr, auth_key),
+   , sizeof(crypt)) == -1)
+   fatalx("auth_gen: ibuf_set failed");
 
/* insert plaintext key */
if ((md = md_list_find(>auth_md_list,
Index: database.c
===
RCS file: /cvs/src/usr.sbin/ospfd/database.c,v
retrieving revision 1.36
diff -u -p -r1.36 database.c
--- database.c  8 Mar 2023 04:43:14 -   1.36
+++ database.c  16 Jun 2023 10:26:00 -
@@ -53,7 +53,7 @@ send_db_description(struct nbr *nbr)
goto fail;
 
/* reserve space for database description header */
-   if (ibuf_reserve(buf, sizeof(dd_hdr)) == NULL)
+   if (ibuf_add_zero(buf, sizeof(dd_hdr)) == -1)
goto fail;
 
switch (nbr->state) {
@@ -140,8 +140,9 @@ send_db_description(struct nbr *nbr)
dd_hdr.bits = bits;
dd_hdr.dd_seq_num = htonl(nbr->dd_seq_num);
 
-   memcpy(ibuf_seek(buf, sizeof(struct ospf_hdr), sizeof(dd_hdr)),
-   _hdr, sizeof(dd_hdr));
+   if (ibuf_set(buf, sizeof(struct ospf_hdr), _hdr,
+   sizeof(dd_hdr)) == -1)
+   goto fail;
 
/* update authentication and calculate checksum */
if (auth_gen(buf, nbr->iface))
Index: lsupdate.c
===
RCS file: /cvs/src/usr.sbin/ospfd/lsupdate.c,v
retrieving revision 1.51
diff -u -p -r1.51 lsupdate.c
--- lsupdate.c  8 Mar 2023 04:43:14 -   1.51
+++ lsupdate.c  16 Jun 2023 10:26:17 -
@@ -158,7 +158,7 @@ prepare_ls_update(struct iface *iface)
goto fail;
 
/* reserve space for number of lsa field */
-   if (ibuf_reserve(buf, sizeof(u_int32_t)) == NULL)
+   if (ibuf_add_zero(buf, sizeof(u_int32_t)) == -1)
goto fail;
 
return (buf);
@@ -194,8 +194,10 @@ add_ls_update(struct ibuf *buf, struct i
age = ntohs(age);
if ((age += older + iface->transmit_delay) >= MAX_AGE)

more relayd cleanup

2023-06-20 Thread Claudio Jeker

Ok, this went overboard. I just wanted to clean up a bit more in
check_tcp.c but noticed check_send_expect and CHECK_BINSEND_EXPECT.

This code is not very consitent in the differnt ways the strings are
encoded. Especially check_send_expect() is a bit of a mess because of
that.

While there I noticed string2binary() and decided to write it in simpler
way (copying code over from rpki-client).

All in all I think this diff is improving the situation a little bit.
-- 
:wq Claudio

Index: check_tcp.c
===
RCS file: /cvs/src/usr.sbin/relayd/check_tcp.c,v
retrieving revision 1.59
diff -u -p -r1.59 check_tcp.c
--- check_tcp.c 20 Jun 2023 09:54:57 -  1.59
+++ check_tcp.c 20 Jun 2023 10:55:12 -
@@ -183,10 +183,6 @@ tcp_host_up(struct ctl_tcp_event *cte)
return;
}
 
-   if (cte->table->sendbuf != NULL && cte->table->sendbinbuf == NULL) {
-   cte->req = cte->table->sendbuf;
-   } else if (cte->table->sendbinbuf != NULL)
-   cte->req = cte->table->sendbinbuf->buf;
if (cte->table->sendbuf != NULL || cte->table->sendbinbuf != NULL) {
event_again(>ev, cte->s, EV_TIMEOUT|EV_WRITE, tcp_send_req,
>tv_start, >table->conf.timeout, cte);
@@ -203,6 +199,7 @@ void
 tcp_send_req(int s, short event, void *arg)
 {
struct ctl_tcp_event*cte = arg;
+   char*req;
int  bs;
int  len;
 
@@ -214,14 +211,17 @@ tcp_send_req(int s, short event, void *a
 
if (cte->table->sendbinbuf != NULL) {
len = ibuf_size(cte->table->sendbinbuf);
+   req = ibuf_data(cte->table->sendbinbuf);
log_debug("%s: table %s sending binary", __func__,
cte->table->conf.name);
print_hex(cte->table->sendbinbuf->buf, 0, len);
-   } else
-   len = strlen(cte->req);
+   } else {
+   len = strlen(cte->table->sendbuf);
+   req = cte->table->sendbuf;
+   }
 
do {
-   bs = write(s, cte->req, len);
+   bs = write(s, req, len);
if (bs == -1) {
if (errno == EAGAIN || errno == EINTR)
goto retry;
@@ -230,7 +230,7 @@ tcp_send_req(int s, short event, void *a
hce_notify_done(cte->host, HCE_TCP_WRITE_FAIL);
return;
}
-   cte->req += bs;
+   req += bs;
len -= bs;
} while (len > 0);
 
@@ -302,20 +302,22 @@ check_send_expect(struct ctl_tcp_event *
u_char  *b;
 
if (cte->table->conf.check == CHECK_BINSEND_EXPECT) {
+   size_t   exlen;
+
+   exlen = strlen(cte->table->conf.exbuf) / 2;
log_debug("%s: table %s expecting binary",
__func__, cte->table->conf.name);
-   print_hex(cte->table->conf.exbinbuf, 0,
-   strlen(cte->table->conf.exbuf) / 2);
+   print_hex(cte->table->conf.exbinbuf, 0, exlen);
 
-   if (memcmp(cte->table->conf.exbinbuf, cte->buf->buf,
-   strlen(cte->table->conf.exbuf) / 2) == 0) {
+   if (ibuf_size(cte->buf) >= exlen && memcmp(ibuf_data(cte->buf),
+   cte->table->conf.exbinbuf, exlen) == 0) {
cte->host->he = HCE_SEND_EXPECT_OK;
cte->host->up = HOST_UP;
return (0);
-   } else {
+   } else if (ibuf_size(cte->buf) >= exlen) {
log_debug("%s: table %s received mismatching binary",
__func__, cte->table->conf.name);
-   print_hex(cte->buf->buf, 0, ibuf_size(cte->buf));
+   print_hex(ibuf_data(cte->buf), 0, ibuf_size(cte->buf));
}
} else if (cte->table->conf.check == CHECK_SEND_EXPECT) {
/*
@@ -353,7 +355,7 @@ check_http_code(struct ctl_tcp_event *ct
if (ibuf_add_zero(cte->buf, 1) == -1)
fatal("out of memory");
 
-   head = cte->buf->buf;
+   head = ibuf_data(cte->buf);
host = cte->host;
host->he = HCE_HTTP_CODE_ERROR;
host->code = 0;
@@ -404,7 +406,7 @@ check_http_digest(struct ctl_tcp_event *
if (ibuf_add_zero(cte->buf, 1) == -1)
fatal("out of memory");
 
-   head = cte->buf->buf;
+   head = ibuf_data(cte->buf);
host = cte->host;
host->he = HCE_HTTP_DIGEST_ERROR;
 
Index: relayd.h
===
RCS file: /cvs/src/usr.sbin/relayd/relayd.h,v
retrieving revision 1.269
diff -u -p -r1.269 relayd.h
--- relayd.h31 Aug 2022 16:17:18 -  1.269
+++ relayd.h20 Jun 2023 10:47:36 -
@@ -176,7 +176,6 @@ struct

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 2249 matches

Mail list logo