Re: sendsyslog kernel buffer

2021-03-08 Thread Alexander Bluhm
I hope to have addressed all issues.

On Sun, Mar 07, 2021 at 11:50:24AM +, Visa Hankala wrote:
> This copyin() can also result in copying the buffer from userspace twice.
> This might not be a problem with log data though.

Is double copyin a problem?  I think error != EFAULT should catch
all cases.

On Mon, Mar 08, 2021 at 01:37:51AM +0300, Vitaliy Makkoveev wrote:
> I wonder they were not buffered. But does it make sense to drop the
> most recent messages?

The oldest message should be kept.  In my experience you get most
information why logging started to fail from the first message.
After that there are subsequent errors.

new diff, ok?

bluhm

Index: sys/kern/subr_log.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.72
diff -u -p -r1.72 subr_log.c
--- sys/kern/subr_log.c 8 Feb 2021 08:18:45 -   1.72
+++ sys/kern/subr_log.c 8 Mar 2021 19:55:52 -
@@ -454,6 +454,149 @@ logioctl(dev_t dev, u_long com, caddr_t 
return (0);
 }
 
+/*
+ * If syslogd is not running, temporarily store a limited amount of messages
+ * in kernel.  After log stash is full, drop messages and count them.  When
+ * syslogd is available again, next log message will flush the stashed
+ * messages and insert a message with drop count.  Calls to malloc(9) and
+ * copyin(9) may sleep, protect data structures with rwlock.
+ */
+
+#define LOGSTASH_SIZE  100
+struct logstash_message {
+   char*lgs_buffer;
+   size_t   lgs_size;
+} logstash_messages[LOGSTASH_SIZE];
+
+struct logstash_message *logstash_in = _messages[0];
+struct logstash_message *logstash_out = _messages[0];
+
+struct rwlock logstash_rwlock = RWLOCK_INITIALIZER("logstash");
+
+intlogstash_dropped, logstash_error, logstash_pid;
+
+intlogstash_insert(const char *, size_t, int, pid_t);
+void   logstash_remove(void);
+intlogstash_sendsyslog(struct proc *);
+
+static inline int
+logstash_full(void)
+{
+   rw_assert_anylock(_rwlock);
+
+   return logstash_out->lgs_buffer != NULL &&
+   logstash_in == logstash_out;
+}
+
+static inline void
+logstash_increment(struct logstash_message **msg)
+{
+   rw_assert_wrlock(_rwlock);
+
+   KASSERT((*msg) >= _messages[0]);
+   KASSERT((*msg) < _messages[LOGSTASH_SIZE]);
+   if ((*msg) == _messages[LOGSTASH_SIZE - 1])
+   (*msg) = _messages[0];
+   else
+   (*msg)++;
+}
+
+int
+logstash_insert(const char *buf, size_t nbyte, int logerror, pid_t pid)
+{
+   int error;
+
+   rw_enter_write(_rwlock);
+
+   if (logstash_full()) {
+   if (logstash_dropped == 0) {
+   logstash_error = logerror;
+   logstash_pid = pid;
+   }
+   logstash_dropped++;
+
+   rw_exit(_rwlock);
+   return (0);
+   }
+
+   logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK);
+   error = copyin(buf, logstash_in->lgs_buffer, nbyte);
+   if (error) {
+   free(logstash_in->lgs_buffer, M_LOG, nbyte);
+   logstash_in->lgs_buffer = NULL;
+
+   rw_exit(_rwlock);
+   return (error);
+   }
+   logstash_in->lgs_size = nbyte;
+   logstash_increment(_in);
+
+   rw_exit(_rwlock);
+   return (0);
+}
+
+void
+logstash_remove(void)
+{
+   rw_assert_wrlock(_rwlock);
+
+   KASSERT(logstash_out->lgs_buffer != NULL);
+   free(logstash_out->lgs_buffer, M_LOG, logstash_out->lgs_size);
+   logstash_out->lgs_buffer = NULL;
+   logstash_increment(_out);
+
+   /* Insert dropped message in sequence where messages were dropped. */
+   if (logstash_dropped) {
+   size_t l, nbyte;
+   char buf[80];
+
+   l = snprintf(buf, sizeof(buf),
+   "<%d>sendsyslog: dropped %d message%s, error %d, pid %d",
+   LOG_KERN|LOG_WARNING, logstash_dropped,
+   logstash_dropped == 1 ? "" : "s",
+   logstash_error, logstash_pid);
+   logstash_dropped = 0;
+   logstash_error = 0;
+   logstash_pid = 0;
+
+   /* Cannot fail, we have just freed a slot. */
+   KASSERT(!logstash_full());
+   nbyte = ulmin(l, sizeof(buf) - 1);
+   logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK);
+   memcpy(logstash_in->lgs_buffer, buf, nbyte);
+   logstash_in->lgs_size = nbyte;
+   logstash_increment(_in);
+   }
+}
+
+int
+logstash_sendsyslog(struct proc *p)
+{
+   int error;
+
+   rw_enter_write(_rwlock);
+
+   while (logstash_out->lgs_buffer != NULL) {
+   error = dosendsyslog(p, logstash_out->lgs_buffer,
+   logstash_out->lgs_size, 0, UIO_SYSSPACE);
+   if (error) {
+   rw_exit(_rwlock);
+   

sendsyslog kernel buffer

2021-03-06 Thread Alexander Bluhm
Hi

Early daemons like dhcpleased, slaacd, unwind, resolvd are started
before syslogd.  This results in ugly sendsyslog: dropped 1 message
logs and the real message is lost.

Changing the start order of syslogd and and network daemons is not
feasible.  A possible solution is a temporary buffer for log meassges
in kernel.

ok?

bluhm

Index: sys/kern/subr_log.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.72
diff -u -p -r1.72 subr_log.c
--- sys/kern/subr_log.c 8 Feb 2021 08:18:45 -   1.72
+++ sys/kern/subr_log.c 5 Mar 2021 21:58:50 -
@@ -454,6 +454,146 @@ logioctl(dev_t dev, u_long com, caddr_t 
return (0);
 }
 
+/*
+ * If syslogd is not running, temporarily store a limited amount of messages
+ * in kernel.  After log stash is full, drop messages and count them.  When
+ * syslogd is available again, next log message will flush the stashed
+ * messages and insert a message with drop count.  Calls to malloc(9) and 
+ * copyin(9) may sleep, protect data structures with rwlock.
+ */
+
+#define LOGSTASH_SIZE  100
+struct logstash_messages {
+   char*lgs_buffer;
+   size_t   lgs_size;
+} logstash_messages[LOGSTASH_SIZE];
+
+struct logstash_messages *logstash_in = _messages[0];
+struct logstash_messages *logstash_out = _messages[0];
+
+struct rwlock logstash_rwlock = RWLOCK_INITIALIZER("logstash");
+
+intlogstash_dropped, logstash_error, logstash_pid;
+
+void   logstash_insert(const char *, size_t, int, pid_t);
+void   logstash_remove(void);
+intlogstash_sendsyslog(struct proc *);
+
+static inline int
+logstash_empty(void)
+{
+   rw_assert_anylock(_rwlock);
+
+   return logstash_out->lgs_buffer == NULL;
+}
+
+static inline int
+logstash_full(void)
+{
+   rw_assert_anylock(_rwlock);
+
+   return logstash_out->lgs_buffer != NULL &&
+   logstash_in == logstash_out;
+}
+
+static inline void
+logstash_increment(struct logstash_messages **msg)
+{
+   rw_assert_wrlock(_rwlock);
+
+   KASSERT((*msg) >= _messages[0]);
+   KASSERT((*msg) < _messages[LOGSTASH_SIZE]);
+   (*msg)++;
+   if ((*msg) == _messages[LOGSTASH_SIZE])
+   (*msg) = _messages[0];
+}
+
+void
+logstash_insert(const char *buf, size_t nbyte, int error, pid_t pid)
+{
+   rw_enter_write(_rwlock);
+
+   if (logstash_full()) {
+   if (logstash_dropped == 0) {
+   logstash_error = error;
+   logstash_pid = pid;
+   }
+   logstash_dropped++;
+
+   rw_exit(_rwlock);
+   return;
+   }
+
+   logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK);
+   copyin(buf, logstash_in->lgs_buffer, nbyte);
+   logstash_in->lgs_size = nbyte;
+   logstash_increment(_in);
+
+   rw_exit(_rwlock);
+}
+
+void
+logstash_remove(void)
+{
+   rw_assert_wrlock(_rwlock);
+
+   KASSERT(!logstash_empty());
+   free(logstash_out->lgs_buffer, M_LOG, logstash_out->lgs_size);
+   logstash_out->lgs_buffer = NULL;
+   logstash_increment(_out);
+
+   /* Insert dropped message in sequence where messages were dropped. */
+   if (logstash_dropped) {
+   size_t l, nbyte;
+   char buf[80];
+
+   l = snprintf(buf, sizeof(buf),
+   "<%d>sendsyslog: dropped %d message%s, error %d, pid %d",
+   LOG_KERN|LOG_WARNING, logstash_dropped,
+   logstash_dropped == 1 ? "" : "s",
+   logstash_error, logstash_pid);
+   logstash_dropped = 0;
+   logstash_error = 0;
+   logstash_pid = 0;
+
+   /* Cannot fail, we have just freed a slot. */
+   KASSERT(!logstash_full());
+   nbyte = ulmin(l, sizeof(buf) - 1);
+   logstash_in->lgs_buffer = malloc(nbyte, M_LOG, M_WAITOK);
+   memcpy(logstash_in->lgs_buffer, buf, nbyte);
+   logstash_in->lgs_size = nbyte;
+   logstash_increment(_in);
+   }
+}
+
+int
+logstash_sendsyslog(struct proc *p)
+{
+   int error;
+
+   rw_enter_write(_rwlock);
+
+   while (logstash_out->lgs_buffer != NULL) {
+   error = dosendsyslog(p, logstash_out->lgs_buffer,
+   logstash_out->lgs_size, 0, UIO_SYSSPACE);
+   if (error) {
+   rw_exit(_rwlock);
+   return (error);
+   }
+   logstash_remove();
+   }
+
+   rw_exit(_rwlock);
+   return (0);
+}
+
+/*
+ * Send syslog(3) message from userland to socketpair(2) created by syslogd(8).
+ * Store message in kernel log stash for later if syslogd(8) is not available
+ * or sending fails.  Send to console if LOG_CONS is set and syslogd(8) socket
+ * does not exist.
+ */
+
 int
 sys_sendsyslog(struct proc *p, void *v, register_t *retval)
 {
@@ -462,32 +602,18 @@ 

ip_fragment ip6_fragment

2021-02-26 Thread Alexander Bluhm
Hi,

I always bothered me that ip_fragment() and ip6_fragment() behave
sligtly differently.  Unify them and use an mlist to simplify the
fragment list.

- The functions ip_fragment() and ip6_fragment() always consume the mbuf.
- They free the mbuf and mbuf list in case of an error.
- They care about the counter.
- Adjust the code a bit to make v4 and v6 look similar.
- Maybe there was an mbuf leak when pf_route6() called pf_refragment6()
  and it failed.  Now the mbuf is always freed by ip6_fragment().

ok?

bluhm

Index: net/if_bridge.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.352
diff -u -p -r1.352 if_bridge.c
--- net/if_bridge.c 25 Feb 2021 02:48:21 -  1.352
+++ net/if_bridge.c 26 Feb 2021 10:41:57 -
@@ -1853,7 +1853,7 @@ bridge_fragment(struct ifnet *brifp, str
 struct mbuf *m)
 {
struct llc llc;
-   struct mbuf *m0;
+   struct mbuf_list ml;
int error = 0;
int hassnap = 0;
u_int16_t etype;
@@ -1911,40 +1911,32 @@ bridge_fragment(struct ifnet *brifp, str
return;
}
 
-   error = ip_fragment(m, ifp, ifp->if_mtu);
-   if (error) {
-   m = NULL;
-   goto dropit;
-   }
+   error = ip_fragment(m, , ifp, ifp->if_mtu);
+   if (error)
+   return;
 
-   for (; m; m = m0) {
-   m0 = m->m_nextpkt;
-   m->m_nextpkt = NULL;
-   if (error == 0) {
-   if (hassnap) {
-   M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
-   if (m == NULL) {
-   error = ENOBUFS;
-   continue;
-   }
-   bcopy(, mtod(m, caddr_t),
-   LLC_SNAPFRAMELEN);
-   }
-   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
+   while ((m = ml_dequeue()) != NULL) {
+   if (hassnap) {
+   M_PREPEND(m, LLC_SNAPFRAMELEN, M_DONTWAIT);
if (m == NULL) {
error = ENOBUFS;
-   continue;
+   break;
}
-   bcopy(eh, mtod(m, caddr_t), sizeof(*eh));
-   error = bridge_ifenqueue(brifp, ifp, m);
-   if (error) {
-   continue;
-   }
-   } else
-   m_freem(m);
+   bcopy(, mtod(m, caddr_t), LLC_SNAPFRAMELEN);
+   }
+   M_PREPEND(m, sizeof(*eh), M_DONTWAIT);
+   if (m == NULL) {
+   error = ENOBUFS;
+   break;
+   }
+   bcopy(eh, mtod(m, caddr_t), sizeof(*eh));
+   error = bridge_ifenqueue(brifp, ifp, m);
+   if (error)
+   break;
}
-
-   if (error == 0)
+   if (error)
+   ml_purge();
+   else
ipstat_inc(ips_fragmented);
 
return;
Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1112
diff -u -p -r1.1112 pf.c
--- net/pf.c23 Feb 2021 11:43:40 -  1.1112
+++ net/pf.c26 Feb 2021 10:41:57 -
@@ -5969,7 +5969,8 @@ pf_rtlabel_match(struct pf_addr *addr, s
 void
 pf_route(struct pf_pdesc *pd, struct pf_state *s)
 {
-   struct mbuf *m0, *m1;
+   struct mbuf *m0;
+   struct mbuf_list ml;
struct sockaddr_in  *dst, sin;
struct rtentry  *rt = NULL;
struct ip   *ip;
@@ -6078,23 +6079,18 @@ pf_route(struct pf_pdesc *pd, struct pf_
goto bad;
}
 
-   m1 = m0;
-   error = ip_fragment(m0, ifp, ifp->if_mtu);
-   if (error) {
-   m0 = NULL;
-   goto bad;
-   }
+   error = ip_fragment(m0, , ifp, ifp->if_mtu);
+   if (error)
+   goto done;
 
-   for (m0 = m1; m0; m0 = m1) {
-   m1 = m0->m_nextpkt;
-   m0->m_nextpkt = NULL;
-   if (error == 0)
-   error = ifp->if_output(ifp, m0, sintosa(dst), rt);
-   else
-   m_freem(m0);
+   while ((m0 = ml_dequeue()) != NULL) {
+   error = ifp->if_output(ifp, m0, sintosa(dst), rt);
+   if (error)
+   break;
}
-
-   if (error == 0)
+   if (error)
+   ml_purge();
+   else
ipstat_inc(ips_fragmented);
 
 done:
Index: net/pf_norm.c
===
RCS file: 

Re: have m_copydata use a void * instead of caddr_t

2021-02-24 Thread Alexander Bluhm
On Wed, Feb 24, 2021 at 04:27:03PM +1000, David Gwynne wrote:
> it's a start though.  cocci and i came up with this to push in after.

Less casting is better.  OK bluhm@

> Index: arch/armv7/sunxi/sxie.c
> ===
> RCS file: /cvs/src/sys/arch/armv7/sunxi/sxie.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 sxie.c
> --- arch/armv7/sunxi/sxie.c   10 Jul 2020 13:26:36 -  1.29
> +++ arch/armv7/sunxi/sxie.c   24 Feb 2021 06:19:13 -
> @@ -524,7 +524,7 @@ sxie_start(struct ifnet *ifp)
>   SXIWRITE4(sc, SXIE_TXPKTLEN0 + (fifo * 4), m->m_pkthdr.len);
>  
>   /* copy the actual packet to fifo XXX through 'align buffer' */
> - m_copydata(m, 0, m->m_pkthdr.len, (caddr_t)td);
> + m_copydata(m, 0, m->m_pkthdr.len, td);
>   bus_space_write_multi_4(sc->sc_iot, sc->sc_ioh,
>   SXIE_TXIO0,
>   (uint32_t *)td, SXIE_ROUNDUP(m->m_pkthdr.len, 4) >> 2);
> Index: arch/octeon/dev/octcrypto.c
> ===
> RCS file: /cvs/src/sys/arch/octeon/dev/octcrypto.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 octcrypto.c
> --- arch/octeon/dev/octcrypto.c   10 Mar 2019 14:20:44 -  1.3
> +++ arch/octeon/dev/octcrypto.c   24 Feb 2021 06:19:13 -
> @@ -739,7 +739,7 @@ octcrypto_authenc_gmac(struct cryptop *c
>   } else {
>   if (crp->crp_flags & CRYPTO_F_IMBUF)
>   m_copydata((struct mbuf *)crp->crp_buf,
> - crde->crd_inject, ivlen, (uint8_t *)iv);
> + crde->crd_inject, ivlen, iv);
>   else
>   cuio_copydata((struct uio *)crp->crp_buf,
>   crde->crd_inject, ivlen, (uint8_t *)iv);
> @@ -1035,10 +1035,8 @@ octcrypto_authenc_hmac(struct cryptop *c
>   memcpy(iv, crde->crd_iv, ivlen);
>   } else {
>   if (crp->crp_flags & CRYPTO_F_IMBUF)
> - m_copydata(
> - (struct mbuf *)crp->crp_buf,
> - crde->crd_inject, ivlen,
> - (uint8_t *)iv);
> + m_copydata((struct mbuf *)crp->crp_buf,
> + crde->crd_inject, ivlen, iv);
>   else
>   cuio_copydata(
>   (struct uio *)crp->crp_buf,
> Index: dev/ic/acx.c
> ===
> RCS file: /cvs/src/sys/dev/ic/acx.c,v
> retrieving revision 1.124
> diff -u -p -r1.124 acx.c
> --- dev/ic/acx.c  10 Jul 2020 13:26:37 -  1.124
> +++ dev/ic/acx.c  24 Feb 2021 06:19:13 -
> @@ -2373,7 +2373,7 @@ acx_set_probe_resp_tmplt(struct acx_soft
>   IEEE80211_ADDR_COPY(wh->i_addr3, ni->ni_bssid);
>   *(u_int16_t *)wh->i_seq = 0;
>  
> - m_copydata(m, 0, m->m_pkthdr.len, (caddr_t));
> + m_copydata(m, 0, m->m_pkthdr.len, );
>   len = m->m_pkthdr.len + sizeof(resp.size);
>   m_freem(m); 
>  
> @@ -2427,7 +2427,7 @@ acx_set_beacon_tmplt(struct acx_softc *s
>   return (1);
>   }
>  
> - m_copydata(m, 0, off, (caddr_t));
> + m_copydata(m, 0, off, );
>   len = off + sizeof(beacon.size);
>  
>   if (acx_set_tmplt(sc, ACXCMD_TMPLT_BEACON, , len) != 0) {
> @@ -2442,7 +2442,7 @@ acx_set_beacon_tmplt(struct acx_softc *s
>   return (0);
>   }
>  
> - m_copydata(m, off, len, (caddr_t));
> + m_copydata(m, off, len, );
>   len += sizeof(beacon.size);
>   m_freem(m);
>  
> Index: dev/ic/an.c
> ===
> RCS file: /cvs/src/sys/dev/ic/an.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 an.c
> --- dev/ic/an.c   8 Dec 2020 04:37:27 -   1.77
> +++ dev/ic/an.c   24 Feb 2021 06:19:13 -
> @@ -781,7 +781,7 @@ an_mwrite_bap(struct an_softc *sc, int i
>   len = min(m->m_len, totlen);
>  
>   if ((mtod(m, u_long) & 0x1) || (len & 0x1)) {
> - m_copydata(m, 0, totlen, (caddr_t)>sc_buf.sc_txbuf);
> + m_copydata(m, 0, totlen, >sc_buf.sc_txbuf);
>   cnt = (totlen + 1) / 2;
>   an_swap16((u_int16_t *)>sc_buf.sc_txbuf, cnt); 
>   CSR_WRITE_MULTI_STREAM_2(sc, AN_DATA0,
> @@ -1126,7 +1126,7 @@ an_start(struct ifnet *ifp)
>   if (ic->ic_flags & IEEE80211_F_WEPON)
>   wh->i_fc[1] |= IEEE80211_FC1_WEP;
>   m_copydata(m, 0, sizeof(struct ieee80211_frame),
> - (caddr_t)_whdr);
> + _whdr);
>  

Re: have m_copydata use a void * instead of caddr_t

2021-02-23 Thread Alexander Bluhm
On Tue, Feb 23, 2021 at 07:31:30PM +1000, David Gwynne wrote:
> i'm not a fan of having to cast to caddr_t when we have modern
> inventions like void *s we can take advantage of.

Shoud you remove all the (caddr_t) casts in the callers then?
Without that step this diff does not provide more consistency.

bluhm

> ok?
> 
> Index: share/man/man9/mbuf.9
> ===
> RCS file: /cvs/src/share/man/man9/mbuf.9,v
> retrieving revision 1.120
> diff -u -p -r1.120 mbuf.9
> --- share/man/man9/mbuf.9 12 Dec 2020 11:48:52 -  1.120
> +++ share/man/man9/mbuf.9 23 Feb 2021 09:29:55 -
> @@ -116,7 +116,7 @@
>  .Ft void
>  .Fn m_reclaim "void"
>  .Ft void
> -.Fn m_copydata "struct mbuf *m" "int off" "int len" "caddr_t cp"
> +.Fn m_copydata "struct mbuf *m" "int off" "int len" "void *cp"
>  .Ft void
>  .Fn m_cat "struct mbuf *m" "struct mbuf *n"
>  .Ft struct mbuf *
> @@ -673,7 +673,7 @@ is a
>  pointer, no action occurs.
>  .It Fn m_reclaim "void"
>  Ask protocols to free unused memory space.
> -.It Fn m_copydata "struct mbuf *m" "int off" "int len" "caddr_t cp"
> +.It Fn m_copydata "struct mbuf *m" "int off" "int len" "void *cp"
>  Copy data from the mbuf chain pointed to by
>  .Fa m
>  starting at
> Index: sys/sys/mbuf.h
> ===
> RCS file: /cvs/src/sys/sys/mbuf.h,v
> retrieving revision 1.251
> diff -u -p -r1.251 mbuf.h
> --- sys/sys/mbuf.h12 Dec 2020 11:49:02 -  1.251
> +++ sys/sys/mbuf.h23 Feb 2021 09:29:55 -
> @@ -435,7 +435,7 @@ int   m_copyback(struct mbuf *, int, int, 
>  struct mbuf *m_freem(struct mbuf *);
>  void m_purge(struct mbuf *);
>  void m_reclaim(void *, int);
> -void m_copydata(struct mbuf *, int, int, caddr_t);
> +void m_copydata(struct mbuf *, int, int, void *);
>  void m_cat(struct mbuf *, struct mbuf *);
>  struct mbuf *m_devget(char *, int, int);
>  int  m_apply(struct mbuf *, int, int,
> Index: sys/kern/uipc_mbuf.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> retrieving revision 1.277
> diff -u -p -r1.277 uipc_mbuf.c
> --- sys/kern/uipc_mbuf.c  13 Jan 2021 12:38:36 -  1.277
> +++ sys/kern/uipc_mbuf.c  23 Feb 2021 09:29:55 -
> @@ -711,8 +711,9 @@ nospace:
>   * continuing for "len" bytes, into the indicated buffer.
>   */
>  void
> -m_copydata(struct mbuf *m, int off, int len, caddr_t cp)
> +m_copydata(struct mbuf *m, int off, int len, void *p)
>  {
> + caddr_t cp = p;
>   unsigned count;
>  
>   if (off < 0)



mbuf leak ip_insertoptions

2021-02-22 Thread Alexander Bluhm
Hi,

ip_insertoptions() may prepend a mbuf.  In this case "goto bad" has
to free the new chain.  Currently we leak the new mbuf in front of
the old chain.  NetBSD has fixed this bug here:


revision 1.33
date: 1996-10-11 18:19:08 +;  author: is;  state: Exp;  lines: +2 -2;
Fix a mbuf leak in ip_output().

Scenario: If ip_insertoptions() prepends a new mbuf to the chain, the
bad: label's m_freem(m0) still would free only the original mbuf chain
if the transmission failed for, e.g., no route to host; resulting in
one lost mbuf per failed packet. (The original posting included a
demonstration program).

Original report of this bug was by jin...@isl.rdc.toshiba.co.jp
(JINMEI Tatuya) on comp.bugs.4bsd.


Free m instead of m0 in the bad case.  This allows to simplify a
bunch of goto done.

ok?

bluhm

Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.365
diff -u -p -r1.365 ip_output.c
--- netinet/ip_output.c 10 Feb 2021 18:28:06 -  1.365
+++ netinet/ip_output.c 22 Feb 2021 15:48:38 -
@@ -111,9 +111,6 @@ ip_output(struct mbuf *m0, struct mbuf *
 #if NPF > 0
u_int orig_rtableid;
 #endif
-#ifdef MROUTING
-   int rv;
-#endif
 
NET_ASSERT_LOCKED();
 
@@ -250,8 +247,7 @@ reroute:
/* Should silently drop packet */
if (error == -EINVAL)
error = 0;
-   m_freem(m);
-   goto done;
+   goto bad;
}
if (tdb != NULL) {
/*
@@ -348,13 +344,13 @@ reroute:
 */
if (ipmforwarding && ip_mrouter[ifp->if_rdomain] &&
(flags & IP_FORWARDING) == 0) {
+   int rv;
+
KERNEL_LOCK();
rv = ip_mforward(m, ifp);
KERNEL_UNLOCK();
-   if (rv != 0) {
-   m_freem(m);
-   goto done;
-   }
+   if (rv != 0)
+   goto bad;
}
}
 #endif
@@ -366,10 +362,8 @@ reroute:
 * loop back a copy if this host actually belongs to the
 * destination group on the loopback interface.
 */
-   if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0) {
-   m_freem(m);
-   goto done;
-   }
+   if (ip->ip_ttl == 0 || (ifp->if_flags & IFF_LOOPBACK) != 0)
+   goto bad;
 
goto sendit;
}
@@ -427,8 +421,7 @@ sendit:
if (pf_test(AF_INET, (flags & IP_FORWARDING) ? PF_FWD : PF_OUT,
ifp, ) != PF_PASS) {
error = EACCES;
-   m_freem(m);
-   goto done;
+   goto bad;
}
if (m == NULL)
goto done;
@@ -453,8 +446,7 @@ sendit:
if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) &&
(m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) == NULL)) {
error = EHOSTUNREACH;
-   m_freem(m);
-   goto done;
+   goto bad;
}
 #endif
 
@@ -534,7 +526,7 @@ done:
if_put(ifp);
return (error);
 bad:
-   m_freem(m0);
+   m_freem(m);
goto done;
 }
 



Re: Possible null deref on pf.c

2021-02-12 Thread Alexander Bluhm
On Fri, Feb 12, 2021 at 01:11:24PM +0100, Claudio Jeker wrote:
> On Fri, Feb 12, 2021 at 12:03:49PM +, Ricardo Mestre wrote:
> > This was reported on CID 1501718, ifp starts as NULL and then might be 
> > deref'ed.


> This code is strange, the scope for the IPv6 address needs to be pulled
> out of s (pf_state) somehow. Also is the state using embedded or
> not-embedded scope addresses?

I was already discussung the issue with dlg@

We both think that the code is not necessary.  The address comes
from pf configuration.  pf does nor work correctly with IPv6
link-local anyway.  I think the only way to fix pf with link-local,
is to embed the scope for all addresses within pf.

Current code is broken, embeding here cannot work, pf link-local
needs rework, remove code makes rework easier.

ok?

bluhm

Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1108
diff -u -p -r1.1108 pf.c
--- net/pf.c4 Feb 2021 00:55:41 -   1.1108
+++ net/pf.c12 Feb 2021 12:06:47 -
@@ -6156,8 +6156,6 @@ pf_route6(struct pf_pdesc *pd, struct pf
dst->sin6_addr = s->rt_addr.v6;
rtableid = m0->m_pkthdr.ph_rtableid;
 
-   if (IN6_IS_SCOPE_EMBED(>sin6_addr))
-   dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid);
if (!rtisvalid(rt)) {
if (s->rt != PF_DUPTO) {



Re: route sockets: simplify route_attach() error path

2021-02-11 Thread Alexander Bluhm
On Wed, Feb 10, 2021 at 10:51:52PM +0300, Vitaliy Makkoveev wrote:
> Do soreserve() before `rop' allocation. It doesn't require protocol
> control block be attached to socket. Also we always call `pr_attach' in
> thread context so we always have `curproc'. 

While I found one pr_attach() from TCP input context, route_attach()
is only called from process context.

> ok?

OK bluhm@

> Index: sys/net/rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.304
> diff -u -p -r1.304 rtsock.c
> --- sys/net/rtsock.c  7 Nov 2020 09:51:40 -   1.304
> +++ sys/net/rtsock.c  10 Feb 2021 19:43:52 -
> @@ -301,6 +301,9 @@ route_attach(struct socket *so, int prot
>   struct rtpcb*rop;
>   int  error;
>  
> + error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
> + if (error)
> + return (error);
>   /*
>* use the rawcb but allocate a rtpcb, this
>* code does not care about the additional fields
> @@ -311,15 +314,6 @@ route_attach(struct socket *so, int prot
>   /* Init the timeout structure */
>   timeout_set(>rop_timeout, rtm_senddesync_timer, so);
>   refcnt_init(>rop_refcnt);
> -
> - if (curproc == NULL)
> - error = EACCES;
> - else
> - error = soreserve(so, ROUTESNDQ, ROUTERCVQ);
> - if (error) {
> - pool_put(_pool, rop);
> - return (error);
> - }
>  
>   rop->rop_socket = so;
>   rop->rop_proto = proto;



Re: isakmpd link dynamically

2021-02-11 Thread Alexander Bluhm
On Wed, Feb 10, 2021 at 04:16:10PM -0700, Theo de Raadt wrote:
> When I re-ordered rc in Slovenia many years ago, I got it right.

NFS /usr over IPsec cannot work.  Without IPsec it is fine.
1. mount -s /usr >/dev/null 2>&1
2. start_daemon syslogd ldattach pflogd nsd unbound ntpd
3. start_daemon iscsid isakmpd iked sasyncd ldapd npppd

> So, I think we should move these two daemons.

What do you mean with "move"?
- install them in /usr/sbin -> inconsistent with usr.sbin source tree
- recommit in /usr/src/usr.sbin -> we loose history
- move the history to /cvs/src/usr.sbin -> someone said no more cvs surgery

I would like to commit my static linking diff as is.
Moving looks more complex to me.

bluhm



isakmpd link dynamically

2021-02-10 Thread Alexander Bluhm
Hi,

Every time we ship a libcrypto erratum, we have to relink isakmpd.
I think that isakmpd and iked are in /sbin due to a historic mistake.
Probably it is for people who mount /usr via NFS over IPsec.

Moving isakmpd to /usr/sbin is hard, linking dynamically is easy.
Lines stolen from iked.

Is there any reason to include bsd.subdir.mk?

ok?

bluhm

Index: sbin/isakmpd/Makefile
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/isakmpd/Makefile,v
retrieving revision 1.88
diff -u -p -r1.88 Makefile
--- sbin/isakmpd/Makefile   10 Jul 2017 21:30:37 -  1.88
+++ sbin/isakmpd/Makefile   10 Feb 2021 17:21:44 -
@@ -87,4 +87,6 @@ generated: ${GENERATED}
 BUILDFIRST = ${GENERATED}
 
 .include 
-.include 
+
+# Don't compile isakmpd as static binary by default
+LDSTATIC=



interface group name validation

2021-02-09 Thread Alexander Bluhm
Hi,

Next try to fix syzkaller crash
https://syzkaller.appspot.com/bug?id=54e16dc5bce6929e14b42e2f1379f1c18f62be43

Interface group names must fit into IFNAMSIZ and be unique.  But
the kernel makes the unique check before trunkating with strlcpy().
So there can be two interfaces groups with the same name.  The kif
is created by a name lookup.  The trunkated names are equal so there
is only one kif owned by both groups.  When both groups are destroyed,
the single kif is removed twice from the RB tree.

- Check length of group name before doing the unique check.
- The empty group name was allowed.  That does not make much sense.
  Does anyone use the empty interface group?
- Use the same check in kernel and ifconfig userland.
- ifconfig -group does not need name sanitation.  The kernel will
  just report that it does not exist.

ok?

bluhm

Index: sys/net/if.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.627
diff -u -p -r1.627 if.c
--- sys/net/if.c8 Feb 2021 12:30:10 -   1.627
+++ sys/net/if.c9 Feb 2021 20:47:34 -
@@ -2621,9 +2621,11 @@ if_addgroup(struct ifnet *ifp, const cha
struct ifg_list *ifgl;
struct ifg_group*ifg = NULL;
struct ifg_member   *ifgm;
+   size_t   namelen;
 
-   if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
-   groupname[strlen(groupname) - 1] <= '9')
+   namelen = strlen(groupname);
+   if (namelen == 0 || namelen >= IFNAMSIZ ||
+   (groupname[namelen - 1] >= '0' && groupname[namelen - 1] <= '9'))
return (EINVAL);
 
TAILQ_FOREACH(ifgl, >if_groups, ifgl_next)
Index: sbin/ifconfig/ifconfig.c
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.432
diff -u -p -r1.432 ifconfig.c
--- sbin/ifconfig/ifconfig.c16 Jan 2021 17:44:29 -  1.432
+++ sbin/ifconfig/ifconfig.c9 Feb 2021 21:02:50 -
@@ -1634,16 +1634,20 @@ void
 setifgroup(const char *group_name, int dummy)
 {
struct ifgroupreq ifgr;
+   size_t namelen;
 
memset(, 0, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, ifname, IFNAMSIZ);
 
-   if (group_name[0] &&
-   isdigit((unsigned char)group_name[strlen(group_name) - 1]))
+   namelen = strlen(group_name);
+   if (namelen == 0)
+   errx(1, "setifgroup: group name empty");
+   if (namelen >= IFNAMSIZ)
+   errx(1, "setifgroup: group name too long");
+   if (isdigit((unsigned char)group_name[namelen - 1]))
errx(1, "setifgroup: group names may not end in a digit");
 
-   if (strlcpy(ifgr.ifgr_group, group_name, IFNAMSIZ) >= IFNAMSIZ)
-   errx(1, "setifgroup: group name too long");
+   strlcpy(ifgr.ifgr_group, group_name, IFNAMSIZ);
if (ioctl(sock, SIOCAIFGROUP, (caddr_t)) == -1) {
if (errno != EEXIST)
err(1," SIOCAIFGROUP");
@@ -1658,10 +1662,6 @@ unsetifgroup(const char *group_name, int
 
memset(, 0, sizeof(ifgr));
strlcpy(ifgr.ifgr_name, ifname, IFNAMSIZ);
-
-   if (group_name[0] &&
-   isdigit((unsigned char)group_name[strlen(group_name) - 1]))
-   errx(1, "unsetifgroup: group names may not end in a digit");
 
if (strlcpy(ifgr.ifgr_group, group_name, IFNAMSIZ) >= IFNAMSIZ)
errx(1, "unsetifgroup: group name too long");



Re: PF_UNIX sockets unlocking

2021-02-09 Thread Alexander Bluhm
On Tue, Feb 09, 2021 at 09:14:44PM +0300, Vitaliy Makkoveev wrote:
> On Tue, Feb 09, 2021 at 05:20:33PM +0100, Alexander Bluhm wrote:
> > > +extern struct rwlock unp_lock;
> > 
> > Could you put this declaration into a header file?
> 
> I see no such sense to do this. `unp_lock' is not system wide populated
> like netlock, so sys/systm.h is not the place for. Also sys/unpcb.h
> contains only internal UNIX sockets declarations and not included by
> kern/uipc_socket2.c. The same case for hypothetical `rop_lock' for
> PF_ROUTE sockets. 

Declarations in multiple C files have the risk of getting inconsistent.
If you change something in one file and have the declaration in a
header, the compiler produces an error.  I you have an extern in
another C file, this disables the sefety belt.

Feel free to commit as is, then it is easier to find a suitable
header file.

bluhm



Re: PF_UNIX sockets unlocking

2021-02-09 Thread Alexander Bluhm
On Thu, Feb 04, 2021 at 03:07:44PM +0300, Vitaliy Makkoveev wrote:
> I hope someone else will try it and gives positive feedback which allow
> to push it forward.

OK bluhm@

> +extern struct rwlock unp_lock;

Could you put this declaration into a header file?



Re: diff: tcp ack improvement

2021-02-08 Thread Alexander Bluhm
On Mon, Feb 08, 2021 at 07:03:59PM +0100, Jan Klemkow wrote:
> On Mon, Feb 08, 2021 at 03:42:54PM +0100, Alexander Bluhm wrote:
> > On Wed, Feb 03, 2021 at 11:20:04AM +0100, Claudio Jeker wrote:
> > > Just commit it. OK claudio@
> > > If people see problems we can back it out again.
> > 
> > This has huge impact on TCP performance.
> > 
> > http://bluhm.genua.de/perform/results/2021-02-07T00%3A01%3A40Z/perform.html
> > 
> > For a single TCP connection between to OpenBSD boxes, througput
> > drops by 77% from 3.1 GBit/sec to 710 MBit/sec.  But with 100
> > parallel connections the througput over all increases by 5%.
> 
> For single connections our kernel is limited to send out 4 max TCP
> segments.  I don't see that, because I just measured with 10 and 30
> streams in parallel.
> 
> FreeBSD disabled it 20 yeas ago.
> https://github.com/freebsd/freebsd-src/commit/d912c694ee00de5ea0f46743295a0fc603cab562

TCP_MAXBURST was added together with SACK in rev 1.12 of tcp_output.c
to our code base.


revision 1.12
date: 1998/11/17 19:23:02;  author: provos;  state: Exp;  lines: +239 -14;
NewReno, SACK and FACK support for TCP, adapted from code for BSDI
by Hari Balakrishnan (h...@lcs.mit.edu), Tom Henderson (t...@cs.berkeley.edu)
and Venkat Padmanabhan (padma...@cs.berkeley.edu) as part of the
Daedalus research group at the University of California,
(http://daedalus.cs.berkeley.edu). [I was able to do this on time spent
at the Center for Information Technology Integration (citi.umich.edu)]


> I would suggest to remove the whole feature.

Sending 4 segments per call to tcp_output() cannot scale.  Bandwith
increases, window size grows, but segment size is 1500 for decades.

With this diff on top of jan's delay ACK behavior I get 4.1 GBit/sec
over a single TCP connection using tcpbench -S100.  Before both
changes it was only 3.0.

I recommend removing TCP_MAXBURST like FreeBSD did.

bluhm

> Index: tcp.h
> ===
> RCS file: /cvs/src/sys/netinet/tcp.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 tcp.h
> --- tcp.h 10 Jul 2019 18:45:31 -  1.21
> +++ tcp.h 8 Feb 2021 17:52:38 -
> @@ -105,8 +105,6 @@ struct tcphdr {
>  #define  TCP_MAX_SACK3   /* Max # SACKs sent in any segment */
>  #define  TCP_SACKHOLE_LIMIT 128  /* Max # SACK holes per connection */
>  
> -#define  TCP_MAXBURST4   /* Max # packets after leaving Fast 
> Rxmit */
> -
>  /*
>   * Default maximum segment size for TCP.
>   * With an IP MSS of 576, this is 536,
> Index: tcp_output.c
> ===
> RCS file: /cvs/src/sys/netinet/tcp_output.c,v
> retrieving revision 1.129
> diff -u -p -r1.129 tcp_output.c
> --- tcp_output.c  25 Jan 2021 03:40:46 -  1.129
> +++ tcp_output.c  8 Feb 2021 17:53:07 -
> @@ -203,7 +203,6 @@ tcp_output(struct tcpcb *tp)
>   int idle, sendalot = 0;
>   int i, sack_rxmit = 0;
>   struct sackhole *p;
> - int maxburst = TCP_MAXBURST;
>  #ifdef TCP_SIGNATURE
>   unsigned int sigoff;
>  #endif /* TCP_SIGNATURE */
> @@ -1120,7 +1119,7 @@ out:
>   tp->last_ack_sent = tp->rcv_nxt;
>   tp->t_flags &= ~TF_ACKNOW;
>   TCP_TIMER_DISARM(tp, TCPT_DELACK);
> - if (sendalot && --maxburst)
> + if (sendalot)
>   goto again;
>   return (0);
>  }



Re: diff: tcp ack improvement

2021-02-08 Thread Alexander Bluhm
On Wed, Feb 03, 2021 at 11:20:04AM +0100, Claudio Jeker wrote:
> Just commit it. OK claudio@
> If people see problems we can back it out again.

This has huge impact on TCP performance.

http://bluhm.genua.de/perform/results/2021-02-07T00%3A01%3A40Z/perform.html

For a single TCP connection between to OpenBSD boxes, througput
drops by 77% from 3.1 GBit/sec to 710 MBit/sec.  But with 100
parallel connections the througput over all increases by 5%.

Sending from Linux to OpenBSD increases by 72% from 3.5 GBit/sec
to 6.0 GBit/sec.

Socket splicing from Linux to Linux via OpenBSD with 10 parallel
TCP connections increases by 25% from 3.5 GBit/sec from 1.8 GBit/sec
to 2.3 GBit/sec.

It seems that sending less ACK packets improves performance if the
machine is limited by the CPU.  But the TCP stack of OpenBSD is
sending 77% percent slower, if it does not receive enough ACKs.
This has no impact if we are measuring the combined througput of
many parallel connections.  The Linux packet sending algorithm looks
unaffected by our more delayed acks.

I think 77% slower between two OpenBSDs is not acceptable.
Do others see that, too?

bluhm



Re: ifg_refcnt atomic operation

2021-02-06 Thread Alexander Bluhm
On Sat, Feb 06, 2021 at 04:44:08PM +0100, Alexander Bluhm wrote:
> Or should we go with a self crafted ++ -- refcounting?

This would look like this, also fine with me.  kasserts are also in
refcnt_... API.

ok?

bluhm

Index: net/if.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.626
diff -u -p -r1.626 if.c
--- net/if.c1 Feb 2021 07:43:33 -   1.626
+++ net/if.c6 Feb 2021 16:26:51 -
@@ -2601,7 +2601,7 @@ if_creategroup(const char *groupname)
return (NULL);
 
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
-   ifg->ifg_refcnt = 0;
+   ifg->ifg_refcnt = 1;
ifg->ifg_carp_demoted = 0;
TAILQ_INIT(>ifg_members);
 #if NPF > 0
@@ -2642,13 +2642,18 @@ if_addgroup(struct ifnet *ifp, const cha
if (!strcmp(ifg->ifg_group, groupname))
break;
 
-   if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
+   if (ifg == NULL)
+   ifg = if_creategroup(groupname);
+   else
+   ifg->ifg_refcnt++;
+   KASSERT(ifg->ifg_refcnt != 0);
+
+   if (ifg == NULL) {
free(ifgl, M_TEMP, sizeof(*ifgl));
free(ifgm, M_TEMP, sizeof(*ifgm));
return (ENOMEM);
}
 
-   ifg->ifg_refcnt++;
ifgl->ifgl_group = ifg;
ifgm->ifgm_ifp = ifp;
 
@@ -2692,6 +2697,7 @@ if_delgroup(struct ifnet *ifp, const cha
pfi_group_change(groupname);
 #endif
 
+   KASSERT(ifgl->ifgl_group->ifg_refcnt != 0);
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(_head, ifgl->ifgl_group, ifg_next);
 #if NPF > 0
Index: netinet/ip_carp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.351
diff -u -p -r1.351 ip_carp.c
--- netinet/ip_carp.c   21 Jan 2021 13:18:07 -  1.351
+++ netinet/ip_carp.c   6 Feb 2021 14:45:34 -
@@ -786,10 +786,7 @@ carp_sysctl(int *name, u_int namelen, vo
 void
 carpattach(int n)
 {
-   struct ifg_group*ifg;
-
-   if ((ifg = if_creategroup("carp")) != NULL)
-   ifg->ifg_refcnt++;  /* keep around even if empty */
+   if_creategroup("carp");  /* keep around even if empty */
if_clone_attach(_cloner);
carpcounters = counters_alloc(carps_ncounters);
 }



Re: ifg_refcnt atomic operation

2021-02-06 Thread Alexander Bluhm
On Sat, Feb 06, 2021 at 05:58:35PM +0300, Vitaliy Makkoveev wrote:
> I???m not sure it should be atomic. It seems groups require their own
> lock and this lock should be held while we perform if_addgroup() and
> if_delgroup().

I also think that atomic refcounting is not needed here.  But it
does no harm as adding interface groups is not performance critical.

Question is if we want to use refcnt_... API even if it does more
than required.  Or should we go with a self crafted ++ -- refcounting?

I think the API provides a nicer interface.

bluhm

> However if_creategroup() should set `ifg_refcnt??? to 1
> and carp(4) should not touch groups internals.
> 
> > 
> > Index: net/if.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
> > retrieving revision 1.626
> > diff -u -p -r1.626 if.c
> > --- net/if.c1 Feb 2021 07:43:33 -   1.626
> > +++ net/if.c6 Feb 2021 12:16:20 -
> > @@ -2601,7 +2601,7 @@ if_creategroup(const char *groupname)
> > return (NULL);
> > 
> > strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
> > -   ifg->ifg_refcnt = 0;
> > +   refcnt_init(>ifg_refcnt);
> > ifg->ifg_carp_demoted = 0;
> > TAILQ_INIT(>ifg_members);
> > #if NPF > 0
> > @@ -2642,13 +2642,17 @@ if_addgroup(struct ifnet *ifp, const cha
> > if (!strcmp(ifg->ifg_group, groupname))
> > break;
> > 
> > -   if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
> > +   if (ifg == NULL)
> > +   ifg = if_creategroup(groupname);
> > +   else
> > +   refcnt_take(>ifg_refcnt);
> > +
> > +   if (ifg == NULL) {
> > free(ifgl, M_TEMP, sizeof(*ifgl));
> > free(ifgm, M_TEMP, sizeof(*ifgm));
> > return (ENOMEM);
> > }
> > 
> > -   ifg->ifg_refcnt++;
> > ifgl->ifgl_group = ifg;
> > ifgm->ifgm_ifp = ifp;
> > 
> > @@ -2692,7 +2696,7 @@ if_delgroup(struct ifnet *ifp, const cha
> > pfi_group_change(groupname);
> > #endif
> > 
> > -   if (--ifgl->ifgl_group->ifg_refcnt == 0) {
> > +   if (refcnt_rele(>ifgl_group->ifg_refcnt)) {
> > TAILQ_REMOVE(_head, ifgl->ifgl_group, ifg_next);
> > #if NPF > 0
> > pfi_detach_ifgroup(ifgl->ifgl_group);
> > Index: net/if_var.h
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
> > retrieving revision 1.112
> > diff -u -p -r1.112 if_var.h
> > --- net/if_var.h29 Jul 2020 12:09:31 -  1.112
> > +++ net/if_var.h6 Feb 2021 12:11:35 -
> > @@ -263,7 +263,7 @@ struct ifmaddr {
> > 
> > struct ifg_group {
> > char ifg_group[IFNAMSIZ];
> > -   u_intifg_refcnt;
> > +   struct refcntifg_refcnt;
> > caddr_t  ifg_pf_kif;
> > int  ifg_carp_demoted;
> > TAILQ_HEAD(, ifg_member) ifg_members;
> > Index: netinet/ip_carp.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_carp.c,v
> > retrieving revision 1.351
> > diff -u -p -r1.351 ip_carp.c
> > --- netinet/ip_carp.c   21 Jan 2021 13:18:07 -  1.351
> > +++ netinet/ip_carp.c   6 Feb 2021 12:11:35 -
> > @@ -786,10 +786,7 @@ carp_sysctl(int *name, u_int namelen, vo
> > void
> > carpattach(int n)
> > {
> > -   struct ifg_group*ifg;
> > -
> > -   if ((ifg = if_creategroup("carp")) != NULL)
> > -   ifg->ifg_refcnt++;  /* keep around even if empty */
> > +   if_creategroup("carp");  /* keep around even if empty */
> > if_clone_attach(_cloner);
> > carpcounters = counters_alloc(carps_ncounters);
> > }
> > 
> 



Re: broadcast simplex checksum

2021-02-06 Thread Alexander Bluhm
On Sat, Feb 06, 2021 at 08:26:35PM +1300, richard.n.proc...@gmail.com wrote:
> I'm ok with your latest diff as-is. I prefer a slightly different 
> direction, see below, but not enough to object. 

I have commited my diff as is.  It is better if you expess your
arguments yourself in the comment when you change the condition.

I would review such a diff benevolently.

bluhm



Re: ifg_refcnt atomic operation

2021-02-06 Thread Alexander Bluhm
On Sat, Feb 06, 2021 at 05:04:20PM +1000, David Gwynne wrote:
> refcnt_init starts counting at 1, while the existing code starts at 0. Do
> the crashes stop because we never fully release all the references and
> never free it now?

You are absolutely right.  I was too optimistic.

Correct diff is below.  It does not fix anything.  Only advantage
is that carp does not access interface group internals.

bluhm

Index: net/if.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.626
diff -u -p -r1.626 if.c
--- net/if.c1 Feb 2021 07:43:33 -   1.626
+++ net/if.c6 Feb 2021 12:16:20 -
@@ -2601,7 +2601,7 @@ if_creategroup(const char *groupname)
return (NULL);
 
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
-   ifg->ifg_refcnt = 0;
+   refcnt_init(>ifg_refcnt);
ifg->ifg_carp_demoted = 0;
TAILQ_INIT(>ifg_members);
 #if NPF > 0
@@ -2642,13 +2642,17 @@ if_addgroup(struct ifnet *ifp, const cha
if (!strcmp(ifg->ifg_group, groupname))
break;
 
-   if (ifg == NULL && (ifg = if_creategroup(groupname)) == NULL) {
+   if (ifg == NULL)
+   ifg = if_creategroup(groupname);
+   else
+   refcnt_take(>ifg_refcnt);
+
+   if (ifg == NULL) {
free(ifgl, M_TEMP, sizeof(*ifgl));
free(ifgm, M_TEMP, sizeof(*ifgm));
return (ENOMEM);
}
 
-   ifg->ifg_refcnt++;
ifgl->ifgl_group = ifg;
ifgm->ifgm_ifp = ifp;
 
@@ -2692,7 +2696,7 @@ if_delgroup(struct ifnet *ifp, const cha
pfi_group_change(groupname);
 #endif
 
-   if (--ifgl->ifgl_group->ifg_refcnt == 0) {
+   if (refcnt_rele(>ifgl_group->ifg_refcnt)) {
TAILQ_REMOVE(_head, ifgl->ifgl_group, ifg_next);
 #if NPF > 0
pfi_detach_ifgroup(ifgl->ifgl_group);
Index: net/if_var.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_var.h,v
retrieving revision 1.112
diff -u -p -r1.112 if_var.h
--- net/if_var.h29 Jul 2020 12:09:31 -  1.112
+++ net/if_var.h6 Feb 2021 12:11:35 -
@@ -263,7 +263,7 @@ struct ifmaddr {
 
 struct ifg_group {
char ifg_group[IFNAMSIZ];
-   u_intifg_refcnt;
+   struct refcntifg_refcnt;
caddr_t  ifg_pf_kif;
int  ifg_carp_demoted;
TAILQ_HEAD(, ifg_member) ifg_members;
Index: netinet/ip_carp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.351
diff -u -p -r1.351 ip_carp.c
--- netinet/ip_carp.c   21 Jan 2021 13:18:07 -  1.351
+++ netinet/ip_carp.c   6 Feb 2021 12:11:35 -
@@ -786,10 +786,7 @@ carp_sysctl(int *name, u_int namelen, vo
 void
 carpattach(int n)
 {
-   struct ifg_group*ifg;
-
-   if ((ifg = if_creategroup("carp")) != NULL)
-   ifg->ifg_refcnt++;  /* keep around even if empty */
+   if_creategroup("carp");  /* keep around even if empty */
if_clone_attach(_cloner);
carpcounters = counters_alloc(carps_ncounters);
 }



ifg_refcnt atomic operation

2021-02-05 Thread Alexander Bluhm
Hi,

When I replace the ++ and -- of ifg_refcnt with an atomic operation,
it fixes this syzkaller panic.

https://syzkaller.appspot.com/bug?id=54e16dc5bce6929e14b42e2f1379f1c18f62be43

Without the fix "syz-execprog -repeat=0 -procs=8 repro-pfi.syz"
crashes my vmm in a few seconds.  With the diff I cannot reproduce
for several minutes.

ok?

bluhm

Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.626
diff -u -p -r1.626 if.c
--- net/if.c1 Feb 2021 07:43:33 -   1.626
+++ net/if.c6 Feb 2021 00:37:50 -
@@ -2601,7 +2601,7 @@ if_creategroup(const char *groupname)
return (NULL);
 
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
-   ifg->ifg_refcnt = 0;
+   refcnt_init(>ifg_refcnt);
ifg->ifg_carp_demoted = 0;
TAILQ_INIT(>ifg_members);
 #if NPF > 0
@@ -2648,7 +2648,7 @@ if_addgroup(struct ifnet *ifp, const cha
return (ENOMEM);
}
 
-   ifg->ifg_refcnt++;
+   refcnt_take(>ifg_refcnt);
ifgl->ifgl_group = ifg;
ifgm->ifgm_ifp = ifp;
 
@@ -2692,7 +2692,7 @@ if_delgroup(struct ifnet *ifp, const cha
pfi_group_change(groupname);
 #endif
 
-   if (--ifgl->ifgl_group->ifg_refcnt == 0) {
+   if (refcnt_rele(>ifgl_group->ifg_refcnt)) {
TAILQ_REMOVE(_head, ifgl->ifgl_group, ifg_next);
 #if NPF > 0
pfi_detach_ifgroup(ifgl->ifgl_group);
Index: net/if_var.h
===
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.112
diff -u -p -r1.112 if_var.h
--- net/if_var.h29 Jul 2020 12:09:31 -  1.112
+++ net/if_var.h6 Feb 2021 00:38:23 -
@@ -263,7 +263,7 @@ struct ifmaddr {
 
 struct ifg_group {
char ifg_group[IFNAMSIZ];
-   u_intifg_refcnt;
+   struct refcntifg_refcnt;
caddr_t  ifg_pf_kif;
int  ifg_carp_demoted;
TAILQ_HEAD(, ifg_member) ifg_members;
Index: netinet/ip_carp.c
===
RCS file: /cvs/src/sys/netinet/ip_carp.c,v
retrieving revision 1.351
diff -u -p -r1.351 ip_carp.c
--- netinet/ip_carp.c   21 Jan 2021 13:18:07 -  1.351
+++ netinet/ip_carp.c   6 Feb 2021 00:39:14 -
@@ -789,7 +789,7 @@ carpattach(int n)
struct ifg_group*ifg;
 
if ((ifg = if_creategroup("carp")) != NULL)
-   ifg->ifg_refcnt++;  /* keep around even if empty */
+   refcnt_take(>ifg_refcnt);  /* keep around even if empty */
if_clone_attach(_cloner);
carpcounters = counters_alloc(carps_ncounters);
 }



Re: broadcast simplex checksum

2021-02-05 Thread Alexander Bluhm
On Mon, Feb 01, 2021 at 02:04:51AM +0100, Alexander Bluhm wrote:
> On Mon, Feb 01, 2021 at 08:08:56AM +1300, Richard Procter wrote:
> > - Might the rule disabling checksum offload for broadcasts on IFF_SIMPLEX
> >   interfaces be weakened to disable checksum offload for all broadcast
> >   packets instead?
> > - what motivates the new '!m->m_pkthdr.pf.routed??? term?

I think the best way to answer your questions, is to add a comment
to both if conditions.

ok?

bluhm

Index: net/if_ethersubr.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.268
diff -u -p -r1.268 if_ethersubr.c
--- net/if_ethersubr.c  4 Jan 2021 21:21:41 -   1.268
+++ net/if_ethersubr.c  5 Feb 2021 09:40:46 -
@@ -227,7 +227,11 @@ ether_resolve(struct ifnet *ifp, struct 
return (error);
eh->ether_type = htons(ETHERTYPE_IP);
 
-   /* If broadcasting on a simplex interface, loopback a copy */
+   /*
+* If broadcasting on a simplex interface, loopback a copy.
+* The checksum must be calculated in software.  Keep the
+* contition in sync with in_ifcap_cksum().
+*/
if (ISSET(m->m_flags, M_BCAST) &&
ISSET(ifp->if_flags, IFF_SIMPLEX) &&
!m->m_pkthdr.pf.routed) {
Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.363
diff -u -p -r1.363 ip_output.c
--- netinet/ip_output.c 2 Feb 2021 17:47:42 -   1.363
+++ netinet/ip_output.c 5 Feb 2021 09:38:09 -
@@ -79,6 +79,7 @@ void ip_mloopback(struct ifnet *, struct
 static __inline u_int16_t __attribute__((__unused__))
 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t);
 void in_delayed_cksum(struct mbuf *);
+int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 
 #ifdef IPSEC
 struct tdb *
@@ -458,8 +459,7 @@ sendit:
 */
if (ntohs(ip->ip_len) <= mtu) {
ip->ip_sum = 0;
-   if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-   (ifp->if_bridgeidx == 0))
+   if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
else {
ipstat_inc(ips_outswcsum);
@@ -719,9 +719,7 @@ ip_fragment(struct mbuf *m, struct ifnet
m->m_pkthdr.ph_ifidx = 0;
mhip->ip_off = htons((u_int16_t)mhip->ip_off);
mhip->ip_sum = 0;
-   if ((ifp != NULL) &&
-   (ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-   (ifp->if_bridgeidx == 0))
+   if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
else {
ipstat_inc(ips_outswcsum);
@@ -740,9 +738,7 @@ ip_fragment(struct mbuf *m, struct ifnet
ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
ip->ip_off |= htons(IP_MF);
ip->ip_sum = 0;
-   if ((ifp != NULL) &&
-   (ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-   (ifp->if_bridgeidx == 0))
+   if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
else {
ipstat_inc(ips_outswcsum);
@@ -1855,15 +1851,15 @@ in_proto_cksum_out(struct mbuf *m, struc
}
 
if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
-   if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
-   ip->ip_hl != 5 || ifp->if_bridgeidx != 0) {
+   if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_TCPv4) ||
+   ip->ip_hl != 5) {
tcpstat_inc(tcps_outswcsum);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
}
} else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
-   if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
-   ip->ip_hl != 5 || ifp->if_bridgeidx != 0) {
+   if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_UDPv4) ||
+   ip->ip_hl != 5) {
udpstat_inc(udps_outswcsum);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */
@@ -1872,4 +1868,23 @@ in_proto_cksum_out(struct mbuf *m, struc
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */
}
+}
+
+int
+in_ifcap_cks

Re: reference trpt(8) in getsockopt(2)

2021-02-04 Thread Alexander Bluhm
On Thu, Feb 04, 2021 at 12:34:22PM +0100, Claudio Jeker wrote:
> Also should we export the tcp_debug buffer via sysctl so that
> trpt can run without kern.allowkmem?

I have set kern.allowkmem on my development and testing machines.
But of course a sysctl that always works would make this often
forgotten feature more popular.

bluhm



reference trpt(8) in getsockopt(2)

2021-02-04 Thread Alexander Bluhm
Hi,

I always forget the name of trpt(8).  It should be refereced in the
SO_DEBUG section of getsockopt(2).

ok?

bluhm

Index: lib/libc/sys/getsockopt.2
===
RCS file: /data/mirror/openbsd/cvs/src/lib/libc/sys/getsockopt.2,v
retrieving revision 1.56
diff -u -p -r1.56 getsockopt.2
--- lib/libc/sys/getsockopt.2   22 Jul 2019 15:34:07 -  1.56
+++ lib/libc/sys/getsockopt.2   4 Feb 2021 10:52:16 -
@@ -182,6 +182,8 @@ get the protocol of the socket (get only
 .Pp
 .Dv SO_DEBUG
 enables debugging in the underlying protocol modules.
+Transliterate the protocol trace with
+.Xr trpt 8 .
 .Dv SO_REUSEADDR
 indicates that the rules used in validating addresses supplied in a
 .Xr bind 2



tcpbench -D

2021-02-04 Thread Alexander Bluhm
Hi,

I would like to analyse tcpbench(1) TCP connections.  So I copied
the nc -D socket debug option.

ok?

bluhm

Index: usr.bin/tcpbench/tcpbench.1
===
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/tcpbench/tcpbench.1,v
retrieving revision 1.28
diff -u -p -r1.28 tcpbench.1
--- usr.bin/tcpbench/tcpbench.1 4 May 2020 12:13:09 -   1.28
+++ usr.bin/tcpbench/tcpbench.1 3 Feb 2021 22:52:07 -
@@ -24,7 +24,7 @@
 .Nm
 .Fl l
 .Nm
-.Op Fl 46RUuv
+.Op Fl 46DRUuv
 .Op Fl B Ar buf
 .Op Fl b Ar sourceaddr
 .Op Fl k Ar kvars
@@ -39,7 +39,7 @@
 .Nm
 .Bk -words
 .Fl s
-.Op Fl 46Uuv
+.Op Fl 46DUuv
 .Op Fl B Ar buf
 .Op Fl k Ar kvars
 .Op Fl p Ar port
@@ -111,6 +111,8 @@ stream.
 .It Fl b Ar sourceaddr
 Specify the IP address to send the packets from,
 which is useful on machines with multiple interfaces.
+.It Fl D
+Enable debugging on the socket.
 .It Fl k Ar kvars
 Specify one or more kernel variables to monitor; multiple variables must be
 separated with commas.
Index: usr.bin/tcpbench/tcpbench.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/tcpbench/tcpbench.c,v
retrieving revision 1.63
diff -u -p -r1.63 tcpbench.c
--- usr.bin/tcpbench/tcpbench.c 4 May 2020 12:13:09 -   1.63
+++ usr.bin/tcpbench/tcpbench.c 4 Feb 2021 10:37:42 -
@@ -65,6 +65,7 @@
 
 /* Our tcpbench globals */
 struct {
+   int   Dflag;/* Socket debug */
int   Sflag;/* Socket buffer size */
u_int rflag;/* Report rate (ms) */
int   sflag;/* True if server */
@@ -198,10 +199,10 @@ usage(void)
 {
fprintf(stderr,
"usage: tcpbench -l\n"
-   "   tcpbench [-46RUuv] [-B buf] [-b sourceaddr] [-k kvars] [-n 
connections]\n"
+   "   tcpbench [-46DRUuv] [-B buf] [-b sourceaddr] [-k kvars] [-n 
connections]\n"
"[-p port] [-r interval] [-S space] [-T 
toskeyword]\n"
"[-t secs] [-V rtable] hostname\n"
-   "   tcpbench -s [-46Uuv] [-B buf] [-k kvars] [-p port] [-r 
interval]\n"
+   "   tcpbench -s [-46DUuv] [-B buf] [-k kvars] [-p port] [-r 
interval]\n"
"[-S space] [-T toskeyword] [-V rtable] 
[hostname]\n");
exit(1);
 }
@@ -857,6 +858,11 @@ server_init(struct addrinfo *aitop)
warn("socket");
continue;
}
+   if (ptb->Dflag) {
+   if (setsockopt(sock, SOL_SOCKET, SO_DEBUG,
+   >Dflag, sizeof(ptb->Dflag)))
+   err(1, "setsockopt SO_DEBUG");
+   }
if (ptb->Tflag != -1 && ai->ai_family == AF_INET) {
if (setsockopt(sock, IPPROTO_IP, IP_TOS,
>Tflag, sizeof(ptb->Tflag)))
@@ -970,6 +976,11 @@ client_init(struct addrinfo *aitop, int 
warn("socket");
continue;
}
+   if (ptb->Dflag) {
+   if (setsockopt(sock, SOL_SOCKET, SO_DEBUG,
+   >Dflag, sizeof(ptb->Dflag)))
+   err(1, "setsockopt SO_DEBUG");
+   }
if (aib != NULL) {
saddr_ntop(aib->ai_addr, aib->ai_addrlen,
tmp, sizeof(tmp));
@@ -1138,6 +1149,7 @@ main(int argc, char **argv)
setvbuf(stdout, NULL, _IOLBF, 0);
ptb = 
ptb->dummybuf_len = 0;
+   ptb->Dflag = 0;
ptb->Sflag = ptb->sflag = ptb->vflag = ptb->Rflag = ptb->Uflag = 0;
ptb->kvmh  = NULL;
ptb->kvars = NULL;
@@ -1147,7 +1159,8 @@ main(int argc, char **argv)
aib = NULL;
secs = 0;
 
-   while ((ch = getopt(argc, argv, "46b:B:hlk:n:p:Rr:sS:t:T:uUvV:")) != 
-1) {
+   while ((ch = getopt(argc, argv, "46b:B:Dhlk:n:p:Rr:sS:t:T:uUvV:"))
+   != -1) {
switch (ch) {
case '4':
family = PF_INET;
@@ -1157,6 +1170,9 @@ main(int argc, char **argv)
break;
case 'b':
srcbind = optarg;
+   break;
+   case 'D':
+   ptb->Dflag = 1;
break;
case 'l':
list_kvars();



Re: Remove obsolete vnode opv declarations

2021-02-01 Thread Alexander Bluhm
On Mon, Feb 01, 2021 at 02:14:24PM +, Visa Hankala wrote:
> This removes obsolete vnode operation vector declarations from
> header . The functions were removed in r1.28 of vfs_init.c.
> 
> OK?

OK bluhm@

> Index: sys/systm.h
> ===
> RCS file: src/sys/sys/systm.h,v
> retrieving revision 1.150
> diff -u -p -r1.150 systm.h
> --- sys/systm.h   27 Dec 2020 11:38:35 -  1.150
> +++ sys/systm.h   1 Feb 2021 14:06:50 -
> @@ -152,11 +152,6 @@ int  enoioctl(void);
>  int  enxio(void);
>  int  eopnotsupp(void *);
>  
> -struct vnodeopv_desc;
> -void vfs_opv_init_explicit(struct vnodeopv_desc *);
> -void vfs_opv_init_default(struct vnodeopv_desc *);
> -void vfs_op_init(void);
> -
>  int  seltrue(dev_t dev, int which, struct proc *);
>  int  selfalse(dev_t dev, int which, struct proc *);
>  void *hashinit(int, int, int, u_long *);



Re: broadcast simplex checksum

2021-01-31 Thread Alexander Bluhm
On Mon, Feb 01, 2021 at 08:08:56AM +1300, Richard Procter wrote:
> - Might the rule disabling checksum offload for broadcasts on IFF_SIMPLEX 
>   interfaces be weakened to disable checksum offload for all broadcast 
>   packets instead?

I just copied the condition from ether_resolve():

/* If broadcasting on a simplex interface, loopback a copy */
if (ISSET(m->m_flags, M_BCAST) &&
ISSET(ifp->if_flags, IFF_SIMPLEX) &&
!m->m_pkthdr.pf.routed) {
struct mbuf *mcopy;

/* XXX Should we input an unencrypted IPsec packet? */
mcopy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
if (mcopy != NULL)
if_input_local(ifp, mcopy, af);
}

The bug is triggered when the packet is processed by if_input_local().

> This simplifies the logic, and shouldn???t impact performance as 

Yes, performance does not matter.  I just wanted to show that the
code is necessary for simplex interfaces.

> (I wonder if IFF_SIMPLEX is a relic of another age and deserves to be removed
> at some point;

Then remove the IFF_SIMPLEX in ether_resolve() and in in_ifcap_cksum()
simultaneously.  Currently I think it makes sense in both places.

> - what motivates the new '!m->m_pkthdr.pf.routed??? term?

Just copied from ether_resolve().  It looks strange I don't know
why it is there.  I can leave it out in my check if you think this
is clearer.

bluhm



IPsec IPv6 Path MTU discovery

2021-01-29 Thread Alexander Bluhm
Hi,

This fixes path MTU discovery for ESP tunneled in IPv6.  In IPv6
we always want short TCP segments or fragments encapsulated in ESP
instead off fragmented ESP packets.

ok?

bluhm

Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.361
diff -u -p -r1.361 ip_output.c
--- netinet/ip_output.c 16 Jan 2021 07:58:12 -  1.361
+++ netinet/ip_output.c 29 Jan 2021 16:05:32 -
@@ -625,6 +625,9 @@ ip_output_ipsec_send(struct tdb *tdb, st
m_freem(m);
return EMSGSIZE;
}
+   /* propagate IP_DF for v4-over-v6 */
+   if (ip_mtudisc && ip->ip_off & htons(IP_DF))
+   SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
 
/*
 * Clear these -- they'll be set in the recursive invocation
Index: netinet6/ip6_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.249
diff -u -p -r1.249 ip6_output.c
--- netinet6/ip6_output.c   11 Jan 2021 13:28:54 -  1.249
+++ netinet6/ip6_output.c   28 Jan 2021 18:53:24 -
@@ -681,6 +681,10 @@ reroute:
else
dontfrag = 0;
if (dontfrag && tlen > ifp->if_mtu) {   /* case 2-b */
+#ifdef IPSEC
+   if (ip_mtudisc)
+   ipsec_adjust_mtu(m, mtu);
+#endif
error = EMSGSIZE;
goto bad;
}
@@ -2854,6 +2858,9 @@ ip6_output_ipsec_send(struct tdb *tdb, s
m_freem(m);
return EMSGSIZE;
}
+   /* propagate don't fragment for v6-over-v6 */
+   if (ip_mtudisc)
+   SET(m->m_pkthdr.csum_flags, M_IPV6_DF_OUT);
 
/*
 * Clear these -- they'll be set in the recursive invocation



Re: have pf_route bail out if it resolves a route with RTF_LOCAL set

2021-01-29 Thread Alexander Bluhm
On Fri, Jan 29, 2021 at 10:53:09AM +1000, David Gwynne wrote:
> > Are you sure that it does not break any use case?  I have seen so
> > much strange stuff.  What is the advantage?
> 
> The current behaviour is lucky at best, and quirky at worst. Usually I
> would agree with you that breaking stuff isn't great, even if it's
> wrong, but while I'm changing how route-to etc works I think it's
> a good chance to clean up some of these edge cases.

I have been developping products based on pf edge cases for 15
years.  I don't know which dragons are in our codebase.  This should
not prevent improvements in OpenBSD.  I am just asking not to remove
anything just because we currently don't know, how it can be used.

Changing syntax like address@interface can easily be adpted.  Slight
semantic changes may cause debugging sessions on productive customer
systems.  And then we might need a quick new solution for a previously
existing feature.  So please be careful.

bluhm



Re: systat(1): improve parsing of delay value

2021-01-28 Thread Alexander Bluhm
On Thu, Jan 28, 2021 at 09:06:51PM +0100, Martijn van Duren wrote:
> Thanks for checking. Should be fixed below.

OK bluhm@

> Index: main.c
> ===
> RCS file: /cvs/src/usr.bin/systat/main.c,v
> retrieving revision 1.72
> diff -u -p -r1.72 main.c
> --- main.c12 Jan 2020 20:51:08 -  1.72
> +++ main.c28 Jan 2021 20:05:30 -
> @@ -40,9 +40,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -73,6 +75,7 @@ charuloadbuf[TIMEPOS];
>  
>  int  ucount(void);
>  void usage(void);
> +double strtodnum(const char *, double, double, const char **);
>  
>  /* command prompt */
>  
> @@ -323,9 +326,14 @@ void
>  cmd_delay(const char *buf)
>  {
>   double del;
> - del = atof(buf);
> + const char *errstr;
>  
> - if (del > 0) {
> + if (buf[0] == '\0')
> + return;
> + del = strtodnum(buf, 0, UINT32_MAX / 100, );
> + if (errstr != NULL)
> + error("s: \"%s\": delay value is %s", buf, errstr);
> + else {
>   udelay = (useconds_t)(del * 100);
>   gotsig_alarm = 1;
>   naptime = del;
> @@ -414,6 +422,48 @@ gethz(void)
>   hz = cinf.hz;
>  }
>  
> +#define  INVALID 1
> +#define  TOOSMALL2
> +#define  TOOLARGE3
> +
> +double
> +strtodnum(const char *nptr, double minval, double maxval, const char 
> **errstrp)
> +{
> + double d = 0;
> + int error = 0;
> + char *ep;
> + struct errval {
> + const char *errstr;
> + int err;
> + } ev[4] = {
> + { NULL, 0 },
> + { "invalid",EINVAL },
> + { "too small",  ERANGE },
> + { "too large",  ERANGE },
> + };
> +
> + ev[0].err = errno;
> + errno = 0;
> + if (minval > maxval) {
> + error = INVALID;
> + } else {
> + d = strtod(nptr, );
> + if (nptr == ep || *ep != '\0')
> + error = INVALID;
> + else if ((d == -HUGE_VAL && errno == ERANGE) || d < minval)
> + error = TOOSMALL;
> + else if ((d == HUGE_VAL && errno == ERANGE) || d > maxval)
> + error = TOOLARGE;
> + }
> + if (errstrp != NULL)
> + *errstrp = ev[error].errstr;
> + errno = ev[error].err;
> + if (error)
> + d = 0;
> +
> + return (d);
> +}
> +
>  int
>  main(int argc, char *argv[])
>  {
> @@ -421,7 +471,7 @@ main(int argc, char *argv[])
>   const char *errstr;
>   extern char *optarg;
>   extern int optind;
> - double delay = 5;
> + double delay = 5, del;
>  
>   char *viewstr = NULL;
>  
> @@ -475,9 +525,11 @@ main(int argc, char *argv[])
>   nflag = 1;
>   break;
>   case 's':
> - delay = atof(optarg);
> - if (delay <= 0)
> - delay = 5;
> + delay = strtodnum(optarg, 0, UINT32_MAX / 100,
> + );
> + if (errstr != NULL)
> + errx(1, "-s \"%s\": delay value is %s", optarg,
> + errstr);
>   break;
>   case 'w':
>   rawwidth = strtonum(optarg, 1, MAX_LINE_BUF-1, );
> @@ -497,16 +549,16 @@ main(int argc, char *argv[])
>   argv += optind;
>  
>   if (argc == 1) {
> - double del = atof(argv[0]);
> - if (del == 0)
> + del = strtodnum(argv[0], 0, UINT32_MAX / 100, );
> + if (errstr != NULL)
>   viewstr = argv[0];
>   else
>   delay = del;
>   } else if (argc == 2) {
>   viewstr = argv[0];
> - delay = atof(argv[1]);
> - if (delay <= 0)
> - delay = 5;
> + delay = strtodnum(argv[1], 0, UINT32_MAX / 100, );
> + if (errstr != NULL)
> + errx(1, "\"%s\": delay value is %s", argv[1], errstr);
>   }
>  
>   udelay = (useconds_t)(delay * 100.0);
> 



Re: pf: route-to IPs, not interfaces

2021-01-28 Thread Alexander Bluhm
On Thu, Jan 28, 2021 at 10:54:30PM +1000, David Gwynne wrote:
> this is the diff from the "pf route-to issues" thread, but on it's own.

I think we should make progress and commit something.

>   the caveat is that route-to becomes tied to pass rules that create
>   state, like rdr-to and nat-to.

Maybe we should mention that in the man page.  But let's discuss
that separately.

>   that's a separate change for broader discussion.

Yes.  No more topics on top of uncomitted diffs.

> ok?

OK bluhm@

> Index: sbin/pfctl/parse.y
> ===
> RCS file: /cvs/src/sbin/pfctl/parse.y,v
> retrieving revision 1.708
> diff -u -p -r1.708 parse.y
> --- sbin/pfctl/parse.y12 Jan 2021 00:10:34 -  1.708
> +++ sbin/pfctl/parse.y28 Jan 2021 11:45:58 -
> @@ -276,6 +276,7 @@ struct filter_opts {
>   struct redirspec nat;
>   struct redirspec rdr;
>   struct redirspec rroute;
> + u_int8_t rt;
>  
>   /* scrub opts */
>   int  nodf;
> @@ -284,15 +285,6 @@ struct filter_opts {
>   int  randomid;
>   int  max_mss;
>  
> - /* route opts */
> - struct {
> - struct node_host*host;
> - u_int8_t rt;
> - u_int8_t pool_opts;
> - sa_family_t  af;
> - struct pf_poolhashkey   *key;
> - }route;
> -
>   struct {
>   u_int32_t   limit;
>   u_int32_t   seconds;
> @@ -372,7 +364,7 @@ void   expand_label(char *, size_t, cons
>   struct node_port *, u_int8_t);
>  int   expand_divertspec(struct pf_rule *, struct divertspec *);
>  int   collapse_redirspec(struct pf_pool *, struct pf_rule *,
> - struct redirspec *rs, u_int8_t);
> + struct redirspec *rs, int);
>  int   apply_redirspec(struct pf_pool *, struct pf_rule *,
>   struct redirspec *, int, struct node_port *);
>  void  expand_rule(struct pf_rule *, int, struct node_if *,
> @@ -518,7 +510,6 @@ int   parseport(char *, struct range *r, i
>  %typeipspec xhost host dynaddr host_list
>  %typetable_host_list tablespec
>  %typeredir_host_list redirspec
> -%typeroute_host route_host_list routespec
>  %type  os xos os_list
>  %typeportspec port_list port_item
>  %type uids uid_list uid_item
> @@ -975,7 +966,7 @@ anchorrule: ANCHOR anchorname dir quick
>   YYERROR;
>   }
>  
> - if ($9.route.rt) {
> + if ($9.rt) {
>   yyerror("cannot specify route handling "
>   "on anchors");
>   YYERROR;
> @@ -1843,37 +1834,13 @@ pfrule: action dir logquick interface 
>   decide_address_family($7.src.host, );
>   decide_address_family($7.dst.host, );
>  
> - if ($8.route.rt) {
> - if (!r.direction) {
> + if ($8.rt) {
> + if ($8.rt != PF_DUPTO && !r.direction) {
>   yyerror("direction must be explicit "
>   "with rules that specify routing");
>   YYERROR;
>   }
> - r.rt = $8.route.rt;
> - r.route.opts = $8.route.pool_opts;
> - if ($8.route.key != NULL)
> - memcpy(, $8.route.key,
> - sizeof(struct pf_poolhashkey));
> - }
> - if (r.rt) {
> - decide_address_family($8.route.host, );
> - if ((r.route.opts & PF_POOL_TYPEMASK) ==
> - PF_POOL_NONE && ($8.route.host->next != 
> NULL ||
> - $8.route.host->addr.type == PF_ADDR_TABLE ||
> - DYNIF_MULTIADDR($8.route.host->addr)))
> - r.route.opts |= PF_POOL_ROUNDROBIN;
> - if ($8.route.host->next != NULL) {
> - if (!PF_POOL_DYNTYPE(r.route.opts)) {
> - yyerror("address pool option "
> - "not supported by type");
> - YYERROR;
> - }
> 

Re: have pf_route bail out if it resolves a route with RTF_LOCAL set

2021-01-28 Thread Alexander Bluhm
On Thu, Jan 28, 2021 at 09:57:33AM +1000, David Gwynne wrote:
> calling if_output with a route to a local IP is confusing, and I'm not
> sure it makes sense anyway.
> 
> this treats a an RTF_LOCAL route like an invalid round and drops the
> packet.
> 
> ok?

Are you sure that it does not break any use case?  I have seen so
much strange stuff.  What is the advantage?

bluhm

> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1104
> diff -u -p -r1.1104 pf.c
> --- pf.c  27 Jan 2021 23:53:35 -  1.1104
> +++ pf.c  27 Jan 2021 23:55:49 -
> @@ -6054,7 +6054,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
>   }
>  
>   rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid);
> - if (!rtisvalid(rt)) {
> + if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_LOCAL)) {
>   if (r->rt != PF_DUPTO) {
>   pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
>   0, pd->af, s->rule.ptr, pd->rdomain);
> @@ -6213,7 +6213,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   if (IN6_IS_SCOPE_EMBED(>sin6_addr))
>   dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
>   rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid);
> - if (!rtisvalid(rt)) {
> + if (!rtisvalid(rt) || ISSET(rt->rt_flags, RTF_LOCAL)) {
>   if (r->rt != PF_DUPTO) {
>   pf_send_icmp(m0, ICMP6_DST_UNREACH,
>   ICMP6_DST_UNREACH_NOROUTE, 0,



Re: if pf_route{,6} route isn't valid, generate an icmp error

2021-01-27 Thread Alexander Bluhm
On Wed, Jan 27, 2021 at 04:41:01PM +1000, David Gwynne wrote:
> at the moment if the route is invalid, we drop the packet. this
> generates an icmp error.
> 
> ok?

OK bluhm@

> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1103
> diff -u -p -r1.1103 pf.c
> --- pf.c  27 Jan 2021 04:46:21 -  1.1103
> +++ pf.c  27 Jan 2021 06:38:12 -
> @@ -6055,6 +6055,10 @@ pf_route(struct pf_pdesc *pd, struct pf_
>  
>   rt = rtalloc(sintosa(dst), RT_RESOLVE, rtableid);
>   if (!rtisvalid(rt)) {
> + if (r->rt != PF_DUPTO) {
> + pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
> + 0, pd->af, s->rule.ptr, pd->rdomain);
> + }
>   ipstat_inc(ips_noroute);
>   goto bad;
>   }
> @@ -6210,6 +6214,11 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index);
>   rt = rtalloc(sin6tosa(dst), RT_RESOLVE, rtableid);
>   if (!rtisvalid(rt)) {
> + if (r->rt != PF_DUPTO) {
> + pf_send_icmp(m0, ICMP6_DST_UNREACH,
> + ICMP6_DST_UNREACH_NOROUTE, 0,
> + pd->af, s->rule.ptr, pd->rdomain);
> + }
>   ip6stat_inc(ip6s_noroute);
>   goto bad;
>   }



Re: don't run dup-to generated packets through pf_test in pf_route{,6}

2021-01-26 Thread Alexander Bluhm
On Wed, Jan 27, 2021 at 11:31:27AM +1000, David Gwynne wrote:
> this was discussed as part of the big route-to issues thread. i think
> it's easy to break out and handle separately now.
> 
> the diff does what the subject line says. it seems to work as expected
> for me. i don't see weird state issues anymore when i dup my ssh session
> out over a tunnel interface.
> 
> sasha suggested setting PF_TAG_GENERATED on the duplicated packet, but i
> didn't set it in this diff. the reason is that i can't see
> PF_TAG_GENERATED get cleared anywhere. this means that if you dup-to a
> host over a tunnel (eg, gif, gre, etc), the encapsulated packet still
> has that tag, which means pf doesn't run against the encapsulated
> packet.
> 
> ok?

OK bluhm@

> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1101
> diff -u -p -r1.1101 pf.c
> --- pf.c  19 Jan 2021 22:22:23 -  1.1101
> +++ pf.c  27 Jan 2021 01:21:24 -
> @@ -6039,7 +6041,7 @@ pf_route(struct pf_pdesc *pd, struct pf_
>   if (ifp == NULL)
>   goto bad;
>  
> - if (pd->kif->pfik_ifp != ifp) {
> + if (r->rt != PF_DUPTO && pd->kif->pfik_ifp != ifp) {
>   if (pf_test(AF_INET, PF_OUT, ifp, ) != PF_PASS)
>   goto bad;
>   else if (m0 == NULL)
> @@ -6194,7 +6195,7 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   if (ifp == NULL)
>   goto bad;
>  
> - if (pd->kif->pfik_ifp != ifp) {
> + if (r->rt != PF_DUPTO && pd->kif->pfik_ifp != ifp) {
>   if (pf_test(AF_INET6, PF_OUT, ifp, ) != PF_PASS)
>   goto bad;
>   else if (m0 == NULL)



Re: tiny pf_route{,6} tweak

2021-01-26 Thread Alexander Bluhm
On Wed, Jan 27, 2021 at 11:14:51AM +1000, David Gwynne wrote:
> On Wed, Jan 27, 2021 at 11:13:12AM +1000, David Gwynne wrote:
> > when pf_route (and pf_route6) are supposed to handle forwarding the
> > packet (ie, for route-to or reply-to rules), they take the mbuf
> > away from the calling code path. this is done by clearing the mbuf
> > pointer in the pf_pdesc struct. it doesn't do this for dup-to rules
> > though.
> > 
> > at the moment pf_route clears that pointer on the way out, but it could
> > take the mbuf away up front in the same place that it already checks if
> > it's a dup-to rule or not.
> > 
> > it's a small change. i've bumped up the number of lines of context so
> > it's easier to read too.
> > 
> > ok?
> 
> sigh. here's the diff with the extra context.

Usually you want all information including the mbuf in pd.
Here it does not matter, pd->m is not accessed.

I see no advantage in changing, so I would leave it as it is.
If others think new code is better, I have no objections.

bluhm

> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1101
> diff -u -p -U8 -r1.1101 pf.c
> --- pf.c  19 Jan 2021 22:22:23 -  1.1101
> +++ pf.c  27 Jan 2021 01:10:52 -
> @@ -5983,16 +5983,17 @@ pf_route(struct pf_pdesc *pd, struct pf_
>  
>   if (r->rt == PF_DUPTO) {
>   if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
>   return;
>   } else {
>   if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir))
>   return;
>   m0 = pd->m;
> + pd->m = NULL;
>   }
>  
>   if (m0->m_len < sizeof(struct ip)) {
>   DPFPRINTF(LOG_ERR,
>   "%s: m0->m_len < sizeof(struct ip)", __func__);
>   goto bad;
>   }
>  
> @@ -6103,18 +6104,16 @@ pf_route(struct pf_pdesc *pd, struct pf_
>   else
>   m_freem(m0);
>   }
>  
>   if (error == 0)
>   ipstat_inc(ips_fragmented);
>  
>  done:
> - if (r->rt != PF_DUPTO)
> - pd->m = NULL;
>   rtfree(rt);
>   return;
>  
>  bad:
>   m_freem(m0);
>   goto done;
>  }
>  
> @@ -6141,16 +6140,17 @@ pf_route6(struct pf_pdesc *pd, struct pf
>  
>   if (r->rt == PF_DUPTO) {
>   if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
>   return;
>   } else {
>   if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir))
>   return;
>   m0 = pd->m;
> + pd->m = NULL;
>   }
>  
>   if (m0->m_len < sizeof(struct ip6_hdr)) {
>   DPFPRINTF(LOG_ERR,
>   "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
>   goto bad;
>   }
>   ip6 = mtod(m0, struct ip6_hdr *);
> @@ -6232,18 +6232,16 @@ pf_route6(struct pf_pdesc *pd, struct pf
>   ip6stat_inc(ip6s_cantfrag);
>   if (r->rt != PF_DUPTO)
>   pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
>   ifp->if_mtu, pd->af, r, pd->rdomain);
>   goto bad;
>   }
>  
>  done:
> - if (r->rt != PF_DUPTO)
> - pd->m = NULL;
>   rtfree(rt);
>   return;
>  
>  bad:
>   m_freem(m0);
>   goto done;
>  }
>  #endif /* INET6 */



Re: systat(1): improve parsing of delay value

2021-01-26 Thread Alexander Bluhm
On Mon, Jan 25, 2021 at 11:17:04AM +0100, Martijn van Duren wrote:
>   if (argc == 1) {
> - double del = atof(argv[0]);
> - if (del == 0)
> + delay = strtodnum(argv[0], 0, UINT32_MAX / 100, );
> + if (errstr != NULL)
>   viewstr = argv[0];
> - else
> - delay = del;

You need the else.  delay should only be changed if parsing was successful.

>   } else if (argc == 2) {
>   viewstr = argv[0];
> - delay = atof(argv[1]);
> - if (delay <= 0)
> - delay = 5;
> + delay = strtodnum(optarg, 0, UINT32_MAX / 100, );

This should be argv[1] instead of optarg.

> + if (errstr != NULL)
> + errx(1, "-s \"%s\": delay value is %s", optarg, errstr);
>   }

The -s in the error message is wrong.  Here delay was passed as argument.

bluhm



Re: pf route-to issues

2021-01-26 Thread Alexander Bluhm
On Tue, Jan 26, 2021 at 10:39:30AM +1000, David Gwynne wrote:
> > But what about dup-to?  The packet is duplicated for both directions.
> > I guess the main use case for dup-to is implementing a monitor port.
> > There you have to pass packets stateless, otherwise it would not
> > work anyway.  The strange semantics is not related to this diff.
> 
> are you saying i should skip pf_test for all dup-to generated packets?

I am not sure.

When we have an in dup-to rule, the incoming packets in request
direction are dupped and tested with the out ruleset.  The reply
packets for this state are also dupped, but not tested when they
leave the dup interface.

This is inconsistent and cannot work statefully.  Stateful filtering
with dupped packets does not make sense anyway.  The only working
config is "pass out on dup-interface no state".

Do we think this rule should be required?

1. No packet should leave an interface without a rule.

if (pd->dir == PF_IN || s->rt == PF_DUPTO) {
if (pf_test(AF_INET, PF_OUT, ifp, ) != PF_PASS)

2. The config says we want a monitor port.  We risk that the
   original packet and the dupped packet match the same rule.
   Stateful filtering cannot work, we do not expect reply packets
   for the dups.

if (pd->dir == PF_IN && s->rt != PF_DUPTO) {
if (pf_test(AF_INET, PF_OUT, ifp, ) != PF_PASS)

3. Some sort of problem was there before, but different.  Don't
   address it now.

Maybe 2 has less impact for the users and is easy to understand.
We should document that in the man page.

> > We are reaching a state where this diff can go in.  I just startet
> > a regress run with it.  OK bluhm@
> 
> hopefully i fixed the pfctl error messages up so the regress tests arent
> too unhappy.

pf forward and pf fragment tests pass.  They include route-to and
reply-to rules.  I have no test for dup-to.  Regress pfctl fails,
but I think dlg@ has a diff for that.

bluhm



Re: pf route-to issues

2021-01-25 Thread Alexander Bluhm
On Fri, Jan 22, 2021 at 06:07:59PM +1000, David Gwynne wrote:
> --- sys/conf/GENERIC  30 Sep 2020 14:51:17 -  1.273
> +++ sys/conf/GENERIC  22 Jan 2021 07:33:30 -
> @@ -82,6 +82,7 @@ pseudo-device   msts1   # MSTS line discipl
>  pseudo-deviceendrun  1   # EndRun line discipline
>  pseudo-devicevnd 4   # vnode disk devices
>  pseudo-deviceksyms   1   # kernel symbols device
> +pseudo-devicekstat
>  #pseudo-device   dt  # Dynamic Tracer
>  
>  # clonable devices

This is an unrelated chunk.

> +pf_route(struct pf_pdesc *pd, struct pf_state *s)
...
> + if (pd->dir == PF_IN) {
>   if (pf_test(AF_INET, PF_OUT, ifp, ) != PF_PASS)

Yes, this is the correct logic.  When the packet comes in, pf
overrides forwarding, tests the out rules, and sends it.  For
outgoing packets on out route-to rules we have already tested the
rules.  It also works for reply-to the other way around.

But what about dup-to?  The packet is duplicated for both directions.
I guess the main use case for dup-to is implementing a monitor port.
There you have to pass packets stateless, otherwise it would not
work anyway.  The strange semantics is not related to this diff.

We are reaching a state where this diff can go in.  I just startet
a regress run with it.  OK bluhm@



Re: [External] : Re: pf route-to issues

2021-01-25 Thread Alexander Bluhm
Hi,

Some personal thoughts.  I am happy when pf route-to gets simpler.
Especially I have never understood what this address@interface
syntax is used for.

I cannot estimate what configuration is used by our cutomers in
many installations.  Simple syntax change address@interface ->
address of next hob should be no problem.  Slight semantic changes
have to be dealt with.  Current packet flow is complicated and may
be inspired by old NAT behavior.  As long it becomes more sane and
easier to understand, we should change it.

But I don't like artificial restrictions.  We don't know all use
cases.  reply-to and route-to could be used for both in and out
rules.  I have used them for strange divert-to on bridge setups.
It should stay that way.

It would be nice to keep state-less route-to.  I have found a special
case with that in the code of our product.  But it looks like dead
code, so I would not object to remove state-less route-to for now.

bluhm



IPv6 IPsec path MTU discovery

2021-01-20 Thread Alexander Bluhm
Hi,

This part of the IPv6 IPsec path MTU discovery is for the case where
the router is between the tunnel endpoints.  Basically it handles
ICMP6 packets for ESP.  Originally this diff came from markus@.

ok?

bluhm

Index: netinet/ip_ipsp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.196
diff -u -p -r1.196 ip_ipsp.h
--- netinet/ip_ipsp.h   5 Nov 2020 19:28:28 -   1.196
+++ netinet/ip_ipsp.h   20 Jan 2021 16:47:58 -
@@ -610,6 +610,7 @@ voidesp4_ctlinput(int, struct sockaddr 
 
 #ifdef INET6
 intesp6_input(struct mbuf **, int *, int, int);
+void   esp6_ctlinput(int, struct sockaddr *, u_int, void *);
 #endif /* INET6 */
 
 /* XF_IPCOMP */
Index: netinet/ipsec_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.173
diff -u -p -r1.173 ipsec_input.c
--- netinet/ipsec_input.c   1 Sep 2020 01:53:34 -   1.173
+++ netinet/ipsec_input.c   20 Jan 2021 18:04:00 -
@@ -66,6 +66,7 @@
 #ifdef INET6
 #include 
 #include 
+#include 
 #include 
 #include 
 #endif /* INET6 */
@@ -82,8 +83,6 @@
 
 #include "bpfilter.h"
 
-void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
-
 #ifdef ENCDEBUG
 #define DPRINTF(x) if (encdebug) printf x
 #else
@@ -148,6 +147,9 @@ int esp_sysctl_espstat(void *, size_t *,
 int ah_sysctl_ahstat(void *, size_t *, void *);
 int ipcomp_sysctl_ipcompstat(void *, size_t *, void *);
 int ipsec_sysctl_ipsecstat(void *, size_t *, void *);
+void ipsec_set_mtu(struct tdb *, u_int32_t, const char *);
+void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
+void ipsec6_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
 
 void
 ipsec_init(void)
@@ -946,18 +948,42 @@ ipcomp4_input(struct mbuf **mp, int *off
 }
 
 void
+ipsec_set_mtu(struct tdb *tdbp, u_int32_t mtu, const char *msg)
+{
+   ssize_t adjust;
+   uint64_t timeout;
+
+   timeout = gettime() + ip_mtudisc_timeout;
+   /* Walk the chain backwards to the first tdb */
+   NET_ASSERT_LOCKED();
+   for (; tdbp; tdbp = tdbp->tdb_inext) {
+   if (tdbp->tdb_flags & TDBF_INVALID ||
+   (adjust = ipsec_hdrsz(tdbp)) == -1)
+   return;
+
+   mtu -= adjust;
+
+   /* Store adjusted MTU in tdb */
+   tdbp->tdb_mtu = mtu;
+   tdbp->tdb_mtutimeout = timeout;
+   DPRINTF(("%s: %s: spi %08x mtu %d adjust %ld timeout %llu\n",
+   __func__, msg, ntohl(tdbp->tdb_spi), tdbp->tdb_mtu, adjust,
+   timeout));
+   }
+}
+
+void
 ipsec_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
 void *v, int proto)
 {
struct ip *ip = v;
 
-   if (cmd == PRC_MSGSIZE && ip && ip_mtudisc && ip->ip_v == 4) {
+   if (cmd == PRC_MSGSIZE && ip_mtudisc && ip && ip->ip_v == 4) {
struct tdb *tdbp;
struct sockaddr_in dst;
struct icmp *icp;
int hlen = ip->ip_hl << 2;
u_int32_t spi, mtu;
-   ssize_t adjust;
 
/* Find the right MTU. */
icp = (struct icmp *)((caddr_t) ip -
@@ -971,7 +997,7 @@ ipsec_common_ctlinput(u_int rdomain, int
if (mtu < 296)
return;
 
-   memset(, 0, sizeof(struct sockaddr_in));
+   memset(, 0, sizeof(dst));
dst.sin_family = AF_INET;
dst.sin_len = sizeof(struct sockaddr_in);
dst.sin_addr.s_addr = ip->ip_dst.s_addr;
@@ -980,28 +1006,72 @@ ipsec_common_ctlinput(u_int rdomain, int
 
tdbp = gettdb_rev(rdomain, spi, (union sockaddr_union *),
proto);
-   if (tdbp == NULL || tdbp->tdb_flags & TDBF_INVALID)
-   return;
+   if (tdbp != NULL && (tdbp->tdb_flags & TDBF_INVALID) == 0)
+   ipsec_set_mtu(tdbp, mtu, __func__);
+   }
+}
 
-   /* Walk the chain backwards to the first tdb */
-   NET_ASSERT_LOCKED();
-   for (; tdbp; tdbp = tdbp->tdb_inext) {
-   if (tdbp->tdb_flags & TDBF_INVALID ||
-   (adjust = ipsec_hdrsz(tdbp)) == -1)
-   return;
+#ifdef INET6
+void
+ipsec6_common_ctlinput(u_int rdomain, int cmd, struct sockaddr *sa,
+void *v, int proto)
+{
+   struct ip6ctlparam *ip6cp = v;
+
+   if (cmd == PRC_MSGSIZE && ip_mtudisc && ip6cp && ip6cp->ip6c_icmp6) {
+   struct tdb *tdbp;
+   struct sockaddr_in6 dst;
+   struct icmp6_hdr *icmp6;
+   struct mbuf *m;
+   u_int32_t spi, mtu;
+   int off;
+
+   /* Find the right MTU. */
+   icmp6 = 

broadcast simplex checksum

2021-01-19 Thread Alexander Bluhm
Hi,

Simplex interfaces reinject broadcast packets back into the IP
stack.  As this is a software features, no hardware checksumming
occurs.  So local broadcast packets are dropped with wrong checksum
if the underlying hardware supports checksumming.

Do software checksumming in ip_output() if the copy of a broadcast
packet will be delivered locally.  Put the logic into a separate
in_ifcap_cksum() function.

Found by regress/sys/kern/sosplice/loop which fails on some machines.

ok?

bluhm

Index: netinet/ip_output.c
===
RCS file: /mount/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.361
diff -u -p -r1.361 ip_output.c
--- netinet/ip_output.c 16 Jan 2021 07:58:12 -  1.361
+++ netinet/ip_output.c 20 Jan 2021 00:27:12 -
@@ -79,6 +79,7 @@ void ip_mloopback(struct ifnet *, struct
 static __inline u_int16_t __attribute__((__unused__))
 in_cksum_phdr(u_int32_t, u_int32_t, u_int32_t);
 void in_delayed_cksum(struct mbuf *);
+int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 
 #ifdef IPSEC
 struct tdb *
@@ -458,8 +459,7 @@ sendit:
 */
if (ntohs(ip->ip_len) <= mtu) {
ip->ip_sum = 0;
-   if ((ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-   (ifp->if_bridgeidx == 0))
+   if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
else {
ipstat_inc(ips_outswcsum);
@@ -716,9 +716,7 @@ ip_fragment(struct mbuf *m, struct ifnet
m->m_pkthdr.ph_ifidx = 0;
mhip->ip_off = htons((u_int16_t)mhip->ip_off);
mhip->ip_sum = 0;
-   if ((ifp != NULL) &&
-   (ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-   (ifp->if_bridgeidx == 0))
+   if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
else {
ipstat_inc(ips_outswcsum);
@@ -737,9 +735,7 @@ ip_fragment(struct mbuf *m, struct ifnet
ip->ip_len = htons((u_int16_t)m->m_pkthdr.len);
ip->ip_off |= htons(IP_MF);
ip->ip_sum = 0;
-   if ((ifp != NULL) &&
-   (ifp->if_capabilities & IFCAP_CSUM_IPv4) &&
-   (ifp->if_bridgeidx == 0))
+   if (in_ifcap_cksum(m, ifp, IFCAP_CSUM_IPv4))
m->m_pkthdr.csum_flags |= M_IPV4_CSUM_OUT;
else {
ipstat_inc(ips_outswcsum);
@@ -1849,15 +1845,15 @@ in_proto_cksum_out(struct mbuf *m, struc
}
 
if (m->m_pkthdr.csum_flags & M_TCP_CSUM_OUT) {
-   if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_TCPv4) ||
-   ip->ip_hl != 5 || ifp->if_bridgeidx != 0) {
+   if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_TCPv4) ||
+   ip->ip_hl != 5) {
tcpstat_inc(tcps_outswcsum);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_TCP_CSUM_OUT; /* Clear */
}
} else if (m->m_pkthdr.csum_flags & M_UDP_CSUM_OUT) {
-   if (!ifp || !(ifp->if_capabilities & IFCAP_CSUM_UDPv4) ||
-   ip->ip_hl != 5 || ifp->if_bridgeidx != 0) {
+   if (!in_ifcap_cksum(m, ifp, IFCAP_CSUM_UDPv4) ||
+   ip->ip_hl != 5) {
udpstat_inc(udps_outswcsum);
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_UDP_CSUM_OUT; /* Clear */
@@ -1866,4 +1862,19 @@ in_proto_cksum_out(struct mbuf *m, struc
in_delayed_cksum(m);
m->m_pkthdr.csum_flags &= ~M_ICMP_CSUM_OUT; /* Clear */
}
+}
+
+int
+in_ifcap_cksum(struct mbuf *m, struct ifnet *ifp, int ifcap)
+{
+   if ((ifp == NULL) ||
+   !ISSET(ifp->if_capabilities, ifcap) ||
+   (ifp->if_bridgeidx != 0))
+   return (0);
+   /* Simplex interface sends packet back without hardware cksum. */
+   if (ISSET(m->m_flags, M_BCAST) &&
+   ISSET(ifp->if_flags, IFF_SIMPLEX) &&
+   !m->m_pkthdr.pf.routed)
+   return (0);
+   return (1);
 }



tcpdump pflog af and rewritten addresses

2021-01-18 Thread Alexander Bluhm
Hi,

tcpdump pflog with addresses rewritten by rdr-to, nat-to, or af-to
is broken.

1. Fix address family of the packet in af-to rules:

before:
19:26:37.620926 169.254.0.14 > 169.254.0.14: icmp: echo request
19:26:37.620946 bad-ip6-version 4
19:26:37.620963 fc00::23 > fc00::24: icmp6: echo request
19:26:37.620977 bad-ip-version 6

after:
19:26:29.606966 169.254.0.14 > 169.254.0.14: icmp: echo request
19:26:29.606990 169.254.0.14 > 169.254.0.14: icmp: echo request
19:26:29.607006 fc00::23 > fc00::24: icmp6: echo request
19:26:29.607019 fc00::24 > fc00::23: icmp6: type-#0

The type-#0 is still buggy, but it is a step in the right direction.

2. Print the addresses that were rewritten if called with -ev:

This is rdr-to.  Note that "orig src" is the modified address.

before:
19:32:34.843807 rule 2.regress.19/(match) [uid 0, pid 37810] pass out on lo11: 
[orig src 169.254.0.22:59443, dst 169.254.0.12:9] 169.254.0.12.42793 > 
169.254.0.12.9: [bad udp cksum 0900! -> 152f] udp 4 (ttl 64, id 11544, len 32, 
bad ip cksum c! -> f9a0)

after:
19:32:06.794193 rule 2.regress.19/(match) [uid 0, pid 37810] pass out on lo11: 
[rewritten: src 169.254.0.22:52093, dst 169.254.0.12:9] 169.254.0.12.1885 > 
169.254.0.12.9: [bad udp cksum 27aa! -> e1ce] udp 4 (ttl 64, id 5110, len 32, 
bad ip cksum c! -> 12c3)

With af-to the old code confuses the address family:

before:
19:33:45.731267 rule 2.regress.22/(match) [uid 0, pid 37810] pass in on lo11: 
[orig src 252.0.0.0:64597, dst 252.0.0.0:9] [|ip6]

after:
19:34:05.388153 rule 2.regress.22/(match) [uid 0, pid 37810] pass in on lo11: 
[rewritten: src fc00::23:65141, dst fc00::24:9] 169.254.0.14.10027 > 
169.254.0.14.9: [udp sum ok] udp 4 (ttl 64, id 27481, len 32)

Basically the kernel uses the information from the packet description
and fills it into the fields in the pflog header.  While doing this,
it is trival to figure out whether the packet has been rewritten.

ok?

bluhm

Index: sys/net/if_pflog.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pflog.c,v
retrieving revision 1.94
diff -u -p -r1.94 if_pflog.c
--- sys/net/if_pflog.c  13 Jan 2021 09:13:30 -  1.94
+++ sys/net/if_pflog.c  18 Jan 2021 18:16:04 -
@@ -231,11 +231,18 @@ pflog_packet(struct pf_pdesc *pd, u_int8
hdr.rule_uid = rm->cuid;
hdr.rule_pid = rm->cpid;
hdr.dir = pd->dir;
+   hdr.af = pd->af;
 
+   if (pd->af != pd->naf ||
+   pf_addr_compare(pd->src, >nsaddr, pd->naf) != 0 ||
+   pf_addr_compare(pd->dst, >ndaddr, pd->naf) != 0 ||
+   pd->osport != pd->nsport ||
+   pd->odport != pd->ndport) {
+   hdr.rewritten = 1;
+   }
+   hdr.naf = pd->naf;
pf_addrcpy(, >nsaddr, pd->naf);
pf_addrcpy(, >ndaddr, pd->naf);
-   hdr.af = pd->af;
-   hdr.naf = pd->naf;
hdr.sport = pd->nsport;
hdr.dport = pd->ndport;
 
Index: usr.sbin/tcpdump/print-pflog.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/tcpdump/print-pflog.c,v
retrieving revision 1.32
diff -u -p -r1.32 print-pflog.c
--- usr.sbin/tcpdump/print-pflog.c  22 Oct 2018 16:12:45 -  1.32
+++ usr.sbin/tcpdump/print-pflog.c  18 Jan 2021 17:41:05 -
@@ -64,7 +64,6 @@ pflog_if_print(u_char *user, const struc
const struct ip *ip;
const struct ip6_hdr *ip6;
const struct pfloghdr *hdr;
-   u_int8_t af;
 
ts_print(>ts);
 
@@ -153,34 +152,40 @@ pflog_if_print(u_char *user, const struc
if (vflag && hdr->rewritten) {
char buf[48];
 
-   if (inet_ntop(hdr->af, >saddr.v4, buf,
+   printf("[rewritten: ");
+   if (inet_ntop(hdr->naf, >saddr, buf,
sizeof(buf)) == NULL)
-   printf("[orig src ?, ");
+   printf("src ?");
else
-   printf("[orig src %s:%u, ", buf,
-   ntohs(hdr->sport));
-   if (inet_ntop(hdr->af, >daddr.v4, buf,
+   printf("src %s:%u", buf, ntohs(hdr->sport));
+   printf(", ");
+   if (inet_ntop(hdr->naf, >daddr, buf,
sizeof(buf)) == NULL)
-   printf("dst ?] ");
+   printf("dst ?");
else
-   printf("dst %s:%u] ", buf,
-   ntohs(hdr->dport));
+   printf("dst %s:%u", buf, ntohs(hdr->dport));
+   printf("] ");
}
}
-   af = hdr->naf;
length -= hdrlen;
-   if (af == AF_INET) {
+   switch(hdr->af) {
+   case AF_INET:
ip = (struct 

pflog remove translation

2021-01-18 Thread Alexander Bluhm
Hi,

pflog(4) tries to log the translated packet with rdr-to, nat-to,
and af-to applied.  Therefore it creates a mbuf chain on the stack
with a partial copy.  This might have been a good idea for plain
IPv4 10 years ago.  But now the concept fails miserably due to:

- IP options
- extension header
- NAT46 af-to
- fragmented mbuf chains

Even the plain IPv4 case does not work.  Usually the length checks
in pf_setup_pdesc() reject the faked mbuf on the stack and we goto
copy.  Some special cases call pf_translate() and it fails miserably.
syzkaller has found such a case.

https://syzkaller.appspot.com/bug?id=9415f8d0a6f176a629daaa910c431498f5e8aa99

After trying to understand this code for a week, I came to the
conclusion that it is broken beyond repair.  The funny part is,
when I remove pflog_mtap(), the pflog output in the cases tested
by regress/sys/net/pflog stays the same.

Remove this undead code to log the packet as it is.

ok?

bluhm

Index: net/if_pflog.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pflog.c,v
retrieving revision 1.94
diff -u -p -r1.94 if_pflog.c
--- net/if_pflog.c  13 Jan 2021 09:13:30 -  1.94
+++ net/if_pflog.c  18 Jan 2021 11:44:22 -
@@ -81,7 +81,6 @@ int   pflogoutput(struct ifnet *, struct m
 intpflogioctl(struct ifnet *, u_long, caddr_t);
 intpflog_clone_create(struct if_clone *, int);
 intpflog_clone_destroy(struct ifnet *);
-void   pflog_mtap(caddr_t, struct pfloghdr *, struct mbuf *);
 struct pflog_softc *pflog_getif(int);
 
 struct if_clonepflog_cloner =
@@ -242,144 +241,8 @@ pflog_packet(struct pf_pdesc *pd, u_int8
ifn->if_opackets++;
ifn->if_obytes += pd->m->m_pkthdr.len;
 
-   pflog_mtap(if_bpf, , pd->m);
+   bpf_mtap_hdr(if_bpf, , sizeof(hdr), pd->m, BPF_DIRECTION_OUT);
 #endif
 
return (0);
-}
-
-void
-pflog_mtap(caddr_t if_bpf, struct pfloghdr *pfloghdr, struct mbuf *m)
-{
-   struct mbuf  mhdr;
-   struct m_hdr mptr;
-   struct mbuf *mp;
-   u_char  *mdst;
-   int  afto, hlen, off;
-
-   struct pf_pdesc  pd;
-   struct pf_addr   osaddr, odaddr;
-   u_int16_tosport = 0, odport = 0;
-   u_int8_t proto = 0;
-
-   afto = pfloghdr->af != pfloghdr->naf;
-
-   /*
-* temporary mbuf will hold an ip/ip6 header and 8 bytes
-* of the protocol header
-*/
-   m_inithdr();
-   mhdr.m_len = 0; /* XXX not done in m_inithdr() */
-
-#ifdef INET6
-   /* offset for a new header */
-   if (afto && pfloghdr->af == AF_INET)
-   mhdr.m_data += sizeof(struct ip6_hdr) -
-   sizeof(struct ip);
-#endif /* INET6 */
-
-   mdst = mtod(, char *);
-   switch (pfloghdr->af) {
-   case AF_INET: {
-   struct ip   *h;
-
-   if (m->m_pkthdr.len < sizeof(*h))
-   goto copy;
-   m_copydata(m, 0, sizeof(*h), mdst);
-   h = (struct ip *)mdst;
-   hlen = h->ip_hl << 2;
-   if (hlen > sizeof(*h) && (m->m_pkthdr.len >= hlen))
-   m_copydata(m, sizeof(*h), hlen - sizeof(*h),
-   mdst + sizeof(*h));
-   break;
-   }
-#ifdef INET6
-   case AF_INET6: {
-   struct ip6_hdr  *h;
-
-   if (m->m_pkthdr.len < sizeof(*h))
-   goto copy;
-   hlen = sizeof(struct ip6_hdr);
-   m_copydata(m, 0, hlen, mdst);
-   h = (struct ip6_hdr *)mdst;
-   proto = h->ip6_nxt;
-   break;
-   }
-#endif /* INET6 */
-   default:
-   /* shouldn't happen ever :-) */
-   goto copy;
-   }
-
-   if (m->m_pkthdr.len < hlen + 8 && proto != IPPROTO_NONE)
-   goto copy;
-   else if (proto != IPPROTO_NONE) {
-   /* copy 8 bytes of the protocol header */
-   m_copydata(m, hlen, 8, mdst + hlen);
-   hlen += 8;
-   }
-
-   mhdr.m_len = hlen;
-   mhdr.m_pkthdr.len = hlen;
-
-   /* create a chain mhdr -> mptr, mptr->m_data = (m->m_data+hlen) */
-   mp = m_getptr(m, hlen, );
-   if (mp != NULL) {
-   mptr.mh_flags = 0;
-   mptr.mh_data = mp->m_data + off;
-   mptr.mh_len = mp->m_len - off;
-   mptr.mh_next = mp->m_next;
-
-   mhdr.m_next = (struct mbuf *)
-   }
-
-   /*
-* Rewrite addresses if needed. Reason pointer must be NULL to avoid
-* counting the packet here again.
-*/
-   if (pf_setup_pdesc(, pfloghdr->af, pfloghdr->dir, NULL,
-   , NULL) != PF_PASS)
-   goto copy;
-   pd.naf = pfloghdr->naf;
-
-   pf_addrcpy(, pd.src, pd.af);
-   pf_addrcpy(, 

Re: Add if_mreqn support to IP_MULTICAST_IF

2021-01-15 Thread Alexander Bluhm
On Fri, Jan 15, 2021 at 03:02:37PM +0100, Claudio Jeker wrote:
> On Fri, Jan 15, 2021 at 02:53:17PM +0100, Claudio Jeker wrote:
> > I forgot to add ip_mreqn support to IP_MULTICAST_IF and so the
> > IP_ADD_MEMBERSHIP change is not fixing all the issues I have.
> > 
> > Linux supports calling IP_MULTICAST_IF with a struct in_addr, a struct
> > ip_mreq, or a struct ip_mreqn. FreeBSD only does the first and last.
> > I followed the Linux way because doing that was not that hard. In the end
> > only the imr_ifindex field and the imr_address field need to be checked
> > and if the imr_ifindex is 0 then just use the old code. If the imr_ifindex
> > is set then use this for interface index and break early.
> > 
> > Any opinions about this?
> 
> This is the corresponding diff for ospfd.

With both diffs, kernel and ospfd, regress/usr.sbin/ospfd passes.

OK bluhm@ for both

> Additionally this initalizes the imr_address field. It is not used but we
> should not send stack garbage to the kernel.
> 
> -- 
> :wq Claudio
> 
> Index: interface.c
> ===
> RCS file: /cvs/src/usr.sbin/ospfd/interface.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 interface.c
> --- interface.c   12 Jan 2021 09:11:09 -  1.85
> +++ interface.c   15 Jan 2021 14:00:39 -
> @@ -734,6 +734,7 @@ if_join_group(struct iface *iface, struc
>   return (0);
>  
>   mreq.imr_multiaddr.s_addr = addr->s_addr;
> + mreq.imr_address.s_addr = 0;
>   mreq.imr_ifindex = iface->ifindex;
>  
>   if (setsockopt(iface->fd, IPPROTO_IP, IP_ADD_MEMBERSHIP,
> @@ -782,6 +783,7 @@ if_leave_group(struct iface *iface, stru
>   }
>  
>   mreq.imr_multiaddr.s_addr = addr->s_addr;
> + mreq.imr_address.s_addr = 0;
>   mreq.imr_ifindex = iface->ifindex;
>  
>   if (setsockopt(iface->fd, IPPROTO_IP, IP_DROP_MEMBERSHIP,
> @@ -808,11 +810,15 @@ if_leave_group(struct iface *iface, stru
>  int
>  if_set_mcast(struct iface *iface)
>  {
> + struct ip_mreqn  mreq;
> +
>   switch (iface->type) {
>   case IF_TYPE_POINTOPOINT:
>   case IF_TYPE_BROADCAST:
> + memset(, 0, sizeof(mreq));
> + mreq.imr_ifindex = iface->ifindex;
>   if (setsockopt(iface->fd, IPPROTO_IP, IP_MULTICAST_IF,
> - >addr.s_addr, sizeof(iface->addr.s_addr)) == -1) {
> + , sizeof(mreq)) == -1) {
>   log_warn("if_set_mcast: error setting "
>   "IP_MULTICAST_IF, interface %s", iface->name);
>   return (-1);



Re: tell pfctl(8) route-to and reply-to accept next-hop only

2021-01-15 Thread Alexander Bluhm
On Tue, Jan 12, 2021 at 08:45:22PM +0100, Alexandr Nedvedicky wrote:
> I think bluhm@ and dlg@ have committed part of that change already.

I have only commited a refactoring change.  Next step in kernel
would be to remove the check in pf_find_state() and see what happens.

I was waiting for dlg@ to do it, but maybe he waited for me.

Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1098
diff -u -p -r1.1098 pf.c
--- net/pf.c14 Jan 2021 09:44:33 -  1.1098
+++ net/pf.c15 Jan 2021 16:46:42 -
@@ -1122,12 +1122,6 @@ pf_find_state(struct pf_pdesc *pd, struc
}
 
*state = s;
-   if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif &&
-   ((s->rule.ptr->rt == PF_ROUTETO &&
-   s->rule.ptr->direction == PF_OUT) ||
-   (s->rule.ptr->rt == PF_REPLYTO &&
-   s->rule.ptr->direction == PF_IN)))
-   return (PF_PASS);
 
return (PF_MATCH);
 }

> the proposed diff updates pfctl(8) so parser will do 'a right thing',

Does it work without the kernel changes from dlg@ ?

> the diff also breaks existing regression tests. We can update
> them once, we will agree on proposed diff.

I have adapted my regress pf.conf and regress/sys/net/pf_forward
fails in the route-to test.  It worked with dlg@'s diff.  So your
standalone pfctl change does not seem to be sufficient.

bluhm

> 8<---8<---8<--8<
> diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
> index 2b3e62b1a7e..536aec3286b 100644
> --- a/sbin/pfctl/parse.y
> +++ b/sbin/pfctl/parse.y
> @@ -3745,23 +3745,13 @@ pool_opt  : BITMASK   {
>   ;
>  
>  route_host   : STRING{
> - /* try to find @if0 address specs */
> - if (strrchr($1, '@') != NULL) {
> - if (($$ = host($1, pf->opts)) == NULL)  {
> - yyerror("invalid host for route spec");
> - YYERROR;
> - }
> + if (($$ = next_hop($1, pf->opts)) == NULL)  {
> + /* error. "any" is handled elsewhere */
>   free($1);
> - } else {
> - $$ = calloc(1, sizeof(struct node_host));
> - if ($$ == NULL)
> - err(1, "route_host: calloc");
> - $$->ifname = $1;
> - $$->addr.type = PF_ADDR_NONE;
> - set_ipmask($$, 128);
> - $$->next = NULL;
> - $$->tail = $$;
> + yyerror("could not parse host specification");
> + YYERROR;
>   }
> + free($1);
>   }
>   | STRING '/' STRING {
>   char*buf;
> @@ -3769,7 +3759,7 @@ route_host  : STRING{
>   if (asprintf(, "%s/%s", $1, $3) == -1)
>   err(1, "host: asprintf");
>   free($1);
> - if (($$ = host(buf, pf->opts)) == NULL) {
> + if (($$ = next_hop(buf, pf->opts)) == NULL) {
>   /* error. "any" is handled elsewhere */
>   free(buf);
>   yyerror("could not parse host specification");
> @@ -3795,33 +3785,6 @@ route_host : STRING{
>   $$->next = NULL;
>   $$->tail = $$;
>   }
> - | dynaddr '/' NUMBER{
> - struct node_host*n;
> -
> - if ($3 < 0 || $3 > 128) {
> - yyerror("bit number too big");
> - YYERROR;
> - }
> - $$ = $1;
> - for (n = $1; n != NULL; n = n->next)
> - set_ipmask(n, $3);
> - }
> - | '(' STRING host ')'   {
> - struct node_host*n;
> -
> - $$ = $3;
> - /* XXX check masks, only full mask should be allowed */
> - for (n = $3; n != NULL; n = n->next) {
> - if ($$->ifname) {
> - yyerror("cannot specify interface twice 
> "
> - "in route spec");
> - YYERROR;
> - }
> - if (($$->ifname = strdup($2)) == NULL)
> -  

Re: pf af-to sysctl forwarding

2021-01-15 Thread Alexander Bluhm
On Fri, Jan 15, 2021 at 03:24:43PM +0100, Klemens Nanni wrote:
> Existing routers doing NAT64 for IPv6-only networks will require
> `net.inet.ip.forwarding=1' for NAT64 to work.

Actually you will need both of them.

When sending "IPv6 -> pf-router -> IPv4" you need ip forwarding as
pf translates the packet and then it is forwarded.

But you also want IPv4 packets from the internet return to your
local IPv6 network.  For that ip6 forwarding is needed.

> I'd say we should make that clear with a current.html entry.

I will do that.

> Either way, I think that diff makes sense.

My argument is, that with ip forwarding = 0 no forwarded IPv4
packet should leave your box.  ip6 forwarding should prevent
IPv6 packets.

Currently pf af-to forwards packets regardless of the sysctl settings.
This feels wrong.

bluhm



sysctl ip.forwarding 2

2021-01-15 Thread Alexander Bluhm
Hi,

As documented in sysctl(2) net.inet.ip.forwarding can be 2.

netinet/ip_output.c:448
if (ipsec_in_use && (flags & IP_FORWARDING) && (ipforwarding == 2) &&

Current input validation prevents this.  
# sysctl net.inet.ip.forwarding=2
sysctl: net.inet.ip.forwarding: Invalid argument

Also change bool check to integer comparison consistently.
ip6_forwarding misses the feature, but that is a different story.

ok?

bluhm

Index: netinet/ip_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.353
diff -u -p -r1.353 ip_input.c
--- netinet/ip_input.c  11 Jan 2021 13:28:53 -  1.353
+++ netinet/ip_input.c  15 Jan 2021 12:45:41 -
@@ -115,7 +115,7 @@ const struct sysctl_bounded_args ipctl_v
 #ifdef MROUTING
{ IPCTL_MRTPROTO, _mrtproto, 1, 0 },
 #endif
-   { IPCTL_FORWARDING, , 0, 1 },
+   { IPCTL_FORWARDING, , 0, 2 },
{ IPCTL_SENDREDIRECTS, , 0, 1 },
{ IPCTL_DEFTTL, _defttl, 0, 255 },
{ IPCTL_DIRECTEDBCAST, _directedbcast, 0, 1 },
@@ -1251,7 +1251,7 @@ ip_dooptions(struct mbuf *m, struct ifne
}
}
KERNEL_UNLOCK();
-   if (forward && ipforwarding) {
+   if (forward && ipforwarding > 0) {
ip_forward(m, ifp, NULL, 1);
return (1);
}



pf af-to sysctl forwarding

2021-01-15 Thread Alexander Bluhm
Hi,

sysctl net.inet.ip.forwarding is checked before ip_input() passes
the packet to ip_forward().  But with an af-to rule, pf(4) calls
ip_forward() directly.  I think we should check the sysctl also in
pf to get consistent behaviour.

ok?

bluhm

Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1098
diff -u -p -r1.1098 pf.c
--- net/pf.c14 Jan 2021 09:44:33 -  1.1098
+++ net/pf.c15 Jan 2021 11:08:31 -
@@ -7259,20 +7259,32 @@ done:
pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
switch (pd.naf) {
case AF_INET:
-   if (pd.dir == PF_IN)
+   if (pd.dir == PF_IN) {
+   if (ipforwarding == 0) {
+   ipstat_inc(ips_cantforward);
+   action = PF_DROP;
+   break;
+   }
ip_forward(pd.m, ifp, NULL, 1);
-   else
+   } else
ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
break;
case AF_INET6:
-   if (pd.dir == PF_IN)
+   if (pd.dir == PF_IN) {
+   if (ip6_forwarding == 0) {
+   ip6stat_inc(ip6s_cantforward);
+   action = PF_DROP;
+   break;
+   }
ip6_forward(pd.m, NULL, 1);
-   else
+   } else
ip6_output(pd.m, NULL, NULL, 0, NULL, NULL);
break;
}
-   pd.m = NULL;
-   action = PF_PASS;
+   if (action != PF_DROP) {
+   pd.m = NULL;
+   action = PF_PASS;
+   }
break;
 #endif /* INET6 */
case PF_DROP:



pf log user and group

2021-01-11 Thread Alexander Bluhm
Hi,

Sometimes an uid is logged in pflog(4) although the logopt of the
rule does not specify it.  Check the option again for the log rule
in case another rule has triggered a socket lookup.  Remove logopt
group, it is not documented and cannot work as struct pfloghdr does
not contain a gid.  Rename PF_LOG_SOCKET_LOOKUP to PF_LOG_USER to
express what it does.  The lookup involved is only an implemntation
detail.

ok?

bluhm

Index: sys/net/if_pflog.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pflog.c,v
retrieving revision 1.91
diff -u -p -r1.91 if_pflog.c
--- sys/net/if_pflog.c  28 Aug 2020 12:01:48 -  1.91
+++ sys/net/if_pflog.c  11 Jan 2021 14:44:55 -
@@ -253,9 +253,9 @@ pflog_packet(struct pf_pdesc *pd, u_int8
strlcpy(hdr.ruleset, ruleset->anchor->name,
sizeof(hdr.ruleset));
}
-   if (trigger->log & PF_LOG_SOCKET_LOOKUP && !pd->lookup.done)
+   if (trigger->log & PF_LOG_USER && !pd->lookup.done)
pd->lookup.done = pf_socket_lookup(pd);
-   if (pd->lookup.done > 0) {
+   if (trigger->log & PF_LOG_USER && pd->lookup.done > 0) {
hdr.uid = pd->lookup.uid;
hdr.pid = pd->lookup.pid;
} else {
Index: sys/net/pfvar.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pfvar.h,v
retrieving revision 1.497
diff -u -p -r1.497 pfvar.h
--- sys/net/pfvar.h 14 Oct 2020 19:22:14 -  1.497
+++ sys/net/pfvar.h 11 Jan 2021 14:46:54 -
@@ -156,7 +156,7 @@ enum{ PF_ADDR_ADDRMASK, PF_ADDR_NOROUTE
 
 #definePF_LOG  0x01
 #definePF_LOG_ALL  0x02
-#definePF_LOG_SOCKET_LOOKUP0x04
+#definePF_LOG_USER 0x04
 #definePF_LOG_FORCE0x08
 #definePF_LOG_MATCHES  0x10
 
Index: sbin/pfctl/parse.y
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.707
diff -u -p -r1.707 parse.y
--- sbin/pfctl/parse.y  16 Dec 2020 18:01:16 -  1.707
+++ sbin/pfctl/parse.y  11 Jan 2021 14:44:46 -
@@ -2409,8 +2409,7 @@ logopts   : logopt{ $$ = 
$1; }
 
 logopt : ALL   { $$.log = PF_LOG_ALL; $$.logif = 0; }
| MATCHES   { $$.log = PF_LOG_MATCHES; $$.logif = 0; }
-   | USER  { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; }
-   | GROUP { $$.log = PF_LOG_SOCKET_LOOKUP; $$.logif = 0; }
+   | USER  { $$.log = PF_LOG_USER; $$.logif = 0; }
| TO string {
const char  *errstr;
u_inti;
Index: sbin/pfctl/pfctl_parser.c
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/pfctl/pfctl_parser.c,v
retrieving revision 1.344
diff -u -p -r1.344 pfctl_parser.c
--- sbin/pfctl/pfctl_parser.c   29 Dec 2020 19:50:28 -  1.344
+++ sbin/pfctl/pfctl_parser.c   11 Jan 2021 14:44:26 -
@@ -795,7 +795,7 @@ print_rule(struct pf_rule *r, const char
printf("%sall", count++ ? ", " : "");
if (r->log & PF_LOG_MATCHES)
printf("%smatches", count++ ? ", " : "");
-   if (r->log & PF_LOG_SOCKET_LOOKUP)
+   if (r->log & PF_LOG_USER)
printf("%suser", count++ ? ", " : "");
if (r->logif)
printf("%sto pflog%u", count++ ? ", " : "",



Re: pf route-to issues

2021-01-08 Thread Alexander Bluhm
On Tue, Jan 05, 2021 at 10:05:39PM +1000, David Gwynne wrote:
> If the idea is to avoid running most of pf_test again if route-to is
> applied during ip_output, I think this tweaked diff is simpler. Is there
> a valid use case for running some of pf_test again after route-to is
> applied?

I found the original commit that introduced this strange check.


revision 1.294
date: 2003/01/02 01:56:56;  author: dhartmei;  state: Exp;  lines: +27 -49;
When route-to/reply-to is used in combination with address translation,
pf_test() may be called twice for the same packet. In this case, make
sure the translation is only applied in the second call. This solves
the problem with state insert failures where the second pf_test() call
tried to insert another state entry after the first call's translation.
ok henning@, mcbride@, thanks to Joe Nall for additional testing.


I have tested your diffs in my setup, they all pass.  I have not
tested the scenario mentioned in the commit message.  Note that the
address translation implementation in 2003 was different from what
we have now.  And sasha@'s analysis shows that the current code is
wrong in other use cases.

The check in pf_find_state() seems to be unrelated to the call to
pf_test() in pf_route().  I have to rethink it separately.

How can we figure out what happens when we remove the check?  It
may harm some cases and benefit others or make no sense at all.  My
regression test, which tests each feature individually, is not
affected.

The only way to find out is to commit it.  It reduces comlexity that
noone understands.

OK bluhm@ to remove the check

Please leave the "if (pd->kif->pfik_ifp != ifp)" around pf_test()
in pf_route() as it is for now.



Re: pf route-to issues

2021-01-04 Thread Alexander Bluhm
On Mon, Jan 04, 2021 at 11:21:50PM +1000, David Gwynne wrote:
> this chunk pops out as a standalone change.
> 
> having pf_find_state() return PF_PASS here means the callers short
> circuit and let the packet go through without running it through the
> a lot of the state handling, which includes things like protocol state
> updates, nat, scrubbing, some pflog handling, and most importantly,
> later calls to pf_route().

pf_route() calls pf_test() again with a different interface.

The idea of this code is, that the interface which is passed to
pf_test() from ip_output() is wrong.  The call to pf_set_rt_ifp()
changes it in the state.

In the pf_test() call from ip_output() we skip the tests.  We know
they will happen in pf_test() called from pf_route().  Without this
chunk we would do state handling twice with different interfaces.

Is that analysis correct?

bluhm

> Index: pf.c
> ===
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1097
> diff -u -p -r1.1097 pf.c
> --- pf.c  4 Jan 2021 12:48:27 -   1.1097
> +++ pf.c  4 Jan 2021 13:08:26 -
> @@ -1122,12 +1122,6 @@ pf_find_state(struct pf_pdesc *pd, struc
>   }
>  
>   *state = s;
> - if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif &&
> - ((s->rule.ptr->rt == PF_ROUTETO &&
> - s->rule.ptr->direction == PF_OUT) ||
> - (s->rule.ptr->rt == PF_REPLYTO &&
> - s->rule.ptr->direction == PF_IN)))
> - return (PF_PASS);
>  
>   return (PF_MATCH);
>  }



Re: pf route-to issues

2021-01-04 Thread Alexander Bluhm
On Mon, Jan 04, 2021 at 04:32:45PM +0100, Alexandr Nedvedicky wrote:
> so either rt_kif must stay for a while, or your new diff (rebased on top 
> of
> stuff committed already) must be expanded by the nit pick I've sent.

The diff I sent contains this bit.  I still think the merge bug is
on your side.

> to put it clear: I'm concerned the diff posted here:
>   https://marc.info/?l=openbsd-tech=160976516119388=2
> is not complete and should not be committed as is.

It compiles, I recreated the diff and attached it.

> > > -   s->rt_kif = NULL;
> > > if (!r->rt)
> > > return (0);

My diff removes the kif here ...

> > > -   if (rv == 0) {
> > > -   s->rt_kif = r->route.kif;
> > > +   if (rv == 0)
> > > s->natrule.ptr = r;
> > > -   }

... and the {}.

Anyway, it should not be commited without the userland part.
(and not without compling it :-)

bluhm

Index: net/if_pfsync.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.280
diff -u -p -r1.280 if_pfsync.c
--- net/if_pfsync.c 4 Jan 2021 12:48:27 -   1.280
+++ net/if_pfsync.c 4 Jan 2021 15:57:17 -
@@ -613,6 +613,7 @@ pfsync_state_import(struct pfsync_state 
 
/* copy to state */
st->rt_addr = sp->rt_addr;
+   st->rt = sp->rt;
st->creation = getuptime() - ntohl(sp->creation);
st->expire = getuptime();
if (ntohl(sp->expire)) {
@@ -643,7 +644,6 @@ pfsync_state_import(struct pfsync_state 
 
st->rule.ptr = r;
st->anchor.ptr = NULL;
-   st->rt_kif = NULL;
 
st->pfsync_time = getuptime();
st->sync_state = PFSYNC_S_NONE;
@@ -1857,7 +1857,7 @@ pfsync_undefer(struct pfsync_deferral *p
if (drop)
m_freem(pd->pd_m);
else {
-   if (st->rule.ptr->rt == PF_ROUTETO) {
+   if (st->rt == PF_ROUTETO) {
if (pf_setup_pdesc(, st->key[PF_SK_WIRE]->af,
st->direction, st->kif, pd->pd_m, NULL) !=
PF_PASS) {
@@ -1866,11 +1866,11 @@ pfsync_undefer(struct pfsync_deferral *p
}
switch (st->key[PF_SK_WIRE]->af) {
case AF_INET:
-   pf_route(, st->rule.ptr, st);
+   pf_route(, st);
break;
 #ifdef INET6
case AF_INET6:
-   pf_route6(, st->rule.ptr, st);
+   pf_route6(, st);
break;
 #endif /* INET6 */
default:
Index: net/pf.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
retrieving revision 1.1097
diff -u -p -r1.1097 pf.c
--- net/pf.c4 Jan 2021 12:48:27 -   1.1097
+++ net/pf.c4 Jan 2021 15:57:17 -
@@ -1122,12 +1122,6 @@ pf_find_state(struct pf_pdesc *pd, struc
}
 
*state = s;
-   if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif &&
-   ((s->rule.ptr->rt == PF_ROUTETO &&
-   s->rule.ptr->direction == PF_OUT) ||
-   (s->rule.ptr->rt == PF_REPLYTO &&
-   s->rule.ptr->direction == PF_IN)))
-   return (PF_PASS);
 
return (PF_MATCH);
 }
@@ -1186,6 +1180,7 @@ pf_state_export(struct pfsync_state *sp,
 
/* copy from state */
strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+   sp->rt = st->rt;
sp->rt_addr = st->rt_addr;
sp->creation = htonl(getuptime() - st->creation);
expire = pf_state_expires(st);
@@ -3433,16 +3428,13 @@ pf_set_rt_ifp(struct pf_state *s, struct
struct pf_rule *r = s->rule.ptr;
int rv;
 
-   s->rt_kif = NULL;
-   if (!r->rt)
+   if (r->rt == PF_NOPFROUTE)
return (0);
 
rv = pf_map_addr(af, r, saddr, >rt_addr, NULL, sns, 
>route, PF_SN_ROUTE);
-   if (rv == 0) {
-   s->rt_kif = r->route.kif;
-   s->natrule.ptr = r;
-   }
+   if (rv == 0)
+   s->rt = r->rt;
 
return (rv);
 }
@@ -5973,15 +5965,13 @@ pf_rtlabel_match(struct pf_addr *addr, s
 
 /* pf_route() may change pd->m, adjust local copies after calling */
 void
-pf_route(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s)
+pf_route(struct pf_pdesc *pd, struct pf_state *s)
 {
struct mbuf *m0, *m1;
struct sockaddr_in  *dst, sin;
struct rtentry  *rt = NULL;
struct ip   *ip;
struct ifnet*ifp = NULL;
-   struct pf_addr   naddr;
-   struct pf_src_node  *sns[PF_SN_MAX];
int  error = 0;
unsigned int rtableid;
 
@@ 

Re: pf route-to issues

2021-01-04 Thread Alexander Bluhm
On Mon, Jan 04, 2021 at 03:26:15PM +0100, Alexandr Nedvedicky wrote:
> you refactoring diff requires a minor finishing touch to keep the
> stuff compiling:

Did I commit something that does not compile?  I just made cvs
update on another machine.  There it worked.

The rt_kif in pf_state still exists.  The diff below should not
be necessary.  Maybe you forgot to clean pfvar.h.

bluhm

> 8<---8<---8<---8<---8<8<
> diff --git a/sys/net/pf.c b/sys/net/pf.c
> index b8766df1686..3f9f5b13add 100644
> --- a/sys/net/pf.c
> +++ b/sys/net/pf.c
> @@ -3428,16 +3428,13 @@ pf_set_rt_ifp(struct pf_state *s, struct pf_addr 
> *saddr, sa_family_t af,
> struct pf_rule *r = s->rule.ptr;
> int rv;
>  
> -   s->rt_kif = NULL;
> if (!r->rt)
> return (0);
>  
> rv = pf_map_addr(af, r, saddr, >rt_addr, NULL, sns, 
> >route, PF_SN_ROUTE);
> -   if (rv == 0) {
> -   s->rt_kif = r->route.kif;
> +   if (rv == 0)
> s->natrule.ptr = r;
> -   }
>  
> return (rv);
>  }
> 8<---8<---8<---8<---8<8<
> 
> 
> thanks and
> regards
> sashan



Re: pf route-to issues

2021-01-04 Thread Alexander Bluhm
On Mon, Jan 04, 2021 at 11:46:16AM +0100, Alexandr Nedvedicky wrote:
> > let's put this in and then i'll have a look. ok by me.
> bluhm's diff is fine with me.

Refactoring is commited, here is the remaining kernel diff after merge.

bluhm

Index: net/if_pfsync.c
===
RCS file: /cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.280
diff -u -p -r1.280 if_pfsync.c
--- net/if_pfsync.c 4 Jan 2021 12:48:27 -   1.280
+++ net/if_pfsync.c 4 Jan 2021 12:52:01 -
@@ -613,6 +613,7 @@ pfsync_state_import(struct pfsync_state 
 
/* copy to state */
st->rt_addr = sp->rt_addr;
+   st->rt = sp->rt;
st->creation = getuptime() - ntohl(sp->creation);
st->expire = getuptime();
if (ntohl(sp->expire)) {
@@ -643,7 +644,6 @@ pfsync_state_import(struct pfsync_state 
 
st->rule.ptr = r;
st->anchor.ptr = NULL;
-   st->rt_kif = NULL;
 
st->pfsync_time = getuptime();
st->sync_state = PFSYNC_S_NONE;
@@ -1857,7 +1857,7 @@ pfsync_undefer(struct pfsync_deferral *p
if (drop)
m_freem(pd->pd_m);
else {
-   if (st->rule.ptr->rt == PF_ROUTETO) {
+   if (st->rt == PF_ROUTETO) {
if (pf_setup_pdesc(, st->key[PF_SK_WIRE]->af,
st->direction, st->kif, pd->pd_m, NULL) !=
PF_PASS) {
@@ -1866,11 +1866,11 @@ pfsync_undefer(struct pfsync_deferral *p
}
switch (st->key[PF_SK_WIRE]->af) {
case AF_INET:
-   pf_route(, st->rule.ptr, st);
+   pf_route(, st);
break;
 #ifdef INET6
case AF_INET6:
-   pf_route6(, st->rule.ptr, st);
+   pf_route6(, st);
break;
 #endif /* INET6 */
default:
Index: net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1097
diff -u -p -r1.1097 pf.c
--- net/pf.c4 Jan 2021 12:48:27 -   1.1097
+++ net/pf.c4 Jan 2021 12:52:02 -
@@ -1122,12 +1122,6 @@ pf_find_state(struct pf_pdesc *pd, struc
}
 
*state = s;
-   if (pd->dir == PF_OUT && s->rt_kif != NULL && s->rt_kif != pd->kif &&
-   ((s->rule.ptr->rt == PF_ROUTETO &&
-   s->rule.ptr->direction == PF_OUT) ||
-   (s->rule.ptr->rt == PF_REPLYTO &&
-   s->rule.ptr->direction == PF_IN)))
-   return (PF_PASS);
 
return (PF_MATCH);
 }
@@ -1186,6 +1180,7 @@ pf_state_export(struct pfsync_state *sp,
 
/* copy from state */
strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
+   sp->rt = st->rt;
sp->rt_addr = st->rt_addr;
sp->creation = htonl(getuptime() - st->creation);
expire = pf_state_expires(st);
@@ -3433,16 +3428,13 @@ pf_set_rt_ifp(struct pf_state *s, struct
struct pf_rule *r = s->rule.ptr;
int rv;
 
-   s->rt_kif = NULL;
-   if (!r->rt)
+   if (r->rt == PF_NOPFROUTE)
return (0);
 
rv = pf_map_addr(af, r, saddr, >rt_addr, NULL, sns, 
>route, PF_SN_ROUTE);
-   if (rv == 0) {
-   s->rt_kif = r->route.kif;
-   s->natrule.ptr = r;
-   }
+   if (rv == 0)
+   s->rt = r->rt;
 
return (rv);
 }
@@ -5973,15 +5965,13 @@ pf_rtlabel_match(struct pf_addr *addr, s
 
 /* pf_route() may change pd->m, adjust local copies after calling */
 void
-pf_route(struct pf_pdesc *pd, struct pf_rule *r, struct pf_state *s)
+pf_route(struct pf_pdesc *pd, struct pf_state *s)
 {
struct mbuf *m0, *m1;
struct sockaddr_in  *dst, sin;
struct rtentry  *rt = NULL;
struct ip   *ip;
struct ifnet*ifp = NULL;
-   struct pf_addr   naddr;
-   struct pf_src_node  *sns[PF_SN_MAX];
int  error = 0;
unsigned int rtableid;
 
@@ -5991,11 +5981,11 @@ pf_route(struct pf_pdesc *pd, struct pf_
return;
}
 
-   if (r->rt == PF_DUPTO) {
+   if (s->rt == PF_DUPTO) {
if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
return;
} else {
-   if ((r->rt == PF_REPLYTO) == (r->direction == pd->dir))
+   if ((s->rt == PF_REPLYTO) == (s->direction == pd->dir))
return;
m0 = pd->m;
}
@@ -6008,44 +5998,31 @@ pf_route(struct pf_pdesc *pd, struct pf_
 
ip = mtod(m0, struct ip *);
 
-   memset(, 0, sizeof(sin));
-   dst = 
-   dst->sin_family = AF_INET;
-   dst->sin_len = sizeof(*dst);
-   

Re: convert i386 fix_f00f() uvm_km_zalloc

2021-01-03 Thread Alexander Bluhm
On Mon, Jan 04, 2021 at 10:00:25AM +1000, Jonathan Matthew wrote:
> I don't have a real 586, but I can tell qemu to pretend to be one,
> which at least executes this code.

You can run regress/sys/arch/i386/f00f/ .

> Using kd_waitok here seems suspect, because if we're out of memory
> this early I can't see anything else freeing any up, but
> uvm_km_zalloc() will also sleep rather than return failure.
> Should this use kd_nowait and panic if the allocation fails instead?

Calling malloc(9) with M_WAITOK during boot is the correct way.  It
will always succeed or panic in malloc() if it tries to sleep during
boot.

Although km_alloc() does not have this check, I would also call it
with kd_waitok.  I don't think we will trigger sleeping during boot.
But if there is concern, better put a similar check into km_alloc()
instead of checks in every caller.

> ok?

OK bluhm@

> Index: arch/i386/i386/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.642
> diff -u -p -u -p -r1.642 machdep.c
> --- arch/i386/i386/machdep.c  28 Dec 2020 14:02:07 -  1.642
> +++ arch/i386/i386/machdep.c  3 Jan 2021 23:01:34 -
> @@ -3100,7 +3100,7 @@ fix_f00f(void)
>   void *p;
>  
>   /* Allocate two new pages */
> - va = uvm_km_zalloc(kernel_map, NBPG*2);
> + va = (vaddr_t)km_alloc(NBPG*2, _any, _zero, _waitok);
>   p = (void *)(va + NBPG - 7*sizeof(*idt));
>  
>   /* Copy over old IDT */



Re: pf route-to issues

2021-01-03 Thread Alexander Bluhm
On Sun, Jan 03, 2021 at 06:56:20PM +0100, Alexander Bluhm wrote:
> I am currently running a full regress to find more fallout.

These regress tests fail:

sys/net/pf_forward
sys/net/pf_fragment
sbin/pfctl

The first two are easy to fix.  That means my tests using route-to
work fine with your diff.  Just remove the @interface as below.

pfctl tests pfail8 and pf13 use very strange routespec syntax.  You
might want to take a look at what that meant before and what should
be valid now.

bluhm

Index: regress/sys/net/pf_forward/pf.conf
===
RCS file: /mount/openbsd/cvs/src/regress/sys/net/pf_forward/pf.conf,v
retrieving revision 1.5
diff -u -p -r1.5 pf.conf
--- regress/sys/net/pf_forward/pf.conf  11 Jan 2018 03:23:16 -  1.5
+++ regress/sys/net/pf_forward/pf.conf  3 Jan 2021 23:26:54 -
@@ -17,22 +17,22 @@ pass out inet6  
 pass in  to $AF_IN6/64 af-to inet  from $PF_OUT  to $ECO_IN/24   tag af
 pass out inettagged af
 
-pass in  to $RTT_IN/24  route-to $RT_IN@$PF_IFOUT  tag rttin
-pass out   tagged rttin
-pass in  to $RTT_IN6/64 route-to $RT_IN6@$PF_IFOUT tag rttin
-pass out   tagged rttin
+pass in  to $RTT_IN/24  route-to $RT_IN  tag rttin
+pass out tagged rttin
+pass in  to $RTT_IN6/64 route-to $RT_IN6 tag rttin
+pass out tagged rttin
 
-pass in  to $RTT_OUT/24 tag rttout
-pass out route-to $RT_IN@$PF_IFOUT  tagged rttout
-pass in  to $RTT_OUT6/64tag rttout
-pass out route-to $RT_IN6@$PF_IFOUT tagged rttout
+pass in  to $RTT_OUT/24   tag rttout
+pass out route-to $RT_IN  tagged rttout
+pass in  to $RTT_OUT6/64  tag rttout
+pass out route-to $RT_IN6 tagged rttout
 
-pass in  from $RPT_IN/24  reply-to $SRC_OUT@$PF_IFIN  tag rptin
-pass out  tagged rptin
-pass in  from $RPT_IN6/64 reply-to $SRC_OUT6@$PF_IFIN tag rptin
-pass out  tagged rptin
+pass in  from $RPT_IN/24  reply-to $SRC_OUT  tag rptin
+pass out tagged rptin
+pass in  from $RPT_IN6/64 reply-to $SRC_OUT6 tag rptin
+pass out tagged rptin
 
-pass in  from $RPT_OUT/24  tag rptout
-pass out   reply-to $SRC_OUT@$PF_IFIN  tagged rptout
-pass in  from $RPT_OUT6/64 tag rptout
-pass out   reply-to $SRC_OUT6@$PF_IFIN tagged rptout
+pass in  from $RPT_OUT/24 tag rptout
+pass out   reply-to $SRC_OUT  tagged rptout
+pass in  from $RPT_OUT6/64tag rptout
+pass out   reply-to $SRC_OUT6 tagged rptout
Index: regress/sys/net/pf_fragment/pf.conf
===
RCS file: /mount/openbsd/cvs/src/regress/sys/net/pf_fragment/pf.conf,v
retrieving revision 1.5
diff -u -p -r1.5 pf.conf
--- regress/sys/net/pf_fragment/pf.conf 7 Jun 2017 20:09:07 -   1.5
+++ regress/sys/net/pf_fragment/pf.conf 3 Jan 2021 23:28:07 -
@@ -10,7 +10,7 @@ pass outnat-to $PF_OUT  
 pass in  to $RDR_IN6/64 rdr-to $ECO_IN6 allow-opts tag rdr
 pass outnat-to $PF_OUT6 allow-opts tagged rdr
 
-pass in  to $RTT_IN/24 allow-opts tag rtt
-pass outroute-to $RT_IN@$PF_IFOUT  allow-opts tagged rtt
-pass in  to $RTT_IN6/64allow-opts tag rtt
-pass outroute-to $RT_IN6@$PF_IFOUT allow-opts tagged rtt
+pass in  to $RTT_IN/24   allow-opts tag rtt
+pass outroute-to $RT_IN  allow-opts tagged rtt
+pass in  to $RTT_IN6/64  allow-opts tag rtt
+pass outroute-to $RT_IN6 allow-opts tagged rtt



Re: pf route-to issues

2021-01-03 Thread Alexander Bluhm
On Sun, Jan 03, 2021 at 02:00:00PM +1000, David Gwynne wrote:
> On Tue, Oct 20, 2020 at 09:27:09AM +1000, David Gwynne wrote:
> We've been running this diff in production for the last couple of
> months, and it's been solid for us so far. Ignoring the fixes for
> crashes, I personally find it a lot more usable than the current
> route-to rules too.
> 
> Can I commit it?

The diff is quite large and does multiple things at a time.

In general I also did not understand why I have to say em0@10.0.0.1
for routing and it took me a while to figure out what to put into
pf.conf.  I use this syntax in /usr/src/regress/sys/net/pf_forward/pf.conf.
This has to be fixed after this goes in.  I will care about regress
as this test is quite complex an needs several machines to setup.
I am currently running a full regress to find more fallout.

I do not use pfsync, so I cannot say what the consequences of the
change are in this area.  Also I don't know why pf-route interfaces
were designed in such a strange way.

>From a user perspective it is not clear, why route-to should not
work together with no-state.  So we should either fix it or document
it and add a check in the parser.  Is fixing hard?

Are we losing any other features apart from this strange arp reuse
you described in your mail?

There is some refactoring in your diff.  I splitted it to make
review easier.  I think this should go in first.  Note that the
pf_state variable is called st in if_pfsync.c.  Can we be consistent
here?  Is the pfsync_state properly aligned?  During import it comes
from an mbuf.

Is there anything else that can be split out easily?

bluhm

Index: net/if_pfsync.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_pfsync.c,v
retrieving revision 1.279
diff -u -p -r1.279 if_pfsync.c
--- net/if_pfsync.c 12 Dec 2020 11:49:02 -  1.279
+++ net/if_pfsync.c 3 Jan 2021 17:16:55 -
@@ -612,7 +612,7 @@ pfsync_state_import(struct pfsync_state 
st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
 
/* copy to state */
-   bcopy(>rt_addr, >rt_addr, sizeof(st->rt_addr));
+   st->rt_addr = sp->rt_addr;
st->creation = getuptime() - ntohl(sp->creation);
st->expire = getuptime();
if (ntohl(sp->expire)) {
@@ -1843,6 +1843,7 @@ pfsync_undefer(struct pfsync_deferral *p
 {
struct pfsync_softc *sc = pfsyncif;
struct pf_pdesc pdesc;
+   struct pf_state *st = pd->pd_st;
 
NET_ASSERT_LOCKED();
 
@@ -1852,35 +1853,32 @@ pfsync_undefer(struct pfsync_deferral *p
TAILQ_REMOVE(>sc_deferrals, pd, pd_entry);
sc->sc_deferred--;
 
-   CLR(pd->pd_st->state_flags, PFSTATE_ACK);
+   CLR(st->state_flags, PFSTATE_ACK);
if (drop)
m_freem(pd->pd_m);
else {
-   if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) {
-   if (pf_setup_pdesc(,
-   pd->pd_st->key[PF_SK_WIRE]->af,
-   pd->pd_st->direction, pd->pd_st->rt_kif,
-   pd->pd_m, NULL) != PF_PASS) {
+   if (st->rule.ptr->rt == PF_ROUTETO) {
+   if (pf_setup_pdesc(, st->key[PF_SK_WIRE]->af,
+   st->direction, st->kif, pd->pd_m, NULL) !=
+   PF_PASS) {
m_freem(pd->pd_m);
goto out;
}
-   switch (pd->pd_st->key[PF_SK_WIRE]->af) {
+   switch (st->key[PF_SK_WIRE]->af) {
case AF_INET:
-   pf_route(,
-   pd->pd_st->rule.ptr, pd->pd_st);
+   pf_route(, st->rule.ptr, st);
break;
 #ifdef INET6
case AF_INET6:
-   pf_route6(,
-   pd->pd_st->rule.ptr, pd->pd_st);
+   pf_route6(, st->rule.ptr, st);
break;
 #endif /* INET6 */
default:
-   unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af);
+   unhandled_af(st->key[PF_SK_WIRE]->af);
}
pd->pd_m = pdesc.m;
} else {
-   switch (pd->pd_st->key[PF_SK_WIRE]->af) {
+   switch (st->key[PF_SK_WIRE]->af) {
case AF_INET:
ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL,
0);
@@ -1892,12 +1890,12 @@ pfsync_undefer(struct pfsync_deferral *p
break;
 #endif /* INET6 */
default:
-   unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af);
+   

Re: uvm_fault: amap & anon locking

2021-01-01 Thread Alexander Bluhm
On Wed, Dec 30, 2020 at 11:19:41AM -0300, Martin Pieuchot wrote:
> This has been extensively tested as part of the unlocking diff I sent to
> many developers.  However, I'd appreciate if you could test again because
> this diff doesn't include WITNESS and do not unlock the fault handler.

Passed regress on i386 and amd64.

bluhm

> Index: kern/init_main.c
> ===
> RCS file: /cvs/src/sys/kern/init_main.c,v
> retrieving revision 1.303
> diff -u -p -r1.303 init_main.c
> --- kern/init_main.c  28 Dec 2020 14:01:23 -  1.303
> +++ kern/init_main.c  29 Dec 2020 14:13:52 -
> @@ -232,6 +232,7 @@ main(void *framep)
>   KERNEL_LOCK_INIT();
>   SCHED_LOCK_INIT();
>  
> + rw_obj_init();
>   uvm_init();
>   disk_init();/* must come before autoconfiguration */
>   tty_init(); /* initialise tty's */
> Index: kern/kern_rwlock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
> retrieving revision 1.45
> diff -u -p -r1.45 kern_rwlock.c
> --- kern/kern_rwlock.c2 Mar 2020 17:07:49 -   1.45
> +++ kern/kern_rwlock.c30 Dec 2020 14:03:00 -
> @@ -19,6 +19,7 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -487,4 +488,124 @@ int
>  rrw_status(struct rrwlock *rrwl)
>  {
>   return (rw_status(>rrwl_lock));
> +}
> +
> +/*-
> + * Copyright (c) 2008 The NetBSD Foundation, Inc.
> + * All rights reserved.
> + *
> + * This code is derived from software contributed to The NetBSD Foundation
> + * by Andrew Doran.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *notice, this list of conditions and the following disclaimer in the
> + *documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
> + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
> LIMITED
> + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> + * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
> + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> + * POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#define  RWLOCK_OBJ_MAGIC0x5aa3c85d
> +struct rwlock_obj {
> + struct rwlock   ro_lock;
> + u_int   ro_magic;
> + u_int   ro_refcnt;
> +};
> +
> +
> +struct pool rwlock_obj_pool;
> +
> +/*
> + * rw_obj_init:
> + *
> + *   Initialize the mutex object store.
> + */
> +void
> +rw_obj_init(void)
> +{
> + pool_init(_obj_pool, sizeof(struct rwlock_obj), 0, IPL_NONE,
> + PR_WAITOK | PR_RWLOCK, "rwobjpl", NULL);
> +}
> +
> +/*
> + * rw_obj_alloc:
> + *
> + *   Allocate a single lock object.
> + */
> +void
> +_rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags,
> +struct lock_type *type)
> +{
> + struct rwlock_obj *mo;
> +
> + mo = pool_get(_obj_pool, PR_WAITOK);
> + mo->ro_magic = RWLOCK_OBJ_MAGIC;
> + _rw_init_flags(>ro_lock, name, flags, type);
> + mo->ro_refcnt = 1;
> +
> + *lock = >ro_lock;
> +}
> +
> +/*
> + * rw_obj_hold:
> + *
> + *   Add a single reference to a lock object.  A reference to the object
> + *   must already be held, and must be held across this call.
> + */
> +
> +void
> +rw_obj_hold(struct rwlock *lock)
> +{
> + struct rwlock_obj *mo = (struct rwlock_obj *)lock;
> +
> + KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC,
> + "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)",
> +  __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC);
> + KASSERTMSG(mo->ro_refcnt > 0,
> + "%s: lock %p: mo->ro_refcnt (%#x) == 0",
> +  __func__, mo, mo->ro_refcnt);
> +
> + atomic_inc_int(>ro_refcnt);
> +}
> +
> +/*
> + * rw_obj_free:
> + *
> + *   Drop a reference from a lock object.  If the last reference is being
> + *   dropped, free the object and return true.  Otherwise, return false.
> + */
> +int
> +rw_obj_free(struct rwlock *lock)
> +{
> + struct rwlock_obj *mo = (struct rwlock_obj *)lock;
> +
> + KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC,
> + "%s: lock 

Re: Thread local data setup and destruct

2020-12-31 Thread Alexander Bluhm
On Tue, Dec 29, 2020 at 04:07:19PM +0100, Otto Moerbeek wrote:
> This workds better, checking the flags does not work if the thread is
> already on the road to desctruction.

This diff survived a full regress run on amd64.

bluhm

> Index: asr/asr.c
> ===
> RCS file: /cvs/src/lib/libc/asr/asr.c,v
> retrieving revision 1.64
> diff -u -p -r1.64 asr.c
> --- asr/asr.c 6 Jul 2020 13:33:05 -   1.64
> +++ asr/asr.c 29 Dec 2020 15:05:45 -
> @@ -117,7 +117,7 @@ _asr_resolver_done(void *arg)
>   _asr_ctx_unref(ac);
>   return;
>   } else {
> - priv = _THREAD_PRIVATE(_asr, _asr, &_asr);
> + priv = _THREAD_PRIVATE_DT(_asr, _asr, NULL, &_asr);
>   if (*priv == NULL)
>   return;
>   asr = *priv;
> @@ -128,6 +128,23 @@ _asr_resolver_done(void *arg)
>   free(asr);
>  }
>  
> +static void
> +_asr_resolver_done_tp(void *arg)
> +{
> + char buf[100];
> + int len;
> + struct asr **priv = arg;
> + struct asr *asr;
> +
> + if (*priv == NULL)
> + return;
> + asr = *priv;
> +
> + _asr_ctx_unref(asr->a_ctx);
> + free(asr);
> + free(priv);
> +}
> +
>  void *
>  asr_resolver_from_string(const char *str)
>  {
> @@ -349,7 +366,8 @@ _asr_use_resolver(void *arg)
>   }
>   else {
>   DPRINT("using thread-local resolver\n");
> - priv = _THREAD_PRIVATE(_asr, _asr, &_asr);
> + priv = _THREAD_PRIVATE_DT(_asr, _asr, _asr_resolver_done_tp,
> + &_asr);
>   if (*priv == NULL) {
>   DPRINT("setting up thread-local resolver\n");
>   *priv = _asr_resolver();
> Index: include/thread_private.h
> ===
> RCS file: /cvs/src/lib/libc/include/thread_private.h,v
> retrieving revision 1.35
> diff -u -p -r1.35 thread_private.h
> --- include/thread_private.h  13 Feb 2019 13:22:14 -  1.35
> +++ include/thread_private.h  29 Dec 2020 15:05:45 -
> @@ -98,7 +98,8 @@ struct thread_callbacks {
>   void(*tc_mutex_destroy)(void **);
>   void(*tc_tag_lock)(void **);
>   void(*tc_tag_unlock)(void **);
> - void*(*tc_tag_storage)(void **, void *, size_t, void *);
> + void*(*tc_tag_storage)(void **, void *, size_t, void (*)(void *),
> +void *);
>   __pid_t (*tc_fork)(void);
>   __pid_t (*tc_vfork)(void);
>   void(*tc_thread_release)(struct pthread *);
> @@ -142,6 +143,7 @@ __END_HIDDEN_DECLS
>  #define _THREAD_PRIVATE_MUTEX_LOCK(name) do {} while (0)
>  #define _THREAD_PRIVATE_MUTEX_UNLOCK(name)   do {} while (0)
>  #define _THREAD_PRIVATE(keyname, storage, error) &(storage)
> +#define _THREAD_PRIVATE_DT(keyname, storage, dt, error)  &(storage)
>  #define _MUTEX_LOCK(mutex)   do {} while (0)
>  #define _MUTEX_UNLOCK(mutex) do {} while (0)
>  #define _MUTEX_DESTROY(mutex)do {} while (0)
> @@ -168,7 +170,12 @@ __END_HIDDEN_DECLS
>  #define _THREAD_PRIVATE(keyname, storage, error) \
>   (_thread_cb.tc_tag_storage == NULL ? &(storage) :   \
>   _thread_cb.tc_tag_storage(&(__THREAD_NAME(keyname)),\
> - &(storage), sizeof(storage), error))
> + &(storage), sizeof(storage), NULL, (error)))
> +
> +#define _THREAD_PRIVATE_DT(keyname, storage, dt, error)  
> \
> + (_thread_cb.tc_tag_storage == NULL ? &(storage) :   \
> + _thread_cb.tc_tag_storage(&(__THREAD_NAME(keyname)),\
> + &(storage), sizeof(storage), (dt), (error)))
>  
>  /*
>   * Macros used in libc to access mutexes.
> Index: thread/rthread_cb.h
> ===
> RCS file: /cvs/src/lib/libc/thread/rthread_cb.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 rthread_cb.h
> --- thread/rthread_cb.h   5 Sep 2017 02:40:54 -   1.2
> +++ thread/rthread_cb.h   29 Dec 2020 15:05:45 -
> @@ -35,5 +35,5 @@ void_thread_mutex_unlock(void **);
>  void _thread_mutex_destroy(void **);
>  void _thread_tag_lock(void **);
>  void _thread_tag_unlock(void **);
> -void *_thread_tag_storage(void **, void *, size_t, void *);
> +void *_thread_tag_storage(void **, void *, size_t, void (*)(void*), void *);
>  __END_HIDDEN_DECLS
> Index: thread/rthread_libc.c
> ===
> RCS file: /cvs/src/lib/libc/thread/rthread_libc.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 rthread_libc.c
> --- thread/rthread_libc.c 10 Jan 2019 18:45:33 -  1.3
> +++ thread/rthread_libc.c 29 Dec 2020 15:05:45 -
> @@ -5,6 +5,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include 

Re: IPsec IPv6 PMTU

2020-12-27 Thread Alexander Bluhm
On Thu, Dec 24, 2020 at 10:54:59PM +0100, Alexander Bluhm wrote:
> It also makes v4 and v6 code look simmilar.  If you want, I can
> split this for easier review.

This is the part of the diff that creates a path MTU host route for
IPv6.  Basically the code is copied from IPv4 and adapted.  Some
things are changed in v4 to make it look similar.

- ip6_forward increases the noroute error counter, I think that
  should be done in ip_forward, too.
- Pass more specific sockaddr_in6 to icmp6_mtudisc_clone().
  Or should we use a sockaddr for both v4 and v6?
- IPv6 may also use reject routes for PMTU clones.
- To pass a route_in6 to ip6_output_ipsec_send() introduce one
  in ip6_forward().  That is the same what IPv4 does.  Note
  that dst and sin6 switch roles.
- Copy comments from ip_output_ipsec_send() to ip6_output_ipsec_send()
  to make code similar.
- Implement dynamic IPv6 IPsec PMTU routes.

ok?

bluhm

Index: netinet/icmp6.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/icmp6.h,v
retrieving revision 1.50
diff -u -p -r1.50 icmp6.h
--- netinet/icmp6.h 28 Oct 2020 17:27:35 -  1.50
+++ netinet/icmp6.h 27 Dec 2020 15:16:56 -
@@ -599,6 +599,7 @@ void icmp6_prepare(struct mbuf *);
 voidicmp6_redirect_input(struct mbuf *, int);
 voidicmp6_redirect_output(struct mbuf *, struct rtentry *);
 int icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
+struct rtentry *icmp6_mtudisc_clone(struct sockaddr_in6 *, u_int, int);
 
 struct ip6ctlparam;
 void   icmp6_mtudisc_update(struct ip6ctlparam *, int);
Index: netinet/ip_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.352
diff -u -p -r1.352 ip_input.c
--- netinet/ip_input.c  16 Nov 2020 06:44:38 -  1.352
+++ netinet/ip_input.c  27 Dec 2020 15:16:56 -
@@ -1418,8 +1418,8 @@ ip_forward(struct mbuf *m, struct ifnet 
goto freecopy;
}
 
+   memset(, 0, sizeof(ro));
sin = satosin(_dst);
-   memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_addr = ip->ip_dst;
@@ -1429,6 +1429,7 @@ ip_forward(struct mbuf *m, struct ifnet 
rt = rtalloc_mpath(sintosa(sin), >ip_src.s_addr,
m->m_pkthdr.ph_rtableid);
if (rt == NULL) {
+   ipstat_inc(ips_noroute);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
return;
}
Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.358
diff -u -p -r1.358 ip_output.c
--- netinet/ip_output.c 20 Dec 2020 21:15:47 -  1.358
+++ netinet/ip_output.c 27 Dec 2020 15:16:56 -
@@ -612,7 +612,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned));
if (rt != NULL) {
rt->rt_mtu = tdb->tdb_mtu;
-   if (ro && ro->ro_rt != NULL) {
+   if (ro != NULL && ro->ro_rt != NULL) {
rtfree(ro->ro_rt);
ro->ro_rt = rtalloc(>ro_dst, RT_RESOLVE,
m->m_pkthdr.ph_rtableid);
Index: netinet6/icmp6.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/icmp6.c,v
retrieving revision 1.233
diff -u -p -r1.233 icmp6.c
--- netinet6/icmp6.c28 Oct 2020 17:27:35 -  1.233
+++ netinet6/icmp6.c27 Dec 2020 15:16:56 -
@@ -138,7 +138,6 @@ int icmp6_ratelimit(const struct in6_add
 const char *icmp6_redirect_diag(struct in6_addr *, struct in6_addr *,
struct in6_addr *);
 inticmp6_notify_error(struct mbuf *, int, int, int);
-struct rtentry *icmp6_mtudisc_clone(struct sockaddr *, u_int);
 void   icmp6_mtudisc_timeout(struct rtentry *, struct rttimer *);
 void   icmp6_redirect_timeout(struct rtentry *, struct rttimer *);
 
@@ -1015,7 +1014,7 @@ icmp6_mtudisc_update(struct ip6ctlparam 
sin6.sin6_scope_id = in6_addr2scopeid(m->m_pkthdr.ph_ifidx,
_addr);
 
-   rt = icmp6_mtudisc_clone(sin6tosa(), m->m_pkthdr.ph_rtableid);
+   rt = icmp6_mtudisc_clone(, m->m_pkthdr.ph_rtableid, 0);
 
if (rt != NULL && ISSET(rt->rt_flags, RTF_HOST) &&
!(rt->rt_locks & RTV_MTU) &&
@@ -1784,15 +1783,18 @@ icmp6_ratelimit(const struct in6_addr *d
 }
 
 struct rtentry *
-icmp6_mtudisc_clone(struct sockaddr *dst, u_int rtableid)
+icmp6_mtudisc_clone(struct sockaddr_in6 *dst, u_int

IPsec IPv6 PMTU

2020-12-24 Thread Alexander Bluhm
Hi,

This diff makes path MTU discovery work for IPv6 IPsec ESP over
IPv4 tunnel.  Basically it ports code from v4 to v6.

It also makes v4 and v6 code look simmilar.  If you want, I can
split this for easier review.

ok?

bluhm

Index: netinet/icmp6.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/icmp6.h,v
retrieving revision 1.50
diff -u -p -r1.50 icmp6.h
--- netinet/icmp6.h 28 Oct 2020 17:27:35 -  1.50
+++ netinet/icmp6.h 22 Dec 2020 17:05:39 -
@@ -599,6 +599,7 @@ void icmp6_prepare(struct mbuf *);
 voidicmp6_redirect_input(struct mbuf *, int);
 voidicmp6_redirect_output(struct mbuf *, struct rtentry *);
 int icmp6_sysctl(int *, u_int, void *, size_t *, void *, size_t);
+struct rtentry *icmp6_mtudisc_clone(struct sockaddr_in6 *, u_int, int);
 
 struct ip6ctlparam;
 void   icmp6_mtudisc_update(struct ip6ctlparam *, int);
Index: netinet/ip_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.352
diff -u -p -r1.352 ip_input.c
--- netinet/ip_input.c  16 Nov 2020 06:44:38 -  1.352
+++ netinet/ip_input.c  22 Dec 2020 17:05:39 -
@@ -1418,8 +1418,8 @@ ip_forward(struct mbuf *m, struct ifnet 
goto freecopy;
}
 
+   memset(, 0, sizeof(ro));
sin = satosin(_dst);
-   memset(sin, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
sin->sin_addr = ip->ip_dst;
@@ -1429,6 +1429,7 @@ ip_forward(struct mbuf *m, struct ifnet 
rt = rtalloc_mpath(sintosa(sin), >ip_src.s_addr,
m->m_pkthdr.ph_rtableid);
if (rt == NULL) {
+   ipstat_inc(ips_noroute);
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
return;
}
Index: netinet/ip_ipsp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.196
diff -u -p -r1.196 ip_ipsp.h
--- netinet/ip_ipsp.h   5 Nov 2020 19:28:28 -   1.196
+++ netinet/ip_ipsp.h   22 Dec 2020 17:05:39 -
@@ -610,6 +610,7 @@ voidesp4_ctlinput(int, struct sockaddr 
 
 #ifdef INET6
 intesp6_input(struct mbuf **, int *, int, int);
+void   esp6_ctlinput(int, struct sockaddr *, u_int, void *);
 #endif /* INET6 */
 
 /* XF_IPCOMP */
Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.358
diff -u -p -r1.358 ip_output.c
--- netinet/ip_output.c 20 Dec 2020 21:15:47 -  1.358
+++ netinet/ip_output.c 22 Dec 2020 17:05:39 -
@@ -612,7 +612,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
ntohl(tdb->tdb_spi), tdb->tdb_mtu, rt, rt_mtucloned));
if (rt != NULL) {
rt->rt_mtu = tdb->tdb_mtu;
-   if (ro && ro->ro_rt != NULL) {
+   if (ro != NULL && ro->ro_rt != NULL) {
rtfree(ro->ro_rt);
ro->ro_rt = rtalloc(>ro_dst, RT_RESOLVE,
m->m_pkthdr.ph_rtableid);
Index: netinet/ipsec_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ipsec_input.c,v
retrieving revision 1.173
diff -u -p -r1.173 ipsec_input.c
--- netinet/ipsec_input.c   1 Sep 2020 01:53:34 -   1.173
+++ netinet/ipsec_input.c   24 Dec 2020 19:22:09 -
@@ -66,6 +66,7 @@
 #ifdef INET6
 #include 
 #include 
+#include 
 #include 
 #include 
 #endif /* INET6 */
@@ -83,6 +84,7 @@
 #include "bpfilter.h"
 
 void ipsec_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
+void ipsec6_common_ctlinput(u_int, int, struct sockaddr *, void *, int);
 
 #ifdef ENCDEBUG
 #define DPRINTF(x) if (encdebug) printf x
@@ -946,18 +948,43 @@ ipcomp4_input(struct mbuf **mp, int *off
 }
 
 void
+ipsec_set_mtu(struct tdb *tdbp, u_int32_t mtu, uint64_t timeout,
+const char *msg)
+{
+   ssize_t adjust;
+
+   if (timeout == 0)
+   timeout = ip_mtudisc_timeout;
+   /* Walk the chain backwards to the first tdb */
+   NET_ASSERT_LOCKED();
+   for (; tdbp; tdbp = tdbp->tdb_inext) {
+   if (tdbp->tdb_flags & TDBF_INVALID ||
+   (adjust = ipsec_hdrsz(tdbp)) == -1)
+   return;
+
+   mtu -= adjust;
+
+   /* Store adjusted MTU in tdb */
+   tdbp->tdb_mtu = mtu;
+   tdbp->tdb_mtutimeout = gettime() + timeout;
+   DPRINTF(("%s: %s: spi %08x mtu %d adjust %ld timeout %llu\n",
+   __func__, msg, ntohl(tdbp->tdb_spi), 

Re: netstat - proto ip record

2020-12-23 Thread Alexander Bluhm
On Wed, Dec 16, 2020 at 05:24:50PM +0100, Claudio Jeker wrote:
> On Wed, Dec 16, 2020 at 03:54:04PM +, Stuart Henderson wrote:
> > On 2020/12/16 16:43, Salvatore Cuzzilla wrote:
> > > Hi folks,
> > > 
> > > is there any process associated with this netstat record?
> > > btw, what's the meaning of the state field with value '17'?
> > > 
> > > ToTo@obsd ~ $ doas netstat -an -f inet
> > > Active Internet connections (including servers)
> > > Proto   Recv-Q Send-Q  Local Address  Foreign Address
> > > (state)
> > > ip   0  0  *.**.*17
> > 
> > Are kernel and userland in sync?
> 
> This is a SOCK_RAW socket using protocol 17 (UDP). AFAIK this is dhclient.
> You can see this also with fstat.
> root dhclient   750245* internet dgram udp *:0

Should we print a specific headline in netstat to avoid such questions?

Proto   Recv-Q Send-Q  Local Address  Foreign AddressIP-Proto
ip   0  0  *.**.*17

Proto   Recv-Q Send-Q  Local Address  Foreign AddressTCP-State
tcp  0  0  192.168.2.138.3513052.10.128.80.443   ESTABLISHED

Proto   Recv-Q Send-Q  Local Address  Foreign Address   
udp  0  0  192.168.2.138.31411162.159.200.1.123 

ok?

bluhm

Index: inet.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.bin/netstat/inet.c,v
retrieving revision 1.168
diff -u -p -r1.168 inet.c
--- inet.c  15 Jan 2020 14:02:37 -  1.168
+++ inet.c  23 Dec 2020 15:08:45 -
@@ -327,9 +327,10 @@ netdomainpr(struct kinfo_file *kf, int p
if (Bflag && istcp)
printf("%-6.6s %-6.6s %-6.6s ",
"Recv-W", "Send-W", "Cgst-W");
-   printf(" %-*.*s %-*.*s %s\n",
+   printf(" %-*.*s %-*.*s%s\n",
addrlen, addrlen, "Local Address",
-   addrlen, addrlen, "Foreign Address", "(state)");
+   addrlen, addrlen, "Foreign Address",
+   istcp ? " TCP-State" : type == SOCK_RAW ? " IP-Proto" : "");
}
 
if (Aflag)



IPv6 pf_test EACCES

2020-12-21 Thread Alexander Bluhm
Hi,

A while ago we decided to pass EACCES to uerland if pf blocks a
packet.  IPv6 still has the old EHOSTUNREACH code.

Use the same errno for dropped IPv6 packets as in IPv4.

ok?

bluhm

Index: netinet6/ip6_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_output.c,v
retrieving revision 1.247
diff -u -p -r1.247 ip6_output.c
--- netinet6/ip6_output.c   17 Jul 2020 15:21:36 -  1.247
+++ netinet6/ip6_output.c   21 Dec 2020 22:27:24 -
@@ -616,7 +616,7 @@ reroute:
 
 #if NPF > 0
if (pf_test(AF_INET6, PF_OUT, ifp, ) != PF_PASS) {
-   error = EHOSTUNREACH;
+   error = EACCES;
m_freem(m);
goto done;
}
@@ -2773,7 +2773,7 @@ ip6_output_ipsec_send(struct tdb *tdb, s
if ((encif = enc_getif(tdb->tdb_rdomain, tdb->tdb_tap)) == NULL ||
pf_test(AF_INET6, fwd ? PF_FWD : PF_OUT, encif, ) != PF_PASS) {
m_freem(m);
-   return EHOSTUNREACH;
+   return EACCES;
}
if (m == NULL)
return 0;



IPsec PMTU and reject route

2020-12-19 Thread Alexander Bluhm
Hi,

In revision 1.87 of ip_icmp.c claudio@ added ignoring reject routes
to icmp_mtudisc_clone().  Otherwise TCP would clone these routes
for PMTU discovery.  They will not work, even after dynamic routing
has found a better route than the reject route.

With IPsec the use case is different.  First you need a route, but
then the flow handles the packet without routing.  Usually this
route should be a reject route to avoid sending unencrypted traffic
if the flow is missing.  But IPsec needs this route for PMTU
discovery, which currently does not work.

So accept reject and blackhole routes for IPsec PMTU discovery.

ok?

bluhm

Index: netinet/ip_icmp.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.183
diff -u -p -r1.183 ip_icmp.c
--- netinet/ip_icmp.c   22 Aug 2020 17:55:54 -  1.183
+++ netinet/ip_icmp.c   18 Dec 2020 16:59:25 -
@@ -928,7 +928,7 @@ icmp_sysctl_icmpstat(void *oldp, size_t 
 }
 
 struct rtentry *
-icmp_mtudisc_clone(struct in_addr dst, u_int rtableid)
+icmp_mtudisc_clone(struct in_addr dst, u_int rtableid, int ipsec)
 {
struct sockaddr_in sin;
struct rtentry *rt;
@@ -942,7 +942,10 @@ icmp_mtudisc_clone(struct in_addr dst, u
rt = rtalloc(sintosa(), RT_RESOLVE, rtableid);
 
/* Check if the route is actually usable */
-   if (!rtisvalid(rt) || (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)))
+   if (!rtisvalid(rt))
+   goto bad;
+   /* IPsec needs the route only for PMTU, it can use reject for that */
+   if (!ipsec && (rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)))
goto bad;
 
/*
@@ -1000,7 +1003,7 @@ icmp_mtudisc(struct icmp *icp, u_int rta
struct ifnet *ifp;
u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
 
-   rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid);
+   rt = icmp_mtudisc_clone(icp->icmp_ip.ip_dst, rtableid, 0);
if (rt == NULL)
return;
 
Index: netinet/ip_icmp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_icmp.h,v
retrieving revision 1.31
diff -u -p -r1.31 ip_icmp.h
--- netinet/ip_icmp.h   5 Nov 2018 21:50:39 -   1.31
+++ netinet/ip_icmp.h   18 Dec 2020 16:59:25 -
@@ -239,7 +239,7 @@ int icmp_reflect(struct mbuf *, struct m
 void   icmp_send(struct mbuf *, struct mbuf *);
 inticmp_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 struct rtentry *
-   icmp_mtudisc_clone(struct in_addr, u_int);
+   icmp_mtudisc_clone(struct in_addr, u_int, int);
 void   icmp_mtudisc(struct icmp *, u_int);
 inticmp_do_exthdr(struct mbuf *, u_int16_t, u_int8_t, void *, size_t);
 #endif /* _KERNEL */
Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.357
diff -u -p -r1.357 ip_output.c
--- netinet/ip_output.c 24 Jun 2020 22:03:43 -  1.357
+++ netinet/ip_output.c 18 Dec 2020 16:59:25 -
@@ -605,7 +605,7 @@ ip_output_ipsec_send(struct tdb *tdb, st
rt = NULL;
else if (rt == NULL || (rt->rt_flags & RTF_HOST) == 0) {
rt = icmp_mtudisc_clone(ip->ip_dst,
-   m->m_pkthdr.ph_rtableid);
+   m->m_pkthdr.ph_rtableid, 1);
rt_mtucloned = 1;
}
DPRINTF(("%s: spi %08x mtu %d rt %p cloned %d\n", __func__,
Index: netinet/tcp_timer.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.67
diff -u -p -r1.67 tcp_timer.c
--- netinet/tcp_timer.c 11 Jun 2018 07:40:26 -  1.67
+++ netinet/tcp_timer.c 18 Dec 2020 16:59:25 -
@@ -292,7 +292,7 @@ tcp_timer_rexmt(void *arg)
 #endif
case PF_INET:
rt = icmp_mtudisc_clone(inp->inp_faddr,
-   inp->inp_rtableid);
+   inp->inp_rtableid, 0);
break;
}
if (rt != NULL) {



Re: converting uvm_km_valloc to km_alloc

2020-12-18 Thread Alexander Bluhm
On Fri, Dec 18, 2020 at 10:36:28AM +1000, Jonathan Matthew wrote:
> Here are a couple of relatively easy ones, applying changes from r1.86 of
> amd64's acpi_machdep.c to i386 and arm64.  I've tested i386 but it turns out I
> don't have any arm64 machines with acpi.

A machine like this?  Something special to test?  Runs fine with
your diff.

bluhm

OpenBSD 6.8-current (GENERIC.MP) #0: Fri Dec 18 11:01:32 CET 2020
r...@ot11.obsd-lab.genua.de:/usr/src/sys/arch/arm64/compile/GENERIC.MP
real mem  = 136874385408 (130533MB)
avail mem = 132543135744 (126402MB)
random: good seed from bootblocks
mainbus0 at root: ACPI
psci0 at mainbus0: PSCI 1.1, SMCCC 65535.65535
cpu0 at mainbus0 mpidr 0: Applied Micro X-Gene r3p2
cpu0: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu0: 256KB 64b/line 32-way L2 cache
cpu0: CRC32,SHA2,SHA1,AES+PMULL
cpu1 at mainbus0 mpidr 1: Applied Micro X-Gene r3p2
cpu1: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu1: 256KB 64b/line 32-way L2 cache
cpu1: CRC32,SHA2,SHA1,AES+PMULL
cpu2 at mainbus0 mpidr 100: Applied Micro X-Gene r3p2
cpu2: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu2: 256KB 64b/line 32-way L2 cache
cpu2: CRC32,SHA2,SHA1,AES+PMULL
cpu3 at mainbus0 mpidr 101: Applied Micro X-Gene r3p2
cpu3: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu3: 256KB 64b/line 32-way L2 cache
cpu3: CRC32,SHA2,SHA1,AES+PMULL
cpu4 at mainbus0 mpidr 200: Applied Micro X-Gene r3p2
cpu4: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu4: 256KB 64b/line 32-way L2 cache
cpu4: CRC32,SHA2,SHA1,AES+PMULL
cpu5 at mainbus0 mpidr 201: Applied Micro X-Gene r3p2
cpu5: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu5: 256KB 64b/line 32-way L2 cache
cpu5: CRC32,SHA2,SHA1,AES+PMULL
cpu6 at mainbus0 mpidr 300: Applied Micro X-Gene r3p2
cpu6: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu6: 256KB 64b/line 32-way L2 cache
cpu6: CRC32,SHA2,SHA1,AES+PMULL
cpu7 at mainbus0 mpidr 301: Applied Micro X-Gene r3p2
cpu7: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu7: 256KB 64b/line 32-way L2 cache
cpu7: CRC32,SHA2,SHA1,AES+PMULL
cpu8 at mainbus0 mpidr 400: Applied Micro X-Gene r3p2
cpu8: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu8: 256KB 64b/line 32-way L2 cache
cpu8: CRC32,SHA2,SHA1,AES+PMULL
cpu9 at mainbus0 mpidr 401: Applied Micro X-Gene r3p2
cpu9: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu9: 256KB 64b/line 32-way L2 cache
cpu9: CRC32,SHA2,SHA1,AES+PMULL
cpu10 at mainbus0 mpidr 500: Applied Micro X-Gene r3p2
cpu10: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu10: 256KB 64b/line 32-way L2 cache
cpu10: CRC32,SHA2,SHA1,AES+PMULL
cpu11 at mainbus0 mpidr 501: Applied Micro X-Gene r3p2
cpu11: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu11: 256KB 64b/line 32-way L2 cache
cpu11: CRC32,SHA2,SHA1,AES+PMULL
cpu12 at mainbus0 mpidr 600: Applied Micro X-Gene r3p2
cpu12: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu12: 256KB 64b/line 32-way L2 cache
cpu12: CRC32,SHA2,SHA1,AES+PMULL
cpu13 at mainbus0 mpidr 601: Applied Micro X-Gene r3p2
cpu13: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu13: 256KB 64b/line 32-way L2 cache
cpu13: CRC32,SHA2,SHA1,AES+PMULL
cpu14 at mainbus0 mpidr 700: Applied Micro X-Gene r3p2
cpu14: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu14: 256KB 64b/line 32-way L2 cache
cpu14: CRC32,SHA2,SHA1,AES+PMULL
cpu15 at mainbus0 mpidr 701: Applied Micro X-Gene r3p2
cpu15: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu15: 256KB 64b/line 32-way L2 cache
cpu15: CRC32,SHA2,SHA1,AES+PMULL
cpu16 at mainbus0 mpidr 800: Applied Micro X-Gene r3p2
cpu16: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu16: 256KB 64b/line 32-way L2 cache
cpu16: CRC32,SHA2,SHA1,AES+PMULL
cpu17 at mainbus0 mpidr 801: Applied Micro X-Gene r3p2
cpu17: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu17: 256KB 64b/line 32-way L2 cache
cpu17: CRC32,SHA2,SHA1,AES+PMULL
cpu18 at mainbus0 mpidr 900: Applied Micro X-Gene r3p2
cpu18: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu18: 256KB 64b/line 32-way L2 cache
cpu18: CRC32,SHA2,SHA1,AES+PMULL
cpu19 at mainbus0 mpidr 901: Applied Micro X-Gene r3p2
cpu19: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu19: 256KB 64b/line 32-way L2 cache
cpu19: CRC32,SHA2,SHA1,AES+PMULL
cpu20 at mainbus0 mpidr a00: Applied Micro X-Gene r3p2
cpu20: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu20: 256KB 64b/line 32-way L2 cache
cpu20: CRC32,SHA2,SHA1,AES+PMULL
cpu21 at mainbus0 mpidr a01: Applied Micro X-Gene r3p2
cpu21: 32KB 64b/line 8-way L1 PIPT I-cache, 32KB 64b/line 8-way L1 D-cache
cpu21: 256KB 64b/line 

amd64 pmap pv_entry SLIST

2020-12-17 Thread Alexander Bluhm
Hi,

Can we convert the pv_entry list in amd64 pmap into an SLIST?
I think the code with macros is easier to read.

ok?

bluhm

Index: arch/amd64//amd64/pmap.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/pmap.c,v
retrieving revision 1.141
diff -u -p -r1.141 pmap.c
--- arch/amd64//amd64/pmap.c16 Dec 2020 21:11:35 -  1.141
+++ arch/amd64//amd64/pmap.c16 Dec 2020 21:13:40 -
@@ -321,9 +321,9 @@ void pmap_remove_ept(struct pmap *, vadd
 void pmap_do_remove_ept(struct pmap *, vaddr_t);
 int pmap_enter_ept(struct pmap *, vaddr_t, paddr_t, vm_prot_t);
 int pmap_remove_pte(struct pmap *, struct vm_page *, pt_entry_t *,
-vaddr_t, int, struct pv_entry **);
+vaddr_t, int, struct pvlist *);
 void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t,
-vaddr_t, vaddr_t, int, struct pv_entry **);
+vaddr_t, vaddr_t, int, struct pvlist *);
 #define PMAP_REMOVE_ALL0   /* remove all mappings */
 #define PMAP_REMOVE_SKIPWIRED  1   /* skip wired mappings */
 
@@ -1029,8 +1029,7 @@ pmap_enter_pv(struct vm_page *pg, struct
pve->pv_va = va;
pve->pv_ptp = ptp;  /* NULL for kernel pmap */
mtx_enter(>mdpage.pv_mtx);
-   pve->pv_next = pg->mdpage.pv_list;  /* add to ... */
-   pg->mdpage.pv_list = pve;   /* ... list */
+   SLIST_INSERT_HEAD(>mdpage.pv_list, pve, pv_next);
mtx_leave(>mdpage.pv_mtx);
 }
 
@@ -1044,16 +1043,19 @@ pmap_enter_pv(struct vm_page *pg, struct
 struct pv_entry *
 pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va)
 {
-   struct pv_entry *pve, **prevptr;
+   struct pv_entry *pve, *prev;
 
mtx_enter(>mdpage.pv_mtx);
-   prevptr = >mdpage.pv_list;
-   while ((pve = *prevptr) != NULL) {
+   prev = NULL;
+   SLIST_FOREACH(pve, >mdpage.pv_list, pv_next) {
if (pve->pv_pmap == pmap && pve->pv_va == va) { /* match? */
-   *prevptr = pve->pv_next;/* remove it! */
+   if (prev == NULL)
+   SLIST_REMOVE_HEAD(>mdpage.pv_list, pv_next);
+   else
+   SLIST_REMOVE_AFTER(prev, pv_next);
break;
}
-   prevptr = >pv_next;/* previous pointer */
+   prev = pve; /* previous pointer */
}
mtx_leave(>mdpage.pv_mtx);
return(pve);/* return removed pve */
@@ -1583,7 +1585,7 @@ pmap_copy_page(struct vm_page *srcpg, st
 
 void
 pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva,
-vaddr_t startva, vaddr_t endva, int flags, struct pv_entry **free_pvs)
+vaddr_t startva, vaddr_t endva, int flags, struct pvlist *free_pvs)
 {
struct pv_entry *pve;
pt_entry_t *pte = (pt_entry_t *) ptpva;
@@ -1643,10 +1645,8 @@ pmap_remove_ptes(struct pmap *pmap, stru
/* sync R/M bits */
pmap_sync_flags_pte(pg, opte);
pve = pmap_remove_pv(pg, pmap, startva);
-   if (pve != NULL) {
-   pve->pv_next = *free_pvs;
-   *free_pvs = pve;
-   }
+   if (pve != NULL)
+   SLIST_INSERT_HEAD(free_pvs, pve, pv_next);
 
/* end of "for" loop: time for next pte */
}
@@ -1663,7 +1663,7 @@ pmap_remove_ptes(struct pmap *pmap, stru
 
 int
 pmap_remove_pte(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte,
-vaddr_t va, int flags, struct pv_entry **free_pvs)
+vaddr_t va, int flags, struct pvlist *free_pvs)
 {
struct pv_entry *pve;
struct vm_page *pg;
@@ -1708,10 +1708,8 @@ pmap_remove_pte(struct pmap *pmap, struc
/* sync R/M bits */
pmap_sync_flags_pte(pg, opte);
pve = pmap_remove_pv(pg, pmap, va);
-   if (pve != NULL) {
-   pve->pv_next = *free_pvs;
-   *free_pvs = pve;
-   }
+   if (pve != NULL)
+   SLIST_INSERT_HEAD(free_pvs, pve, pv_next);
 
return 1;
 }
@@ -1746,7 +1744,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_
vaddr_t blkendva;
struct vm_page *ptp;
struct pv_entry *pve;
-   struct pv_entry *free_pvs = NULL;
+   struct pvlist free_pvs = SLIST_HEAD_INITIALIZER(pvlist);
vaddr_t va;
int shootall = 0, shootself;
struct pg_to_free empty_ptps;
@@ -1864,8 +1862,8 @@ pmap_do_remove(struct pmap *pmap, vaddr_
pmap_tlb_shootwait();
 
 cleanup:
-   while ((pve = free_pvs) != NULL) {
-   free_pvs = pve->pv_next;
+   while ((pve = SLIST_FIRST(_pvs)) != NULL) {
+   SLIST_REMOVE_HEAD(_pvs, pv_next);
pool_put(_pv_pool, pve);
}
 
@@ -1898,7 +1896,7 @@ pmap_page_remove(struct vm_page *pg)

Re: regress print target name

2020-12-17 Thread Alexander Bluhm
On Wed, Dec 16, 2020 at 04:42:59PM +0100, Alexander Bluhm wrote:
> When debugging tests, it is useful to see the target name and which
> output belongs to it.

A small addition:

Run setup_once targets in a sepearate block with headline before
all other targets.

ok?

bluhm

Index: share/mk/bsd.regress.mk
===
RCS file: /data/mirror/openbsd/cvs/src/share/mk/bsd.regress.mk,v
retrieving revision 1.22
diff -u -p -r1.22 bsd.regress.mk
--- share/mk/bsd.regress.mk 16 Dec 2020 16:53:24 -  1.22
+++ share/mk/bsd.regress.mk 17 Dec 2020 00:56:08 -
@@ -75,13 +75,16 @@ ${REGRESS_TARGETS}: ${REGRESS_SETUP}
 CLEANFILES+=${REGRESS_SETUP_ONCE:S/^/stamp-/}
 ${REGRESS_TARGETS}: ${REGRESS_SETUP_ONCE:S/^/stamp-/}
 ${REGRESS_SETUP_ONCE:S/^/stamp-/}: .SILENT
+   echo ' ${@:S/^stamp-//} '
${MAKE} -C ${.CURDIR} ${@:S/^stamp-//}
date >$@
+   echo
 .endif
 
 regress: .SILENT
 .if !empty(REGRESS_SETUP_ONCE)
rm -f ${REGRESS_SETUP_ONCE:S/^/stamp-/}
+   ${MAKE} -C ${.CURDIR} ${REGRESS_SETUP_ONCE:S/^/stamp-/}
 .endif
 .for RT in ${REGRESS_TARGETS}
echo ' ${RT} '



regress print target name

2020-12-16 Thread Alexander Bluhm
Hi,

When debugging tests, it is useful to see the target name and which
output belongs to it.  A lot of my tests have echo lines, but I
think this is better done in the framework.  Then all tests behave
simmilar.  I would remove the echos from the Makefiles afterwards.

ok?

bluhm

Index: share/mk/bsd.regress.mk
===
RCS file: /data/mirror/openbsd/cvs/src/share/mk/bsd.regress.mk,v
retrieving revision 1.21
diff -u -p -r1.21 bsd.regress.mk
--- share/mk/bsd.regress.mk 17 Jun 2019 17:20:24 -  1.21
+++ share/mk/bsd.regress.mk 16 Dec 2020 15:33:04 -
@@ -84,6 +84,7 @@ regress: .SILENT
rm -f ${REGRESS_SETUP_ONCE:S/^/stamp-/}
 .endif
 .for RT in ${REGRESS_TARGETS}
+   echo ' ${RT} '
 .  if ${REGRESS_SKIP_TARGETS:M${RT}}
echo -n "SKIP " ${_REGRESS_OUT}
echo SKIPPED
@@ -106,9 +107,12 @@ regress: .SILENT
fi
 .  endif
echo ${_REGRESS_NAME}/${RT:S/^run-regress-//} ${_REGRESS_OUT}
+   echo
 .endfor
 .for RT in ${REGRESS_CLEANUP}
+   echo ' ${RT} '
${MAKE} -C ${.CURDIR} ${RT}
+   echo
 .endfor
rm -f ${REGRESS_SETUP_ONCE:S/^/stamp-/}
 



amd64 pamp panic messages

2020-12-16 Thread Alexander Bluhm
Hi,

during all my pmap crashes, I sometimes get this strange address.

panic: pmap_remove_pte: unmanaged page marked PG_PVLIST, va = 0x5d155753000, pa 
= 0xfdfdfdfdfd000

I think we should not clear bits in a panic messages.  Debugging
with the full picture is easier.  While there make the panics
more consistent.

ok?

bluhm

Index: arch/amd64//amd64/pmap.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/arch/amd64/amd64/pmap.c,v
retrieving revision 1.140
diff -u -p -r1.140 pmap.c
--- arch/amd64//amd64/pmap.c26 Oct 2020 18:35:41 -  1.140
+++ arch/amd64//amd64/pmap.c16 Dec 2020 12:49:04 -
@@ -1626,17 +1626,18 @@ pmap_remove_ptes(struct pmap *pmap, stru
if ((opte & PG_PVLIST) == 0) {
 #ifdef DIAGNOSTIC
if (pg != NULL)
-   panic("%s: managed page without PG_PVLIST "
- "for 0x%lx", __func__, startva);
+   panic("%s: managed page without PG_PVLIST: "
+   "va 0x%lx, opte 0x%llx", __func__,
+   startva, opte);
 #endif
continue;
}
 
 #ifdef DIAGNOSTIC
if (pg == NULL)
-   panic("%s: unmanaged page marked PG_PVLIST, "
- "va = 0x%lx, pa = 0x%lx", __func__,
- startva, (u_long)(opte & PG_FRAME));
+   panic("%s: unmanaged page marked PG_PVLIST: "
+   "va 0x%lx, opte 0x%llx", __func__,
+   startva, opte);
 #endif
 
/* sync R/M bits */
@@ -1692,16 +1693,16 @@ pmap_remove_pte(struct pmap *pmap, struc
if ((opte & PG_PVLIST) == 0) {
 #ifdef DIAGNOSTIC
if (pg != NULL)
-   panic("%s: managed page without PG_PVLIST for 0x%lx",
- __func__, va);
+   panic("%s: managed page without PG_PVLIST: "
+   "va 0x%lx, opte 0x%llx", __func__, va, opte);
 #endif
return 1;
}
 
 #ifdef DIAGNOSTIC
if (pg == NULL)
-   panic("%s: unmanaged page marked PG_PVLIST, va = 0x%lx, "
- "pa = 0x%lx", __func__, va, (u_long)(opte & PG_FRAME));
+   panic("%s: unmanaged page marked PG_PVLIST: "
+   "va 0x%lx, opte 0x%llx", __func__, va, opte);
 #endif
 
/* sync R/M bits */
@@ -2724,18 +2725,19 @@ pmap_enter(struct pmap *pmap, vaddr_t va
pg = PHYS_TO_VM_PAGE(pa);
 #ifdef DIAGNOSTIC
if (pg == NULL)
-   panic("%s: same pa PG_PVLIST "
- "mapping with unmanaged page "
- "pa = 0x%lx (0x%lx)", __func__,
- pa, atop(pa));
+   panic("%s: same pa, PG_PVLIST "
+   "mapping with unmanaged page: "
+   "va 0x%lx, opte 0x%llx, pa 0x%lx",
+   __func__, va, opte, pa);
 #endif
pmap_sync_flags_pte(pg, opte);
} else {
 #ifdef DIAGNOSTIC
if (PHYS_TO_VM_PAGE(pa) != NULL)
-   panic("%s: same pa, managed "
-   "page, no PG_VLIST pa: 0x%lx",
-   __func__, pa);
+   panic("%s: same pa, no PG_PVLIST "
+   "mapping with managed page: "
+   "va 0x%lx, opte 0x%llx, pa 0x%lx",
+   __func__, va, opte, pa);
 #endif
}
goto enter_now;
@@ -2755,8 +2757,8 @@ pmap_enter(struct pmap *pmap, vaddr_t va
 #ifdef DIAGNOSTIC
if (pg == NULL)
panic("%s: PG_PVLIST mapping with unmanaged "
- "page pa = 0x%lx (0x%lx)",
- __func__, pa, atop(pa));
+   "page: va 0x%lx, opte 0x%llx, pa 0x%lx",
+   __func__, va, opte, pa);
 #endif
pmap_sync_flags_pte(pg, opte);
opve = pmap_remove_pv(pg, pmap, va);



Re: Kernel panic with i386 on latest snapshot

2020-12-15 Thread Alexander Bluhm
On Tue, Dec 15, 2020 at 06:57:03PM +0100, Mark Kettenis wrote:
> Does the diff below fix this?

I can reproduce the panic and your diff fixes it.

Usually my regress machines do not trigger it as I do not install
firmware.  fw_update and reboot makes it crash.

bluhm

OpenBSD 6.8-current (GENERIC.MP) #0: Tue Dec 15 20:18:20 CET 2020
r...@ot2.obsd-lab.genua.de:/usr/src/sys/arch/i386/compile/GENERIC.MP
real mem  = 2146910208 (2047MB)
avail mem = 2091409408 (1994MB)
panic: kernel diagnostic assertion "_kernel_lock_held()" failed: file 
"/usr/src/sys/uvm/uvm_km.c", line 246
Stopped at  db_enter+0x4:   popl%ebp
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
db_enter(d0e47604,d110ae68,d0e46ec4,d0e47fec,d0e47fec) at db_enter+0x4
panic(d0bc79e5,d0c2da24,d0c3a8c4,d0c522a9,f6) at panic+0xd3
__assert(d0c2da24,d0c522a9,f6,d0c3a8c4,d0e46ec4) at __assert+0x19
uvm_km_pgremove(d0e2da74,2552c000,2552e000) at uvm_km_pgremove+0x119
uvm_unmap_kill_entry(d0e47fec,d0e46ec4) at uvm_unmap_kill_entry+0x92
uvm_unmap_remove(d0e47fec,f552c000,f552e000,d110af10,0,1) at uvm_unmap_remove+0
x1cb
uvm_unmap(d0e47fec,f552c000,f552e000) at uvm_unmap+0x53
uvm_km_free(d0e47fec,f552c000,2000,2000) at uvm_km_free+0x25
cpu_ucode_setup(f092c000,f080,efff9000,150a416,1108000) at cpu_ucode_setup+
0xeb
cpu_startup(150a416,1108000,1117000,110b000,0) at cpu_startup+0x14a
main(0,0,0,0,0) at main+0x6b
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{0}> boot reboot
rebooting...

OpenBSD 6.8-current (GENERIC.MP) #1: Tue Dec 15 20:34:36 CET 2020
r...@ot2.obsd-lab.genua.de:/usr/src/sys/arch/i386/compile/GENERIC.MP
real mem  = 2146910208 (2047MB)
avail mem = 2091417600 (1994MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: date 10/20/04, BIOS32 rev. 0 @ 0xfdb30, SMBIOS rev. 2.3 @ 
0xf0640 (63 entries)
bios0: vendor American Megatrends Inc. version "0700xx" date 10/20/2004
bios0: Supermicro. X5DL8
acpi0 at bios0: ACPI 1.0
acpi0: sleep states S0 S1 S4 S5
acpi0: tables DSDT FACP APIC
acpi0: wakeup devices SLPB(S1) NRTH(S5) PS2M(S1) PS2K(S1) UAR1(S1) UAR2(S1) 
USB_(S1) PCI1(S5) PCI2(S5) PCI3(S5) PCI4(S5)
acpitimer0 at acpi0: 3579545 Hz, 32 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Xeon(TM) CPU 3.06GHz ("GenuineIntel" 686-class) 3.07 GHz, 
0f-02-05
cpu0: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,CNXT-ID,xTPR,PERF,MELTDOWN
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 133MHz
cpu1 at mainbus0: apid 6 (application processor)
cpu1: Intel(R) Xeon(TM) CPU 3.06GHz ("GenuineIntel" 686-class) 3.07 GHz, 
0f-02-05
cpu1: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,CNXT-ID,xTPR,PERF,MELTDOWN
cpu2 at mainbus0: apid 1 (application processor)
cpu2: Intel(R) Xeon(TM) CPU 3.06GHz ("GenuineIntel" 686-class) 3.07 GHz, 
0f-02-05
cpu2: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,CNXT-ID,xTPR,PERF,MELTDOWN
cpu3 at mainbus0: apid 7 (application processor)
cpu3: Intel(R) Xeon(TM) CPU 3.06GHz ("GenuineIntel" 686-class) 3.07 GHz, 
0f-02-05
cpu3: 
FPU,V86,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,CNXT-ID,xTPR,PERF,MELTDOWN
ioapic0 at mainbus0: apid 8 pa 0xfec0, version 11, 16 pins
ioapic1 at mainbus0: apid 9 pa 0xfec01000, version 11, 16 pins
ioapic2 at mainbus0: apid 10 pa 0xfec02000, version 11, 16 pins
acpiprt0 at acpi0: bus 0 (NRTH)
acpiprt1 at acpi0: bus 1 (PCI1)
acpiprt2 at acpi0: bus 2 (PCI2)
acpiprt3 at acpi0: bus 4 (PCI3)
acpiprt4 at acpi0: bus 5 (PCI4)
acpibtn0 at acpi0: SLPB
"PNP0A03" at acpi0 not configured
acpicmos0 at acpi0
"PNP0A03" at acpi0 not configured
"PNP0A03" at acpi0 not configured
"PNP0A03" at acpi0 not configured
"PNP0A03" at acpi0 not configured
acpicpu0 at acpi0: C1(@1 halt!)
acpicpu1 at acpi0: C1(@1 halt!)
acpicpu2 at acpi0: C1(@1 halt!)
acpicpu3 at acpi0: C1(@1 halt!)
bios0: ROM list: 0xc/0x8000 0xc8000/0x1000 0xc9000/0x1800 0xca800/0x1800
pci0 at mainbus0 bus 0: configuration mode 1 (bios)
pchb0 at pci0 dev 0 function 0 "ServerWorks CNB20-HE Host" rev 0x33
pchb1 at pci0 dev 0 function 1 "ServerWorks CNB20-HE Host" rev 0x00
pci1 at pchb1 bus 1
em0 at pci1 dev 2 function 0 "Intel 82546GB" rev 0x03: apic 9 int 10, address 
00:1b:21:55:eb:f4
em1 at pci1 dev 2 function 1 "Intel 82546GB" rev 0x03: apic 9 int 11, address 
00:1b:21:55:eb:f5
bge0 at pci1 dev 3 function 0 "Broadcom BCM5703X" rev 0x02, BCM5702/5703 A2 
(0x1002): apic 9 int 15, address 00:30:48:53:90:95
brgphy0 at bge0 phy 1: BCM5703 10/100/1000baseT PHY, 

Re: diff: replace useless use of MCLGETL with MCLGET

2020-12-14 Thread Alexander Bluhm
On Sat, Dec 12, 2020 at 02:05:48PM +0100, Jan Klemkow wrote:
> Thus, this diff removes '(void)' from the MCLGET macro
> -#define MCLGET(m, how) (void) m_clget((m), (how), MCLBYTES)
> +#define MCLGET(m, how) m_clget((m), (how), MCLBYTES)

The MCLGET API is to add a cluster to an existing mbuf.  Returning
the mbuf changes that.  I don't think a macro cleanup justifies an
API change.

bluhm



Re: diff: cleanup type handling

2020-12-12 Thread Alexander Bluhm
On Sat, Dec 12, 2020 at 02:25:03PM +0100, Jan Klemkow wrote:
> The type of the local variable hash in pf_map_addr() has right length
> but the wrong type.  This diff uses the correct type and removes the
> useless casts.  Both functions uses hash as pf_addr, so no cast is
> needed.
> 
> OK?

OK bluhm@

> Index: net/pf_lb.c
> ===
> RCS file: /cvs/src/sys/net/pf_lb.c,v
> retrieving revision 1.67
> diff -u -p -r1.67 pf_lb.c
> --- net/pf_lb.c   29 Jul 2020 02:32:13 -  1.67
> +++ net/pf_lb.c   12 Dec 2020 13:06:49 -
> @@ -349,7 +349,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
>  struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node 
> **sns,
>  struct pf_pool *rpool, enum pf_sn_types type)
>  {
> - unsigned charhash[16];
> + struct pf_addr   hash;
>   struct pf_addr   faddr;
>   struct pf_addr  *raddr = >addr.v.a.addr;
>   struct pf_addr  *rmask = >addr.v.a.mask;
> @@ -460,8 +460,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
>   }
>   break;
>   case PF_POOL_SRCHASH:
> - hashidx =
> - pf_hash(saddr, (struct pf_addr *), >key, af);
> + hashidx = pf_hash(saddr, , >key, af);
>  
>   if (rpool->addr.type == PF_ADDR_TABLE ||
>   rpool->addr.type == PF_ADDR_DYNIFTL) {
> @@ -483,8 +482,7 @@ pf_map_addr(sa_family_t af, struct pf_ru
>   return (1);
>   pf_addrcpy(naddr, >counter, af);
>   } else {
> - pf_poolmask(naddr, raddr, rmask,
> - (struct pf_addr *), af);
> + pf_poolmask(naddr, raddr, rmask, , af);
>   }
>   break;
>   case PF_POOL_ROUNDROBIN:



Re: PF synproxy should act on inbound packets only

2020-12-04 Thread Alexander Bluhm
On Fri, Dec 04, 2020 at 01:08:53AM +0100, Alexandr Nedvedicky wrote:
> below is updated diff. The new diff also updates pf.conf(5) manpage.

OK bluhm@

A note for the man page.

> @@ -2126,6 +2126,9 @@ will not work if
>  .Xr pf 4
>  operates on a
>  .Xr bridge 4 .
> +Also
> +.Cm synproxy state
> +option acts on inbound packets only.

The synproxy rules are the subject of the previous sentence.  I
would not repeate synproxy state in one paragraph.  What about

Also they act on incoming SYN packets only.



Re: PF synproxy should act on inbound packets only

2020-12-03 Thread Alexander Bluhm
On Wed, Dec 02, 2020 at 12:43:28AM +0100, Alexandr Nedvedicky wrote:
> the fix is to apply synproxy action on inbound packets only. Diff below
> does that exactly. Furthermore it also makes pfctl(8) to emit warning,
> when synproxy is being used in outbound/unbound rule:

Sounds reasonable.

> lumpy$ echo 'pass proto tcp from any to any port = 80 synproxy state' 
> |./pfctl -nf -  
> warning (stdin:1): synproxy acts on inbound packets only
> synproxy action is ignored for outbound packets

Just a style nit.  Other errors do not put stdin:1 in brackes.  One
line per error.  In pf.conf the rule direction matters.  What about

stdin:1 warning: synproxy used for inbound rules only, ignored for outbound

> OK?

OK bluhm@

> 8<---8<---8<--8<
> diff --git a/sbin/pfctl/parse.y b/sbin/pfctl/parse.y
> index f06171158cb..d052b5b9a0e 100644
> --- a/sbin/pfctl/parse.y
> +++ b/sbin/pfctl/parse.y
> @@ -4042,6 +4042,13 @@ rule_consistent(struct pf_rule *r)
>   "synproxy state or modulate state");
>   problems++;
>   }
> +
> + if ((r->keep_state == PF_STATE_SYNPROXY) && (r->direction != PF_IN))
> + fprintf(stderr, "warning (%s:%d): "
> + "synproxy acts on inbound packets only\n"
> + "synproxy action is ignored for outbound packets\n",
> + file->name, yylval.lineno);
> +
>   if ((r->nat.addr.type != PF_ADDR_NONE ||
>   r->rdr.addr.type != PF_ADDR_NONE) &&
>   r->action != PF_MATCH && !r->keep_state) {
> diff --git a/sys/net/pf.c b/sys/net/pf.c
> index 823fdc22133..986ee57bff9 100644
> --- a/sys/net/pf.c
> +++ b/sys/net/pf.c
> @@ -4161,7 +4161,7 @@ pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, 
> struct pf_rule *a,
>   s->tag = tag;
>   }
>   if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
> - TH_SYN && r->keep_state == PF_STATE_SYNPROXY) {
> + TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
>   int rtid = pd->rdomain;
>   if (act->rtableid >= 0)
>   rtid = act->rtableid;



Re: Fix ix(4) link status

2020-11-10 Thread Alexander Bluhm
On Mon, Oct 12, 2020 at 11:20:50AM +0200, Gerhard Roth wrote:
> ix(4) relies on link-state change interrupts the update the link state
> via if_link_state_change(). However, after ixgbe_stop() all interrupts
> for the device are disabled and there won't be any IXGBE_EICR_LSC
> interrupt.
> 
> Simple solution: manually update link state after ixgbe_stop().

route monitor shows that there was a "link: no carrier" routing
message missing.  Your diff fixes this during ifconfig down.  Tested
with
ix0 at pci3 dev 0 function 0 "Intel 82598AF" rev 0x01, msix, 4 queues, address 
00:1b:21:0d:db:8f

OK bluhm@

> --- sys/dev/pci/if_ix.c   2020/07/18 07:18:22 1.172
> +++ sys/dev/pci/if_ix.c   2020/10/12 09:13:59
> @@ -1606,6 +1606,8 @@ ixgbe_stop(void *arg)
>   /* Should we really clear all structures on stop? */
>   ixgbe_free_transmit_structures(sc);
>   ixgbe_free_receive_structures(sc);
> +
> + ixgbe_update_link_status(sc);
>  }
>  
>  



Re: accton(8) requires a reboot after being enabled

2020-11-03 Thread Alexander Bluhm
On Fri, Oct 30, 2020 at 09:59:09AM -0600, Theo de Raadt wrote:
> 1 - historically it requires a file to be pre-created.  In the rc scripts,
> this is a touch.  That grabs the umask and ownership of root's run of
> /etc/rc.
> 2 - could we do better, in some way?

We could do the same as we do with other logfiles.  Create them
during installation like syslog log files.  User can simply enable
accounting.  Nothing changes for existing installations.

bluhm

Index: distrib/sets/lists/etc/mi
===
RCS file: /data/mirror/openbsd/cvs/src/distrib/sets/lists/etc/mi,v
retrieving revision 1.218
diff -u -p -r1.218 mi
--- distrib/sets/lists/etc/mi   2 Dec 2019 02:45:18 -   1.218
+++ distrib/sets/lists/etc/mi   3 Nov 2020 16:55:31 -
@@ -50,6 +50,7 @@
 ./root/.login
 ./root/.profile
 ./root/.ssh/authorized_keys
+./var/account/acct
 ./var/crash/minfree
 ./var/cron/at.deny
 ./var/cron/cron.deny



Re: net.inet.ip.forwarding=0 vs lo(4)

2020-10-20 Thread Alexander Bluhm
On Tue, Oct 20, 2020 at 10:14:13AM +1000, David Gwynne wrote:
> such a diff looks like this. it adds a "global" flag that you can set on
> interfaces.

Making addresses on loopback interfaces globally accessible is
against the idea of the strong host model.  Current behavior is a
consequence when we switched the model.

Setting it per interface is pretty complex.  I cannot see the
consequences of your diff, but with my experiance with IPsec, pf
divert, rdr-to, carp, trunk I assume it will break come setups.
This area is rather complicated.  Do you realy need a "ifconfig lo0
global" feature?

The idea of setting waek host model it with ip forward was, that
it will fit most users by default.  If you think a separate switch
has an advantage, we could go that way.  Other BSD do that according
to https://en.wikipedia.org/wiki/Host_model .

But I prefer the idea of setting the host model with forward.
Usually you want it weak on a router and string on a host.  That
means less knobs and less confusion.

Why can't you set ip forward to 1 ?
Could pf divert-to be a solution to your setup?

bluhm

> Index: sbin/ifconfig/ifconfig.c
> ===
> RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
> retrieving revision 1.429
> diff -u -p -r1.429 ifconfig.c
> --- sbin/ifconfig/ifconfig.c  7 Oct 2020 14:38:54 -   1.429
> +++ sbin/ifconfig/ifconfig.c  20 Oct 2020 00:12:06 -
> @@ -468,6 +468,8 @@ const struct  cmd {
>   { "-autoconfprivacy",   IFXF_INET6_NOPRIVACY,   0,  setifxflags },
>   { "soii",   -IFXF_INET6_NOSOII, 0,  setifxflags },
>   { "-soii",  IFXF_INET6_NOSOII,  0,  setifxflags },
> + { "global", IFXF_GLOBAL,0,  setifxflags },
> + { "-global",-IFXF_GLOBAL,   0,  setifxflags },
>  #ifndef SMALL
>   { "hwfeatures", NEXTARG0,   0,  printifhwfeatures },
>   { "metric", NEXTARG,0,  setifmetric },
> @@ -675,7 +677,7 @@ const struct  cmd {
>   "\7RUNNING\10NOARP\11PROMISC\12ALLMULTI\13OACTIVE\14SIMPLEX"\
>   "\15LINK0\16LINK1\17LINK2\20MULTICAST"  \
>   "\23INET6_NOPRIVACY\24MPLS\25WOL\26AUTOCONF6\27INET6_NOSOII"\
> - "\30AUTOCONF4"
> + "\30AUTOCONF4" "\031GLOBAL"
>  
>  int  getinfo(struct ifreq *, int);
>  void getsock(int);
> Index: sys/netinet/ip_input.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_input.c,v
> retrieving revision 1.351
> diff -u -p -r1.351 ip_input.c
> --- sys/netinet/ip_input.c22 Aug 2020 17:55:30 -  1.351
> +++ sys/netinet/ip_input.c20 Oct 2020 00:12:06 -
> @@ -753,29 +753,42 @@ in_ouraddr(struct mbuf *m, struct ifnet 
>   break;
>   }
>   }
> - } else if (ipforwarding == 0 && rt->rt_ifidx != ifp->if_index &&
> - !((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_type == IFT_ENC) ||
> - (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST))) {
> - /* received on wrong interface. */
> -#if NCARP > 0
> - struct ifnet *out_if;
> + } else if (ipforwarding == 0 && !ip_laddr(ifp, m, rt)) {
> + ipstat_inc(ips_wrongif);
> + match = 2;
> + }
> +
> + return (match);
> +}
>  
> +int
> +ip_laddr(struct ifnet *ifp, struct mbuf *m, struct rtentry *rt)
> +{
> + struct ifnet *rtifp;
> + int match = 0;
> +
> + if (rt->rt_ifidx == ifp->if_index ||
> + ifp->if_type == IFT_ENC ||
> + ISSET(ifp->if_flags, IFF_LOOPBACK) ||
> + ISSET(m->m_pkthdr.pf.flags, PF_TAG_TRANSLATE_LOCALHOST))
> + return (1);
> +
> + /* received on a different interface. */
> + rtifp = if_get(rt->rt_ifidx);
> + if (rtifp != NULL) {
> + if (ISSET(rtifp->if_xflags, IFXF_GLOBAL))
> + match = 1;
> +#if NCARP > 0
>   /*
>* Virtual IPs on carp interfaces need to be checked also
>* against the parent interface and other carp interfaces
>* sharing the same parent.
>*/
> - out_if = if_get(rt->rt_ifidx);
> - if (!(out_if && carp_strict_addr_chk(out_if, ifp))) {
> - ipstat_inc(ips_wrongif);
> - match = 2;
> - }
> - if_put(out_if);
> -#else
> - ipstat_inc(ips_wrongif);
> - match = 2;
> + else if (carp_strict_addr_chk(rtifp, ifp))
> + match = 1;
>  #endif
>   }
> + if_put(rtifp);
>  
>   return (match);
>  }
> Index: sys/netinet/ip_var.h
> ===
> RCS file: /cvs/src/sys/netinet/ip_var.h,v
> retrieving revision 1.86
> diff -u -p -r1.86 ip_var.h
> --- sys/netinet/ip_var.h  8 Dec 2019 11:08:22 -   1.86
> +++ 

syslogd listen keep alive

2020-09-14 Thread Alexander Bluhm
Hi,

A while ago dhill@ pointed out that syslogd TCP sockets will stay
open forever if a client aborts the connection silently.  As syslogd
does not write anything into incoming connections, it will not
recognize failure and the socket will stay forever.

Setting TCP keep alive on the listen socket will prevent that.  Note
that outgoing connections don't need it as syslogd will write data
into them.

After keep alive timeout you get this:

syslogd[51331]: tcp logger "10.188.74.74:32769" connection error: Operation 
timed out
syslogd[51331]: tls logger "10.188.74.74:15557" connection error: read failed: 
error:02FFF03C:system library:func(4095):Operation timed out

ok?

bluhm

Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.263
diff -u -p -r1.263 syslogd.c
--- usr.sbin/syslogd/syslogd.c  25 May 2020 10:38:32 -  1.263
+++ usr.sbin/syslogd/syslogd.c  14 Sep 2020 15:09:14 -
@@ -354,6 +354,7 @@ int socket_bind(const char *, const char
 intunix_socket(char *, int, mode_t);
 void   double_sockbuf(int, int, int);
 void   set_sockbuf(int);
+void   set_keepalive(int);
 void   tailify_replytext(char *, int);
 
 int
@@ -979,8 +980,10 @@ socket_bind(const char *proto, const cha
}
if (!shutread && res->ai_protocol == IPPROTO_UDP)
double_sockbuf(*fdp, SO_RCVBUF, 0);
-   else if (res->ai_protocol == IPPROTO_TCP)
+   else if (res->ai_protocol == IPPROTO_TCP) {
set_sockbuf(*fdp);
+   set_keepalive(*fdp);
+   }
reuseaddr = 1;
if (setsockopt(*fdp, SOL_SOCKET, SO_REUSEADDR, ,
sizeof(reuseaddr)) == -1) {
@@ -3104,6 +3107,15 @@ set_sockbuf(int fd)
log_warn("setsockopt sndbufsize %d", size);
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, , sizeof(size)) == -1)
log_warn("setsockopt rcvbufsize %d", size);
+}
+
+void
+set_keepalive(int fd)
+{
+   int val = 1;
+
+   if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, , sizeof(val)) == -1)
+   log_warn("setsockopt keepalive %d", val);
 }
 
 void



Re: trunk: keep interface up on port removal

2020-09-12 Thread Alexander Bluhm
OK bluhm@

On Sat, Sep 12, 2020 at 05:49:52PM +0200, Klemens Nanni wrote:
> Index: if_trunk.c
> ===
> RCS file: /cvs/src/sys/net/if_trunk.c,v
> retrieving revision 1.149
> diff -u -p -r1.149 if_trunk.c
> --- if_trunk.c28 Jul 2020 09:52:32 -  1.149
> +++ if_trunk.c12 Sep 2020 15:41:14 -
> @@ -423,10 +423,6 @@ trunk_port_destroy(struct trunk_port *tp
>   /* Remove multicast addresses from this port */
>   trunk_ether_cmdmulti(tp, SIOCDELMULTI);
>  
> - /* Port has to be down */
> - if (ifp->if_flags & IFF_UP)
> - if_down(ifp);
> -
>   ifpromisc(ifp, 0);
>  
>   if (tr->tr_port_destroy != NULL)



Re: pf_remove_divert_state

2020-07-25 Thread Alexander Bluhm
On Sat, Jul 25, 2020 at 09:37:37PM +0900, YASUOKA Masahiko wrote:
> Is this part a reason why we have "divert-reply"?

Yes.

Divert rules pass packets to the local network stack.  With divert-to
you specify the socket address.  This works for incomming connections.
The divert-to address can be 127.0.0.1 or anything else with
SO_BINDANY.

When you use SO_BINDANY for outgoing connections and you don't know
the addresses when writing pf.conf, use divert-reply.

As dangling states interfere with new connections, I added the
divert state cleanup.  This is especially necessary for DGRAM or
RAW sockets.

> > Is that not possible for you?
> 
> It's possible.

Fine, then use divert-reply instead of changing the semantics.

bluhm



Re: pf_remove_divert_state

2020-07-25 Thread Alexander Bluhm
On Sat, Jul 25, 2020 at 08:20:21PM +0900, YASUOKA Masahiko wrote:
> Currently SO_BINDANY is usable without any divert or divert-reply
> rule.

This is why we have the divert-reply feature.  Just mark the states
with that keyword when you want to use them with SO_BINDANY.

See man setsockopt

Is that not possible for you?

bluhm



Re: tcp_close: can we delay the reaper for 1 tick?

2020-07-24 Thread Alexander Bluhm
On Fri, Jul 24, 2020 at 01:20:29PM -0500, Scott Cheloha wrote:
> tcp_close() schedules the reaper timeout to run immediately.
> Does it need to run *immediately*?  Can it wait for one tick?

It does not matter.  Free has to happen after timeout thread has
been run.  Some other timeout may be waiting for netlock and reference
the TCB.  So I added the delay.

Just don't wait too long to avoid wasting resources.

OK bluhm@

> Index: tcp_subr.c
> ===
> RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
> retrieving revision 1.174
> diff -u -p -r1.174 tcp_subr.c
> --- tcp_subr.c4 Oct 2018 17:33:41 -   1.174
> +++ tcp_subr.c24 Jul 2020 18:19:00 -
> @@ -518,7 +518,7 @@ tcp_close(struct tcpcb *tp)
>  
>   m_free(tp->t_template);
>   /* Free tcpcb after all pending timers have been run. */
> - TCP_TIMER_ARM(tp, TCPT_REAPER, 0);
> + TCP_TIMER_ARM(tp, TCPT_REAPER, 1);
>  
>   inp->inp_ppcb = NULL;
>   soisdisconnected(so);



Re: sensorsd bad unveil

2020-07-02 Thread Alexander Bluhm
On Thu, Jul 02, 2020 at 12:39:47PM -0600, Theo de Raadt wrote:
> The unveil("/", "x") is to support command executation:

Of course.  Forgot that.

bluhm

Index: usr.sbin/sensorsd/sensorsd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/sensorsd/sensorsd.c,v
retrieving revision 1.66
diff -u -p -r1.66 sensorsd.c
--- usr.sbin/sensorsd/sensorsd.c28 Jun 2019 13:32:50 -  1.66
+++ usr.sbin/sensorsd/sensorsd.c2 Jul 2020 18:43:25 -
@@ -94,7 +94,7 @@ void   reparse_cfg(int);
 TAILQ_HEAD(sdlimhead_t, sdlim_t);
 struct sdlimhead_t sdlims = TAILQ_HEAD_INITIALIZER(sdlims);
 
-char*configfile;
+char*configfile, *configdb;
 volatile sig_atomic_treload = 0;
 int  debug = 0;
 
@@ -125,10 +125,9 @@ main(int argc, char *argv[])
debug = 1;
break;
case 'f':
-   configfile = optarg;
-   if (access(configfile, R_OK) != 0)
-   err(1, "access configuration file %s",
-   configfile);
+   configfile = realpath(optarg, NULL);
+   if (configfile == NULL)
+   err(1, "configuration file %s", optarg);
break;
default:
usage();
@@ -143,9 +142,14 @@ main(int argc, char *argv[])
if (configfile == NULL)
if (asprintf(, "/etc/sensorsd.conf") == -1)
err(1, "out of memory");
+   if (asprintf(, "%s.db", configfile) == -1)
+   err(1, "out of memory");
 
+   chdir("/");
if (unveil(configfile, "r") == -1)
err(1, "unveil");
+   if (unveil(configdb, "r") == -1)
+   err(1, "unveil");
if (unveil("/", "x") == -1)
err(1, "unveil");
 
@@ -158,7 +162,7 @@ main(int argc, char *argv[])
 
parse_config(configfile);
 
-   if (debug == 0 && daemon(0, 0) == -1)
+   if (debug == 0 && daemon(1, 0) == -1)
err(1, "unable to fork");
 
signal(SIGHUP, reparse_cfg);



sensorsd bad unveil

2020-07-02 Thread Alexander Bluhm
Hi,

sensorsd(8) reports an unveil failure due to chdir / .  An additional
"r" permission would be necessary.

- chdir before unveil, do not unveil /
- use absolute config path after chdir, also necessary for SIGHUP
- /etc/sensorsd.conf.db must be unveiled, cgetent(3) tries to open it

ok?

bluhm

Index: usr.sbin/sensorsd/sensorsd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/sensorsd/sensorsd.c,v
retrieving revision 1.66
diff -u -p -r1.66 sensorsd.c
--- usr.sbin/sensorsd/sensorsd.c28 Jun 2019 13:32:50 -  1.66
+++ usr.sbin/sensorsd/sensorsd.c2 Jul 2020 18:14:53 -
@@ -94,7 +94,7 @@ void   reparse_cfg(int);
 TAILQ_HEAD(sdlimhead_t, sdlim_t);
 struct sdlimhead_t sdlims = TAILQ_HEAD_INITIALIZER(sdlims);
 
-char*configfile;
+char*configfile, *configdb;
 volatile sig_atomic_treload = 0;
 int  debug = 0;
 
@@ -125,10 +125,9 @@ main(int argc, char *argv[])
debug = 1;
break;
case 'f':
-   configfile = optarg;
-   if (access(configfile, R_OK) != 0)
-   err(1, "access configuration file %s",
-   configfile);
+   configfile = realpath(optarg, NULL);
+   if (configfile == NULL)
+   err(1, "configuration file %s", optarg);
break;
default:
usage();
@@ -143,10 +142,13 @@ main(int argc, char *argv[])
if (configfile == NULL)
if (asprintf(, "/etc/sensorsd.conf") == -1)
err(1, "out of memory");
+   if (asprintf(, "%s.db", configfile) == -1)
+   err(1, "out of memory");
 
+   chdir("/");
if (unveil(configfile, "r") == -1)
err(1, "unveil");
-   if (unveil("/", "x") == -1)
+   if (unveil(configdb, "r") == -1)
err(1, "unveil");
 
if (pledge("stdio rpath proc exec", NULL) == -1)
@@ -158,7 +160,7 @@ main(int argc, char *argv[])
 
parse_config(configfile);
 
-   if (debug == 0 && daemon(0, 0) == -1)
+   if (debug == 0 && daemon(1, 0) == -1)
err(1, "unable to fork");
 
signal(SIGHUP, reparse_cfg);



Re: Correcty reloading unresolved host in syslogd @Conf lines

2020-05-22 Thread Alexander Bluhm
On Fri, May 22, 2020 at 07:38:30AM -0600, Todd C. Miller wrote:
> I'm a little confused by the protocol handling in cfline.
>
>   if (strcmp(proto, "udp") == 0) {
>   if (fd_udp == -1)
>   proto = "udp6";
>   if (fd_udp6 == -1)
>   proto = "udp4";
>   ipproto = proto;
>   }
>
> Doesn't that mean that in the default case if a syslog server is
> not reachable, proto will end up being set to "udp4" and not "udp"?
> If so, then your diff will only retry udp4 on SIGHUP instead of
> both udp4 and udp6.

What do you mean by "not reachable"?  As we do not connect(2) and
ignore most errors of sendto(2), syslogd(8) knows nothing about
reachabiliy.  I guess you mean "if DNS lookup fails".

fd_udp and fd_udp6 should never become -1 as we cannot reopen them.
If fd_udp6 is -1 we have to restrict ourselves to "udp4".  But it
is better to move this code out of the big if else block.  Then we
get the "no udp4" warning if something went wrong.

There was another problem with my diff.  If DNS server switches
between A and  answers after SIGHUP, the wrong socket has been
closed.  It is better to close the sockets based only on configuration,
not on runtime DNS.  Note that when the config file changes, syslogd
re-execs itself and we start with fresh sockets.

New diff, move the send_udp = 1 a bit up to the config logic.

ok?

bluhm

Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.262
diff -u -p -r1.262 syslogd.c
--- usr.sbin/syslogd/syslogd.c  5 Jul 2019 13:23:27 -   1.262
+++ usr.sbin/syslogd/syslogd.c  22 May 2020 18:21:23 -
@@ -853,20 +853,6 @@ main(int argc, char *argv[])
event_add(ev_udp, NULL);
if (fd_udp6 != -1)
event_add(ev_udp6, NULL);
-   } else {
-   /*
-* If generic UDP file descriptors are used neither
-* for receiving nor for sending, close them.  Then
-* there is no useless *.514 in netstat.
-*/
-   if (fd_udp != -1 && !send_udp) {
-   close(fd_udp);
-   fd_udp = -1;
-   }
-   if (fd_udp6 != -1 && !send_udp6) {
-   close(fd_udp6);
-   fd_udp6 = -1;
-   }
}
for (i = 0; i < nbind; i++)
if (fd_bind[i] != -1)
@@ -2416,6 +2402,7 @@ init(void)
s = 0;
strlcpy(progblock, "*", sizeof(progblock));
strlcpy(hostblock, "*", sizeof(hostblock));
+   send_udp = send_udp6 = 0;
while (getline(, , cf) != -1) {
/*
 * check for end-of-section, comments, strip off trailing
@@ -2508,6 +2495,22 @@ init(void)
Initialized = 1;
dropped_warn(_dropped, "during initialization");

+   if (SecureMode) {
+   /*
+* If generic UDP file descriptors are used neither
+* for receiving nor for sending, close them.  Then
+* there is no useless *.514 in netstat.
+*/
+   if (fd_udp != -1 && !send_udp) {
+   close(fd_udp);
+   fd_udp = -1;
+   }
+   if (fd_udp6 != -1 && !send_udp6) {
+   close(fd_udp6);
+   fd_udp6 = -1;
+   }
+   }
+
if (Debug) {
SIMPLEQ_FOREACH(f, , f_next) {
for (i = 0; i <= LOG_NFACILITIES; i++)
@@ -2704,20 +2707,24 @@ cfline(char *line, char *progblock, char
}
if (proto == NULL)
proto = "udp";
-   ipproto = proto;
if (strcmp(proto, "udp") == 0) {
if (fd_udp == -1)
proto = "udp6";
if (fd_udp6 == -1)
proto = "udp4";
-   ipproto = proto;
+   }
+   ipproto = proto;
+   if (strcmp(proto, "udp") == 0) {
+   send_udp = send_udp6 = 1;
} else if (strcmp(proto, "udp4") == 0) {
+   send_udp = 1;
if (fd_udp == -1) {
log_warnx("no udp4 \"%s\"",
f->f_un.f_forw.f_loghost);
break;
}
} else if (strcmp(proto, "udp6") == 0) {
+   send_udp6 = 1;
if (fd_udp6 == -1) {
log_warnx("no udp6 \"%s\"",
f->f_un.f_forw.f_loghost);
@@ -2761,11 +2768,9 @@ cfline(char *line, char *progblock, char
if 

Re: diff: uvm: fix unitialized var and simplify code in km_alloc()

2020-05-22 Thread Alexander Bluhm
On Wed, May 20, 2020 at 11:44:57AM +0200, Jan Klemkow wrote:
> The function km_alloc() returns the uninitialized local variable sva if
> pgl is empty.  It seems to be not possible in the current condition of
> the code, but I'm not sure if this is guaranteed.  Thus, I would prefer
> to initialize sva with zero.

I think initializing sva to 0 is better than returning some stack
memory if something goes wrong.

> It also seems to be unnecessary to loop over the whole pagelist to find
> the first element.  The marco pmap_map_direct() just does some
> calculations and the value of va is discarded.

It is not a macro on other architectures.  You must make sure that
it has no side effects everywhere.  I would not touch this.

> I build and run the code on amd64 without any issue and regress/sys/uvm
> also doesn't show any problems with that diff.

Only testing amd64 is not enough for such a diff.

bluhm

> Index: uvm/uvm_km.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_km.c,v
> retrieving revision 1.136
> diff -u -p -r1.136 uvm_km.c
> --- uvm/uvm_km.c  18 Feb 2020 12:13:40 -  1.136
> +++ uvm/uvm_km.c  20 May 2020 06:17:41 -
> @@ -816,7 +816,7 @@ km_alloc(size_t sz, const struct kmem_va
>   paddr_t pla_align;
>   int pla_flags;
>   int pla_maxseg;
> - vaddr_t va, sva;
> + vaddr_t va, sva = 0;
>
>   KASSERT(sz == round_page(sz));
>
> @@ -851,11 +851,8 @@ km_alloc(size_t sz, const struct kmem_va
>* allocations.
>*/
>   if (kv->kv_singlepage || kp->kp_maxseg == 1) {
> - TAILQ_FOREACH(pg, , pageq) {
> - va = pmap_map_direct(pg);
> - if (pg == TAILQ_FIRST())
> - sva = va;
> - }
> + if ((pg = TAILQ_FIRST()) != NULL)
> + sva = pmap_map_direct(pg);
>   return ((void *)sva);
>   }
>  #endif



Re: Correcty reloading unresolved host in syslogd @Conf lines

2020-05-22 Thread Alexander Bluhm
On Wed, May 20, 2020 at 09:29:54PM -0400, sven falempin wrote:
> ? Will it goes into base this time ?

I need an OK from a developer.  Anyone?

bluhm

> On Mon, May 18, 2020 at 5:31 AM Alexander Bluhm 
> > Index: usr.sbin/syslogd/syslogd.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
> > retrieving revision 1.262
> > diff -u -p -r1.262 syslogd.c
> > --- usr.sbin/syslogd/syslogd.c  5 Jul 2019 13:23:27 -   1.262
> > +++ usr.sbin/syslogd/syslogd.c  9 Feb 2020 20:25:20 -
> > @@ -853,20 +853,6 @@ main(int argc, char *argv[])
> > event_add(ev_udp, NULL);
> > if (fd_udp6 != -1)
> > event_add(ev_udp6, NULL);
> > -   } else {
> > -   /*
> > -* If generic UDP file descriptors are used neither
> > -* for receiving nor for sending, close them.  Then
> > -* there is no useless *.514 in netstat.
> > -*/
> > -   if (fd_udp != -1 && !send_udp) {
> > -   close(fd_udp);
> > -   fd_udp = -1;
> > -   }
> > -   if (fd_udp6 != -1 && !send_udp6) {
> > -   close(fd_udp6);
> > -   fd_udp6 = -1;
> > -   }
> > }
> > for (i = 0; i < nbind; i++)
> > if (fd_bind[i] != -1)
> > @@ -2416,6 +2402,7 @@ init(void)
> > s = 0;
> > strlcpy(progblock, "*", sizeof(progblock));
> > strlcpy(hostblock, "*", sizeof(hostblock));
> > +   send_udp = send_udp6 = 0;
> > while (getline(, , cf) != -1) {
> > /*
> >  * check for end-of-section, comments, strip off trailing
> > @@ -2508,6 +2495,22 @@ init(void)
> > Initialized = 1;
> > dropped_warn(_dropped, "during initialization");
> >
> > +   if (SecureMode) {
> > +   /*
> > +* If generic UDP file descriptors are used neither
> > +* for receiving nor for sending, close them.  Then
> > +* there is no useless *.514 in netstat.
> > +*/
> > +   if (fd_udp != -1 && !send_udp) {
> > +   close(fd_udp);
> > +   fd_udp = -1;
> > +   }
> > +   if (fd_udp6 != -1 && !send_udp6) {
> > +   close(fd_udp6);
> > +   fd_udp6 = -1;
> > +   }
> > +   }
> > +
> > if (Debug) {
> > SIMPLEQ_FOREACH(f, , f_next) {
> > for (i = 0; i <= LOG_NFACILITIES; i++)
> > @@ -2755,6 +2758,13 @@ cfline(char *line, char *progblock, char
> > sizeof(f->f_un.f_forw.f_addr)) != 0) {
> > log_warnx("bad hostname \"%s\"",
> > f->f_un.f_forw.f_loghost);
> > +   /* DNS lookup may work after SIGHUP, keep sockets
> > */
> > +   if (strcmp(proto, "udp") == 0)
> > +   send_udp = send_udp6 = 1;
> > +   else if (strcmp(proto, "udp4") == 0)
> > +   send_udp = 1;
> > +   else if (strcmp(proto, "udp6") == 0)
> > +   send_udp6 = 1;
> > break;
> > }
> > f->f_file = -1;
> >



Re: Correcty reloading unresolved host in syslogd @Conf lines

2020-05-18 Thread Alexander Bluhm
On Sat, May 16, 2020 at 07:23:37PM -0400, sven falempin wrote:
> This was looked at before.
> Did not get through.

The posted diff was not my final solution.  But yes, the issue was
forgotten.  So I would suggest this.

When DNS lookup of an UDP loghost failed, syslogd(8) did close the
UDP sockets for sending messages.  Keep the sockets open in this
case.  Then they can be used if DNS is working during the next
SIGHUP.

ok?

bluhm

Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.262
diff -u -p -r1.262 syslogd.c
--- usr.sbin/syslogd/syslogd.c  5 Jul 2019 13:23:27 -   1.262
+++ usr.sbin/syslogd/syslogd.c  9 Feb 2020 20:25:20 -
@@ -853,20 +853,6 @@ main(int argc, char *argv[])
event_add(ev_udp, NULL);
if (fd_udp6 != -1)
event_add(ev_udp6, NULL);
-   } else {
-   /*
-* If generic UDP file descriptors are used neither
-* for receiving nor for sending, close them.  Then
-* there is no useless *.514 in netstat.
-*/
-   if (fd_udp != -1 && !send_udp) {
-   close(fd_udp);
-   fd_udp = -1;
-   }
-   if (fd_udp6 != -1 && !send_udp6) {
-   close(fd_udp6);
-   fd_udp6 = -1;
-   }
}
for (i = 0; i < nbind; i++)
if (fd_bind[i] != -1)
@@ -2416,6 +2402,7 @@ init(void)
s = 0;
strlcpy(progblock, "*", sizeof(progblock));
strlcpy(hostblock, "*", sizeof(hostblock));
+   send_udp = send_udp6 = 0;
while (getline(, , cf) != -1) {
/*
 * check for end-of-section, comments, strip off trailing
@@ -2508,6 +2495,22 @@ init(void)
Initialized = 1;
dropped_warn(_dropped, "during initialization");

+   if (SecureMode) {
+   /*
+* If generic UDP file descriptors are used neither
+* for receiving nor for sending, close them.  Then
+* there is no useless *.514 in netstat.
+*/
+   if (fd_udp != -1 && !send_udp) {
+   close(fd_udp);
+   fd_udp = -1;
+   }
+   if (fd_udp6 != -1 && !send_udp6) {
+   close(fd_udp6);
+   fd_udp6 = -1;
+   }
+   }
+
if (Debug) {
SIMPLEQ_FOREACH(f, , f_next) {
for (i = 0; i <= LOG_NFACILITIES; i++)
@@ -2755,6 +2758,13 @@ cfline(char *line, char *progblock, char
sizeof(f->f_un.f_forw.f_addr)) != 0) {
log_warnx("bad hostname \"%s\"",
f->f_un.f_forw.f_loghost);
+   /* DNS lookup may work after SIGHUP, keep sockets */
+   if (strcmp(proto, "udp") == 0)
+   send_udp = send_udp6 = 1;
+   else if (strcmp(proto, "udp4") == 0)
+   send_udp = 1;
+   else if (strcmp(proto, "udp6") == 0)
+   send_udp6 = 1;
break;
}
f->f_file = -1;



Re: Remove some customization from our perl build

2020-05-18 Thread Alexander Bluhm
On Sun, May 17, 2020 at 09:49:54AM -0700, Andrew Hewus Fresh wrote:
> I think this patch is now cleaned up enough to look for OKs.

OK bluhm@

> The patch to numeric.c works around an issue with clang and
> -Wdeclaration-after-statement that was fixed more correctly upstream,
> but pulling in the full fix in seemed like overkill for now.
>
> https://github.com/Perl/perl5/issues/17725
> https://github.com/Perl/perl5/pull/17750
>
> This patch does miss `rm gnu/usr.bin/perl/DynaLoader.sed` that we no
> longer need, not sure which cvs diff flag I missed on that.
>
> Other than that, as described in my original email:
>
>
> On Fri, Apr 10, 2020 at 06:17:33PM -0700, Andrew Hewus Fresh wrote:
> > Recently it was pointed out that we don't link /usr/lib/libperl.so.* to
> > libm the way is expected for code that also links to libperl.  That led
> > me to go digging again into the customization we have around the perl
> > build and getting terribly confused.  That did somewhat clear up after
> > reading more about bsd.*.mk, but still feel like some of this mess was
> > to make the vax work, but I couldn't actually figure it out from the cvs
> > logs why it exists.
> >
> > In any case, this patch does a few things, some of which I can split up
> > and put in separately if it comes to that.
> >
> > * Puts back some of the upstream Makefile.SH that we removed
> >   * and a little Dynaloader too
> >
> > * Changes Configure flags to -de instead of -dsE
> >   * So Configure does the work previously handled by depend.done
> >
> > * Adjusts the installperl script to put libperl where we want it
> >
> > * Moves some build flag discovery into hint/openbsd.sh
> >   (Which I can then push upstream)
> >   * Figuring out the correct PICFLAG, which means perl will now use the
> > same one, not -fpic for things built by Makefile.bsdwapper on archs
> > that want it and -fPIC for everything else.
> >   * Using no-tree-ter on alpha, due to a compiler bug.
> >
> > * Lets the perl infrastructure build libperl again
> >   * Notably, this stops creating libperl.a, but I have a patch
> > around that puts it back, just not sure if we need it.
> >   * Which means "we" don't actually build anything anymore, we leave all
> > that to the perl upstream Makefile so all the "stuff" to do with
> > that can go away.
> >   * Which means ldd now mentions libm as it should
> >
> > * Some tidying of the rest of Makefile.bsd-wrapper*
> >
> > It seems to build fine on my
> >   alpha, amd64, arm64, armv7, i386, macppc, octeon, and sparc64.
> >
> >
> > The individual changes are committed, in a clean-up-build branch, to the
> > GitHub repo where I keep track of them, along with build logs from my
> > test machines both with and without this patch:
> > https://github.com/afresh1/OpenBSD-perl/tree/clean-up-build
>
>
> Index: gnu/usr.bin/perl/numeric.c
> ===
> RCS file: /cvs/src/gnu/usr.bin/perl/numeric.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 numeric.c
> --- gnu/usr.bin/perl/numeric.c9 Apr 2020 01:32:11 -   1.6
> +++ gnu/usr.bin/perl/numeric.c17 May 2020 16:44:29 -
> @@ -34,8 +34,8 @@ values, including such things as replace
>  PERL_STATIC_INLINE NV
>  S_strtod(pTHX_ const char * const s, char ** e)
>  {
> -DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
>  NV result;
> +DECLARATION_FOR_LC_NUMERIC_MANIPULATION;
>
>  STORE_LC_NUMERIC_SET_TO_NEEDED();
>
> Index: distrib/sets/lists/comp/mi
> ===
> RCS file: /cvs/src/distrib/sets/lists/comp/mi,v
> retrieving revision 1.1498
> diff -u -p -r1.1498 mi
> --- distrib/sets/lists/comp/mi22 Apr 2020 02:04:00 -  1.1498
> +++ distrib/sets/lists/comp/mi17 May 2020 16:44:21 -
> @@ -1536,7 +1536,6 @@
>  ./usr/lib/libpanelw_p.a
>  ./usr/lib/libpcap.a
>  ./usr/lib/libpcap_p.a
> -./usr/lib/libperl.a
>  ./usr/lib/libpthread.a
>  ./usr/lib/libpthread_p.a
>  ./usr/lib/libradius.a
> Index: gnu/usr.bin/perl/DynaLoader.sed
> ===
> RCS file: gnu/usr.bin/perl/DynaLoader.sed
> diff -N gnu/usr.bin/perl/DynaLoader.sed
> --- gnu/usr.bin/perl/DynaLoader.sed   17 Nov 2014 21:00:55 -  1.4
> +++ /dev/null 1 Jan 1970 00:00:00 -
> @@ -1,6 +0,0 @@
> -s@"dlutils.c"@"ext/DynaLoader/dlutils.c"@
> -/^#ifndef RTLD_LAZY/{N;N;c\
> -#define VERSION "1.20"\
> -#define XS_VERSION "1.20"\
> -#define DLOPEN_WONT_DO_RELATIVE_PATHS
> -}
> Index: gnu/usr.bin/perl/Makefile.SH
> ===
> RCS file: /cvs/src/gnu/usr.bin/perl/Makefile.SH,v
> retrieving revision 1.51
> diff -u -p -r1.51 Makefile.SH
> --- gnu/usr.bin/perl/Makefile.SH  25 Apr 2020 21:15:23 -  1.51
> +++ gnu/usr.bin/perl/Makefile.SH  17 May 2020 16:44:23 -
> @@ -822,11 +822,11 @@ CCDLFLAGS   = `echo $ccdlflags|sed -e 's@

Re: Fix occasional signify regression test fail

2020-04-03 Thread Alexander Bluhm
On Thu, Apr 02, 2020 at 08:03:33AM +, Christian Ludwig wrote:
> The signify regression test creates a tar archive from the test's
> directory. Without a symlink to the obj directory, the output tarball is
> part of the input file list. This makes tar complain that archive.tgz
> was modified during copy to archive.

Thanks, commited.

bluhm

> Avoid including the output archive by reducing the input file list to
> text files only.
>
> While there, tweak the list of files to clean.
> ---
>  regress/usr.bin/signify/Makefile   | 3 ++-
>  regress/usr.bin/signify/signify.sh | 2 +-
>  2 files changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/regress/usr.bin/signify/Makefile 
> b/regress/usr.bin/signify/Makefile
> index 208bc5eff38..6fff3450e11 100644
> --- a/regress/usr.bin/signify/Makefile
> +++ b/regress/usr.bin/signify/Makefile
> @@ -1,6 +1,7 @@
>  #$OpenBSD: Makefile,v 1.4 2014/03/17 02:49:02 tedu Exp $
>
> -CLEANFILES += test.sig confirmorders confirmorders.sig HASH.sig
> +CLEANFILES += test.sig confirmorders confirmorders.sig HASH HASH.sig \
> +   archive.tgz signed.tgz
>  REGRESS_TARGETS = t1
>
>  t1:
> diff --git a/regress/usr.bin/signify/signify.sh 
> b/regress/usr.bin/signify/signify.sh
> index d83dff79b19..5c2d86dec98 100644
> --- a/regress/usr.bin/signify/signify.sh
> +++ b/regress/usr.bin/signify/signify.sh
> @@ -28,7 +28,7 @@ signify -S -e -s $seckey -m HASH
>  rm HASH
>  signify -C -q -p $pubkey -x HASH.sig
>
> -tar zcPf archive.tgz $srcdir
> +tar zcPf archive.tgz $srcdir/*.txt
>  signify -zS -s $seckey -m archive.tgz -x signed.tgz
>  # check it's still valid gzip
>  gunzip -t signed.tgz
> --
> 2.26.0



Re: rwsleep and stopped process

2020-03-01 Thread Alexander Bluhm
On Sun, Mar 01, 2020 at 02:16:20PM +0100, Mark Kettenis wrote:
> This probably means that msleep(4) has a similar issue.

Here is the diff for msleep() and rwsleep().

bluhm

Index: kern/kern_synch.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.162
diff -u -p -r1.162 kern_synch.c
--- kern/kern_synch.c   30 Jan 2020 08:51:27 -  1.162
+++ kern/kern_synch.c   1 Mar 2020 13:50:29 -
@@ -259,7 +259,6 @@ msleep(const volatile void *ident, struc

sleep_setup(, ident, priority, wmesg);
sleep_setup_timeout(, timo);
-   sleep_setup_signal();

/* XXX - We need to make sure that the mutex doesn't
 * unblock splsched. This can be made a bit more
@@ -268,6 +267,8 @@ msleep(const volatile void *ident, struc
spl = MUTEX_OLDIPL(mtx);
MUTEX_OLDIPL(mtx) = splsched();
mtx_leave(mtx);
+   /* signal may stop the process, release mutex before that */
+   sleep_setup_signal();

error = sleep_finish_all(, 1);

@@ -320,9 +321,10 @@ rwsleep(const volatile void *ident, stru

sleep_setup(, ident, priority, wmesg);
sleep_setup_timeout(, timo);
-   sleep_setup_signal();

rw_exit(rwl);
+   /* signal may stop the process, release rwlock before that */
+   sleep_setup_signal();

error = sleep_finish_all(, 1);



rwsleep and stopped process

2020-03-01 Thread Alexander Bluhm
Hi,

I had a 6.6 machine where a lot of git processes got stuck sleeping
on "futex".  The process holding the futex rwlock was this one.

 33293  332235  1   2734  30x800483  fsleepgit

It called mi_switch() from proc_stop() with this trace.

issignal(80002acc74a8) at issignal+0x2ec
sleep_setup_signal(120,81e2e168) at sleep_setup_signal+0xdf
rwsleep(12d8,80002acc74a8,23,c010e7fabd0,0) at rwsleep+0x94
futex_wait(2,80002b0fb480,c010e7fabd0,12d8) at futex_wait+0x180
sys_futex(530,80002acc74a8,53) at sys_futex+0x80
syscall(0) at syscall+0x37d
Xsyscall(0,53,0,53,0,c015a954200) at Xsyscall+0x128

So I would say the process was stopped instead of sleeping and did
not release the lock.  Can rwsleep() call rw_exit() before
sleep_setup_signal()?  This diff survived a full make regress on
amd64.

ok?

bluhm

Index: kern/kern_synch.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.162
diff -u -p -r1.162 kern_synch.c
--- kern/kern_synch.c   30 Jan 2020 08:51:27 -  1.162
+++ kern/kern_synch.c   1 Mar 2020 12:11:30 -
@@ -320,9 +320,9 @@ rwsleep(const volatile void *ident, stru

sleep_setup(, ident, priority, wmesg);
sleep_setup_timeout(, timo);
-   sleep_setup_signal();
-
rw_exit(rwl);
+   /* signal may stop the process, release rwlock before that */
+   sleep_setup_signal();

error = sleep_finish_all(, 1);



Re: Update Term::ReadKey to 2.38

2020-02-26 Thread Alexander Bluhm
On Tue, Feb 25, 2020 at 06:34:40PM -0800, Andrew Hewus Fresh wrote:
> > Here the patch is broken.
> > patch:  malformed patch at line 906: /VERS+use strict;
>
> Attached again, not sure how it got broken. Also can be downloaded from:
> http://cvs.afresh1.com/~andrew/Term-ReadKey-2.38.patch

The downloaded pach is fine.  The patch in the mail has replaced
UTF-8 with latin1 or something.  That explains the difference in
signature.

Please remember to cvs rm ReadKey.pm, it was not removed by the
patch.

OK bluhm@



Re: Update Term::ReadKey to 2.38

2020-02-25 Thread Alexander Bluhm
On Mon, Feb 24, 2020 at 07:58:23PM -0800, Andrew Hewus Fresh wrote:
> Index: gnu/usr.bin/perl/cpan/Term-ReadKey/Makefile.PL
> ===
> RCS file: /cvs/src/gnu/usr.bin/perl/cpan/Term-ReadKey/Makefile.PL,v
> retrieving revision 1.3
> diff -u -p -r1.3 Makefile.PL
> --- gnu/usr.bin/perl/cpan/Term-ReadKey/Makefile.PL3 Jul 2016 01:07:58 
> -   1.3
> +++ gnu/usr.bin/perl/cpan/Term-ReadKey/Makefile.PL25 Feb 2020 01:27:42 
> -
> @@ -1,20 +1,37 @@
> /VERS+use strict;
> +use warnings;

Here the patch is broken.
patch:  malformed patch at line 906: /VERS+use strict;

I did run cpansign.  It reports some diffs in the signature file.

-SHA1 616229399c8babf4256fadad2571aad7d72a3ea8 Changes
+SHA1 d7fc030ba69546216f0b78922d3f184657cf1d1c Changes

-SHA1 c009e9a2bbd000588fbcba9a9ae08dfdaa1e8ad1 example/test.pl
+SHA1 f0d06423953f4308c4ab3564ae13c50f910299d2 example/test.pl

-SHA1 24ce32574887ccf5f7b3ec5619f840e5944d8c75 t/02_terminal_functions.t
+SHA1 3c6c986194532d487889312e0fa6892148cdc1cd t/02_terminal_functions.t

Did you change something?

bluhm



Re: syslogd closing all udp is a tiny bit aggressiv

2020-02-09 Thread Alexander Bluhm
On Thu, Feb 06, 2020 at 05:57:15PM -0500, sven falempin wrote:
> > Your DNS lookup fails at startup, sockets are closed.
> > Later at SIGHUP you DNS works again.  Now the sockets are needed.
> > So do not close them if DNS for udp fails.

I thought again about this problem.  The fix can be more specific.
- if user requested udp4 or udp6, close the other af socket.
- after SIGHUP, when DNS works, close the unneeded af socket.

ok?

Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.262
diff -u -p -r1.262 syslogd.c
--- usr.sbin/syslogd/syslogd.c  5 Jul 2019 13:23:27 -   1.262
+++ usr.sbin/syslogd/syslogd.c  9 Feb 2020 20:25:20 -
@@ -853,20 +853,6 @@ main(int argc, char *argv[])
event_add(ev_udp, NULL);
if (fd_udp6 != -1)
event_add(ev_udp6, NULL);
-   } else {
-   /*
-* If generic UDP file descriptors are used neither
-* for receiving nor for sending, close them.  Then
-* there is no useless *.514 in netstat.
-*/
-   if (fd_udp != -1 && !send_udp) {
-   close(fd_udp);
-   fd_udp = -1;
-   }
-   if (fd_udp6 != -1 && !send_udp6) {
-   close(fd_udp6);
-   fd_udp6 = -1;
-   }
}
for (i = 0; i < nbind; i++)
if (fd_bind[i] != -1)
@@ -2416,6 +2402,7 @@ init(void)
s = 0;
strlcpy(progblock, "*", sizeof(progblock));
strlcpy(hostblock, "*", sizeof(hostblock));
+   send_udp = send_udp6 = 0;
while (getline(, , cf) != -1) {
/*
 * check for end-of-section, comments, strip off trailing
@@ -2508,6 +2495,22 @@ init(void)
Initialized = 1;
dropped_warn(_dropped, "during initialization");

+   if (SecureMode) {
+   /*
+* If generic UDP file descriptors are used neither
+* for receiving nor for sending, close them.  Then
+* there is no useless *.514 in netstat.
+*/
+   if (fd_udp != -1 && !send_udp) {
+   close(fd_udp);
+   fd_udp = -1;
+   }
+   if (fd_udp6 != -1 && !send_udp6) {
+   close(fd_udp6);
+   fd_udp6 = -1;
+   }
+   }
+
if (Debug) {
SIMPLEQ_FOREACH(f, , f_next) {
for (i = 0; i <= LOG_NFACILITIES; i++)
@@ -2755,6 +2758,13 @@ cfline(char *line, char *progblock, char
sizeof(f->f_un.f_forw.f_addr)) != 0) {
log_warnx("bad hostname \"%s\"",
f->f_un.f_forw.f_loghost);
+   /* DNS lookup may work after SIGHUP, keep sockets */
+   if (strcmp(proto, "udp") == 0)
+   send_udp = send_udp6 = 1;
+   else if (strcmp(proto, "udp4") == 0)
+   send_udp = 1;
+   else if (strcmp(proto, "udp6") == 0)
+   send_udp6 = 1;
break;
}
f->f_file = -1;



Re: syslogd closing all udp is a tiny bit aggressiv

2020-02-06 Thread Alexander Bluhm
On Thu, Feb 06, 2020 at 11:46:25AM -0500, sven falempin wrote:
> If for exemple there s a wrong endpoint in the config file, like
> local1.warn @badhost
> and no other the daemon will close  fd_udp.

Your DNS lookup fails at startup, sockets are closed.

> // reload with a badhost in /etc/hosts for the sake of testing

Later at SIGHUP you DNS works again.  Now the sockets are needed.

So do not close them if DNS for udp fails.
Does this diff fix your setup?

bluhm

Index: usr.sbin/syslogd/syslogd.c
===
RCS file: /data/mirror/openbsd/cvs/src/usr.sbin/syslogd/syslogd.c,v
retrieving revision 1.262
diff -u -p -r1.262 syslogd.c
--- usr.sbin/syslogd/syslogd.c  5 Jul 2019 13:23:27 -   1.262
+++ usr.sbin/syslogd/syslogd.c  6 Feb 2020 21:51:30 -
@@ -2416,6 +2416,7 @@ init(void)
s = 0;
strlcpy(progblock, "*", sizeof(progblock));
strlcpy(hostblock, "*", sizeof(hostblock));
+   send_udp = send_udp6 = 0;
while (getline(, , cf) != -1) {
/*
 * check for end-of-section, comments, strip off trailing
@@ -2755,6 +2756,9 @@ cfline(char *line, char *progblock, char
sizeof(f->f_un.f_forw.f_addr)) != 0) {
log_warnx("bad hostname \"%s\"",
f->f_un.f_forw.f_loghost);
+   /* DNS lookup may work after SIGHUP, keep sockets */
+   if (strncmp(proto, "udp", 3) == 0)
+   send_udp = send_udp6 = 1;
break;
}
f->f_file = -1;



ipsecctl udp encap

2020-02-05 Thread Alexander Bluhm
Hi,

I would like to test IPsec with NAT-T.  For that it would be useful
to set the udpencap flag and port of a SA.  I added that to
ipseectl(8).

ok?

bluhm

Index: sbin/ipsecctl/ipsec.conf.5
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/ipsecctl/ipsec.conf.5,v
retrieving revision 1.156
diff -u -p -r1.156 ipsec.conf.5
--- sbin/ipsecctl/ipsec.conf.5  10 Nov 2019 20:51:52 -  1.156
+++ sbin/ipsecctl/ipsec.conf.5  6 Feb 2020 00:00:06 -
@@ -890,6 +890,10 @@ and
 The SPI identifies a specific SA.
 .Ar number
 is a 32-bit value and needs to be unique.
+.It Ic udpencap Op Ic port Ar dport
+For NAT-Traversal encapsulate the IPsec traffic in UDP.
+The port number of the peer can be set to
+.Ar dport .
 .It Ic auth Ar algorithm
 For ESP and AH
 an authentication algorithm can be specified.
Index: sbin/ipsecctl/ipsecctl.h
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/ipsecctl/ipsecctl.h,v
retrieving revision 1.73
diff -u -p -r1.73 ipsecctl.h
--- sbin/ipsecctl/ipsecctl.h20 Nov 2017 10:51:24 -  1.73
+++ sbin/ipsecctl/ipsecctl.h5 Feb 2020 20:42:45 -
@@ -208,6 +208,8 @@ struct ipsec_rule {
u_int8_t ikemode;
u_int8_t p1ie;
u_int8_t p2ie;
+   u_int8_t udpencap;
+   u_int16_tudpdport;
u_int16_tsport;
u_int16_tdport;
u_int32_tspi;
Index: sbin/ipsecctl/parse.y
===
RCS file: /data/mirror/openbsd/cvs/src/sbin/ipsecctl/parse.y,v
retrieving revision 1.177
diff -u -p -r1.177 parse.y
--- sbin/ipsecctl/parse.y   26 Aug 2019 18:53:58 -  1.177
+++ sbin/ipsecctl/parse.y   5 Feb 2020 20:46:00 -
@@ -205,7 +205,8 @@ int  validate_sa(u_int32_t, u_int8_t,
 struct ipsec_transforms *, struct ipsec_key *,
 struct ipsec_key *, u_int8_t);
 struct ipsec_rule  *create_sa(u_int8_t, u_int8_t, struct ipsec_hosts *,
-u_int32_t, struct ipsec_transforms *,
+u_int32_t, u_int8_t, u_int16_t,
+struct ipsec_transforms *,
 struct ipsec_key *, struct ipsec_key *);
 struct ipsec_rule  *reverse_sa(struct ipsec_rule *, u_int32_t,
 struct ipsec_key *, struct ipsec_key *);
@@ -257,6 +258,10 @@ typedef struct {
u_int32_t   spiin;
} spis;
struct {
+   u_int8_tencap;
+   u_int16_t   port;
+   } udpencap;
+   struct {
struct ipsec_key *keyout;
struct ipsec_key *keyin;
} authkeys;
@@ -281,7 +286,7 @@ typedef struct {
 %token AUTHKEY ENCKEY FILENAME AUTHXF ENCXF ERROR IKE MAIN QUICK AGGRESSIVE
 %token PASSIVE ACTIVE ANY IPIP IPCOMP COMPXF TUNNEL TRANSPORT DYNAMIC LIFETIME
 %token TYPE DENY BYPASS LOCAL PROTO USE ACQUIRE REQUIRE DONTACQ GROUP PORT TAG
-%token INCLUDE BUNDLE
+%token INCLUDE BUNDLE UDPENCAP
 %token   STRING
 %token   NUMBER
 %typestring
@@ -300,6 +305,7 @@ typedef struct {
 %type   ids
 %typeid
 %type  spispec
+%type  udpencap
 %type  authkeyspec
 %type   enckeyspec
 %typebundlestring
@@ -347,7 +353,7 @@ tcpmd5rule  : TCPMD5 hosts spispec authke
struct ipsec_rule   *r;

r = create_sa(IPSEC_TCPMD5, IPSEC_TRANSPORT, &$2,
-   $3.spiout, NULL, $4.keyout, NULL);
+   $3.spiout, 0, 0, NULL, $4.keyout, NULL);
if (r == NULL)
YYERROR;

@@ -357,17 +363,17 @@ tcpmd5rule: TCPMD5 hosts spispec authke
}
;

-sarule : satype tmode hosts spispec transforms authkeyspec
+sarule : satype tmode hosts spispec udpencap transforms authkeyspec
enckeyspec bundlestring {
struct ipsec_rule   *r;

-   r = create_sa($1, $2, &$3, $4.spiout, $5, $6.keyout,
-   $7.keyout);
+   r = create_sa($1, $2, &$3, $4.spiout, $5.encap, $5.port,
+   $6, $7.keyout, $8.keyout);
if (r == NULL)
YYERROR;

-   if (expand_rule(r, NULL, 0, $4.spiin, $6.keyin,
-   $7.keyin, $8))
+   if (expand_rule(r, NULL, 0, $4.spiin, $7.keyin,
+   $8.keyin, $9))
errx(1, "sarule: expand_rule");

Re: IPv6 Support for umb(4)

2020-02-04 Thread Alexander Bluhm
On Tue, Feb 04, 2020 at 09:16:34AM +0100, Gerhard Roth wrote:
> The updated patch below introduces a UMBFLG_NO_INET6 which is set on
> receipt of a MBIM_STATUS_NO_DEVICE_SUPPORT in response to a
> MBIM_CID_CONNECT. The code will then retry the connect operation in
> IPv4-only mode.
>
> That won't give you any IPv6 support, but at least it won't break
> your setup.

OK bluhm@

Now it works:

umb0: state going up from 'open' to 'radio on'
umb0: packet service changed from unknown to detached, class none, speed: 0 up 
/ 0 down
umb0: none state unlocked (-1 attempts left)
umb0: set/qry MBIM_CID_SUBSCRIBER_READY_STATUS failed: BUSY
umb0: SIM initialized
umb0: state going up from 'radio on' to 'SIM is ready'
umb0: set/qry MBIM_CID_PACKET_SERVICE failed: FAILURE
umb0: packet service changed from detached to attaching, class none, speed: 0 
up / 0 down
umb0: packet service changed from attaching to attached, class HSPA, speed: 
576 up / 1440 down
umb0: state going up from 'SIM is ready' to 'attached'
umb0: connecting ...
umb0: device does not support IPv6
umb0: connecting ...
umb0: connection activating
umb0: network connected
umb0: connection activated
umb0: state going up from 'attached' to 'connected'
umb0: IPv4 addr 100.67.244.231, mask 255.255.255.240, gateway 100.67.244.226
umb0: IPv4 nameserver 139.7.30.126
umb0: IPv4 nameserver 139.7.30.125
umb0: ISP or WWAN module offers no IPv6 support
umb0: MTU 1500
umb0: state going up from 'connected' to 'up'
umb0: link state changed from down to up
umb0: packet service attached, class custom, speed: 576 up / 2100 down
umb0: unable to set IPv4 default route, error 17
umb0: IPv4 addr 100.67.244.231, mask 255.255.255.240, gateway 100.67.244.226
umb0: IPv4 nameserver 139.7.30.126
umb0: IPv4 nameserver 139.7.30.125
umb0: ISP or WWAN module offers no IPv6 support

> Index: sbin/ifconfig/ifconfig.c
> ===
> RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
> retrieving revision 1.417
> diff -u -p -u -p -r1.417 ifconfig.c
> --- sbin/ifconfig/ifconfig.c  27 Dec 2019 14:34:46 -  1.417
> +++ sbin/ifconfig/ifconfig.c  28 Jan 2020 12:16:23 -
> @@ -5666,6 +5666,7 @@ umb_status(void)
>   char apn[UMB_APN_MAXLEN+1];
>   char pn[UMB_PHONENR_MAXLEN+1];
>   int  i, n;
> + char astr[INET6_ADDRSTRLEN];
>
>   memset((char *), 0, sizeof(mi));
>   ifr.ifr_data = (caddr_t)
> @@ -5830,7 +5831,15 @@ umb_status(void)
>   for (i = 0, n = 0; i < UMB_MAX_DNSSRV; i++) {
>   if (mi.ipv4dns[i].s_addr == INADDR_ANY)
>   break;
> - printf("%s %s", n++ ? "" : "\tdns", inet_ntoa(mi.ipv4dns[i]));
> + printf("%s %s", n++ ? "" : "\tdns",
> + inet_ntop(AF_INET, [i], astr, sizeof (astr)));
> + }
> + for (i = 0; i < UMB_MAX_DNSSRV; i++) {
> + if (memcmp([i], _any,
> + sizeof (mi.ipv6dns[i])) == 0)
> + break;
> + printf("%s %s", n++ ? "" : "\tdns",
> + inet_ntop(AF_INET6, [i], astr, sizeof (astr)));
>   }
>   if (n)
>   printf("\n");
> Index: share/man/man4/umb.4
> ===
> RCS file: /cvs/src/share/man/man4/umb.4,v
> retrieving revision 1.9
> diff -u -p -u -p -r1.9 umb.4
> --- share/man/man4/umb.4  23 Nov 2017 20:47:26 -  1.9
> +++ share/man/man4/umb.4  28 Jan 2020 12:16:23 -
> @@ -40,6 +40,11 @@ will remain in this state until the MBIM
>  In case the device is connected to an "always-on" USB port,
>  it may be possible to connect to a provider without entering the
>  PIN again even if the system was rebooted.
> +.Pp
> +If the kernel has been compiled with INET6, the driver will try to
> +obtain an IPv6 address from the provider. To succeed with the IPv6
> +configuration, both the ISP and the MBIM device have to offer IPv6
> +support.
>  .Sh HARDWARE
>  The following devices should work:
>  .Pp
> @@ -64,10 +69,6 @@ The following devices should work:
>  .%U http://www.usb.org/developers/docs/devclass_docs/MBIM10Errata1_073013.zip
>  .Re
>  .Sh CAVEATS
> -The
> -.Nm
> -driver does not support IPv6.
> -.Pp
>  Devices which fail to provide a conforming MBIM implementation will
>  probably be attached as some other driver, such as
>  .Xr umsm 4 .
> Index: sys/dev/usb/if_umb.c
> ===
> RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
> retrieving revision 1.31
> diff -u -p -u -p -r1.31 if_umb.c
> --- sys/dev/usb/if_umb.c  26 Nov 2019 23:04:28 -  1.31
> +++ sys/dev/usb/if_umb.c  4 Feb 2020 07:50:30 -
> @@ -43,6 +43,14 @@
>  #include 
>  #include 
>
> +#ifdef INET6
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#endif
> +
>  #include 
>
>  #include 
> @@ -158,7 +166,9 @@ intumb_decode_connect_info(struct umb
>  void  

Re: IPv6 Support for umb(4)

2020-02-03 Thread Alexander Bluhm
On Tue, Jan 28, 2020 at 03:03:47PM +0100, Gerhard Roth wrote:
> this patch adds IPv6 support to umb(4).

It breaks my IPv4 setup.

umb0 at uhub0 port 4 configuration 1 interface 6 "Lenovo H5321 gw" rev 
2.00/0.00 addr 2
provider Vodafone.de
firmware CXP 901 8700/1 - R3C18

When I apply the diff, my umb device does not get an IPv4 address.

umb0: state going up from 'open' to 'radio on'
umb0: none state unlocked (-1 attempts left)
umb0: set/qry MBIM_CID_SUBSCRIBER_READY_STATUS failed: BUSY
umb0: packet service changed from detached to attaching, class none, speed: 0 
up / 0 down
umb0: SIM initialized
umb0: state going up from 'radio on' to 'SIM is ready'
umb0: packet service changed from attaching to attached, class HSPA, speed: 
576 up / 1440 down
umb0: state going up from 'SIM is ready' to 'attached'
umb0: connecting ...
umb0: set/qry MBIM_CID_CONNECT failed: NO_DEVICE_SUPPORT
umb0: state change timeout
umb0: connecting ...
umb0: set/qry MBIM_CID_CONNECT failed: NO_DEVICE_SUPPORT
umb0: state change timeout
umb0: connecting ...
umb0: set/qry MBIM_CID_CONNECT failed: NO_DEVICE_SUPPORT
umb0: state change timeout
...

A few comments inline.

> +#ifdef INET6
> +int   umb_add_inet6_config(struct umb_softc *, struct in6_addr *,
> + u_int, struct in6_addr *);
> +#endif

Usually I avoid #ifdef for prototypes.  It does not matter whether
the compiler reads them and without #ifdef the code is nicer.

> +tryv6:;

The ; is wrong.

> + if (n == 0 || off + sizeof (ipv6elem) > len)
> + goto done;
> + if (n != 1 && ifp->if_flags & IFF_DEBUG)
> + log(LOG_INFO, "%s: more than one IPv6 addr: %d\n",
> + DEVNAM(ifp->if_softc), n);
> +
> + /* Only pick the first one */
> + memcpy(, data + off, sizeof (ipv6elem));
> + memcpy(, ipv6elem.addr, sizeof (addr6));
> +
> + off = letoh32(ic->ipv6_gwoffs);
> + memcpy(, data + off, sizeof (gw6));

I think we should check the data length like above.

if (off + sizeof (gw6) > len)
goto done;

And IPv4 should get the same check.

> @@ -380,6 +381,6 @@ struct umb_softc {
>
>  #define sc_state sc_info.state
>  #define sc_roaming   sc_info.enable_roaming
> - struct umb_info sc_info;
> + struct umb_info  sc_info;
>  };
>  #endif /* _KERNEL */

This whitespace chunk is wrong.

bluhm



Re: sys/ufs/ffs/ffs_softdep.c: use TAILQ_CONCAT(3)

2020-02-03 Thread Alexander Bluhm
On Mon, Feb 03, 2020 at 05:37:01PM +0100, Bj??rn Ketelaars wrote:
> On Mon 27/01/2020 20:01, Bj??rn Ketelaars wrote:
> > cheloha@ recently replaced custom TAILQ concatenation loops in pool(9)
> > with TAILQ_CONCAT(3) [0]. I was curious as how often these custom
> > concatenations loops are used and grepped src. I found a couple of them.
> > One being in sys/ufs/ffs/ffs_softdep.c, and the others in userland.
> >
> > I build a kernel using the diff below and gave it a spin. So far it
> > didn't blow up.
>
> OK?

OK bluhm@

> Diff enclosed again for your convenience.
>
>
> diff --git sys/ufs/ffs/ffs_softdep.c sys/ufs/ffs/ffs_softdep.c
> index 2f2a2edc224..d0fd3a15278 100644
> --- sys/ufs/ffs/ffs_softdep.c
> +++ sys/ufs/ffs/ffs_softdep.c
> @@ -4486,10 +4486,7 @@ merge_inode_lists(struct inodedep *inodedep)
>   }
>   newadp = TAILQ_FIRST(>id_newinoupdt);
>   }
> - while ((newadp = TAILQ_FIRST(>id_newinoupdt)) != NULL) {
> - TAILQ_REMOVE(>id_newinoupdt, newadp, ad_next);
> - TAILQ_INSERT_TAIL(>id_inoupdt, newadp, ad_next);
> - }
> + TAILQ_CONCAT(>id_inoupdt, >id_newinoupdt, ad_next);
>  }
>
>  /*



Re: em(4) diff to test

2020-01-22 Thread Alexander Bluhm
On Tue, Jan 21, 2020 at 12:31:52PM +0100, Martin Pieuchot wrote:
> New diff that works with 82576, previous breakage reported by Hrvoje
> Popovski.  So far the following models have been tested, I'm looking for
> more tests :o)
>
> em3 at pci2 dev 0 function 0 "Intel 82571EB" rev 0x06: apic 0 int 16
> em0 at pci1 dev 0 function 0 "Intel 82572EI" rev 0x06: apic 0 int 16
> em0 at pci1 dev 0 function 0 "Intel 82576" rev 0x01: msi
> em0 at pci0 dev 25 function 0 "Intel 82577LM" rev 0x06: msi
> em0 at pci0 dev 25 function 0 "Intel 82579LM" rev 0x04: msi
> em0 at pci0 dev 25 function 0 "Intel I217-LM" rev 0x04: msi
> em0 at pci0 dev 25 function 0 "Intel I218-V" rev 0x03: msi
> em0 at pci0 dev 25 function 0 Intel I218-LM rev 0x04: msi
> em0 at pci0 dev 31 function 6 "Intel I219-V" rev 0x21: msi
> em0 at pci7 dev 0 function 0 "Intel I350" rev 0x01: msi

On my regress machines I found these:

em2 at pci0 dev 4 function 0 "Intel 82540EM" rev 0x02: apic 9 int 14
em1 at pci3 dev 1 function 0 "Intel 82545GM" rev 0x04: apic 4 int 0
em0 at pci3 dev 1 function 0 "Intel 82546GB" rev 0x03: apic 3 int 0
em2 at pci5 dev 0 function 0 "Intel 82573E" rev 0x03: msi
em3 at pci6 dev 0 function 0 "Intel 82573L" rev 0x00: msi

Tested on i386.

bluhm



  1   2   3   4   5   6   7   8   9   10   >