Re: snmpd(8): New application layer - step towards agentx support

2021-11-30 Thread Martijn van Duren
On Sun, 2021-11-21 at 14:58 +0100, Martijn van Duren wrote:
> On Sun, 2021-11-14 at 14:35 +, Stuart Henderson wrote:
> > On 2021/11/14 11:49, Martijn van Duren wrote:
> > > sthen@ found an issue when using this diff with netsnmp tools.
> > > 
> > > The problem was that I put the requestID in the msgID, resulting
> > > in a mismatch upon receiving the reply. The reason that snmp(1)
> > > works is because msgID and requestID are the same.
> > > Diff below fixes things.
> > 
> > This version works for me, and the runtime increase with librenms
> > fetches and polls (which use a mixture of get/bulkwalk) is acceptable
> > (10% or so).
> > 
> Anyone else put this through a test? I want to move forward with this.
> 
> martijn@
> 
ping

Index: Makefile
===
RCS file: /cvs/src/usr.sbin/snmpd/Makefile,v
retrieving revision 1.17
diff -u -p -r1.17 Makefile
--- Makefile30 Jun 2020 17:11:49 -  1.17
+++ Makefile1 Dec 2021 06:45:57 -
@@ -2,7 +2,7 @@
 
 PROG=  snmpd
 MAN=   snmpd.8 snmpd.conf.5
-SRCS=  parse.y log.c snmpe.c \
+SRCS=  parse.y log.c snmpe.c application.c application_legacy.c \
mps.c trap.c mib.c smi.c kroute.c snmpd.c timer.c \
pf.c proc.c usm.c traphandler.c util.c
 
Index: application.c
===
RCS file: application.c
diff -N application.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ application.c   1 Dec 2021 06:45:57 -
@@ -0,0 +1,1451 @@
+/* $OpenBSD$   */
+
+/*
+ * Copyright (c) 2021 Martijn van Duren 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "application.h"
+#include "log.h"
+#include "smi.h"
+#include "snmp.h"
+#include "snmpe.h"
+#include "mib.h"
+
+TAILQ_HEAD(, appl_context) contexts = TAILQ_HEAD_INITIALIZER(contexts);
+
+struct appl_context {
+   char ac_name[APPL_CONTEXTNAME_MAX + 1];
+
+   RB_HEAD(appl_regions, appl_region) ac_regions;
+
+   TAILQ_ENTRY(appl_context) ac_entries;
+};
+
+struct appl_region {
+   struct ber_oid ar_oid;
+   uint8_t ar_priority;
+   int32_t ar_timeout;
+   int ar_instance;
+   int ar_subtree; /* Claim entire subtree */
+   struct appl_backend *ar_backend;
+   struct appl_region *ar_next; /* Sorted by priority */
+
+   RB_ENTRY(appl_region) ar_entry;
+};
+
+struct appl_request_upstream {
+   struct appl_context *aru_ctx;
+   struct snmp_message *aru_statereference;
+   int32_t aru_requestid; /* upstream requestid */
+   int32_t aru_transactionid; /* RFC 2741 section 6.1 */
+   int16_t aru_nonrepeaters;
+   int16_t aru_maxrepetitions;
+   struct appl_varbind_internal *aru_vblist;
+   size_t aru_varbindlen;
+   enum appl_error aru_error;
+   int16_t aru_index;
+   int aru_locked; /* Prevent recursion through appl_request_send */
+
+   enum snmp_version aru_pduversion;
+   struct ber_element *aru_pdu; /* Original requested pdu */
+};
+
+struct appl_request_downstream {
+   struct appl_request_upstream *ard_request;
+   struct appl_backend *ard_backend;
+   enum snmp_pdutype ard_requesttype;
+   int16_t ard_nonrepeaters;
+   int16_t ard_maxrepetitions;
+   int32_t ard_requestid;
+   uint8_t ard_retries;
+
+   struct appl_varbind_internal *ard_vblist;
+   struct event ard_timer;
+
+   RB_ENTRY(appl_request_downstream) ard_entry;
+};
+
+enum appl_varbind_state {
+   APPL_VBSTATE_MUSTFILL,
+   APPL_VBSTATE_NEW,
+   APPL_VBSTATE_PENDING,
+   APPL_VBSTATE_DONE
+};
+
+struct appl_varbind_internal {
+   enum appl_varbind_state avi_state;
+   struct appl_varbind avi_varbind;
+   struct appl_region *avi_region;
+   int16_t avi_index;
+   struct appl_request_upstream *avi_request_upstream;
+   struct appl_request_downstream *avi_request_downstream;
+   struct appl_varbind_internal *avi_next;
+   struct appl_varbind_internal *avi_sub;
+};
+
+/* SNMP-TARGET-MIB (RFC 3413) */
+struct snmp_target_mib {
+   uint32_tsnmp_unavailablecontexts;
+   uin

Re: Fix ipsp_spd_lookup() for transport mode

2021-11-30 Thread Alexander Bluhm
On Tue, Nov 30, 2021 at 05:53:34PM +0300, Vitaliy Makkoveev wrote:
> Hi,
> 
> This question is mostly for bluhm@. Should the gettdbbyflow() grab the
> extra reference on returned `tdbp' like other other gettdb*() do? I'm
> pointing this because we are going to not rely on the netlock when doing
> `tdbp' dereference.

Yes.  Call tdb_ref(tdbp) withing the tdb_sadb_mtx mutex.

The interesting question is when to unref it.  You use the same
variable for the tdb parameter and the tdb from gettdbbyflow().
Tracking when you don't use the new TDB anymore, gets tricky.

bluhm



Re: ifconfig description for wireguard peers

2021-11-30 Thread Stefan Sperling
On Tue, Nov 30, 2021 at 02:31:20PM -0500, Noah Meier wrote:
> Hi Stefan,
> 
> Richard Procter offered some kind advice on the ordering of options in the 
> man page
> (to be done alphabetically) and commented on an unnecessary cast.
> 
> I also believe that I goofed by failing to initalize the mutex and zero the 
> description
> upon peer creation (in wg_peer_create).
> 
> I’ve attempted to address these issues and have pasted the diff below.
> 
> NM

This new patch does not apply cleanly.
Leading whitespace was stripped, and thus patch complains as follows:

Patching file if_wg.c using Plan A...
patch:  malformed patch at line 15: };

And it would help if you created a patch where all paths are relative to
the /usr/src directory. Something like this should do it:

 cd /usr/src
 cvs diff -u sbin/ifconfig sys/net  > /tmp/wgdesc.patch

If your mailer cannot preserve whitespace as-is then please try attaching
the patch file instead of inlining it.

Thanks!



ipsp_spd_lookup error

2021-11-30 Thread Alexander Bluhm
Hi,

I want to ref count the TDB that is returned by ipsp_spd_lookup().

Sometimes the TDB returned by ipsp_spd_lookup() is not used.  So
refcounting that, does not make sense.  As a first step I want to
convert ipsp_spd_lookup() to return an error.

- tdb = ipsp_spd_lookup(&error)
+ error = ipsp_spd_lookup(&tdb)

If we don't need the TDB, this call will avoid ref counting.
  error = ipsp_spd_lookup(NULL)

The following diff only changes the parameter and error checking.
No refcountig yet.

ok?

bluhm

Index: net/if_bridge.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.359
diff -u -p -r1.359 if_bridge.c
--- net/if_bridge.c 25 Nov 2021 13:46:02 -  1.359
+++ net/if_bridge.c 30 Nov 2021 21:29:39 -
@@ -1594,9 +1594,9 @@ bridge_ipsec(struct ifnet *ifp, struct e
return (0);
}
} else { /* Outgoing from the bridge. */
-   tdb = ipsp_spd_lookup(m, af, hlen, &error,
-   IPSP_DIRECTION_OUT, NULL, NULL, 0);
-   if (tdb != NULL) {
+   error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_OUT,
+   NULL, NULL, &tdb, 0);
+   if (error == 0 && tdb != NULL) {
/*
 * We don't need to do loop detection, the
 * bridge will do that for us.
Index: netinet/ip_ipsp.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.224
diff -u -p -r1.224 ip_ipsp.h
--- netinet/ip_ipsp.h   30 Nov 2021 13:17:43 -  1.224
+++ netinet/ip_ipsp.h   30 Nov 2021 15:14:46 -
@@ -632,8 +632,8 @@ int checkreplaywindow(struct tdb *, u_in
 /* Packet processing */
 intipsp_process_packet(struct mbuf *, struct tdb *, int, int);
 intipsp_process_done(struct mbuf *, struct tdb *);
-struct tdb *ipsp_spd_lookup(struct mbuf *, int, int, int *, int,
-   struct tdb *, struct inpcb *, u_int32_t);
+intipsp_spd_lookup(struct mbuf *, int, int, int, struct tdb *,
+   struct inpcb *, struct tdb **, u_int32_t);
 intipsp_is_unspecified(union sockaddr_union);
 intipsp_aux_match(struct tdb *, struct ipsec_ids *,
struct sockaddr_encap *, struct sockaddr_encap *);
Index: netinet/ip_output.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.375
diff -u -p -r1.375 ip_output.c
--- netinet/ip_output.c 24 Nov 2021 18:48:33 -  1.375
+++ netinet/ip_output.c 30 Nov 2021 21:41:01 -
@@ -86,8 +86,8 @@ static __inline u_int16_t __attribute__(
 void in_delayed_cksum(struct mbuf *);
 int in_ifcap_cksum(struct mbuf *, struct ifnet *, int);
 
-struct tdb *ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error,
-struct inpcb *inp, int ipsecflowinfo);
+int ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp,
+struct tdb **, int ipsecflowinfo);
 void ip_output_ipsec_pmtu_update(struct tdb *, struct route *, struct in_addr,
 int, int);
 int ip_output_ipsec_send(struct tdb *, struct mbuf *, struct route *, int);
@@ -244,9 +244,9 @@ reroute:
 #ifdef IPSEC
if (ipsec_in_use || inp != NULL) {
/* Do we have any pending SAs to apply ? */
-   tdb = ip_output_ipsec_lookup(m, hlen, &error, inp,
+   error = ip_output_ipsec_lookup(m, hlen, inp, &tdb,
ipsecflowinfo);
-   if (error != 0) {
+   if (error) {
/* Should silently drop packet */
if (error == -EINVAL)
error = 0;
@@ -531,19 +531,22 @@ bad:
 }
 
 #ifdef IPSEC
-struct tdb *
-ip_output_ipsec_lookup(struct mbuf *m, int hlen, int *error, struct inpcb *inp,
-int ipsecflowinfo)
+int
+ip_output_ipsec_lookup(struct mbuf *m, int hlen, struct inpcb *inp,
+struct tdb **tdbout, int ipsecflowinfo)
 {
struct m_tag *mtag;
struct tdb_ident *tdbi;
struct tdb *tdb;
+   int error;
 
/* Do we have any pending SAs to apply ? */
-   tdb = ipsp_spd_lookup(m, AF_INET, hlen, error, IPSP_DIRECTION_OUT,
-   NULL, inp, ipsecflowinfo);
-   if (tdb == NULL)
-   return NULL;
+   error = ipsp_spd_lookup(m, AF_INET, hlen, IPSP_DIRECTION_OUT,
+   NULL, inp, &tdb, ipsecflowinfo);
+   if (error || tdb == NULL) {
+   *tdbout = NULL;
+   return error;
+   }
/* Loop detection */
for (mtag = m_tag_first(m); mtag != NULL; mtag = m_tag_next(m, mtag)) {
if (mtag->m_tag_id != PACKET_TAG_IPSEC_OUT_DONE)
@@ -555,10 +558,12 @@ ip_output_ipsec_lookup(struct mbuf *m, i
!memcmp(&tdbi->dst, &tdb->tdb_dst,
sizeof(union sockaddr_union

Re: lib/Makefile: libfido2 requires libz

2021-11-30 Thread Patrick Wildt
On Tue, Nov 30, 2021 at 03:24:17PM -0700, Theo de Raadt wrote:
> Todd C. Miller  wrote:
> 
> > On Tue, 30 Nov 2021 23:18:23 +0100, Mark Kettenis wrote:
> > 
> > > Maybe add a comment here that libfido2 depends on libz?
> > >
> > > > +SUBDIR+=libfido2
> > 
> > Yes, please.  With a comment this is OK millert@
> 
> Or just mark these as "phase 2", without saying why.
> 

So, this better?

diff --git a/lib/Makefile b/lib/Makefile
index 7f95cb1e3e7..f96aae01984 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -3,9 +3,12 @@
 
 SUBDIR=csu libagentx libarch libc libcbor libcrypto libcurses \
libedit libelf libevent libexpat \
-   libfido2 libform libfuse libkeynote libkvm libl libm libmenu \
+   libform libfuse libkeynote libkvm libl libm libmenu \
libossaudio libpanel libpcap libradius librthread \
librpcsvc libskey libsndio libssl libtls libusbhid \
libutil liby libz
 
+# Phase 2
+SUBDIR+=libfido2
+
 .include 



Re: lib/Makefile: libfido2 requires libz

2021-11-30 Thread Theo de Raadt
Todd C. Miller  wrote:

> On Tue, 30 Nov 2021 23:18:23 +0100, Mark Kettenis wrote:
> 
> > Maybe add a comment here that libfido2 depends on libz?
> >
> > > +SUBDIR+=libfido2
> 
> Yes, please.  With a comment this is OK millert@

Or just mark these as "phase 2", without saying why.



Re: lib/Makefile: libfido2 requires libz

2021-11-30 Thread Todd C . Miller
On Tue, 30 Nov 2021 23:18:23 +0100, Mark Kettenis wrote:

> Maybe add a comment here that libfido2 depends on libz?
>
> > +SUBDIR+=libfido2

Yes, please.  With a comment this is OK millert@

 - todd



Re: lib/Makefile: libfido2 requires libz

2021-11-30 Thread Mark Kettenis
> Date: Tue, 30 Nov 2021 23:04:49 +0100
> From: Patrick Wildt 
> 
> On Thu, Nov 11, 2021 at 08:27:21PM +0900, SASANO Takayoshi wrote:
> > Hi,
> > 
> > Recently I tried to crossbuild for arm64 on amd64.
> > I found "TARGET=arm64 make cross-tools" stops when building libfido2.
> > 
> > libfido2 requires libz but this is not built yet at that time.
> > lib/Makefile needs to tweak like this.
> > 
> > openbsd-current-vm# diff -uNpr Makefile~ Makefile
> > --- Makefile~   Sun Jan  3 05:04:36 2021
> > +++ MakefileThu Nov 11 19:47:42 2021
> > @@ -2,10 +2,10 @@
> >  #  $NetBSD: Makefile,v 1.20.4.1 1996/06/14 17:22:38 cgd Exp $
> > 
> >  SUBDIR=csu libagentx libarch libc libcbor libcrypto libcurses \
> > -   libedit libelf libevent libexpat \
> > +   libedit libelf libevent libexpat libz \
> > libfido2 libform libfuse libkeynote libkvm libl libm libmenu \
> > libossaudio libpanel libpcap libradius librthread \
> > librpcsvc libskey libsndio libssl libtls libusbhid \
> > -   libutil liby libz
> > +   libutil liby
> > 
> >  .include 
> > openbsd-current-vm#
> > 
> > It works good, but I think there is better build order.
> > 
> > -- 
> > SASANO Takayoshi (JG1UAA) 
> > 
> 
> Hi,
> 
> I just had the same issue and after a short discussion it seems like
> it's a better idea to separate libfido2 into a separate line to show
> that it should be built at the end due to having libz as dependency.
> 
> Comments? ok?
> 
> Patrick
> 
> diff --git a/lib/Makefile b/lib/Makefile
> index 7f95cb1e3e7..1f1586b0518 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -3,9 +3,10 @@
>  
>  SUBDIR=  csu libagentx libarch libc libcbor libcrypto libcurses \
>   libedit libelf libevent libexpat \
> - libfido2 libform libfuse libkeynote libkvm libl libm libmenu \
> + libform libfuse libkeynote libkvm libl libm libmenu \
>   libossaudio libpanel libpcap libradius librthread \
>   librpcsvc libskey libsndio libssl libtls libusbhid \
>   libutil liby libz

Maybe add a comment here that libfido2 depends on libz?

> +SUBDIR+=libfido2
>  
>  .include 
> 
> 



Re: lib/Makefile: libfido2 requires libz

2021-11-30 Thread Patrick Wildt
On Thu, Nov 11, 2021 at 08:27:21PM +0900, SASANO Takayoshi wrote:
> Hi,
> 
> Recently I tried to crossbuild for arm64 on amd64.
> I found "TARGET=arm64 make cross-tools" stops when building libfido2.
> 
> libfido2 requires libz but this is not built yet at that time.
> lib/Makefile needs to tweak like this.
> 
> openbsd-current-vm# diff -uNpr Makefile~ Makefile
> --- Makefile~   Sun Jan  3 05:04:36 2021
> +++ MakefileThu Nov 11 19:47:42 2021
> @@ -2,10 +2,10 @@
>  #  $NetBSD: Makefile,v 1.20.4.1 1996/06/14 17:22:38 cgd Exp $
> 
>  SUBDIR=csu libagentx libarch libc libcbor libcrypto libcurses \
> -   libedit libelf libevent libexpat \
> +   libedit libelf libevent libexpat libz \
> libfido2 libform libfuse libkeynote libkvm libl libm libmenu \
> libossaudio libpanel libpcap libradius librthread \
> librpcsvc libskey libsndio libssl libtls libusbhid \
> -   libutil liby libz
> +   libutil liby
> 
>  .include 
> openbsd-current-vm#
> 
> It works good, but I think there is better build order.
> 
> -- 
> SASANO Takayoshi (JG1UAA) 
> 

Hi,

I just had the same issue and after a short discussion it seems like
it's a better idea to separate libfido2 into a separate line to show
that it should be built at the end due to having libz as dependency.

Comments? ok?

Patrick

diff --git a/lib/Makefile b/lib/Makefile
index 7f95cb1e3e7..1f1586b0518 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -3,9 +3,10 @@
 
 SUBDIR=csu libagentx libarch libc libcbor libcrypto libcurses \
libedit libelf libevent libexpat \
-   libfido2 libform libfuse libkeynote libkvm libl libm libmenu \
+   libform libfuse libkeynote libkvm libl libm libmenu \
libossaudio libpanel libpcap libradius librthread \
librpcsvc libskey libsndio libssl libtls libusbhid \
libutil liby libz
+SUBDIR+=libfido2
 
 .include 



Re: ifconfig description for wireguard peers

2021-11-30 Thread Noah Meier
Hi Stefan,

Richard Procter offered some kind advice on the ordering of options in the man 
page
(to be done alphabetically) and commented on an unnecessary cast.

I also believe that I goofed by failing to initalize the mutex and zero the 
description
upon peer creation (in wg_peer_create).

I’ve attempted to address these issues and have pasted the diff below.

NM


Index: if_wg.c
===
RCS file: /cvs/src/sys/net/if_wg.c,v
retrieving revision 1.18
diff -u -p -u -p -r1.18 if_wg.c
--- if_wg.c 5 Aug 2021 13:37:04 -   1.18
+++ if_wg.c 26 Oct 2021 23:01:11 -
@@ -222,6 +222,9 @@ struct wg_peer {

SLIST_ENTRY(wg_peer) p_start_list;
int  p_start_onlist;
+
+   struct mutex p_description_mtx;
+   char p_description[IFDESCRSIZE];
};

struct wg_softc {
@@ -276,6 +279,7 @@ int wg_peer_get_sockaddr(struct wg_peer 
voidwg_peer_clear_src(struct wg_peer *);
voidwg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
voidwg_peer_counters_add(struct wg_peer *, uint64_t, uint64_t);
+void   wg_peer_set_description(struct wg_peer *, char *);

int wg_aip_add(struct wg_softc *, struct wg_peer *, struct wg_aip_io *);
struct wg_peer *
@@ -409,6 +413,9 @@ wg_peer_create(struct wg_softc *sc, uint
peer->p_counters_tx = 0;
peer->p_counters_rx = 0;

+   mtx_init(&peer->p_description_mtx, IPL_NET);
+   memset(peer->p_description, 0, IFDESCRSIZE);
+
mtx_init(&peer->p_endpoint_mtx, IPL_NET);
bzero(&peer->p_endpoint, sizeof(peer->p_endpoint));

@@ -583,6 +590,15 @@ wg_peer_counters_add(struct wg_peer *pee
mtx_leave(&peer->p_counters_mtx);
}

+void
+wg_peer_set_description(struct wg_peer *peer, char *description)
+{
+   mtx_enter(&peer->p_description_mtx);
+   memset(peer->p_description, 0, IFDESCRSIZE);
+   strlcpy(peer->p_description, description, IFDESCRSIZE);
+   mtx_leave(&peer->p_description_mtx);
+}
+
int
wg_aip_add(struct wg_softc *sc, struct wg_peer *peer, struct wg_aip_io *d)
{
@@ -2323,6 +2339,10 @@ wg_ioctl_set(struct wg_softc *sc, struct
}
}

+   if (peer_o.p_flags & WG_PEER_SET_DESCRIPTION) {
+   wg_peer_set_description(peer,  peer_o.p_description);
+   }
+
aip_p = &peer_p->p_aips[0];
for (j = 0; j < peer_o.p_aips_count; j++) {
if ((ret = copyin(aip_p, &aip_o, sizeof(aip_o))) != 0)
@@ -2432,6 +2452,8 @@ wg_ioctl_get(struct wg_softc *sc, struct
aip_count++;
}
peer_o.p_aips_count = aip_count;
+
+   strlcpy(peer_o.p_description, peer->p_description, IFDESCRSIZE);

if ((ret = copyout(&peer_o, peer_p, sizeof(peer_o))) != 0)
goto unlock_and_ret_size;
Index: if_wg.h
===
RCS file: /cvs/src/sys/net/if_wg.h,v
retrieving revision 1.4
diff -u -p -u -p -r1.4 if_wg.h
--- if_wg.h 22 Jun 2020 12:20:44 -  1.4
+++ if_wg.h 26 Oct 2021 23:01:11 -
@@ -61,6 +61,7 @@ struct wg_aip_io {
#define WG_PEER_REPLACE_AIPS(1 << 4)
#define WG_PEER_REMOVE  (1 << 5)
#define WG_PEER_UPDATE  (1 << 6)
+#define WG_PEER_SET_DESCRIPTION(1 << 7)

#define p_sap_endpoint.sa_sa
#define p_sin   p_endpoint.sa_sin
@@ -80,6 +81,7 @@ struct wg_peer_io {
uint64_tp_txbytes;
uint64_tp_rxbytes;
struct timespec p_last_handshake; /* nanotime */
+   charp_description[IFDESCRSIZE];
size_t  p_aips_count;
struct wg_aip_iop_aips[];
};
Index: ifconfig.8
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.8,v
retrieving revision 1.375
diff -u -p -u -p -r1.375 ifconfig.8
--- ifconfig.8  18 Aug 2021 18:10:33 -  1.375
+++ ifconfig.8  26 Oct 2021 23:01:34 -
@@ -2309,6 +2309,10 @@ Set the peer's IPv4 or IPv6
range for tunneled traffic.
Repeat the option to set multiple ranges.
By default, no addresses are allowed.
+.It Cm wgdesc Ar value
+Specify a description of the peer.
+.It Cm -wgdesc
+Clear the peer description.
.It Cm wgendpoint Ar peer_address port
Address traffic to the peer's IPv4 or IPv6
.Ar peer_address
Index: ifconfig.c
===
RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v
retrieving revision 1.445
diff -u -p -u -p -r1.445 ifconfig.c
--- ifconfig.c  6 Oct 2021 06:14:08 -   1.445
+++ ifconfig.c  26 Oct 2021 23:01:34 -
@@ -355,12 +355,14 @@ void  setwgpeerep(const char *, const cha
voidsetwgpeeraip(const char *, int);
voidsetwgpeerpsk(const char *, int);
void

Re: Please test: UVM fault unlocking (aka vmobjlock)

2021-11-30 Thread Sebastien Marie
On Tue, Nov 30, 2021 at 07:17:14PM +0100, Jan Stary wrote:
> 
> While here, I notice that sysctl.witness.watch is documented
> in witness(4), but kern.witness.locktrace is not - is that intended?
> 

It is documented on options(4)

   option WITNESS_LOCKTRACE
Enable witness(4) lock stack trace saving at boot.  The feature is
disabled by default and has to be enabled by setting the
kern.witness.locktrace sysctl(8) variable.

Thanks.
-- 
Sebastien Marie



Re: dhcpleased - set ciaddr per RFC

2021-11-30 Thread Florian Obser
I'm making a fine mess off this :(

On 2021-11-25 10:23 -07, Joel Knight  wrote:
> On Wed, Nov 24, 2021 at 4:46 AM Florian Obser  wrote:
>
>> Thanks, I had indeed missed this. I went through the RFC and found that
>> we MUST NOT send the server identifier in rebooting state. While here I
>> also made it explicit that we are not sending server identifier in
>> rebinding state. This was already the case since we only transition from
>> renewing to rebinding, but this is clearer.
>
> The behavior for this option looks consistent with prior behavior.
> However, the RFC confuses me wrt this option and when it should/should
> not be set so I'm only relying on the fact that broadcast and unicast
> requests are working fine in testing.

yeah, I don't get it either

>
>> @@ -1486,9 +1488,17 @@ request_dhcp_request(struct dhcpleased_iface *iface)
>> iface->xid = arc4random();
>> imsg_req_request.if_index = iface->if_index;
>> imsg_req_request.xid = iface->xid;
>> +   if (iface->state == IF_RENEWING || iface->state == IF_REBINDING)
>> +   imsg_req_request.ciaddr.s_addr = iface->requested_ip.s_addr;
>> +   else
>> +   imsg_req_request.ciaddr.s_addr = 0;
>> imsg_req_request.server_identifier.s_addr =
>> iface->server_identifier.s_addr;
>> -   imsg_req_request.requested_ip.s_addr = iface->requested_ip.s_addr;
>> +   if (iface->state == IF_RENEWING || iface->state == IF_REBINDING)
>> +   imsg_req_request.requested_ip.s_addr = 0;
>> +   else
>> +   imsg_req_request.requested_ip.s_addr =
>> +   iface->requested_ip.s_addr;
>
> These two if/else statements could be merged.
>
>>  ssize_t
>>  build_packet(uint8_t message_type, char *if_name, uint32_t xid,
>> -struct ether_addr *hw_address, struct in_addr *requested_ip,
>> -struct in_addr *server_identifier)
>> +struct ether_addr *hw_address, struct in_addr *ciaddr, struct in_addr
>> +*requested_ip, struct in_addr *server_identifier)
>>  {
>> static uint8_t   dhcp_cookie[] = DHCP_COOKIE;
>> static uint8_t   dhcp_message_type[] = {DHO_DHCP_MESSAGE_TYPE, 1,
>> @@ -926,6 +929,7 @@ build_packet(uint8_t message_type, char *if_name, 
>> uint32_t xid,
>> hdr->hops = 0;
>> hdr->xid = xid;
>> hdr->secs = 0;
>> +   hdr->ciaddr.s_addr = ciaddr->s_addr;
>> memcpy(hdr->chaddr, hw_address, sizeof(*hw_address));
>> p += sizeof(struct dhcp_hdr);
>> memcpy(p, dhcp_cookie, sizeof(dhcp_cookie));
>
> The problem with build_packet() now is that it will unconditionally
> insert opt 50 (requested IP address) even if ciaddr is being set. The
> request packet ends up with ciaddr as the correct client IP and opt 50
> as "0.0.0.0".
>

Thanks for keeping me on my toes.

I have decided to refactor the whole thing. I think this makes it easier
to understand and the logic is contained in request_dhcp_discover() and
request_dhcp_request() in engine.c and not all over the place.

I have now also tested all the cases and carefuly compared them with RFC
2131 4.3.6, Table 4. This might now be correct.

diff --git dhcpleased.h dhcpleased.h
index b3b4938ad3c..6df38be6f5b 100644
--- dhcpleased.h
+++ dhcpleased.h
@@ -287,15 +287,10 @@ struct imsg_dhcp {
uint8_t packet[1500];
 };
 
-
-struct imsg_req_discover {
-   uint32_tif_index;
-   uint32_txid;
-};
-
-struct imsg_req_request {
+struct imsg_req_dhcp {
uint32_tif_index;
uint32_txid;
+   struct in_addr  ciaddr;
struct in_addr  requested_ip;
struct in_addr  server_identifier;
struct in_addr  dhcp_server;
diff --git engine.c engine.c
index 17e65fbe789..87cee256600 100644
--- engine.c
+++ engine.c
@@ -1170,6 +1170,7 @@ parse_dhcp(struct dhcpleased_iface *iface, struct 
imsg_dhcp *dhcp)
return;
}
iface->server_identifier.s_addr = server_identifier.s_addr;
+   iface->dhcp_server.s_addr = server_identifier.s_addr;
iface->requested_ip.s_addr = dhcp_hdr->yiaddr.s_addr;
state_transition(iface, IF_REQUESTING);
break;
@@ -1354,11 +1355,8 @@ state_transition(struct dhcpleased_iface *iface, enum 
if_state new_state)
case IF_REBOOTING:
if (old_state == IF_REBOOTING)
iface->timo.tv_sec *= 2;
-   else {
-   /* make sure we send broadcast */
-   iface->dhcp_server.s_addr = INADDR_ANY;
+   else
iface->timo.tv_sec = START_EXP_BACKOFF;
-   }
request_dhcp_request(iface);
break;
case IF_REQUESTING:
@@ -1377,10 +1375,6 @@ state_transition(struct dhcpleased_iface *iface, enum 
if_state new_state)
  

[patch] urndis: uncomment watchdog

2021-11-30 Thread Mikhail
Currently watchdog functionality for urndis driver is disabled
(commented), I've tested it and it works properly - reset messages are
correctly sent and cmplt packets are received according to RNDIS spec (I
patched the driver to forcedly send reset message and act on it with
device reset every 5 seconds). I suggest to uncomment it.

The hardware is Megafon 4G МM200-1:

urndis0 at uhub3 port 2 configuration 1 interface 0 "Qualcomm MM200-1" rev 
2.00/3.18 addr 5

Tests, comments or objections?

diff --git sys/dev/usb/if_urndis.c sys/dev/usb/if_urndis.c
index 551197fbfc3..7347712f282 100644
--- sys/dev/usb/if_urndis.c
+++ sys/dev/usb/if_urndis.c
@@ -64,9 +64,7 @@
 int urndis_newbuf(struct urndis_softc *, struct urndis_chain *);
 
 int urndis_ioctl(struct ifnet *, u_long, caddr_t);
-#if 0
 void urndis_watchdog(struct ifnet *);
-#endif
 
 void urndis_start(struct ifnet *);
 void urndis_rxeof(struct usbd_xfer *, void *, usbd_status);
@@ -100,10 +98,8 @@ u_int32_t urndis_ctrl_query(struct urndis_softc *, 
u_int32_t, void *, size_t,
 u_int32_t urndis_ctrl_set(struct urndis_softc *, u_int32_t, void *, size_t);
 u_int32_t urndis_ctrl_set_param(struct urndis_softc *, const char *, u_int32_t,
 void *, size_t);
-#if 0
 u_int32_t urndis_ctrl_reset(struct urndis_softc *);
 u_int32_t urndis_ctrl_keepalive(struct urndis_softc *);
-#endif
 
 int urndis_encap(struct urndis_softc *, struct mbuf *, int);
 void urndis_decap(struct urndis_softc *, struct urndis_chain *, u_int32_t);
@@ -680,7 +676,6 @@ urndis_ctrl_set_param(struct urndis_softc *sc,
return rval;
 }
 
-#if 0
 /* XXX : adrreset, get it from response */
 u_int32_t
 urndis_ctrl_reset(struct urndis_softc *sc)
@@ -765,7 +760,6 @@ urndis_ctrl_keepalive(struct urndis_softc *sc)
 
return rval;
 }
-#endif
 
 int
 urndis_encap(struct urndis_softc *sc, struct mbuf *m, int idx)
@@ -1048,7 +1042,6 @@ urndis_ioctl(struct ifnet *ifp, u_long command, caddr_t 
data)
return (error);
 }
 
-#if 0
 void
 urndis_watchdog(struct ifnet *ifp)
 {
@@ -1064,7 +1057,6 @@ urndis_watchdog(struct ifnet *ifp)
 
urndis_ctrl_keepalive(sc);
 }
-#endif
 
 void
 urndis_init(struct urndis_softc *sc)
@@ -1451,9 +1443,7 @@ urndis_attach(struct device *parent, struct device *self, 
void *aux)
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_start = urndis_start;
ifp->if_ioctl = urndis_ioctl;
-#if 0
ifp->if_watchdog = urndis_watchdog;
-#endif
 
strlcpy(ifp->if_xname, DEVNAME(sc), IFNAMSIZ);
 



Re: Please test: UVM fault unlocking (aka vmobjlock)

2021-11-30 Thread Jan Stary
On Nov 30 10:40:25, s...@spacehopper.org wrote:
> On 2021/11/29 22:50, Martin Pieuchot wrote:
> > On 24/11/21(Wed) 11:16, Martin Pieuchot wrote:
> > > Diff below unlock the bottom part of the UVM fault handler.  I'm
> > > interested in squashing the remaining bugs.  Please test with your usual
> > > setup & report back.
> > 
> > Thanks to all the testers, here's a new version that includes a bug fix.
> > Tests on !x86 architectures are much appreciated!
> 

This is current/amd64 with the patch on a PC (dmesg below);
this is a work machine, running chrome and all, with

kern.witness.watch=2
kern.witness.locktrace=0

Shortly after boot, dmesg says

witness: lock order reversal:
 1st 0xfd83bbfd8788 uobjlk (&uobj->vmobjlock)
 2nd 0x80b74f00 objmm (&obj->mm.lock)
lock order "&obj->mm.lock"(rwlock) -> "&uobj->vmobjlock"(rwlock) first seen at:
#0  rw_enter+0x65
#1  uvm_obj_wire+0x46
#2  shmem_get_pages+0xaf
#3  __i915_gem_object_get_pages+0x9d
#4  i915_vma_pin_ww+0x49b
#5  i915_ggtt_pin+0x61
#6  __intel_context_do_pin_ww+0x2d1
#7  __intel_context_do_pin+0x4b
#8  intel_engines_init+0x4a1
#9  intel_gt_init+0x133
#10 i915_gem_init+0xa3
#11 i915_driver_probe+0x75a
#12 inteldrm_attachhook+0x45
#13 config_process_deferred_mountroot+0x6b
#14 main+0x743
lock order "&uobj->vmobjlock"(rwlock) -> "&obj->mm.lock"(rwlock) first seen at:
#0  rw_enter+0x65
#1  __i915_gem_object_get_pages+0x30
#2  i915_gem_fault+0x1aa
#3  drm_fault+0x156
#4  uvm_fault+0x179
#5  upageflttrap+0x62
#6  usertrap+0x18b
#7  recall_trap+0x8

While here, I notice that sysctl.witness.watch is documented
in witness(4), but kern.witness.locktrace is not - is that intended?

Jan


OpenBSD 7.0-current (GENERIC.MP) #0: Tue Nov 30 15:37:35 CET 2021
h...@box.stare.cz:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 17009606656 (16221MB)
avail mem = 16347242496 (15589MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.4 @ 0xf0100 (36 entries)
bios0: vendor Award Software International, Inc. version "F3" date 03/31/2011
bios0: Gigabyte Technology Co., Ltd. H67MA-USB3-B3
acpi0 at bios0: ACPI 1.0
acpi0: sleep states S0 S3 S4 S5
acpi0: tables DSDT FACP HPET MCFG ASPT SSPT EUDS MATS TAMG APIC SSDT
acpi0: wakeup devices PCI0(S5) PEX0(S5) PEX1(S5) PEX2(S5) PEX3(S5) PEX4(S5) 
PEX5(S5) PEX6(S5) PEX7(S5) HUB0(S5) UAR1(S3) USBE(S3) USE2(S3) AZAL(S5)
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpihpet0 at acpi0: 14318179 Hz
acpimcfg0 at acpi0
acpimcfg0: addr 0xf400, bus 0-63
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz, 3392.72 MHz, 06-2a-07
cpu0: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
cpu0: 256KB 64b/line 8-way L2 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
cpu0: apic clock running at 99MHz
cpu0: mwait min=64, max=64, C-substates=0.2.1.1, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz, 3392.31 MHz, 06-2a-07
cpu1: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
cpu1: 256KB 64b/line 8-way L2 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 4 (application processor)
cpu2: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz, 3392.31 MHz, 06-2a-07
cpu2: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
cpu2: 256KB 64b/line 8-way L2 cache
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 6 (application processor)
cpu3: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz, 3392.31 MHz, 06-2a-07
cpu3: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,AES,XSAVE,AVX,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
cpu3: 256KB 64b/line 8-way L2 cache
cpu3: smt 0, core 3, package 0
cpu4 at mainbus0: apid 1 (application processor)
cpu4: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz, 33

Re: Please test: UVM fault unlocking (aka vmobjlock)

2021-11-30 Thread Alexander Bluhm
On Mon, Nov 29, 2021 at 10:50:32PM +0100, Martin Pieuchot wrote:
> On 24/11/21(Wed) 11:16, Martin Pieuchot wrote:
> > Diff below unlock the bottom part of the UVM fault handler.  I'm
> > interested in squashing the remaining bugs.  Please test with your usual
> > setup & report back.
> 
> Thanks to all the testers, here's a new version that includes a bug fix.

Passed amd64 regress with two witness reports.  I don't know whether
they were there without the diff.

witness: lock order reversal:
 1st 0xfd87764d68c8 vmmaplk (&map->lock)
 2nd 0xfd873a6516f8 inode (&ip->i_lock)
lock order "&ip->i_lock"(rrwlock) -> "&map->lock"(rwlock) first seen at:
#0  rw_enter_read+0x38
#1  uvmfault_lookup+0x92
#2  uvm_fault_check+0x32
#3  uvm_fault+0xfb
#4  kpageflttrap+0x12b
#5  kerntrap+0x91
#6  alltraps_kern_meltdown+0x7b
#7  copyout+0x53
#8  ffs_read+0x1f6
#9  VOP_READ+0x41
#10 vn_rdwr+0xa1
#11 vmcmd_map_readvn+0xa6
#12 exec_process_vmcmds+0x84
#13 sys_execve+0x79f
#14 start_init+0x29f
#15 proc_trampoline+0x1c
lock order "&map->lock"(rwlock) -> "&ip->i_lock"(rrwlock) first seen at:
#0  rw_enter+0x65
#1  rrw_enter+0x56
#2  VOP_LOCK+0x5b
#3  vn_lock+0xad
#4  uvn_io+0x1e7
#5  uvm_pager_put+0xf7
#6  uvn_flush+0x260
#7  uvm_map_clean+0x21d
#8  syscall+0x3a9
#9  Xsyscall+0x128

witness: lock order reversal:
 1st 0xfd886a277768 vmmaplk (&map->lock)
 2nd 0x800022c46360 nfsnode (&np->n_lock)
lock order data w2 -> w1 missing
lock order "&map->lock"(rwlock) -> "&np->n_lock"(rrwlock) first seen at:
#0  rw_enter+0x65
#1  rrw_enter+0x56
#2  VOP_LOCK+0x5b
#3  vn_lock+0xad
#4  vn_rdwr+0x7f
#5  vndstrategy+0x2e6
#6  physio+0x227
#7  spec_write+0x95
#8  VOP_WRITE+0x41
#9  vn_write+0xfc
#10 dofilewritev+0x14d
#11 sys_pwrite+0x5c
#12 syscall+0x3a9
#13 Xsyscall+0x128

bluhm



Re: Rework UNIX sockets locking to be fine grained

2021-11-30 Thread Vitaliy Makkoveev
Include missing "sys/refcnt.h" header to unpcb.h to fix libkvm and
netstat(1) build. No functional changes.

Index: sys/kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.269
diff -u -p -r1.269 uipc_socket.c
--- sys/kern/uipc_socket.c  11 Nov 2021 16:35:09 -  1.269
+++ sys/kern/uipc_socket.c  30 Nov 2021 16:44:39 -
@@ -52,6 +52,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef DDB
 #include 
@@ -156,7 +157,9 @@ soalloc(int prflags)
so = pool_get(&socket_pool, prflags);
if (so == NULL)
return (NULL);
-   rw_init(&so->so_lock, "solock");
+   rw_init_flags(&so->so_lock, "solock", RWL_DUPOK);
+   refcnt_init(&so->so_refcnt);
+
return (so);
 }
 
@@ -257,6 +260,8 @@ solisten(struct socket *so, int backlog)
 void
 sofree(struct socket *so, int s)
 {
+   int persocket = solock_persocket(so);
+
soassertlocked(so);
 
if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) {
@@ -264,16 +269,53 @@ sofree(struct socket *so, int s)
return;
}
if (so->so_head) {
+   struct socket *head = so->so_head;
+
/*
 * We must not decommission a socket that's on the accept(2)
 * queue.  If we do, then accept(2) may hang after select(2)
 * indicated that the listening socket was ready.
 */
-   if (!soqremque(so, 0)) {
+   if (so->so_onq == &head->so_q) {
sounlock(so, s);
return;
}
+
+   if (persocket) {
+   /*
+* Concurrent close of `head' could
+* abort `so' due to re-lock.
+*/
+   soref(so);
+   soref(head);
+   sounlock(so, SL_LOCKED);
+   solock(head);
+   solock(so);
+
+   if (so->so_onq != &head->so_q0) {
+   sounlock(head, SL_LOCKED);
+   sounlock(so, SL_LOCKED);
+   sorele(head);
+   sorele(so);
+   return;
+   }
+
+   sorele(head);
+   sorele(so);
+   }
+
+   soqremque(so, 0);
+
+   if (persocket)
+   sounlock(head, SL_LOCKED);
}
+
+   if (persocket) {
+   sounlock(so, SL_LOCKED);
+   refcnt_finalize(&so->so_refcnt, "sofinal");
+   solock(so);
+   }
+
sigio_free(&so->so_sigio);
klist_free(&so->so_rcv.sb_sel.si_note);
klist_free(&so->so_snd.sb_sel.si_note);
@@ -363,13 +405,36 @@ drop:
error = error2;
}
if (so->so_options & SO_ACCEPTCONN) {
+   int persocket = solock_persocket(so);
+
+   if (persocket) {
+   /* Wait concurrent sonewconn() threads. */
+   while (so->so_newconn > 0) {
+   so->so_state |= SS_NEWCONN_WAIT;
+   sosleep_nsec(so, &so->so_newconn, PSOCK,
+   "netlck", INFSLP);
+   }
+   }
+
while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) {
+   if (persocket)
+   solock(so2);
(void) soqremque(so2, 0);
+   if (persocket)
+   sounlock(so, SL_LOCKED);
(void) soabort(so2);
+   if (persocket)
+   solock(so);
}
while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) {
+   if (persocket)
+   solock(so2);
(void) soqremque(so2, 1);
+   if (persocket)
+   sounlock(so, SL_LOCKED);
(void) soabort(so2);
+   if (persocket)
+   solock(so);
}
}
 discard:
@@ -437,11 +502,19 @@ soconnect(struct socket *so, struct mbuf
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
-   int s, error;
+   int persocket, s, error;
+
+   if ((persocket = solock_persocket(so1))) {
+   solock_pair(so1, so2);
+   s = SL_LOCKED;
+   } else
+   s = solock(so1);
 
-   s = solock(so1);
error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL,
(struct mbuf *)so2, NULL, curproc);
+
+   if (persocket)
+   sounlock(so2, s);
sounlock(so1, s);
   

Re: vmm(4): restore vmcs after sleep points [vmx 2/3]

2021-11-30 Thread Dave Voutila


Dave Voutila  writes:

> This diff removes instability from VMX-based hosts by either removing
> the possibility of the process sleeping while the VMCS is active or
> reloading it if we had no choice.
>
> A mutex is added to help guard the VMCS state so testing with witness
> has helped verify the diff.
>

Removed the mutex as it has served its purpose in ferreting out some
sleep points.

> The rwlock on the cpu originally used in the remote vmclear routine is
> changed to a mutex accordingly.
>

Reverted this. This update doesn't change the rwlock to a mutex...it's
fine if we sleep while we wait for a remote clear as it doesn't matter
which CPU we wake up on as we're about to reload the VMCS anyways.

> This diff does not remote possible calls to printf(9) via the DPRINTF
> macro as that's part of the next diff.
>

Moot at this point.

> One area of note: in vmx_load_pdptes() there's a XXX to call out that
> because of the printf(9) call on failure to km_alloc that the VMCS is
> potentially no longer valid. The upcoming diff to swap out printf(9) for
> log(9) will remove that.
>

Revisited the above now that we're holding off on this printf -> log
changeover.

It was in the previous diff as well, but just to point out this removes
the KERNEL_LOCK dance around uvm_fault. We were only doing this on Intel
hosts as it wasn't understood (at that time) what was causing the VMCS
corruption. AMD hosts haven't done this during nested page fault exit
handling since my work to unlock vmm(4) at k2k21.

ok?


blob - 8e588f7dcbd1cec2e61e7b7292ee32ff4eb9a2e1
blob + ac91b74fd4d5da774808ad1c78d75469ff89b458
--- sys/arch/amd64/amd64/vmm.c
+++ sys/arch/amd64/amd64/vmm.c
@@ -3028,12 +3028,22 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg
IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
IA32_VMX_ENABLE_VPID, 1)) {
-   if (vmm_alloc_vpid(&vpid)) {
+
+   /* We may sleep during allocation, so reload VMCS. */
+   vcpu->vc_last_pcpu = curcpu();
+   ret = vmm_alloc_vpid(&vpid);
+   if (vcpu_reload_vmcs_vmx(vcpu)) {
+   printf("%s: failed to reload vmcs\n", __func__);
+   ret = EINVAL;
+   goto exit;
+   }
+   if (ret) {
DPRINTF("%s: could not allocate VPID\n",
__func__);
ret = EINVAL;
goto exit;
}
+
if (vmwrite(VMCS_GUEST_VPID, vpid)) {
DPRINTF("%s: error setting guest VPID\n",
__func__);
@@ -5549,7 +5559,7 @@ svm_handle_np_fault(struct vcpu *vcpu)
  *
  * Return Values:
  *  0: if successful
- *  EINVAL: if fault type could not be determined
+ *  EINVAL: if fault type could not be determined or VMCS reload fails
  *  EAGAIN: if a protection fault occurred, ie writing to a read-only page
  *  errno: if uvm_fault(9) fails to wire in the page
  */
@@ -5569,10 +5579,14 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
return (EAGAIN);
}

-   KERNEL_LOCK();
+   /* We may sleep during uvm_fault(9), so reload VMCS. */
+   vcpu->vc_last_pcpu = curcpu();
ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE,
PROT_READ | PROT_WRITE | PROT_EXEC);
-   KERNEL_UNLOCK();
+   if (vcpu_reload_vmcs_vmx(vcpu)) {
+   printf("%s: failed to reload vmcs\n", __func__);
+   return (EINVAL);
+   }

if (ret)
printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
@@ -5962,7 +5976,16 @@ vmx_load_pdptes(struct vcpu *vcpu)

ret = 0;

-   cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none, 
&kd_waitok);
+   /* We may sleep during km_alloc(9), so reload VMCS. */
+   vcpu->vc_last_pcpu = curcpu();
+   cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE, &kv_any, &kp_none,
+   &kd_waitok);
+   if (vcpu_reload_vmcs_vmx(vcpu)) {
+   printf("%s: failed to reload vmcs\n", __func__);
+   ret = EINVAL;
+   goto exit;
+   }
+
if (!cr3_host_virt) {
printf("%s: can't allocate address for guest CR3 mapping\n",
__func__);
@@ -5998,7 +6021,15 @@ vmx_load_pdptes(struct vcpu *vcpu)

 exit:
pmap_kremove(cr3_host_virt, PAGE_SIZE);
+
+   /* km_free(9) might sleep, so we need to reload VMCS. */
+   vcpu->vc_last_pcpu = curcpu();
km_free((void *)cr3_host_virt, PAGE_SIZE, &kv_any, &kp_none);
+   if (vcpu_reload_vmcs_vmx(vcpu)) {
+   printf("%s: failed to reload vmcs after km_free\n", __func__);
+   ret = EINVAL;
+ 

Revised version of kqueue-based poll(2)

2021-11-30 Thread Visa Hankala
Here is a revised version of kqueue-based poll(2).

The two most important changes to the previous version are:

* Properly handle pollfd arrays where more than one entries refer
  to the same file descriptor.

* Fix poll(2)'s return value.

The first change would be somewhat tricky with plain kqueue since
an fd-based knote is identified by a (filter, fd) pair. The poll(2)
translation layer could manage with this by using an auxiliary array
to map overlapping pollfd entries into a single knote. However, most
users of poll(2) appear to avoid fd overlaps. So the mapping would be
a hindrance in the typical case. Also, overlaps might suggest
suboptimal I/O patterns in the userspace software.

The patch handles fd overlaps by adding a pollid field to the knote
identifier. This allows the co-existence of multiple knotes with the
same filter and fd. Of course, the extra identifier is not totally free
either. Nevertheless, it looks to solve the problem relatively nicely.

I have shuffled the code a bit so that the register and collect
routines are next to each other.

In addition, the collect routine now warns if kqueue_scan() returns
an event that does not activate a pollfd entry. Such a situation is
likely the result of a bug somewhere.

Please test. Once this way of implementing poll(2) has proven reliable
enough, it should be possible to replace selwakeup() with kernel lock
free KNOTE() in subsystems that are sufficiently MP-safe.

Index: sys/kern/kern_event.c
===
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.174
diff -u -p -r1.174 kern_event.c
--- sys/kern/kern_event.c   29 Nov 2021 15:54:04 -  1.174
+++ sys/kern/kern_event.c   30 Nov 2021 15:05:06 -
@@ -450,7 +450,7 @@ filt_proc(struct knote *kn, long hint)
kev.fflags = kn->kn_sfflags;
kev.data = kn->kn_id;   /* parent */
kev.udata = kn->kn_udata;   /* preserve udata */
-   error = kqueue_register(kq, &kev, NULL);
+   error = kqueue_register(kq, &kev, 0, NULL);
if (error)
kn->kn_fflags |= NOTE_TRACKERR;
}
@@ -928,7 +928,7 @@ sys_kevent(struct proc *p, void *v, regi
for (i = 0; i < n; i++) {
kevp = &kev[i];
kevp->flags &= ~EV_SYSFLAGS;
-   error = kqueue_register(kq, kevp, p);
+   error = kqueue_register(kq, kevp, 0, p);
if (error || (kevp->flags & EV_RECEIPT)) {
if (SCARG(uap, nevents) != 0) {
kevp->flags = EV_ERROR;
@@ -1024,7 +1024,8 @@ bad:
 #endif
 
 int
-kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p)
+kqueue_register(struct kqueue *kq, struct kevent *kev, unsigned int pollid,
+struct proc *p)
 {
struct filedesc *fdp = kq->kq_fdp;
const struct filterops *fops = NULL;
@@ -1033,6 +1034,8 @@ kqueue_register(struct kqueue *kq, struc
struct knlist *list = NULL;
int active, error = 0;
 
+   KASSERT(pollid == 0 || (p != NULL && p->p_kq == kq));
+
if (kev->filter < 0) {
if (kev->filter + EVFILT_SYSCOUNT < 0)
return (EINVAL);
@@ -1080,7 +1083,8 @@ again:
if (list != NULL) {
SLIST_FOREACH(kn, list, kn_link) {
if (kev->filter == kn->kn_filter &&
-   kev->ident == kn->kn_id) {
+   kev->ident == kn->kn_id &&
+   pollid == kn->kn_pollid) {
if (!knote_acquire(kn, NULL, 0)) {
/* knote_acquire() has released
 * kq_lock. */
@@ -1125,6 +1129,7 @@ again:
kev->fflags = 0;
kev->data = 0;
kn->kn_kevent = *kev;
+   kn->kn_pollid = pollid;
 
knote_attach(kn);
mtx_leave(&kq->kq_lock);
Index: sys/kern/sys_generic.c
===
RCS file: src/sys/kern/sys_generic.c,v
retrieving revision 1.144
diff -u -p -r1.144 sys_generic.c
--- sys/kern/sys_generic.c  30 Nov 2021 02:58:33 -  1.144
+++ sys/kern/sys_generic.c  30 Nov 2021 15:05:06 -
@@ -81,8 +81,11 @@ int kqpoll_debug = 0;
 
 int pselregister(struct proc *, fd_set *[], fd_set *[], int, int *, int *);
 int pselcollect(struct proc *, struct kevent *, fd_set *[], int *);
+void ppollregister(struct proc *, struct pollfd *, int, int *, int *);
+int ppollregister_evts(struct proc *, struct kevent *, int, struct pollfd *,
+unsigned int);
+int ppollcollect(struct proc *, struct kevent *, struct pollfd *, u_int);
 
-void pollscan(struct proc *, struct 

Re: Fix ipsp_spd_lookup() for transport mode

2021-11-30 Thread Vitaliy Makkoveev
Hi,

This question is mostly for bluhm@. Should the gettdbbyflow() grab the
extra reference on returned `tdbp' like other other gettdb*() do? I'm
pointing this because we are going to not rely on the netlock when doing
`tdbp' dereference.

Also could this block be rewritten? It looks a little ugly.

> + if (ipo->ipo_ids != NULL) {
> + if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0 &&
> + (tdbp->tdb_flags & TDBF_UDPENCAP) != 0) {
> + /*
> +  * Skip IDs check for transport mode
> +  * with NAT-T.  Multiple clients (IDs)
> +  * can use a same policy.
> +  */
> + } else if (tdbp->tdb_ids == NULL &&
> + !ipsp_ids_match(ipo->ipo_ids,
> + tdbp->tdb_ids))
>   goto nomatchin;
> + }

On Tue, Nov 30, 2021 at 01:50:21PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> Let me update the diff.  Previous has a problem in ipsp_spd_lookup()
> which uses "rn" without initialization.
> 
> On Sat, 20 Nov 2021 21:44:20 +0900 (JST)
> YASUOKA Masahiko  wrote:
> > On Wed, 12 May 2021 19:11:09 +0900 (JST)
> > YASUOKA Masahiko  wrote:
> >> Radek reported a problem to misc@ that multiple Windows clients behind
> >> a NAT cannot use a L2TP/IPsec server simultaneously.
> >> 
> >> https://marc.info/?t=16099681611&r=1&w=2
> >> 
> >> There is two problems.  First is pipex(4) doesn't pass the proper
> >> ipsecflowinfo to ip_output().  Second is the IPsec policy check which
> >> is done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is
> >> not cached.  This happens when its flow is shared by another tdb (for
> >> another client of the same NAT).
> > 
> > This problem is not fixed yet.  The diff for the second problem was
> > not committed in.  It was to fix the check in ipsp_spd_lookup() by
> > making a IPsec policy have a list of IDs.
> > 
> > Also my colleague Kawai pointed out there is another problem if there
> > is a Linux client among with Windows clients behind a NAT.  Windows
> > uses 1701/udp for its local ID, but the Linux uses ANY/udp for its
> > local ID.
> > 
> > In the situation, policies will be overlapped.
> > 
> >   (a) Windows:  REMOTE_IP:1701/udp <=> LOCAL_IP:1701/udp
> >   (b) Linux:REMOTE_IP:ANY/udp  <=> LOCAL_IP:1701/udp
> >   
> > Since we use a radix tree for the policies, when rn_match() is used to
> > find a policy, as it's best match, (b) is never selected.
> > 
> > Let me update the diff.
> > 
> > As for the incomming, we know the tdb when is used.  The diff uses the
> > tdb to find the proper policy.
> > 
> > As for the outgoing, other than using "ipsecflowinfo" there is no way
> > to select a proper policy.  So only when "ipsecflowinfo" is used, get
> > a tdb from the packet flow and the IDs (retributed by the
> > ipsecflowinfo), then we can find the proper policy by the tdb.
> > 
> > Also the diff skips the IDs check against the policy only if it is
> > transport mode and using NAT-T.  Since when NAT-T is used for a policy
> > for transport mode is shared by multiple clients which has a different
> > IDs, checking the IDs is difficult and I think the checks other than
> > is enough.
> > 
> > ok?  comments?
> > 
> > Fix some problems when accepting IPsec transport mode connections from
> > multiple clients behind a NAT.  In the situation, policies can be
> > overlapped, but previous could not choice a proper policy both for
> > incoming and outgoing.  To solve this problem, use
> > tdb->tdb_filter{,mask} to find a proper policy for incoming and find the
> > tdb by the given ipsecflowinfo and use it for outgoing.  Also skip
> > checking IDs of the policy since a policy is shared by multiple clients
> > in the situation.
> 
> Index: sys/netinet/ip_ipsp.c
> ===
> RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.c,v
> retrieving revision 1.258
> diff -u -p -r1.258 ip_ipsp.c
> --- sys/netinet/ip_ipsp.c 29 Nov 2021 19:19:00 -  1.258
> +++ sys/netinet/ip_ipsp.c 30 Nov 2021 04:44:48 -
> @@ -90,6 +90,8 @@ voidtdb_firstuse(void *);
>  void tdb_soft_timeout(void *);
>  void tdb_soft_firstuse(void *);
>  int  tdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
> +int  sockaddr_encap_match(struct sockaddr_encap *,
> + struct sockaddr_encap *, struct sockaddr_encap *);
>  
>  int ipsec_in_use = 0;
>  u_int64_t ipsec_last_added = 0;
> @@ -507,6 +509,76 @@ gettdbbysrc(u_int rdomain, union sockadd
>   tdb_ref(tdbp);
>   mtx_leave(&tdb_sadb_mtx);
>   return tdbp;
> +}
> +
> +/*
> + * Get an SA given the flow, the direction, the security protocol type, and

Re: Please test: UVM fault unlocking (aka vmobjlock)

2021-11-30 Thread Stuart Henderson
On 2021/11/29 22:50, Martin Pieuchot wrote:
> On 24/11/21(Wed) 11:16, Martin Pieuchot wrote:
> > Diff below unlock the bottom part of the UVM fault handler.  I'm
> > interested in squashing the remaining bugs.  Please test with your usual
> > setup & report back.
> 
> Thanks to all the testers, here's a new version that includes a bug fix.
> 
> Tests on !x86 architectures are much appreciated!

Running here on amd64 and arm64 (plus I added WITNESS to kernel config on
arm64 which is only for x86 in your diff). No failures so far, though
the machine is quiet (mostly just running conserver + NSD) so it's not
the best test ever - if somebody has a more active machine that would be
good to get tested.

i386 will get a good workout shortly (mkr'ing now).

I have one lock order reversal showing at boot on arm64:

witness: lock order reversal:
 1st 0xff800dab18c0 vmmaplk (&map->lock)
 2nd 0xff800da32098 inode (&ip->i_lock)
lock order "&ip->i_lock"(rrwlock) -> "&map->lock"(rwlock) first seen at:
#0  rw_enter_read+0x44
#1  uvmfault_lookup+0xd8
#2  uvm_fault_check+0x40
#3  uvm_fault+0xdc
#4  do_el1h_sync+0x118
#5  handle_el1h_sync+0x70
#6  uiomove+0x9c
#7  ffs_read+0x1cc
#8  VOP_READ+0x3c
#9  vn_rdwr+0xac
#10 vmcmd_map_readvn+0x8c
#11 exec_process_vmcmds+0x84
#12 sys_execve+0x610
#13 start_init+0x23c
#14 proc_trampoline+0x14
lock order "&map->lock"(rwlock) -> "&ip->i_lock"(rrwlock) first seen at:
#0  rw_enter+0x84
#1  rrw_enter+0x60
#2  VOP_LOCK+0x64
#3  vn_lock+0xa8
#4  uvn_io+0x19c
#5  uvm_pager_put+0xf0
#6  uvn_flush+0x274
#7  uvm_map_clean+0x228
#8  svc_handler+0x310
#9  do_el0_sync+0xe0
#10 handle_el0_sync+0x78

OpenBSD 7.0-current (GENERIC.MP) #4: Tue Nov 30 10:01:15 GMT 2021
st...@mala.spacehopper.org:/sys/arch/arm64/compile/GENERIC.MP
real mem  = 4134567936 (3943MB)
avail mem = 3937853440 (3755MB)
random: good seed from bootblocks
mainbus0 at root: Raspberry Pi 4 Model B Rev 1.2
psci0 at mainbus0: PSCI 1.1, SMCCC 1.2
cpu0 at mainbus0 mpidr 0: ARM Cortex-A72 r0p3
cpu0: 48KB 64b/line 3-way L1 PIPT I-cache, 32KB 64b/line 2-way L1 D-cache
cpu0: 1024KB 64b/line 16-way L2 cache
cpu0: CRC32,ASID16
cpu1 at mainbus0 mpidr 1: ARM Cortex-A72 r0p3
cpu1: 48KB 64b/line 3-way L1 PIPT I-cache, 32KB 64b/line 2-way L1 D-cache
cpu1: 1024KB 64b/line 16-way L2 cache
cpu1: CRC32,ASID16
cpu2 at mainbus0 mpidr 2: ARM Cortex-A72 r0p3
cpu2: 48KB 64b/line 3-way L1 PIPT I-cache, 32KB 64b/line 2-way L1 D-cache
cpu2: 1024KB 64b/line 16-way L2 cache
cpu2: CRC32,ASID16
cpu3 at mainbus0 mpidr 3: ARM Cortex-A72 r0p3
cpu3: 48KB 64b/line 3-way L1 PIPT I-cache, 32KB 64b/line 2-way L1 D-cache
cpu3: 1024KB 64b/line 16-way L2 cache
cpu3: CRC32,ASID16
efi0 at mainbus0: UEFI 2.7
efi0: https://github.com/pftf/RPi4 rev 0x1
smbios0 at efi0: SMBIOS 3.3.0
smbios0: vendor https://github.com/pftf/RPi4 version "UEFI Firmware v1.31" date 
09/09/2021
smbios0: Raspberry Pi Foundation Raspberry Pi 4 Model B
apm0 at mainbus0
"system" at mainbus0 not configured
"axi" at mainbus0 not configured
simplebus0 at mainbus0: "soc"
bcmclock0 at simplebus0
bcmmbox0 at simplebus0
bcmgpio0 at simplebus0
bcmaux0 at simplebus0
ampintc0 at simplebus0 nirq 256, ncpu 4 ipi: 0, 1: "interrupt-controller"
bcmtmon0 at simplebus0
bcmdmac0 at simplebus0: DMA0 DMA2 DMA4 DMA5 DMA6 DMA7 DMA8 DMA9
"timer" at simplebus0 not configured
pluart0 at simplebus0: console
com0 at simplebus0: ns16550, no working fifo
"local_intc" at simplebus0 not configured
bcmdog0 at simplebus0
bcmirng0 at simplebus0
"firmware" at simplebus0 not configured
"power" at simplebus0 not configured
"mailbox" at simplebus0 not configured
sdhc0 at simplebus0
sdhc0: SDHC 3.0, 250 MHz base clock
sdmmc0 at sdhc0: 4-bit, sd high-speed, mmc high-speed
"gpiomem" at simplebus0 not configured
"fb" at simplebus0 not configured
"vcsm" at simplebus0 not configured
"clocks" at mainbus0 not configured
"phy" at mainbus0 not configured
"clk-27M" at mainbus0 not configured
"clk-108M" at mainbus0 not configured
simplebus1 at mainbus0: "emmc2bus"
sdhc1 at simplebus1
sdhc1: SDHC 3.0, 100 MHz base clock
sdmmc1 at sdhc1: 8-bit, sd high-speed, mmc high-speed, ddr52, dma
"arm-pmu" at mainbus0 not configured
agtimer0 at mainbus0: 54000 kHz
simplebus2 at mainbus0: "scb"
bcmpcie0 at simplebus2
pci0 at bcmpcie0
ppb0 at pci0 dev 0 function 0 "Broadcom BCM2711" rev 0x10
pci1 at ppb0 bus 1
xhci0 at pci1 dev 0 function 0 "VIA VL805 xHCI" rev 0x01: intx, xHCI 1.0
usb0 at xhci0: USB revision 3.0
uhub0 at usb0 configuration 1 interface 0 "VIA xHCI root hub" rev 3.00/1.00 
addr 1
bse0 at simplebus2: address dc:a6:32:8b:e1:b7
brgphy0 at bse0 phy 1: BCM54210E 10/100/1000baseT PHY, rev. 2
"dma" at simplebus2 not configured
"hevc-decoder" at simplebus2 not configured
"rpivid-local-intc" at simplebus2 not configured
"h264-decoder" at simplebus2 not configured
"vp9-decoder" at simplebus2 not configured
gpioleds0 at mainbus0: "led0", "led1"
"sd_io_1v8_reg" at mainbus0 not configured
"sd_vcc_reg" at mainbus0 not configured
"fixedregulator_3v3" at m