com at acpi: minor nit

2022-06-27 Thread Anton Lindqvist
Hi,
A com_acpi_softc pointer is used as the interrupt callback cookie which
is later on interpreted as a com_softc pointer. This is not a problem in
practice as a com_softc structure is the first member of the
com_acpi_softc structure.

Using the actual types consistently yields a better symmetry in my
opinion between registering the interrupt and the corresponding
interrupt handler.

Comments? OK?

diff --git sys/dev/acpi/com_acpi.c sys/dev/acpi/com_acpi.c
index 9c1e4af0426..b9f2a14edd3 100644
--- sys/dev/acpi/com_acpi.c
+++ sys/dev/acpi/com_acpi.c
@@ -159,9 +159,9 @@ com_acpi_is_designware(const char *hid)
 int
 com_acpi_intr_designware(void *cookie)
 {
-   struct com_softc *sc = cookie;
+   struct com_acpi_softc *sc = cookie;
 
-   com_read_reg(sc, com_usr);
+   com_read_reg(>sc, com_usr);
 
-   return comintr(sc);
+   return comintr(>sc);
 }



bcmdmac: minor nit

2022-06-27 Thread Anton Lindqvist
Hi,
No need to pass a copy of the bcmdmac_channel structure to predicate
routines.

Comments? OK?

diff --git sys/dev/fdt/bcm2835_dmac.c sys/dev/fdt/bcm2835_dmac.c
index 145810dd7af..e9b31af568c 100644
--- sys/dev/fdt/bcm2835_dmac.c
+++ sys/dev/fdt/bcm2835_dmac.c
@@ -97,18 +97,18 @@ struct cfdriver bcmdmac_cd = { NULL, "bcmdmac", DV_DULL };
 
 /* utilities */
 enum bcmdmac_type
-bcmdmac_channel_type(struct bcmdmac_channel ch)
+bcmdmac_channel_type(struct bcmdmac_channel *ch)
 {
-   if (ISSET(ch.ch_debug, DMAC_DEBUG_LITE))
+   if (ISSET(ch->ch_debug, DMAC_DEBUG_LITE))
return BCMDMAC_TYPE_LITE;
else
return BCMDMAC_TYPE_NORMAL;
 }
 
 int
-bcmdmac_channel_used(struct bcmdmac_channel ch)
+bcmdmac_channel_used(struct bcmdmac_channel *ch)
 {
-   return ch.ch_callback != NULL;
+   return ch->ch_callback != NULL;
 }
 
 void
@@ -233,9 +233,9 @@ bcmdmac_alloc(enum bcmdmac_type type, int ipl,
for (index = 0; index < sc->sc_nchannels; index++) {
if (!ISSET(sc->sc_channelmask, (1 << index)))
continue;
-   if (bcmdmac_channel_type(sc->sc_channels[index]) != type)
+   if (bcmdmac_channel_type(>sc_channels[index]) != type)
continue;
-   if (bcmdmac_channel_used(sc->sc_channels[index]))
+   if (bcmdmac_channel_used(>sc_channels[index]))
continue;
 
ch = >sc_channels[index];



Re: acpitz(4): perform passive cooling only when perfpolicy is AUTO

2022-06-27 Thread Sebastien Marie
On Mon, Jun 27, 2022 at 11:47:58PM +0200, Stefan Hagen wrote:
> Bryan Steele wrote (2022-06-27 23:12 CEST):
> > On Mon, Jun 27, 2022 at 11:01:31PM +0200, Stefan Hagen wrote:
> > > acpitz(4) implements passive cooling, which starts throttling the CPU to 
> > > keep it under the temperature reported by the _PSV trip point.
> > > 
> > > https://uefi.org/specs/ACPI/6.4/11_Thermal_Management/thermal-control.html
> > > 
> > > The specs (1.1.5.1) leave the decision to activate passive cooling to
> > > the OS. It is a way to limit noise and heat rather than to protect the
> > > CPU (for which _HOT and _CRT are the better trip points).
> > > 
> > > I would like to restrict passive cooling to the AUTO perfpolicy.
> > > 
> > > For low (apm -L) and high (apm -H) it doesn't make much sense, because
> > >  - low is setting a low pstate anyway
> > >  - high is probably never set with the intention to have a cool and 
> > >quiet machine.
> > 
> > Shouldn't this also take into consideration hw.power as well? If it
> > doesn't make sense for perfpolicy=high then it probably doesn't for
> > perfpolicy=auto when on AC power?
> 
> I would say yes.

no particular opinion for now regarding the logic, but please avoid magic 
number when constant/define exists.

AUTO perfpolicy is PERFPOL_AUTO (which is 1).

Thanks.
 
> Index: sys/dev/acpi/acpitz.c
> ===
> RCS file: /home/cvs/src/sys/dev/acpi/acpitz.c,v
> retrieving revision 1.58
> diff -u -p -r1.58 acpitz.c
> --- sys/dev/acpi/acpitz.c 6 Apr 2022 18:59:27 -   1.58
> +++ sys/dev/acpi/acpitz.c 27 Jun 2022 21:23:06 -
> @@ -89,7 +89,9 @@ voidacpitz_init(struct acpitz_softc *, 
>  void (*acpitz_cpu_setperf)(int);
>  int  acpitz_perflevel = -1;
>  extern void  (*cpu_setperf)(int);
> +extern int   hw_power;
>  extern int   perflevel;
> +extern int   perfpolicy;
>  #define PERFSTEP 10
>  
>  #define ACPITZ_TRIPS (1L << 0)
> @@ -381,7 +383,7 @@ acpitz_refresh(void *arg)
>   sc->sc_tc1, sc->sc_tc2, sc->sc_psv);
>  
>   nperf = acpitz_perflevel;
> - if (sc->sc_psv <= sc->sc_tmp) {
> + if (sc->sc_psv <= sc->sc_tmp && perfpolicy == 1 && hw_power == 
> 0) {
>   /* Passive cooling enabled */
>   dnprintf(1, "%s: enabling passive %d %d\n",
>   DEVNAME(sc), sc->sc_tmp, sc->sc_psv);
> 

-- 
Sebastien Marie



ip6 hbhchcheck next protocol

2022-06-27 Thread Alexander Bluhm
Hi,

The ip6_hbhchcheck() function never reads the nxtp parameter, it
only sets its value.  It is more obvious if we return the next
protocol and return IPPROTO_DONE to signal error.  All IP protocol
functions do that.

ok?

bluhm

Index: netinet6/ip6_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.245
diff -u -p -r1.245 ip6_input.c
--- netinet6/ip6_input.c5 May 2022 13:57:40 -   1.245
+++ netinet6/ip6_input.c28 Jun 2022 00:15:32 -
@@ -122,7 +122,7 @@ uint8_t ip6_soiikey[IP6_SOIIKEY_LEN];
 int ip6_ours(struct mbuf **, int *, int, int);
 int ip6_local(struct mbuf **, int *, int, int);
 int ip6_check_rh0hdr(struct mbuf *, int *);
-int ip6_hbhchcheck(struct mbuf *, int *, int *, int *);
+int ip6_hbhchcheck(struct mbuf *, int *, int *);
 int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
 struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
 int ip6_sysctl_soiikey(void *, size_t *, void *, size_t);
@@ -424,7 +424,8 @@ ip6_input_if(struct mbuf **mp, int *offp
if (ip6_mforwarding && ip6_mrouter[ifp->if_rdomain]) {
int error;
 
-   if (ip6_hbhchcheck(m, offp, , ))
+   nxt = ip6_hbhchcheck(m, offp, );
+   if (nxt == IPPROTO_DONE)
goto out;
 
ip6 = mtod(m, struct ip6_hdr *);
@@ -543,7 +544,8 @@ ip6_input_if(struct mbuf **mp, int *offp
goto bad;
}
 
-   if (ip6_hbhchcheck(m, offp, , ))
+   nxt = ip6_hbhchcheck(m, offp, );
+   if (nxt == IPPROTO_DONE)
goto out;
 
if (ours) {
@@ -584,7 +586,8 @@ ip6_local(struct mbuf **mp, int *offp, i
 {
NET_ASSERT_WLOCKED();
 
-   if (ip6_hbhchcheck(*mp, offp, , NULL))
+   nxt = ip6_hbhchcheck(*mp, offp, NULL);
+   if (nxt == IPPROTO_DONE)
return IPPROTO_DONE;
 
/* Check whether we are already in a IPv4/IPv6 local deliver loop. */
@@ -594,10 +597,11 @@ ip6_local(struct mbuf **mp, int *offp, i
 }
 
 int
-ip6_hbhchcheck(struct mbuf *m, int *offp, int *nxtp, int *oursp)
+ip6_hbhchcheck(struct mbuf *m, int *offp, int *oursp)
 {
struct ip6_hdr *ip6;
u_int32_t plen, rtalert = ~0;
+   int nxt;
 
ip6 = mtod(m, struct ip6_hdr *);
 
@@ -641,7 +645,7 @@ ip6_hbhchcheck(struct mbuf *m, int *offp
ip6stat_inc(ip6s_tooshort);
goto bad;
}
-   *nxtp = hbh->ip6h_nxt;
+   nxt = hbh->ip6h_nxt;
 
/*
 * accept the packet if a router alert option is included
@@ -650,7 +654,7 @@ ip6_hbhchcheck(struct mbuf *m, int *offp
if (rtalert != ~0 && ip6_forwarding && oursp != NULL)
*oursp = 1;
} else
-   *nxtp = ip6->ip6_nxt;
+   nxt = ip6->ip6_nxt;
 
/*
 * Check that the amount of data in the buffers
@@ -673,11 +677,9 @@ ip6_hbhchcheck(struct mbuf *m, int *offp
}
}
 
-   return (0);
-
+   return nxt;
  bad:
-   *nxtp = IPPROTO_DONE;
-   return (-1);
+   return IPPROTO_DONE;
 }
 
 /* scan packet for RH0 routing header. Mostly stolen from pf.c:pf_test() */



Re: pipex(4): do pppoe output through netisr

2022-06-27 Thread Alexander Bluhm
On Mon, Jun 27, 2022 at 03:58:52PM +0300, Vitaliy Makkoveev wrote:
> We can't predict netlock state when pipex(4) related (*if_qstart)()
> handlers called. This means we can't use netlock within pppac_qstart()
> and pppx_if_qstart() handlers. But actually we can't avoid netlock only
> when we call (*if_output)() in pipex(4) PPPOE output path.
> 
> Introduce `pipexoutq' mbuf(9) queue, and put PPPOE related mbufs within.
> Do (*if_output)() calls within netisr handler with netlock held.
> 
> The netlock assertion is still kept within pppac_qstart(),
> pppx_if_qstart() and underlay code because some per session data relies
> on netlock. These assertions will be removed with following diffs.
> 
> I also want to use mbuf(9) queue for pppoe(4) input path and remove
> existing serialization hack provided by kernel lock.

OK bluhm@

>  #endif
> +#ifdef PIPEX
> + if (n & (1 << NETISR_PIPEX))
> + pipexintr();
> +#endif
> +
>   t |= n;
>   }

The empty line looks useless.

> @@ -45,6 +45,7 @@
>  #define  NETISR_PFSYNC   5   /* for pfsync "immediate" tx */
>  #define  NETISR_ARP  18  /* same as AF_LINK */
>  #define  NETISR_IPV6 24  /* same as AF_INET6 */
> +#define NETISR_PIPEX 27  /* for pipex processing */
>  #define  NETISR_PPP  28  /* for PPP processing */
>  #define  NETISR_BRIDGE   29  /* for bridge processing */
>  #define  NETISR_SWITCH   31  /* for switch dataplane */

Tab and space are inconsistent.  I prefer space after #define, then
it does not jump when you look at the diff.  Mixing looks ugly.



amd64 serial console changes

2022-06-27 Thread Mark Kettenis
The Ryzen Embedded V1000 processors have an arm64-style Synposys
DesignWare UART instead if a PC-compatible NS16x50 UART.  To make this
UART work as a serial console, we need to pass some more information
from the bootloader to the kernel.  This diff adds the logic to handle
that information to the kernel.  I'd like some folks that use a serial
console on their amd64 machines to test this.  But testing this diff
on amd64 machines with a glass console doesn't hurt.

Thanks,

Mark


Index: dev/acpi/com_acpi.c
===
RCS file: /cvs/src/sys/dev/acpi/com_acpi.c,v
retrieving revision 1.8
diff -u -p -r1.8 com_acpi.c
--- dev/acpi/com_acpi.c 6 Apr 2022 18:59:27 -   1.8
+++ dev/acpi/com_acpi.c 27 Jun 2022 21:42:08 -
@@ -49,6 +49,7 @@ const struct cfattach com_acpi_ca = {
 };
 
 const char *com_hids[] = {
+   "AMDI0020",
"HISI0031",
"PNP0501",
NULL
@@ -86,10 +87,14 @@ com_acpi_attach(struct device *parent, s
printf(" addr 0x%llx/0x%llx", aaa->aaa_addr[0], aaa->aaa_size[0]);
printf(" irq %d", aaa->aaa_irq[0]);
 
+   sc->sc.sc_frequency = COM_FREQ;
+   if (strcmp(aaa->aaa_dev, "AMDI0020") == 0)
+   sc->sc.sc_frequency = 4800;
+
sc->sc.sc_iot = aaa->aaa_bst[0];
sc->sc.sc_iobase = aaa->aaa_addr[0];
sc->sc.sc_frequency = acpi_getpropint(sc->sc_node, "clock-frequency",
-   COM_FREQ);
+   sc->sc.sc_frequency);
 
if (com_acpi_is_designware(aaa->aaa_dev)) {
intr = com_acpi_intr_designware;
@@ -153,7 +158,8 @@ com_acpi_is_console(struct com_acpi_soft
 int
 com_acpi_is_designware(const char *hid)
 {
-   return strcmp("HISI0031", hid) == 0;
+   return strcmp(hid, "AMDI0020") == 0 ||
+   strcmp(hid, "HISI0031") == 0;
 }
 
 int
Index: arch/amd64/amd64/bus_space.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/bus_space.c,v
retrieving revision 1.26
diff -u -p -r1.26 bus_space.c
--- arch/amd64/amd64/bus_space.c25 Apr 2015 21:31:24 -  1.26
+++ arch/amd64/amd64/bus_space.c27 Jun 2022 21:42:09 -
@@ -43,6 +43,8 @@
 #include 
 #include 
 
+extern int pmap_initialized;
+
 /*
  * Extent maps to manage I/O and memory space.  Allocate
  * storage for 16 regions in each, initially.  Later, ioport_malloc_safe
@@ -376,6 +378,11 @@ bus_space_map(bus_space_tag_t t, bus_add
return(0);
}
 
+   if (!pmap_initialized && bpa < 0x1) {
+   *bshp = (bus_space_handle_t)PMAP_DIRECT_MAP(bpa);
+   return(0);
+   }
+
/*
 * For memory space, map the bus physical address to
 * a kernel virtual address.
@@ -585,6 +592,11 @@ bus_space_unmap(bus_space_tag_t t, bus_s
bpa = (bus_addr_t)ISA_PHYSADDR(bsh);
if (IOM_BEGIN <= bpa && bpa <= IOM_END)
goto ok;
+
+   if (bsh >= PMAP_DIRECT_BASE && bsh < PMAP_DIRECT_END) {
+   bpa = PMAP_DIRECT_UNMAP(bsh);
+   goto ok;
+   }
 
va = trunc_page(bsh);
endva = round_page(bsh + size);
Index: arch/amd64/amd64/machdep.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.277
diff -u -p -r1.277 machdep.c
--- arch/amd64/amd64/machdep.c  1 Feb 2022 20:29:53 -   1.277
+++ arch/amd64/amd64/machdep.c  27 Jun 2022 21:42:09 -
@@ -1945,11 +1945,40 @@ getbootinfo(char *bootinfo, int bootinfo
/* generated by i386 boot loader */
break;
case BOOTARG_CONSDEV:
-   if (q->ba_size >= sizeof(bios_consdev_t) +
+   if (q->ba_size > sizeof(bios_oconsdev_t) +
offsetof(struct _boot_args32, ba_arg)) {
 #if NCOM > 0
bios_consdev_t *cdp =
(bios_consdev_t*)q->ba_arg;
+   static const int ports[] =
+   { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
+   int unit = minor(cdp->consdev);
+   uint64_t consaddr = cdp->consaddr;
+   if (consaddr == -1 && unit >= 0 &&
+   unit < nitems(ports))
+   consaddr = ports[unit];
+   if (major(cdp->consdev) == 8 &&
+   consaddr != -1) {
+   comconsunit = unit;
+   comconsaddr = consaddr;
+   comconsrate = cdp->conspeed;
+   comconsfreq = cdp->consfreq;
+   

Re: acpitz(4): perform passive cooling only when perfpolicy is AUTO

2022-06-27 Thread Bryan Steele
On Mon, Jun 27, 2022 at 11:01:31PM +0200, Stefan Hagen wrote:
> Hi,
> 
> acpitz(4) implements passive cooling, which starts throttling the CPU to 
> keep it under the temperature reported by the _PSV trip point.
> 
> https://uefi.org/specs/ACPI/6.4/11_Thermal_Management/thermal-control.html
> 
> The specs (1.1.5.1) leave the decision to activate passive cooling to
> the OS. It is a way to limit noise and heat rather than to protect the
> CPU (for which _HOT and _CRT are the better trip points).
> 
> I would like to restrict passive cooling to the AUTO perfpolicy.
> 
> For low (apm -L) and high (apm -H) it doesn't make much sense, because
>  - low is setting a low pstate anyway
>  - high is probably never set with the intention to have a cool and 
>quiet machine.
> 
> Compiling a kernel with apm -H before diff:
> 4m52.32s real21m02.62s user 4m47.00s system
> 
> Compiling a kernel with apm -H after diff:
> 2m42.65s real11m36.07s user 2m36.83s system
> 
> OK? Comments?
> 
> Best Regards,
> Stefan

Shouldn't this also take into consideration hw.power as well? If it
doesn't make sense for perfpolicy=high then it probably doesn't for
perfpolicy=auto when on AC power?

> 
> Index: sys/dev/acpi/acpitz.c
> ===
> RCS file: /home/cvs/src/sys/dev/acpi/acpitz.c,v
> retrieving revision 1.58
> diff -u -p -r1.58 acpitz.c
> --- sys/dev/acpi/acpitz.c 6 Apr 2022 18:59:27 -   1.58
> +++ sys/dev/acpi/acpitz.c 27 Jun 2022 19:25:55 -
> @@ -90,6 +90,7 @@ void(*acpitz_cpu_setperf)(int);
>  int  acpitz_perflevel = -1;
>  extern void  (*cpu_setperf)(int);
>  extern int   perflevel;
> +extern int   perfpolicy;
>  #define PERFSTEP 10
>  
>  #define ACPITZ_TRIPS (1L << 0)
> @@ -381,7 +382,7 @@ acpitz_refresh(void *arg)
>   sc->sc_tc1, sc->sc_tc2, sc->sc_psv);
>  
>   nperf = acpitz_perflevel;
> - if (sc->sc_psv <= sc->sc_tmp) {
> + if (sc->sc_psv <= sc->sc_tmp && perfpolicy == 1) {
>   /* Passive cooling enabled */
>   dnprintf(1, "%s: enabling passive %d %d\n",
>   DEVNAME(sc), sc->sc_tmp, sc->sc_psv);
> 
> 



Re: kernel lock in arp

2022-06-27 Thread Alexander Bluhm
On Mon, Jun 27, 2022 at 07:23:20PM +0200, Martin Pieuchot wrote:
> On 27/06/22(Mon) 19:11, Alexander Bluhm wrote:
> > On Mon, Jun 27, 2022 at 11:49:23AM +0200, Alexander Bluhm wrote:
> > > On Sat, May 21, 2022 at 10:50:28PM +0300, Vitaliy Makkoveev wrote:
> > > > This diff looks good, except the re-check after kernel lock. It???s
> > > > supposed `rt??? could became inconsistent, right? But what stops to
> > > > make it inconsistent after first unlocked RTF_LLINFO flag check?
> > > >
> > > > I think this re-check should gone.
> > >
> > > I have copied the re-check from intenal genua code.  I am not sure
> > > if it is really needed.  We know from Hrvoje that the diff with
> > > re-check is stable.  And we know that it crashes without kernel
> > > lock at all.
> > >
> > > I have talked with mpi@ about it.  The main problem is that we have
> > > no write lock when we change RTF_LLINFO.  Then rt_llinfo can get
> > > NULL or inconsistent.
> > >
> > > Plan is that I put some lock asserts into route add and delete.
> > > This helps to find the parts that modify RTF_LLINFO and rt_llinfo
> > > without exclusive lock.
> > >
> > > Maybe we need some kernel lock somewhere else.  Or we want to use
> > > some ARP mutex.  We could also add some comment and commit the diff
> > > that I have.  We know that it is faster and stable.  Pushing the
> > > kernel lock down or replacing it with something clever can always
> > > be done later.
> > 
> > We need the re-check.  I have tested it with a printf.  It is
> > triggered by running arp -d in a loop while forwarding.
> > 
> > The concurrent threads are these:
> > 
> > rtrequest_delete(8000246b7428,3,80775048,8000246b7510,0) at 
> > rtrequest_delete+0x67
> > rtdeletemsg(fd8834a23550,80775048,0) at rtdeletemsg+0x1ad
> > rtrequest(b,8000246b7678,3,8000246b7718,0) at rtrequest+0x55c
> > rt_clone(8000246b7780,8000246b78f8,0) at rt_clone+0x73
> > rtalloc_mpath(8000246b78f8,fd8003169ad8,0) at rtalloc_mpath+0x4c
> > ip_forward(fd80b8cc7e00,8077d048,fd8834a230f0,0) at 
> > ip_forward+0x137
> > ip_input_if(8000246b7a28,8000246b7a34,4,0,8077d048) at 
> > ip_input_if+0x353
> > ipv4_input(8077d048,fd80b8cc7e00) at ipv4_input+0x39
> > ether_input(8077d048,fd80b8cc7e00) at ether_input+0x3ad
> > if_input_process(8077d048,8000246b7b18) at if_input_process+0x6f
> > ifiq_process(8077d458) at ifiq_process+0x69
> > taskq_thread(80036080) at taskq_thread+0x100
> > 
> > rtrequest_delete(8000246c8d08,3,80775048,8000246c8df0,0) at 
> > rtrequest_delete+0x67
> > rtdeletemsg(fd8834a230f0,80775048,0) at rtdeletemsg+0x1ad
> > rtrequest(b,8000246c8f58,3,8000246c8ff8,0) at rtrequest+0x55c
> > rt_clone(8000246c9060,8000246c90b8,0) at rt_clone+0x73
> > rtalloc_mpath(8000246c90b8,fd8002c754d8,0) at rtalloc_mpath+0x4c
> > in_ouraddr(fd8094771b00,8077d048,8000246c9138) at 
> > in_ouraddr+0x84
> > ip_input_if(8000246c91d8,8000246c91e4,4,0,8077d048) at 
> > ip_input_if+0x1cd
> > ipv4_input(8077d048,fd8094771b00) at ipv4_input+0x39
> > ether_input(8077d048,fd8094771b00) at ether_input+0x3ad
> > if_input_process(8077d048,8000246c92c8) at if_input_process+0x6f
> > ifiq_process(80781400) at ifiq_process+0x69
> > taskq_thread(80036200) at taskq_thread+0x100
> > 
> > I have added a comment why kernel lock protects us.  I would like
> > to get this in.  It has been tested, reduces the kernel lock and
> > is faster.  A more clever lock can be done later.
> > 
> > ok?
> 
> I don't understand how the KERNEL_LOCK() there prevents rtdeletemsg()
> from running.  rtrequest_delete() seems completely broken it assumes it
> holds an exclusive lock.
> 
> To "fix" arp the KERNEL_LOCK() should also be taken in RTM_DELETE and
> RTM_RESOLVE inside arp_rtrequest().  Or maybe around ifp->if_rtrequest()

The kernel lock is already here:

int
rt_clone(struct rtentry **rtp, struct sockaddr *dst, unsigned int rtableid)
{
...
KERNEL_LOCK();
error = rtrequest(RTM_RESOLVE, , rt->rt_priority - 1, ,
rtableid);
KERNEL_UNLOCK();
...
}

> 
> But it doesn't mean there isn't another problem in rtdeletemsg()...
> 
> > Index: net/if_ethersubr.c
> > ===
> > RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
> > retrieving revision 1.281
> > diff -u -p -r1.281 if_ethersubr.c
> > --- net/if_ethersubr.c  26 Jun 2022 21:19:53 -  1.281
> > +++ net/if_ethersubr.c  27 Jun 2022 16:55:15 -
> > @@ -221,10 +221,7 @@ ether_resolve(struct ifnet *ifp, struct 
> >  
> > switch (af) {
> > case AF_INET:
> > -   KERNEL_LOCK();
> > -   /* XXXSMP there is a MP race in arpresolve() */
> > error = arpresolve(ifp, rt, m, dst, 

Re: kernel lock in arp

2022-06-27 Thread Martin Pieuchot
On 27/06/22(Mon) 19:11, Alexander Bluhm wrote:
> On Mon, Jun 27, 2022 at 11:49:23AM +0200, Alexander Bluhm wrote:
> > On Sat, May 21, 2022 at 10:50:28PM +0300, Vitaliy Makkoveev wrote:
> > > This diff looks good, except the re-check after kernel lock. It???s
> > > supposed `rt??? could became inconsistent, right? But what stops to
> > > make it inconsistent after first unlocked RTF_LLINFO flag check?
> > >
> > > I think this re-check should gone.
> >
> > I have copied the re-check from intenal genua code.  I am not sure
> > if it is really needed.  We know from Hrvoje that the diff with
> > re-check is stable.  And we know that it crashes without kernel
> > lock at all.
> >
> > I have talked with mpi@ about it.  The main problem is that we have
> > no write lock when we change RTF_LLINFO.  Then rt_llinfo can get
> > NULL or inconsistent.
> >
> > Plan is that I put some lock asserts into route add and delete.
> > This helps to find the parts that modify RTF_LLINFO and rt_llinfo
> > without exclusive lock.
> >
> > Maybe we need some kernel lock somewhere else.  Or we want to use
> > some ARP mutex.  We could also add some comment and commit the diff
> > that I have.  We know that it is faster and stable.  Pushing the
> > kernel lock down or replacing it with something clever can always
> > be done later.
> 
> We need the re-check.  I have tested it with a printf.  It is
> triggered by running arp -d in a loop while forwarding.
> 
> The concurrent threads are these:
> 
> rtrequest_delete(8000246b7428,3,80775048,8000246b7510,0) at 
> rtrequest_delete+0x67
> rtdeletemsg(fd8834a23550,80775048,0) at rtdeletemsg+0x1ad
> rtrequest(b,8000246b7678,3,8000246b7718,0) at rtrequest+0x55c
> rt_clone(8000246b7780,8000246b78f8,0) at rt_clone+0x73
> rtalloc_mpath(8000246b78f8,fd8003169ad8,0) at rtalloc_mpath+0x4c
> ip_forward(fd80b8cc7e00,8077d048,fd8834a230f0,0) at 
> ip_forward+0x137
> ip_input_if(8000246b7a28,8000246b7a34,4,0,8077d048) at 
> ip_input_if+0x353
> ipv4_input(8077d048,fd80b8cc7e00) at ipv4_input+0x39
> ether_input(8077d048,fd80b8cc7e00) at ether_input+0x3ad
> if_input_process(8077d048,8000246b7b18) at if_input_process+0x6f
> ifiq_process(8077d458) at ifiq_process+0x69
> taskq_thread(80036080) at taskq_thread+0x100
> 
> rtrequest_delete(8000246c8d08,3,80775048,8000246c8df0,0) at 
> rtrequest_delete+0x67
> rtdeletemsg(fd8834a230f0,80775048,0) at rtdeletemsg+0x1ad
> rtrequest(b,8000246c8f58,3,8000246c8ff8,0) at rtrequest+0x55c
> rt_clone(8000246c9060,8000246c90b8,0) at rt_clone+0x73
> rtalloc_mpath(8000246c90b8,fd8002c754d8,0) at rtalloc_mpath+0x4c
> in_ouraddr(fd8094771b00,8077d048,8000246c9138) at 
> in_ouraddr+0x84
> ip_input_if(8000246c91d8,8000246c91e4,4,0,8077d048) at 
> ip_input_if+0x1cd
> ipv4_input(8077d048,fd8094771b00) at ipv4_input+0x39
> ether_input(8077d048,fd8094771b00) at ether_input+0x3ad
> if_input_process(8077d048,8000246c92c8) at if_input_process+0x6f
> ifiq_process(80781400) at ifiq_process+0x69
> taskq_thread(80036200) at taskq_thread+0x100
> 
> I have added a comment why kernel lock protects us.  I would like
> to get this in.  It has been tested, reduces the kernel lock and
> is faster.  A more clever lock can be done later.
> 
> ok?

I don't understand how the KERNEL_LOCK() there prevents rtdeletemsg()
from running.  rtrequest_delete() seems completely broken it assumes it
holds an exclusive lock.

To "fix" arp the KERNEL_LOCK() should also be taken in RTM_DELETE and
RTM_RESOLVE inside arp_rtrequest().  Or maybe around ifp->if_rtrequest()

But it doesn't mean there isn't another problem in rtdeletemsg()...

> Index: net/if_ethersubr.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
> retrieving revision 1.281
> diff -u -p -r1.281 if_ethersubr.c
> --- net/if_ethersubr.c26 Jun 2022 21:19:53 -  1.281
> +++ net/if_ethersubr.c27 Jun 2022 16:55:15 -
> @@ -221,10 +221,7 @@ ether_resolve(struct ifnet *ifp, struct 
>  
>   switch (af) {
>   case AF_INET:
> - KERNEL_LOCK();
> - /* XXXSMP there is a MP race in arpresolve() */
>   error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
> - KERNEL_UNLOCK();
>   if (error)
>   return (error);
>   eh->ether_type = htons(ETHERTYPE_IP);
> @@ -285,10 +282,7 @@ ether_resolve(struct ifnet *ifp, struct 
>   break;
>  #endif
>   case AF_INET:
> - KERNEL_LOCK();
> - /* XXXSMP there is a MP race in arpresolve() */
>   error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
> -

Re: kernel lock in arp

2022-06-27 Thread Alexander Bluhm
On Mon, Jun 27, 2022 at 11:49:23AM +0200, Alexander Bluhm wrote:
> On Sat, May 21, 2022 at 10:50:28PM +0300, Vitaliy Makkoveev wrote:
> > This diff looks good, except the re-check after kernel lock. It???s
> > supposed `rt??? could became inconsistent, right? But what stops to
> > make it inconsistent after first unlocked RTF_LLINFO flag check?
> >
> > I think this re-check should gone.
>
> I have copied the re-check from intenal genua code.  I am not sure
> if it is really needed.  We know from Hrvoje that the diff with
> re-check is stable.  And we know that it crashes without kernel
> lock at all.
>
> I have talked with mpi@ about it.  The main problem is that we have
> no write lock when we change RTF_LLINFO.  Then rt_llinfo can get
> NULL or inconsistent.
>
> Plan is that I put some lock asserts into route add and delete.
> This helps to find the parts that modify RTF_LLINFO and rt_llinfo
> without exclusive lock.
>
> Maybe we need some kernel lock somewhere else.  Or we want to use
> some ARP mutex.  We could also add some comment and commit the diff
> that I have.  We know that it is faster and stable.  Pushing the
> kernel lock down or replacing it with something clever can always
> be done later.

We need the re-check.  I have tested it with a printf.  It is
triggered by running arp -d in a loop while forwarding.

The concurrent threads are these:

rtrequest_delete(8000246b7428,3,80775048,8000246b7510,0) at 
rtrequest_delete+0x67
rtdeletemsg(fd8834a23550,80775048,0) at rtdeletemsg+0x1ad
rtrequest(b,8000246b7678,3,8000246b7718,0) at rtrequest+0x55c
rt_clone(8000246b7780,8000246b78f8,0) at rt_clone+0x73
rtalloc_mpath(8000246b78f8,fd8003169ad8,0) at rtalloc_mpath+0x4c
ip_forward(fd80b8cc7e00,8077d048,fd8834a230f0,0) at 
ip_forward+0x137
ip_input_if(8000246b7a28,8000246b7a34,4,0,8077d048) at 
ip_input_if+0x353
ipv4_input(8077d048,fd80b8cc7e00) at ipv4_input+0x39
ether_input(8077d048,fd80b8cc7e00) at ether_input+0x3ad
if_input_process(8077d048,8000246b7b18) at if_input_process+0x6f
ifiq_process(8077d458) at ifiq_process+0x69
taskq_thread(80036080) at taskq_thread+0x100

rtrequest_delete(8000246c8d08,3,80775048,8000246c8df0,0) at 
rtrequest_delete+0x67
rtdeletemsg(fd8834a230f0,80775048,0) at rtdeletemsg+0x1ad
rtrequest(b,8000246c8f58,3,8000246c8ff8,0) at rtrequest+0x55c
rt_clone(8000246c9060,8000246c90b8,0) at rt_clone+0x73
rtalloc_mpath(8000246c90b8,fd8002c754d8,0) at rtalloc_mpath+0x4c
in_ouraddr(fd8094771b00,8077d048,8000246c9138) at 
in_ouraddr+0x84
ip_input_if(8000246c91d8,8000246c91e4,4,0,8077d048) at 
ip_input_if+0x1cd
ipv4_input(8077d048,fd8094771b00) at ipv4_input+0x39
ether_input(8077d048,fd8094771b00) at ether_input+0x3ad
if_input_process(8077d048,8000246c92c8) at if_input_process+0x6f
ifiq_process(80781400) at ifiq_process+0x69
taskq_thread(80036200) at taskq_thread+0x100

I have added a comment why kernel lock protects us.  I would like
to get this in.  It has been tested, reduces the kernel lock and
is faster.  A more clever lock can be done later.

ok?

bluhm

Index: net/if_ethersubr.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.281
diff -u -p -r1.281 if_ethersubr.c
--- net/if_ethersubr.c  26 Jun 2022 21:19:53 -  1.281
+++ net/if_ethersubr.c  27 Jun 2022 16:55:15 -
@@ -221,10 +221,7 @@ ether_resolve(struct ifnet *ifp, struct 
 
switch (af) {
case AF_INET:
-   KERNEL_LOCK();
-   /* XXXSMP there is a MP race in arpresolve() */
error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
-   KERNEL_UNLOCK();
if (error)
return (error);
eh->ether_type = htons(ETHERTYPE_IP);
@@ -285,10 +282,7 @@ ether_resolve(struct ifnet *ifp, struct 
break;
 #endif
case AF_INET:
-   KERNEL_LOCK();
-   /* XXXSMP there is a MP race in arpresolve() */
error = arpresolve(ifp, rt, m, dst, eh->ether_dhost);
-   KERNEL_UNLOCK();
if (error)
return (error);
break;
Index: netinet/if_ether.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.249
diff -u -p -r1.249 if_ether.c
--- netinet/if_ether.c  27 Jun 2022 12:47:07 -  1.249
+++ netinet/if_ether.c  27 Jun 2022 16:55:15 -
@@ -352,8 +352,7 @@ arpresolve(struct ifnet *ifp, struct rte
log(LOG_DEBUG, "%s: %s: route contains no arp 

CoW & neighbor pages

2022-06-27 Thread Martin Pieuchot
When faulting a page after COW neighborhood pages are likely to already
be entered.   So speed up the fault by doing a narrow fault (do not try
to map in adjacent pages).

This is stolen from NetBSD.

ok?

Index: uvm/uvm_fault.c
===
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.129
diff -u -p -r1.129 uvm_fault.c
--- uvm/uvm_fault.c 4 Apr 2022 09:27:05 -   1.129
+++ uvm/uvm_fault.c 27 Jun 2022 17:05:26 -
@@ -737,6 +737,16 @@ uvm_fault_check(struct uvm_faultinfo *uf
}
 
/*
+* for a case 2B fault waste no time on adjacent pages because
+* they are likely already entered.
+*/
+   if (uobj != NULL && amap != NULL &&
+   (flt->access_type & PROT_WRITE) != 0) {
+   /* wide fault (!narrow) */
+   flt->narrow = TRUE;
+   }
+
+   /*
 * establish range of interest based on advice from mapper
 * and then clip to fit map entry.   note that we only want
 * to do this the first time through the fault.   if we



Re: xhci@acpi

2022-06-27 Thread Jonathan Gray
On Mon, Jun 27, 2022 at 05:54:03PM +0200, Mark Kettenis wrote:
> So ACPI uses separate HIDs/CIDs for xhci(4) with and without debug
> support.  PNP0D15 is the one without.  Seen on the Lenovo x13s.
> 
> ok?
> 

Might as well add PNP0D25 to ehci_hids[] also.

ok jsg@ for both

> 
> Index: xhci_acpi.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/xhci_acpi.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 xhci_acpi.c
> --- xhci_acpi.c   6 Apr 2022 18:59:27 -   1.6
> +++ xhci_acpi.c   27 Jun 2022 15:51:04 -
> @@ -52,6 +52,7 @@ const struct cfattach xhci_acpi_ca = {
>  
>  const char *xhci_hids[] = {
>   "PNP0D10",
> + "PNP0D15",
>   NULL
>  };
>  
> 
> 



xhci@acpi

2022-06-27 Thread Mark Kettenis
So ACPI uses separate HIDs/CIDs for xhci(4) with and without debug
support.  PNP0D15 is the one without.  Seen on the Lenovo x13s.

ok?


Index: xhci_acpi.c
===
RCS file: /cvs/src/sys/dev/acpi/xhci_acpi.c,v
retrieving revision 1.6
diff -u -p -r1.6 xhci_acpi.c
--- xhci_acpi.c 6 Apr 2022 18:59:27 -   1.6
+++ xhci_acpi.c 27 Jun 2022 15:51:04 -
@@ -52,6 +52,7 @@ const struct cfattach xhci_acpi_ca = {
 
 const char *xhci_hids[] = {
"PNP0D10",
+   "PNP0D15",
NULL
 };
 



Remove d_poll from struct cdevsw

2022-06-27 Thread Visa Hankala
Remove the now-unused d_poll field from struct cdevsw.

This diff adjusts the various conf.{c,h} bits. To avoid making this
patch too unwieldy, I leave the removal of the device poll functions
for another patch.

(Compile-)tested on amd64, arm64, armv7, i386, loongson, macppc, octeon,
powerpc64, riscv64 and sparc64.

OK?

Index: arch/amd64/amd64/conf.c
===
RCS file: src/sys/arch/amd64/amd64/conf.c,v
retrieving revision 1.74
diff -u -p -r1.74 conf.c
--- arch/amd64/amd64/conf.c 11 Nov 2021 10:03:08 -  1.74
+++ arch/amd64/amd64/conf.c 27 Jun 2022 14:19:10 -
@@ -79,14 +79,14 @@ int nblkdev = nitems(bdevsw);
 #define cdev_ocis_init(c,n) { \
 dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
 (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-(dev_type_stop((*))) enodev, 0,  seltrue, \
+(dev_type_stop((*))) enodev, 0, \
 (dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
 
 /* open, close, read */
 #define cdev_nvram_init(c,n) { \
dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read), \
(dev_type_write((*))) enodev, (dev_type_ioctl((*))) enodev, \
-   (dev_type_stop((*))) enodev, 0, seltrue, \
+   (dev_type_stop((*))) enodev, 0, \
(dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
 
 /* open, close, ioctl */
@@ -95,7 +95,7 @@ int   nblkdev = nitems(bdevsw);
(dev_type_read((*))) enodev, \
(dev_type_write((*))) enodev, \
 dev_init(c,n,ioctl), \
-   (dev_type_stop((*))) enodev, 0, seltrue, \
+   (dev_type_stop((*))) enodev, 0, \
(dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
 
 #definemmread  mmrw
Index: arch/amd64/include/conf.h
===
RCS file: src/sys/arch/amd64/include/conf.h,v
retrieving revision 1.8
diff -u -p -r1.8 conf.h
--- arch/amd64/include/conf.h   13 May 2020 08:32:43 -  1.8
+++ arch/amd64/include/conf.h   27 Jun 2022 14:19:10 -
@@ -46,7 +46,7 @@ cdev_decl(bios);
 #definecdev_acpi_init(c,n) {\
dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-   (dev_type_stop((*))) enodev, 0, selfalse, \
+   (dev_type_stop((*))) enodev, 0, \
(dev_type_mmap((*))) enodev, 0, 0, dev_init(c,n,kqfilter) }
 cdev_decl(acpi);
 
Index: arch/arm/include/conf.h
===
RCS file: src/sys/arch/arm/include/conf.h,v
retrieving revision 1.11
diff -u -p -r1.11 conf.h
--- arch/arm/include/conf.h 21 May 2016 21:24:36 -  1.11
+++ arch/arm/include/conf.h 27 Jun 2022 14:19:10 -
@@ -61,7 +61,7 @@ cdev_decl(fd);
 #define cdev_apm_init(c,n) { \
 dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
 (dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-   (dev_type_stop((*))) enodev, 0, selfalse, \
+   (dev_type_stop((*))) enodev, 0, \
(dev_type_mmap((*))) enodev, 0, 0, dev_init(c,n,kqfilter) }
 
 cdev_decl(com);
@@ -74,7 +74,7 @@ cdev_decl(spkr);
 #define cdev_openprom_init(c,n) { \
dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-   (dev_type_stop((*))) nullop, 0, selfalse, \
+   (dev_type_stop((*))) nullop, 0, \
(dev_type_mmap((*))) enodev }
 
 cdev_decl(openprom);
Index: arch/arm64/include/conf.h
===
RCS file: src/sys/arch/arm64/include/conf.h,v
retrieving revision 1.3
diff -u -p -r1.3 conf.h
--- arch/arm64/include/conf.h   23 Jan 2019 09:57:36 -  1.3
+++ arch/arm64/include/conf.h   27 Jun 2022 14:19:10 -
@@ -43,7 +43,7 @@ cdev_decl(mm);
 #define cdev_openprom_init(c,n) { \
dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-   (dev_type_stop((*))) nullop, 0, selfalse, \
+   (dev_type_stop((*))) nullop, 0, \
(dev_type_mmap((*))) enodev }
 
 cdev_decl(openprom);
@@ -52,7 +52,7 @@ cdev_decl(openprom);
 #define cdev_acpiapm_init(c,n) { \
dev_init(c,n,open), dev_init(c,n,close), (dev_type_read((*))) enodev, \
(dev_type_write((*))) enodev, dev_init(c,n,ioctl), \
-   (dev_type_stop((*))) enodev, 0, selfalse, \
+   (dev_type_stop((*))) enodev, 0, \
(dev_type_mmap((*))) enodev, 0, 0, dev_init(c,n,kqfilter) }
 
 cdev_decl(apm);
Index: arch/i386/i386/conf.c
===
RCS file: src/sys/arch/i386/i386/conf.c,v
retrieving revision 1.172
diff -u -p -r1.172 conf.c
--- arch/i386/i386/conf.c   11 Nov 2021 10:03:09 -  1.172
+++ arch/i386/i386/conf.c   27 Jun 2022 14:19:10 -
@@ -81,21 +81,21 @@ int nblkdev 

Re: Remove switch(4) leftovers

2022-06-27 Thread Vitaliy Makkoveev
On Mon, Jun 27, 2022 at 01:44:55PM +, Visa Hankala wrote:
> Remove some switch(4) leftovers.
> 
> OK?
> 

ok mvs

> Index: etc/etc.hppa/MAKEDEV.md
> ===
> RCS file: src/etc/etc.hppa/MAKEDEV.md,v
> retrieving revision 1.68
> diff -u -p -r1.68 MAKEDEV.md
> --- etc/etc.hppa/MAKEDEV.md   11 Nov 2021 09:47:33 -  1.68
> +++ etc/etc.hppa/MAKEDEV.md   27 Jun 2022 13:41:53 -
> @@ -108,4 +108,3 @@ target(all, rd, 0)dnl
>  target(all, cd, 0, 1)dnl
>  target(all, sd, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9)dnl
>  target(all, vnd, 0, 1, 2, 3)dnl
> -target(all, switch, 0, 1, 2, 3)dnl
> Index: sys/sys/conf.h
> ===
> RCS file: src/sys/sys/conf.h,v
> retrieving revision 1.156
> diff -u -p -r1.156 conf.h
> --- sys/sys/conf.h23 Jan 2021 05:08:36 -  1.156
> +++ sys/sys/conf.h27 Jun 2022 13:41:53 -
> @@ -257,13 +257,6 @@ extern struct cdevsw cdevsw[];
>   0, dev_init(c,n,poll), (dev_type_mmap((*))) enodev, \
>   0, 0, dev_init(c,n,kqfilter) }
>  
> -/* open, close, read, write, ioctl, poll, kqfilter -- XXX should be generic 
> device */
> -#define cdev_switch_init(c,n) {  
> \
> - dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read),\
> - dev_init(c,n,write), dev_init(c,n,ioctl), (dev_type_stop((*))) enodev, \
> - 0, dev_init(c,n,poll), (dev_type_mmap((*))) enodev, \
> - 0, 0, dev_init(c,n,kqfilter) }
> -
>  /* open, close, ioctl, poll, kqfilter -- XXX should be generic device */
>  #define cdev_vscsi_init(c,n) { \
>   dev_init(c,n,open), dev_init(c,n,close), \
> @@ -607,7 +600,6 @@ cdev_decl(pf);
>  
>  cdev_decl(tun);
>  cdev_decl(tap);
> -cdev_decl(switch);
>  cdev_decl(pppx);
>  cdev_decl(pppac);
>  
> 



Re: proctree lock diff

2022-06-27 Thread Jeremie Courreges-Anglas
On Fri, Nov 29 2019, Martin Pieuchot  wrote:
> For archive, here's the diff on top of -current.

Here's a refreshed diff with the bare minimum changes needed to prevent
a WITNESS kernel from crashing when running /usr/src/regress/sys.


Index: kern/exec_elf.c
===
RCS file: /home/cvs/src/sys/kern/exec_elf.c,v
retrieving revision 1.166
diff -u -p -r1.166 exec_elf.c
--- kern/exec_elf.c 12 May 2022 16:29:58 -  1.166
+++ kern/exec_elf.c 27 Jun 2022 13:37:58 -
@@ -1185,12 +1185,14 @@ coredump_notes_elf(struct proc *p, void 
cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
 
cpi.cpi_pid = pr->ps_pid;
+   rw_enter_read();
cpi.cpi_ppid = pr->ps_ppid;
cpi.cpi_pgrp = pr->ps_pgid;
if (pr->ps_session->s_leader)
cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
else
cpi.cpi_sid = 0;
+   rw_exit_read();
 
cpi.cpi_ruid = p->p_ucred->cr_ruid;
cpi.cpi_euid = p->p_ucred->cr_uid;
Index: kern/kern_acct.c
===
RCS file: /home/cvs/src/sys/kern/kern_acct.c,v
retrieving revision 1.46
diff -u -p -r1.46 kern_acct.c
--- kern/kern_acct.c22 Feb 2022 17:22:29 -  1.46
+++ kern/kern_acct.c27 Jun 2022 13:37:58 -
@@ -226,11 +226,13 @@ acct_process(struct proc *p)
acct.ac_gid = pr->ps_ucred->cr_rgid;
 
/* (7) The terminal from which the process was started */
+   rw_enter_read();
if ((pr->ps_flags & PS_CONTROLT) &&
pr->ps_pgrp->pg_session->s_ttyp)
acct.ac_tty = pr->ps_pgrp->pg_session->s_ttyp->t_dev;
else
acct.ac_tty = -1;
+   rw_exit_read();
 
/* (8) The boolean flags that tell how process terminated or 
misbehaved. */
acct.ac_flag = pr->ps_acflag;
Index: kern/kern_exec.c
===
RCS file: /home/cvs/src/sys/kern/kern_exec.c,v
retrieving revision 1.230
diff -u -p -r1.230 kern_exec.c
--- kern/kern_exec.c22 Feb 2022 17:14:14 -  1.230
+++ kern/kern_exec.c27 Jun 2022 13:37:58 -
@@ -520,9 +520,11 @@ sys_execve(struct proc *p, void *v, regi
 
atomic_setbits_int(>ps_flags, PS_EXEC);
if (pr->ps_flags & PS_PPWAIT) {
+   rw_enter_read();
atomic_clearbits_int(>ps_flags, PS_PPWAIT);
atomic_clearbits_int(>ps_pptr->ps_flags, PS_ISPWAIT);
wakeup(pr->ps_pptr);
+   rw_exit_read();
}
 
/*
Index: kern/kern_exit.c
===
RCS file: /home/cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.203
diff -u -p -r1.203 kern_exit.c
--- kern/kern_exit.c31 Mar 2022 01:41:22 -  1.203
+++ kern/kern_exit.c27 Jun 2022 13:37:58 -
@@ -154,6 +154,7 @@ exit1(struct proc *p, int xexit, int xsi
 * is set; we wake up the parent early to avoid deadlock.
 */
if (pr->ps_flags & PS_PPWAIT) {
+   /* XXXPT `proctreelk` ? */
atomic_clearbits_int(>ps_flags, PS_PPWAIT);
atomic_clearbits_int(>ps_pptr->ps_flags,
PS_ISPWAIT);
@@ -222,6 +223,7 @@ exit1(struct proc *p, int xexit, int xsi
 * If parent has the SAS_NOCLDWAIT flag set, we're not
 * going to become a zombie.
 */
+   /* XXXPT `proctreelk` ? */
if (pr->ps_pptr->ps_sigacts->ps_sigflags & SAS_NOCLDWAIT)
atomic_setbits_int(>ps_flags, PS_NOZOMBIE);
}
@@ -236,11 +238,7 @@ exit1(struct proc *p, int xexit, int xsi
 * thread of a process that isn't PS_NOZOMBIE, we'll put
 * the process on the zombprocess list below.
 */
-   /*
-* NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
-*/
-   p->p_stat = SDEAD;
-
+   rw_enter_write();
LIST_REMOVE(p, p_hash);
LIST_REMOVE(p, p_list);
 
@@ -300,6 +298,7 @@ exit1(struct proc *p, int xexit, int xsi
process_clear_orphan(qr);
}
}
+   rw_exit_write();
 
/* add thread's accumulated rusage into the process's total */
ruadd(rup, >p_ru);
@@ -325,9 +324,13 @@ exit1(struct proc *p, int xexit, int xsi
 * wait4() to return ECHILD.
 */
if (pr->ps_flags & PS_NOZOMBIE) {
-   struct process *ppr = pr->ps_pptr;
+   struct process *ppr;
+
+   rw_enter_write();
+   ppr = pr->ps_pptr;
process_reparent(pr, initprocess);
wakeup(ppr);
+   

Re: struct refcnt for routes

2022-06-27 Thread Vitaliy Makkoveev
On Mon, Jun 27, 2022 at 03:21:56PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> Use ref count API for routes.
> 
> ok?
> 

Some time ago, I posted the diff which removes the logic around
negative `refcnt' within rtfree(). This diff was stopped by mpi@, he
said we could underflow rt->rt_refcnt and this logic is required to
prevent panics. I don't know, is it true, but I didn't find any report
about "rtfree: ... not freed (neg refs)" in the dmesg(8) output.

I think the logic around negative `refcnt' is much worse then panic,
because we trying hide the problem instead of catch and fix it.

ok mvs@ unless someone else denies this diff.

> bluhm
> 
> Index: net/route.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
> retrieving revision 1.410
> diff -u -p -r1.410 route.c
> --- net/route.c   5 May 2022 13:57:40 -   1.410
> +++ net/route.c   27 Jun 2022 13:03:16 -
> @@ -488,40 +488,34 @@ rt_putgwroute(struct rtentry *rt)
>  void
>  rtref(struct rtentry *rt)
>  {
> - atomic_inc_int(>rt_refcnt);
> + refcnt_take(>rt_refcnt);
>  }
>  
>  void
>  rtfree(struct rtentry *rt)
>  {
> - int  refcnt;
> -
>   if (rt == NULL)
>   return;
>  
> - refcnt = (int)atomic_dec_int_nv(>rt_refcnt);
> - if (refcnt <= 0) {
> - KASSERT(!ISSET(rt->rt_flags, RTF_UP));
> - KASSERT(!RT_ROOT(rt));
> - atomic_dec_int();
> - if (refcnt < 0) {
> - printf("rtfree: %p not freed (neg refs)\n", rt);
> - return;
> - }
> + if (refcnt_rele(>rt_refcnt) == 0)
> + return;
>  
> - KERNEL_LOCK();
> - rt_timer_remove_all(rt);
> - ifafree(rt->rt_ifa);
> - rtlabel_unref(rt->rt_labelid);
> + KASSERT(!ISSET(rt->rt_flags, RTF_UP));
> + KASSERT(!RT_ROOT(rt));
> + atomic_dec_int();
> +
> + KERNEL_LOCK();
> + rt_timer_remove_all(rt);
> + ifafree(rt->rt_ifa);
> + rtlabel_unref(rt->rt_labelid);
>  #ifdef MPLS
> - rt_mpls_clear(rt);
> + rt_mpls_clear(rt);
>  #endif
> - free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
> - free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
> - KERNEL_UNLOCK();
> + free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
> + free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
> + KERNEL_UNLOCK();
>  
> - pool_put(_pool, rt);
> - }
> + pool_put(_pool, rt);
>  }
>  
>  void
> @@ -877,7 +871,7 @@ rtrequest(int req, struct rt_addrinfo *i
>   return (ENOBUFS);
>   }
>  
> - rt->rt_refcnt = 1;
> + refcnt_init(>rt_refcnt);
>   rt->rt_flags = info->rti_flags | RTF_UP;
>   rt->rt_priority = prio; /* init routing priority */
>   LIST_INIT(>rt_timer);
> @@ -1864,8 +1858,8 @@ db_show_rtentry(struct rtentry *rt, void
>  {
>   db_printf("rtentry=%p", rt);
>  
> - db_printf(" flags=0x%x refcnt=%d use=%llu expire=%lld rtableid=%u\n",
> - rt->rt_flags, rt->rt_refcnt, rt->rt_use, rt->rt_expire, id);
> + db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld rtableid=%u\n",
> + rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire, id);
>  
>   db_printf(" key="); db_print_sa(rt_key(rt));
>   db_printf(" plen=%d", rt_plen(rt));
> Index: net/route.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
> retrieving revision 1.194
> diff -u -p -r1.194 route.h
> --- net/route.h   5 May 2022 13:57:40 -   1.194
> +++ net/route.h   27 Jun 2022 13:03:16 -
> @@ -119,7 +119,7 @@ struct rtentry {
>   struct rt_kmetrics rt_rmx;  /* metrics used by rx'ing protocols */
>   unsigned int rt_ifidx;  /* the answer: interface to use */
>   unsigned int rt_flags;  /* up/down?, host/net */
> - int  rt_refcnt; /* # held references */
> + struct refcntrt_refcnt; /* # held references */
>   int  rt_plen;   /* prefix length */
>   uint16_t rt_labelid;/* route label ID */
>   uint8_t  rt_priority;   /* routing priority to use */
> Index: net/rtable.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtable.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 rtable.c
> --- net/rtable.c  19 Apr 2022 15:44:56 -  1.77
> +++ net/rtable.c  27 Jun 2022 13:03:16 -
> @@ -680,7 +680,7 @@ rtable_delete(unsigned int rtableid, str
>   npaths++;
>  
>   if (npaths > 1) {
> - KASSERT(rt->rt_refcnt >= 1);
> + KASSERT(refcnt_read(>rt_refcnt) >= 1);
>   SRPL_REMOVE_LOCKED(_rc, >an_rtlist, rt, 

Re: Remove switch(4) leftovers

2022-06-27 Thread Claudio Jeker
On Mon, Jun 27, 2022 at 01:44:55PM +, Visa Hankala wrote:
> Remove some switch(4) leftovers.
> 
> OK?

OK claudio
 
> Index: etc/etc.hppa/MAKEDEV.md
> ===
> RCS file: src/etc/etc.hppa/MAKEDEV.md,v
> retrieving revision 1.68
> diff -u -p -r1.68 MAKEDEV.md
> --- etc/etc.hppa/MAKEDEV.md   11 Nov 2021 09:47:33 -  1.68
> +++ etc/etc.hppa/MAKEDEV.md   27 Jun 2022 13:41:53 -
> @@ -108,4 +108,3 @@ target(all, rd, 0)dnl
>  target(all, cd, 0, 1)dnl
>  target(all, sd, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9)dnl
>  target(all, vnd, 0, 1, 2, 3)dnl
> -target(all, switch, 0, 1, 2, 3)dnl
> Index: sys/sys/conf.h
> ===
> RCS file: src/sys/sys/conf.h,v
> retrieving revision 1.156
> diff -u -p -r1.156 conf.h
> --- sys/sys/conf.h23 Jan 2021 05:08:36 -  1.156
> +++ sys/sys/conf.h27 Jun 2022 13:41:53 -
> @@ -257,13 +257,6 @@ extern struct cdevsw cdevsw[];
>   0, dev_init(c,n,poll), (dev_type_mmap((*))) enodev, \
>   0, 0, dev_init(c,n,kqfilter) }
>  
> -/* open, close, read, write, ioctl, poll, kqfilter -- XXX should be generic 
> device */
> -#define cdev_switch_init(c,n) {  
> \
> - dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read),\
> - dev_init(c,n,write), dev_init(c,n,ioctl), (dev_type_stop((*))) enodev, \
> - 0, dev_init(c,n,poll), (dev_type_mmap((*))) enodev, \
> - 0, 0, dev_init(c,n,kqfilter) }
> -
>  /* open, close, ioctl, poll, kqfilter -- XXX should be generic device */
>  #define cdev_vscsi_init(c,n) { \
>   dev_init(c,n,open), dev_init(c,n,close), \
> @@ -607,7 +600,6 @@ cdev_decl(pf);
>  
>  cdev_decl(tun);
>  cdev_decl(tap);
> -cdev_decl(switch);
>  cdev_decl(pppx);
>  cdev_decl(pppac);
>  
> 

-- 
:wq Claudio



Remove switch(4) leftovers

2022-06-27 Thread Visa Hankala
Remove some switch(4) leftovers.

OK?

Index: etc/etc.hppa/MAKEDEV.md
===
RCS file: src/etc/etc.hppa/MAKEDEV.md,v
retrieving revision 1.68
diff -u -p -r1.68 MAKEDEV.md
--- etc/etc.hppa/MAKEDEV.md 11 Nov 2021 09:47:33 -  1.68
+++ etc/etc.hppa/MAKEDEV.md 27 Jun 2022 13:41:53 -
@@ -108,4 +108,3 @@ target(all, rd, 0)dnl
 target(all, cd, 0, 1)dnl
 target(all, sd, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9)dnl
 target(all, vnd, 0, 1, 2, 3)dnl
-target(all, switch, 0, 1, 2, 3)dnl
Index: sys/sys/conf.h
===
RCS file: src/sys/sys/conf.h,v
retrieving revision 1.156
diff -u -p -r1.156 conf.h
--- sys/sys/conf.h  23 Jan 2021 05:08:36 -  1.156
+++ sys/sys/conf.h  27 Jun 2022 13:41:53 -
@@ -257,13 +257,6 @@ extern struct cdevsw cdevsw[];
0, dev_init(c,n,poll), (dev_type_mmap((*))) enodev, \
0, 0, dev_init(c,n,kqfilter) }
 
-/* open, close, read, write, ioctl, poll, kqfilter -- XXX should be generic 
device */
-#define cdev_switch_init(c,n) {
\
-   dev_init(c,n,open), dev_init(c,n,close), dev_init(c,n,read),\
-   dev_init(c,n,write), dev_init(c,n,ioctl), (dev_type_stop((*))) enodev, \
-   0, dev_init(c,n,poll), (dev_type_mmap((*))) enodev, \
-   0, 0, dev_init(c,n,kqfilter) }
-
 /* open, close, ioctl, poll, kqfilter -- XXX should be generic device */
 #define cdev_vscsi_init(c,n) { \
dev_init(c,n,open), dev_init(c,n,close), \
@@ -607,7 +600,6 @@ cdev_decl(pf);
 
 cdev_decl(tun);
 cdev_decl(tap);
-cdev_decl(switch);
 cdev_decl(pppx);
 cdev_decl(pppac);
 



Fix the swapper

2022-06-27 Thread Martin Pieuchot
Diff below contain 3 parts that can be committed independently.  The 3
of them are necessary to allow the pagedaemon to make progress in OOM
situation and to satisfy all the allocations waiting for pages in
specific ranges.

* uvm/uvm_pager.c part reserves a second segment for the page daemon.
  This is necessary to ensure the two uvm_pagermapin() calls needed by
  uvm_swap_io() succeed in emergency OOM situation.  (the 2nd segment is
  necessary when encryption or bouncing is required)

* uvm/uvm_swap.c part pre-allocates 16 pages in the DMA-reachable region
  for the same reason.  Note that a sleeping point is introduced because
  the pagedaemon is faster than the asynchronous I/O and in OOM
  situation it tends to stay busy building cluster that it then discard
  because no memory is available.

* uvm/uvm_pdaemon.c part changes the inner-loop scanning the inactive 
  list of pages to account for a given memory range.  Without this the
  daemon could spin infinitely doing nothing because the global limits
  are reached.

At lot could be improved, but this at least makes swapping work in OOM
situations.

Index: uvm/uvm_pager.c
===
RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
retrieving revision 1.78
diff -u -p -r1.78 uvm_pager.c
--- uvm/uvm_pager.c 18 Feb 2022 09:04:38 -  1.78
+++ uvm/uvm_pager.c 27 Jun 2022 08:44:41 -
@@ -58,8 +58,8 @@ const struct uvm_pagerops *uvmpagerops[]
  * The number of uvm_pseg instances is dynamic using an array segs.
  * At most UVM_PSEG_COUNT instances can exist.
  *
- * psegs[0] always exists (so that the pager can always map in pages).
- * psegs[0] element 0 is always reserved for the pagedaemon.
+ * psegs[0/1] always exist (so that the pager can always map in pages).
+ * psegs[0/1] element 0 are always reserved for the pagedaemon.
  *
  * Any other pseg is automatically created when no space is available
  * and automatically destroyed when it is no longer in use.
@@ -93,6 +93,7 @@ uvm_pager_init(void)
 
/* init pager map */
uvm_pseg_init([0]);
+   uvm_pseg_init([1]);
mtx_init(_pseg_lck, IPL_VM);
 
/* init ASYNC I/O queue */
@@ -168,9 +169,10 @@ pager_seg_restart:
goto pager_seg_fail;
}
 
-   /* Keep index 0 reserved for pagedaemon. */
-   if (pseg == [0] && curproc != uvm.pagedaemon_proc)
-   i = 1;
+   /* Keep indexes 0,1 reserved for pagedaemon. */
+   if ((pseg == [0] || pseg == [1]) &&
+   (curproc != uvm.pagedaemon_proc))
+   i = 2;
else
i = 0;
 
@@ -229,7 +231,7 @@ uvm_pseg_release(vaddr_t segaddr)
pseg->use &= ~(1 << id);
wakeup();
 
-   if (pseg != [0] && UVM_PSEG_EMPTY(pseg)) {
+   if ((pseg != [0] && pseg != [1]) && UVM_PSEG_EMPTY(pseg)) {
va = pseg->start;
pseg->start = 0;
}
Index: uvm/uvm_pdaemon.c
===
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.99
diff -u -p -r1.99 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c   12 May 2022 12:49:31 -  1.99
+++ uvm/uvm_pdaemon.c   27 Jun 2022 13:24:54 -
@@ -101,8 +101,8 @@ extern void drmbackoff(long);
  * local prototypes
  */
 
-void   uvmpd_scan(void);
-boolean_t  uvmpd_scan_inactive(struct pglist *);
+void   uvmpd_scan(struct uvm_pmalloc *);
+boolean_t  uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
 void   uvmpd_tune(void);
 void   uvmpd_drop(struct pglist *);
 
@@ -281,7 +281,7 @@ uvm_pageout(void *arg)
if (pma != NULL ||
((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
-   uvmpd_scan();
+   uvmpd_scan(pma);
}
 
/*
@@ -379,15 +379,15 @@ uvm_aiodone_daemon(void *arg)
  */
 
 boolean_t
-uvmpd_scan_inactive(struct pglist *pglst)
+uvmpd_scan_inactive(struct uvm_pmalloc *pma, struct pglist *pglst)
 {
boolean_t retval = FALSE;   /* assume we haven't hit target */
int free, result;
struct vm_page *p, *nextpg;
struct uvm_object *uobj;
-   struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+   struct vm_page *pps[SWCLUSTPAGES], **ppsp;
int npages;
-   struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT];  /* XXX: see below */
+   struct vm_page *swpps[SWCLUSTPAGES];/* XXX: see below */
int swnpages, swcpages; /* XXX: see below */
int swslot;
struct vm_anon *anon;
@@ -404,8 +404,27 @@ uvmpd_scan_inactive(struct pglist *pglst
swnpages = swcpages = 0;
free = 0;
dirtyreacts = 0;
+   p = NULL;
 
-   for (p 

struct refcnt for routes

2022-06-27 Thread Alexander Bluhm
Hi,

Use ref count API for routes.

ok?

bluhm

Index: net/route.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.c,v
retrieving revision 1.410
diff -u -p -r1.410 route.c
--- net/route.c 5 May 2022 13:57:40 -   1.410
+++ net/route.c 27 Jun 2022 13:03:16 -
@@ -488,40 +488,34 @@ rt_putgwroute(struct rtentry *rt)
 void
 rtref(struct rtentry *rt)
 {
-   atomic_inc_int(>rt_refcnt);
+   refcnt_take(>rt_refcnt);
 }
 
 void
 rtfree(struct rtentry *rt)
 {
-   int  refcnt;
-
if (rt == NULL)
return;
 
-   refcnt = (int)atomic_dec_int_nv(>rt_refcnt);
-   if (refcnt <= 0) {
-   KASSERT(!ISSET(rt->rt_flags, RTF_UP));
-   KASSERT(!RT_ROOT(rt));
-   atomic_dec_int();
-   if (refcnt < 0) {
-   printf("rtfree: %p not freed (neg refs)\n", rt);
-   return;
-   }
+   if (refcnt_rele(>rt_refcnt) == 0)
+   return;
 
-   KERNEL_LOCK();
-   rt_timer_remove_all(rt);
-   ifafree(rt->rt_ifa);
-   rtlabel_unref(rt->rt_labelid);
+   KASSERT(!ISSET(rt->rt_flags, RTF_UP));
+   KASSERT(!RT_ROOT(rt));
+   atomic_dec_int();
+
+   KERNEL_LOCK();
+   rt_timer_remove_all(rt);
+   ifafree(rt->rt_ifa);
+   rtlabel_unref(rt->rt_labelid);
 #ifdef MPLS
-   rt_mpls_clear(rt);
+   rt_mpls_clear(rt);
 #endif
-   free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
-   free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
-   KERNEL_UNLOCK();
+   free(rt->rt_gateway, M_RTABLE, ROUNDUP(rt->rt_gateway->sa_len));
+   free(rt_key(rt), M_RTABLE, rt_key(rt)->sa_len);
+   KERNEL_UNLOCK();
 
-   pool_put(_pool, rt);
-   }
+   pool_put(_pool, rt);
 }
 
 void
@@ -877,7 +871,7 @@ rtrequest(int req, struct rt_addrinfo *i
return (ENOBUFS);
}
 
-   rt->rt_refcnt = 1;
+   refcnt_init(>rt_refcnt);
rt->rt_flags = info->rti_flags | RTF_UP;
rt->rt_priority = prio; /* init routing priority */
LIST_INIT(>rt_timer);
@@ -1864,8 +1858,8 @@ db_show_rtentry(struct rtentry *rt, void
 {
db_printf("rtentry=%p", rt);
 
-   db_printf(" flags=0x%x refcnt=%d use=%llu expire=%lld rtableid=%u\n",
-   rt->rt_flags, rt->rt_refcnt, rt->rt_use, rt->rt_expire, id);
+   db_printf(" flags=0x%x refcnt=%u use=%llu expire=%lld rtableid=%u\n",
+   rt->rt_flags, rt->rt_refcnt.r_refs, rt->rt_use, rt->rt_expire, id);
 
db_printf(" key="); db_print_sa(rt_key(rt));
db_printf(" plen=%d", rt_plen(rt));
Index: net/route.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/route.h,v
retrieving revision 1.194
diff -u -p -r1.194 route.h
--- net/route.h 5 May 2022 13:57:40 -   1.194
+++ net/route.h 27 Jun 2022 13:03:16 -
@@ -119,7 +119,7 @@ struct rtentry {
struct rt_kmetrics rt_rmx;  /* metrics used by rx'ing protocols */
unsigned int rt_ifidx;  /* the answer: interface to use */
unsigned int rt_flags;  /* up/down?, host/net */
-   int  rt_refcnt; /* # held references */
+   struct refcntrt_refcnt; /* # held references */
int  rt_plen;   /* prefix length */
uint16_t rt_labelid;/* route label ID */
uint8_t  rt_priority;   /* routing priority to use */
Index: net/rtable.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtable.c,v
retrieving revision 1.77
diff -u -p -r1.77 rtable.c
--- net/rtable.c19 Apr 2022 15:44:56 -  1.77
+++ net/rtable.c27 Jun 2022 13:03:16 -
@@ -680,7 +680,7 @@ rtable_delete(unsigned int rtableid, str
npaths++;
 
if (npaths > 1) {
-   KASSERT(rt->rt_refcnt >= 1);
+   KASSERT(refcnt_read(>rt_refcnt) >= 1);
SRPL_REMOVE_LOCKED(_rc, >an_rtlist, rt, rtentry,
rt_next);
 
@@ -694,7 +694,7 @@ rtable_delete(unsigned int rtableid, str
if (art_delete(ar, an, addr, plen) == NULL)
panic("art_delete failed to find node %p", an);
 
-   KASSERT(rt->rt_refcnt >= 1);
+   KASSERT(refcnt_read(>rt_refcnt) >= 1);
SRPL_REMOVE_LOCKED(_rc, >an_rtlist, rt, rtentry, rt_next);
art_put(an);
 
Index: net/rtsock.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/net/rtsock.c,v
retrieving revision 1.330
diff -u -p -r1.330 rtsock.c
--- net/rtsock.c26 Jun 2022 16:07:00 -  1.330
+++ net/rtsock.c27 Jun 2022 13:03:16 -
@@ -1992,7 

Re: snmpd(8): Add rudimentary AgentX support

2022-06-27 Thread Theo de Raadt
Florian Obser  wrote:

> On 2022-06-27 13:32 +02, Martijn van Duren  
> wrote:
> > For the group-id I went with 92, which was used by _rtadvd. It's one up
> > from _snmpd and has been used previously by _rtadvd, which should make
> > it the perfect candidate. According to florian rtadvd never stored
> > anything on disk and chances of any lingering rtadvd binary still around
> > on a modern install still running approach 0. So I don't see any risks
> > there. If someone kept the old user/group around sysmerge should change
> > _rtadvd to _agentx, which florian, sthen, and I also don't see an issue
> > with.
> 
> What I mean was, if they didn't clean up they still have _rtadvd:_rtadvd
> which will become _rtadvd:_agentx. Operationally this should not a problem,
> nothing is supposed to use that user.
> 

We discussed this;  After a few years, we don't care about the potential for
problems users create by having ancient master.passwd left over.



pipex(4): do pppoe output through netisr

2022-06-27 Thread Vitaliy Makkoveev
We can't predict netlock state when pipex(4) related (*if_qstart)()
handlers called. This means we can't use netlock within pppac_qstart()
and pppx_if_qstart() handlers. But actually we can't avoid netlock only
when we call (*if_output)() in pipex(4) PPPOE output path.

Introduce `pipexoutq' mbuf(9) queue, and put PPPOE related mbufs within.
Do (*if_output)() calls within netisr handler with netlock held.

The netlock assertion is still kept within pppac_qstart(),
pppx_if_qstart() and underlay code because some per session data relies
on netlock. These assertions will be removed with following diffs.

I also want to use mbuf(9) queue for pppoe(4) input path and remove
existing serialization hack provided by kernel lock.

Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.653
diff -u -p -r1.653 if.c
--- sys/net/if.c7 Jun 2022 22:18:34 -   1.653
+++ sys/net/if.c27 Jun 2022 12:53:37 -
@@ -914,6 +914,11 @@ if_netisr(void *unused)
if (n & (1 << NETISR_BRIDGE))
bridgeintr();
 #endif
+#ifdef PIPEX
+   if (n & (1 << NETISR_PIPEX))
+   pipexintr();
+#endif
+
t |= n;
}
 
Index: sys/net/netisr.h
===
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.56
diff -u -p -r1.56 netisr.h
--- sys/net/netisr.h28 Apr 2022 16:56:39 -  1.56
+++ sys/net/netisr.h27 Jun 2022 12:53:37 -
@@ -45,6 +45,7 @@
 #defineNETISR_PFSYNC   5   /* for pfsync "immediate" tx */
 #defineNETISR_ARP  18  /* same as AF_LINK */
 #defineNETISR_IPV6 24  /* same as AF_INET6 */
+#define NETISR_PIPEX   27  /* for pipex processing */
 #defineNETISR_PPP  28  /* for PPP processing */
 #defineNETISR_BRIDGE   29  /* for bridge processing */
 #defineNETISR_SWITCH   31  /* for switch dataplane */
@@ -65,6 +66,7 @@ void  pppintr(void);
 void   bridgeintr(void);
 void   switchintr(void);
 void   pfsyncintr(void);
+void   pipexintr(void);
 
 #defineschednetisr(anisr)  
\
 do {   \
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.141
diff -u -p -r1.141 pipex.c
--- sys/net/pipex.c 26 Jun 2022 22:51:58 -  1.141
+++ sys/net/pipex.c 27 Jun 2022 12:53:37 -
@@ -52,6 +52,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "pf.h"
 #if NPF > 0
@@ -106,6 +107,9 @@ struct radix_node_head  *pipex_rd_head6 =
 struct timeout pipex_timer_ch; /* callout timer context */
 int pipex_prune = 1;   /* [I] walk list every seconds */
 
+struct mbuf_queue pipexoutq = MBUF_QUEUE_INITIALIZER(
+IFQ_MAXLEN, IPL_SOFTNET);
+
 /* borrow an mbuf pkthdr field */
 #define ph_ppp_proto ether_vtag
 
@@ -194,6 +198,46 @@ pipex_ioctl(void *ownersc, u_long cmd, c
 }
 
 /
+ * Software Interrupt Handler
+ /
+
+void
+pipexintr(void)
+{
+   struct mbuf_list ml;
+   struct mbuf *m;
+   struct pipex_session *session;
+
+   NET_ASSERT_LOCKED();
+
+   mq_delist(, );
+
+   while ((m = ml_dequeue()) != NULL) {
+   struct ifnet *ifp;
+
+   session = m->m_pkthdr.ph_cookie;
+
+   ifp = if_get(session->proto.pppoe.over_ifidx);
+   if (ifp != NULL) {
+   struct pipex_pppoe_header *pppoe;
+   int len;
+
+   pppoe = mtod(m, struct pipex_pppoe_header *);
+   len = ntohs(pppoe->length);
+   ifp->if_output(ifp, m, >peer.sa, NULL);
+   counters_pkt(session->stat_counters, pxc_opackets,
+   pxc_obytes, len);
+   } else {
+   m_freem(m);
+   counters_inc(session->stat_counters, pxc_oerrors);
+   }
+   if_put(ifp);
+
+   pipex_rele_session(session);
+   }
+}
+
+/
  * Session management functions
  /
 int
@@ -1259,7 +1303,6 @@ Static void
 pipex_pppoe_output(struct mbuf *m0, struct pipex_session *session)
 {
struct pipex_pppoe_header *pppoe;
-   struct ifnet *ifp;
int len, padlen;
 
/* save length for pppoe header */
@@ -1286,18 +1329,15 @@ pipex_pppoe_output(struct mbuf *m0, stru
pppoe->length 

Re: arp getuptime

2022-06-27 Thread Vitaliy Makkoveev
On Mon, Jun 27, 2022 at 01:58:11PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> Instead of calling getuptime() all the time in ARP code, I would
> like to do it only once per function.  This should give us a more
> consistent time value.
> 
> ok?
> 

ok mvs@

> bluhm
> 
> Index: netinet/if_ether.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.248
> diff -u -p -r1.248 if_ether.c
> --- netinet/if_ether.c28 Apr 2021 21:21:44 -  1.248
> +++ netinet/if_ether.c27 Jun 2022 11:33:12 -
> @@ -124,14 +124,16 @@ arptimer(void *arg)
>  {
>   struct timeout *to = arg;
>   struct llinfo_arp *la, *nla;
> + time_t uptime;
>  
>   NET_LOCK();
> + uptime = getuptime();
>   timeout_add_sec(to, arpt_prune);
>   /* Net lock is exclusive, no arp mutex needed for arp_list here. */
>   LIST_FOREACH_SAFE(la, _list, la_list, nla) {
>   struct rtentry *rt = la->la_rt;
>  
> - if (rt->rt_expire && rt->rt_expire < getuptime())
> + if (rt->rt_expire && rt->rt_expire < uptime)
>   arptfree(rt); /* timer has expired; clear */
>   }
>   NET_UNLOCK();
> @@ -154,6 +156,7 @@ arp_rtrequest(struct ifnet *ifp, int req
>  {
>   struct sockaddr *gate = rt->rt_gateway;
>   struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
> + time_t uptime;
>  
>   NET_ASSERT_LOCKED();
>  
> @@ -161,8 +164,8 @@ arp_rtrequest(struct ifnet *ifp, int req
>   RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST|RTF_MPLS))
>   return;
>  
> + uptime = getuptime();
>   switch (req) {
> -
>   case RTM_ADD:
>   if (rt->rt_flags & RTF_CLONING) {
>   rt->rt_expire = 0;
> @@ -206,7 +209,7 @@ arp_rtrequest(struct ifnet *ifp, int req
>   la->la_rt = rt;
>   rt->rt_flags |= RTF_LLINFO;
>   if ((rt->rt_flags & RTF_LOCAL) == 0)
> - rt->rt_expire = getuptime();
> + rt->rt_expire = uptime;
>   mtx_enter(_mtx);
>   LIST_INSERT_HEAD(_list, la, la_list);
>   mtx_leave(_mtx);
> @@ -325,6 +328,7 @@ arpresolve(struct ifnet *ifp, struct rte
>   struct sockaddr_dl *sdl;
>   struct rtentry *rt = NULL;
>   char addr[INET_ADDRSTRLEN];
> + time_t uptime;
>  
>   if (m->m_flags & M_BCAST) { /* broadcast */
>   memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
> @@ -335,10 +339,11 @@ arpresolve(struct ifnet *ifp, struct rte
>   return (0);
>   }
>  
> + uptime = getuptime();
>   rt = rt_getll(rt0);
>  
>   if (ISSET(rt->rt_flags, RTF_REJECT) &&
> - (rt->rt_expire == 0 || rt->rt_expire > getuptime() )) {
> + (rt->rt_expire == 0 || rt->rt_expire > uptime)) {
>   m_freem(m);
>   return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
>   }
> @@ -366,15 +371,15 @@ arpresolve(struct ifnet *ifp, struct rte
>* Check the address family and length is valid, the address
>* is resolved; otherwise, try to resolve.
>*/
> - if ((rt->rt_expire == 0 || rt->rt_expire > getuptime()) &&
> + if ((rt->rt_expire == 0 || rt->rt_expire > uptime) &&
>   sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
>   memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
>  
>   /* refresh ARP entry when timeout gets close */
>   if (rt->rt_expire != 0 &&
> - rt->rt_expire - arpt_keep / 8 < getuptime() &&
> - la->la_refreshed + 30 < getuptime()) {
> - la->la_refreshed = getuptime();
> + rt->rt_expire - arpt_keep / 8 < uptime &&
> + la->la_refreshed + 30 < uptime) {
> + la->la_refreshed = uptime;
>   arprequest(ifp,
>   (rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
>   (dst)->sin_addr.s_addr,
> @@ -407,13 +412,13 @@ arpresolve(struct ifnet *ifp, struct rte
>   /* This should never happen. (Should it? -gwr) */
>   printf("%s: unresolved and rt_expire == 0\n", __func__);
>   /* Set expiration time to now (expired). */
> - rt->rt_expire = getuptime();
> + rt->rt_expire = uptime;
>   }
>  #endif
>   if (rt->rt_expire) {
>   rt->rt_flags &= ~RTF_REJECT;
> - if (la->la_asked == 0 || rt->rt_expire != getuptime()) {
> - rt->rt_expire = getuptime();
> + if (la->la_asked == 0 || rt->rt_expire != uptime) {
> + rt->rt_expire = uptime;
>   if (la->la_asked++ < arp_maxtries)
>   arprequest(ifp,
>   
> (rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
> @@ -613,6 +618,7 @@ 

Re: arp getuptime

2022-06-27 Thread Claudio Jeker
On Mon, Jun 27, 2022 at 01:58:11PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> Instead of calling getuptime() all the time in ARP code, I would
> like to do it only once per function.  This should give us a more
> consistent time value.
> 
> ok?

I would love to see the arp code use rttimer instead of handrolling its
own version.

Also the getuptime() function should be rather cheap. Diff is fine with me
none the less.

> Index: netinet/if_ether.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
> retrieving revision 1.248
> diff -u -p -r1.248 if_ether.c
> --- netinet/if_ether.c28 Apr 2021 21:21:44 -  1.248
> +++ netinet/if_ether.c27 Jun 2022 11:33:12 -
> @@ -124,14 +124,16 @@ arptimer(void *arg)
>  {
>   struct timeout *to = arg;
>   struct llinfo_arp *la, *nla;
> + time_t uptime;
>  
>   NET_LOCK();
> + uptime = getuptime();
>   timeout_add_sec(to, arpt_prune);
>   /* Net lock is exclusive, no arp mutex needed for arp_list here. */
>   LIST_FOREACH_SAFE(la, _list, la_list, nla) {
>   struct rtentry *rt = la->la_rt;
>  
> - if (rt->rt_expire && rt->rt_expire < getuptime())
> + if (rt->rt_expire && rt->rt_expire < uptime)
>   arptfree(rt); /* timer has expired; clear */
>   }
>   NET_UNLOCK();
> @@ -154,6 +156,7 @@ arp_rtrequest(struct ifnet *ifp, int req
>  {
>   struct sockaddr *gate = rt->rt_gateway;
>   struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
> + time_t uptime;
>  
>   NET_ASSERT_LOCKED();
>  
> @@ -161,8 +164,8 @@ arp_rtrequest(struct ifnet *ifp, int req
>   RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST|RTF_MPLS))
>   return;
>  
> + uptime = getuptime();
>   switch (req) {
> -
>   case RTM_ADD:
>   if (rt->rt_flags & RTF_CLONING) {
>   rt->rt_expire = 0;
> @@ -206,7 +209,7 @@ arp_rtrequest(struct ifnet *ifp, int req
>   la->la_rt = rt;
>   rt->rt_flags |= RTF_LLINFO;
>   if ((rt->rt_flags & RTF_LOCAL) == 0)
> - rt->rt_expire = getuptime();
> + rt->rt_expire = uptime;
>   mtx_enter(_mtx);
>   LIST_INSERT_HEAD(_list, la, la_list);
>   mtx_leave(_mtx);
> @@ -325,6 +328,7 @@ arpresolve(struct ifnet *ifp, struct rte
>   struct sockaddr_dl *sdl;
>   struct rtentry *rt = NULL;
>   char addr[INET_ADDRSTRLEN];
> + time_t uptime;
>  
>   if (m->m_flags & M_BCAST) { /* broadcast */
>   memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
> @@ -335,10 +339,11 @@ arpresolve(struct ifnet *ifp, struct rte
>   return (0);
>   }
>  
> + uptime = getuptime();
>   rt = rt_getll(rt0);
>  
>   if (ISSET(rt->rt_flags, RTF_REJECT) &&
> - (rt->rt_expire == 0 || rt->rt_expire > getuptime() )) {
> + (rt->rt_expire == 0 || rt->rt_expire > uptime)) {
>   m_freem(m);
>   return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
>   }
> @@ -366,15 +371,15 @@ arpresolve(struct ifnet *ifp, struct rte
>* Check the address family and length is valid, the address
>* is resolved; otherwise, try to resolve.
>*/
> - if ((rt->rt_expire == 0 || rt->rt_expire > getuptime()) &&
> + if ((rt->rt_expire == 0 || rt->rt_expire > uptime) &&
>   sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
>   memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
>  
>   /* refresh ARP entry when timeout gets close */
>   if (rt->rt_expire != 0 &&
> - rt->rt_expire - arpt_keep / 8 < getuptime() &&
> - la->la_refreshed + 30 < getuptime()) {
> - la->la_refreshed = getuptime();
> + rt->rt_expire - arpt_keep / 8 < uptime &&
> + la->la_refreshed + 30 < uptime) {
> + la->la_refreshed = uptime;
>   arprequest(ifp,
>   (rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
>   (dst)->sin_addr.s_addr,
> @@ -407,13 +412,13 @@ arpresolve(struct ifnet *ifp, struct rte
>   /* This should never happen. (Should it? -gwr) */
>   printf("%s: unresolved and rt_expire == 0\n", __func__);
>   /* Set expiration time to now (expired). */
> - rt->rt_expire = getuptime();
> + rt->rt_expire = uptime;
>   }
>  #endif
>   if (rt->rt_expire) {
>   rt->rt_flags &= ~RTF_REJECT;
> - if (la->la_asked == 0 || rt->rt_expire != getuptime()) {
> - rt->rt_expire = getuptime();
> + if (la->la_asked == 0 || rt->rt_expire != uptime) {
> + rt->rt_expire = uptime;
>   if (la->la_asked++ < arp_maxtries)
>   

Re: snmpd(8): Add rudimentary AgentX support

2022-06-27 Thread Florian Obser
On 2022-06-27 13:32 +02, Martijn van Duren  
wrote:
> For the group-id I went with 92, which was used by _rtadvd. It's one up
> from _snmpd and has been used previously by _rtadvd, which should make
> it the perfect candidate. According to florian rtadvd never stored
> anything on disk and chances of any lingering rtadvd binary still around
> on a modern install still running approach 0. So I don't see any risks
> there. If someone kept the old user/group around sysmerge should change
> _rtadvd to _agentx, which florian, sthen, and I also don't see an issue
> with.

What I mean was, if they didn't clean up they still have _rtadvd:_rtadvd
which will become _rtadvd:_agentx. Operationally this should not a problem,
nothing is supposed to use that user.



arp getuptime

2022-06-27 Thread Alexander Bluhm
Hi,

Instead of calling getuptime() all the time in ARP code, I would
like to do it only once per function.  This should give us a more
consistent time value.

ok?

bluhm

Index: netinet/if_ether.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.248
diff -u -p -r1.248 if_ether.c
--- netinet/if_ether.c  28 Apr 2021 21:21:44 -  1.248
+++ netinet/if_ether.c  27 Jun 2022 11:33:12 -
@@ -124,14 +124,16 @@ arptimer(void *arg)
 {
struct timeout *to = arg;
struct llinfo_arp *la, *nla;
+   time_t uptime;
 
NET_LOCK();
+   uptime = getuptime();
timeout_add_sec(to, arpt_prune);
/* Net lock is exclusive, no arp mutex needed for arp_list here. */
LIST_FOREACH_SAFE(la, _list, la_list, nla) {
struct rtentry *rt = la->la_rt;
 
-   if (rt->rt_expire && rt->rt_expire < getuptime())
+   if (rt->rt_expire && rt->rt_expire < uptime)
arptfree(rt); /* timer has expired; clear */
}
NET_UNLOCK();
@@ -154,6 +156,7 @@ arp_rtrequest(struct ifnet *ifp, int req
 {
struct sockaddr *gate = rt->rt_gateway;
struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
+   time_t uptime;
 
NET_ASSERT_LOCKED();
 
@@ -161,8 +164,8 @@ arp_rtrequest(struct ifnet *ifp, int req
RTF_GATEWAY|RTF_BROADCAST|RTF_MULTICAST|RTF_MPLS))
return;
 
+   uptime = getuptime();
switch (req) {
-
case RTM_ADD:
if (rt->rt_flags & RTF_CLONING) {
rt->rt_expire = 0;
@@ -206,7 +209,7 @@ arp_rtrequest(struct ifnet *ifp, int req
la->la_rt = rt;
rt->rt_flags |= RTF_LLINFO;
if ((rt->rt_flags & RTF_LOCAL) == 0)
-   rt->rt_expire = getuptime();
+   rt->rt_expire = uptime;
mtx_enter(_mtx);
LIST_INSERT_HEAD(_list, la, la_list);
mtx_leave(_mtx);
@@ -325,6 +328,7 @@ arpresolve(struct ifnet *ifp, struct rte
struct sockaddr_dl *sdl;
struct rtentry *rt = NULL;
char addr[INET_ADDRSTRLEN];
+   time_t uptime;
 
if (m->m_flags & M_BCAST) { /* broadcast */
memcpy(desten, etherbroadcastaddr, sizeof(etherbroadcastaddr));
@@ -335,10 +339,11 @@ arpresolve(struct ifnet *ifp, struct rte
return (0);
}
 
+   uptime = getuptime();
rt = rt_getll(rt0);
 
if (ISSET(rt->rt_flags, RTF_REJECT) &&
-   (rt->rt_expire == 0 || rt->rt_expire > getuptime() )) {
+   (rt->rt_expire == 0 || rt->rt_expire > uptime)) {
m_freem(m);
return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
}
@@ -366,15 +371,15 @@ arpresolve(struct ifnet *ifp, struct rte
 * Check the address family and length is valid, the address
 * is resolved; otherwise, try to resolve.
 */
-   if ((rt->rt_expire == 0 || rt->rt_expire > getuptime()) &&
+   if ((rt->rt_expire == 0 || rt->rt_expire > uptime) &&
sdl->sdl_family == AF_LINK && sdl->sdl_alen != 0) {
memcpy(desten, LLADDR(sdl), sdl->sdl_alen);
 
/* refresh ARP entry when timeout gets close */
if (rt->rt_expire != 0 &&
-   rt->rt_expire - arpt_keep / 8 < getuptime() &&
-   la->la_refreshed + 30 < getuptime()) {
-   la->la_refreshed = getuptime();
+   rt->rt_expire - arpt_keep / 8 < uptime &&
+   la->la_refreshed + 30 < uptime) {
+   la->la_refreshed = uptime;
arprequest(ifp,
(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
(dst)->sin_addr.s_addr,
@@ -407,13 +412,13 @@ arpresolve(struct ifnet *ifp, struct rte
/* This should never happen. (Should it? -gwr) */
printf("%s: unresolved and rt_expire == 0\n", __func__);
/* Set expiration time to now (expired). */
-   rt->rt_expire = getuptime();
+   rt->rt_expire = uptime;
}
 #endif
if (rt->rt_expire) {
rt->rt_flags &= ~RTF_REJECT;
-   if (la->la_asked == 0 || rt->rt_expire != getuptime()) {
-   rt->rt_expire = getuptime();
+   if (la->la_asked == 0 || rt->rt_expire != uptime) {
+   rt->rt_expire = uptime;
if (la->la_asked++ < arp_maxtries)
arprequest(ifp,

(rt->rt_ifa->ifa_addr)->sin_addr.s_addr,
@@ -613,6 +618,7 @@ arpcache(struct ifnet *ifp, struct ether
struct ifnet *rifp;
struct mbuf_list ml;
struct mbuf *m;
+   time_t uptime;
unsigned int len;

snmpd(8): Add rudimentary AgentX support

2022-06-27 Thread Martijn van Duren
Hello tech@,

This diff implements most of the AgentX master side. noticeable
omissions/intendend omissions are:
- Notify: according to the RFC doesn't need guarantee being
  delivered to it's final destination, just that it has been validated.
  This allows us to delay this feature to a later moment.
- Index{,De}allocate: I haven't seen any software in base/ports use
  this feature. So it should be safe to ignore for now.
- {Add,Remove}AgentCaps: This adds an entry to the SysORTable. Our
  implementation isn't ready for this yet and the feature is useless
  enough that I don't see any reason to force this until after we got
  rid of the legacy code-paths.

For the master socket sthen, florian, and I decided it would be nicest
to go for a new group _agentx and default permissions of root:_agentx
660. This appears to work with all our available subagent code:
- relayd(8): Connects as root
- amavisd-snmp-subagent: Should be able to be setup via rc.d
  daemon_user, which honours supplementary groups
- lldpd: Connects as root via an openbsd-like privsep concept
- Asterisk: Honours supplementary groups if the explicit group is not
  set. According to sthen we could change this default in the port.
- gnugk: Afaik only supports AgentX over TCP, which I don't want to
  support.
- Other daemons that would have agentx support added later should
  deal with these default settings, or have it made clear how to set
  up an additional agentx listen socket.

For the group-id I went with 92, which was used by _rtadvd. It's one up
from _snmpd and has been used previously by _rtadvd, which should make
it the perfect candidate. According to florian rtadvd never stored
anything on disk and chances of any lingering rtadvd binary still around
on a modern install still running approach 0. So I don't see any risks
there. If someone kept the old user/group around sysmerge should change
_rtadvd to _agentx, which florian, sthen, and I also don't see an issue
with.

Feedback/OK?

martijn@

Index: etc/group
===
RCS file: /cvs/src/etc/group,v
retrieving revision 1.94
diff -u -p -r1.94 group
--- etc/group   28 Jan 2020 16:51:03 -  1.94
+++ etc/group   27 Jun 2022 11:29:36 -
@@ -59,6 +59,7 @@ _ripd:*:88:
 _relayd:*:89:
 _ospf6d:*:90:
 _snmpd:*:91:
+_agentx:*:92:
 _ypldap:*:93:
 _rad:*:94:
 _smtpd:*:95:
Index: etc/mtree/4.4BSD.dist
===
RCS file: /cvs/src/etc/mtree/4.4BSD.dist,v
retrieving revision 1.319
diff -u -p -r1.319 4.4BSD.dist
--- etc/mtree/4.4BSD.dist   23 Oct 2021 19:40:29 -  1.319
+++ etc/mtree/4.4BSD.dist   27 Jun 2022 11:29:36 -
@@ -608,6 +608,8 @@ usr
 var
 account
 ..
+agentx uname=root gname=wheel mode=755
+..
 authpf uname=root gname=authpf mode=0770
 ..
 empty  mode=0755
Index: usr.sbin/snmpd.application.agentx/Makefile
===
RCS file: /cvs/src/usr.sbin/snmpd/Makefile,v
retrieving revision 1.18
diff -u -p -r1.18 Makefile
--- usr.sbin/snmpd.application.agentx/Makefile  19 Jan 2022 11:00:56 -  
1.18
+++ usr.sbin/snmpd.application.agentx/Makefile  27 Jun 2022 11:29:36 -
@@ -3,6 +3,7 @@
 PROG=  snmpd
 MAN=   snmpd.8 snmpd.conf.5
 SRCS=  parse.y log.c snmpe.c application.c application_legacy.c \
+   application_agentx.c ax.c \
mps.c trap.c mib.c smi.c kroute.c snmpd.c timer.c \
pf.c proc.c usm.c traphandler.c util.c
 
Index: usr.sbin/snmpd.application.agentx/application.c
===
RCS file: /cvs/src/usr.sbin/snmpd/application.c,v
retrieving revision 1.5
diff -u -p -r1.5 application.c
--- usr.sbin/snmpd.application.agentx/application.c 27 Jun 2022 10:31:17 
-  1.5
+++ usr.sbin/snmpd.application.agentx/application.c 27 Jun 2022 11:29:36 
-
@@ -146,9 +146,16 @@ RB_PROTOTYPE_STATIC(appl_requests, appl_
 #define APPL_CONTEXT_NAME(ctx) (ctx->ac_name[0] == '\0' ? NULL : ctx->ac_name)
 
 void
+appl(void)
+{
+   appl_agentx();
+}
+
+void
 appl_init(void)
 {
appl_legacy_init();
+   appl_agentx_init();
 }
 
 void
@@ -157,6 +164,7 @@ appl_shutdown(void)
struct appl_context *ctx, *tctx;
 
appl_legacy_shutdown();
+   appl_agentx_shutdown();
 
TAILQ_FOREACH_SAFE(ctx, , ac_entries, tctx) {
assert(RB_EMPTY(&(ctx->ac_regions)));
@@ -543,8 +551,7 @@ appl_close(struct appl_backend *backend)
 }
 
 struct appl_region *
-appl_region_find(struct appl_context *ctx,
-const struct ber_oid *oid)
+appl_region_find(struct appl_context *ctx, const struct ber_oid *oid)
 {
struct appl_region *region, search;
 
@@ -1120,10 +1127,13 @@ appl_response(struct appl_backend *backe
log_warnx("Invalid error 

Re: refcount btrace

2022-06-27 Thread Alexander Bluhm
On Fri, Apr 22, 2022 at 12:07:44AM +0200, Alexander Bluhm wrote:
> I still think it is worth to have refcount debugging in generic
> kernel dt(4).  Having tools is easier than first adding printf to
> hunt a bug.  I see no downside.

I have talked with mpi@.  We both think it is useful to have refcount
debugging tools available in GENERIC kernel.  If there are no hard
objections, I will put this in tomorrow.

bluhm

> Index: dev/dt/dt_prov_static.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dt_prov_static.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 dt_prov_static.c
> --- dev/dt/dt_prov_static.c   17 Mar 2022 14:53:59 -  1.13
> +++ dev/dt/dt_prov_static.c   21 Apr 2022 21:06:03 -
> @@ -87,6 +87,12 @@ DT_STATIC_PROBE1(smr, barrier_exit, "int
>  DT_STATIC_PROBE0(smr, wakeup);
>  DT_STATIC_PROBE2(smr, thread, "uint64_t", "uint64_t");
> 
> +/*
> + * reference counting
> + */
> +DT_STATIC_PROBE0(refcnt, none);
> +DT_STATIC_PROBE3(refcnt, inpcb, "void *", "int", "int");
> +DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int");
> 
>  /*
>   * List of all static probes
> @@ -127,15 +133,24 @@ struct dt_probe *const dtps_static[] = {
>   &_DT_STATIC_P(smr, barrier_exit),
>   &_DT_STATIC_P(smr, wakeup),
>   &_DT_STATIC_P(smr, thread),
> + /* refcnt */
> + &_DT_STATIC_P(refcnt, none),
> + &_DT_STATIC_P(refcnt, inpcb),
> + &_DT_STATIC_P(refcnt, tdb),
>  };
> 
> +struct dt_probe *const *dtps_index_refcnt;
> +
>  int
>  dt_prov_static_init(void)
>  {
>   int i;
> 
> - for (i = 0; i < nitems(dtps_static); i++)
> + for (i = 0; i < nitems(dtps_static); i++) {
> + if (dtps_static[i] == &_DT_STATIC_P(refcnt, none))
> + dtps_index_refcnt = _static[i];
>   dt_dev_register_probe(dtps_static[i]);
> + }
> 
>   return i;
>  }
> Index: dev/dt/dtvar.h
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dtvar.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 dtvar.h
> --- dev/dt/dtvar.h27 Feb 2022 10:14:01 -  1.13
> +++ dev/dt/dtvar.h21 Apr 2022 21:06:03 -
> @@ -313,11 +313,30 @@ extern volatile uint32_tdt_tracing; /*
>  #define  DT_STATIC_ENTER(func, name, args...) do {   
> \
>   extern struct dt_probe _DT_STATIC_P(func, name);\
>   struct dt_probe *dtp = &_DT_STATIC_P(func, name);   \
> - struct dt_provider *dtpv = dtp->dtp_prov;   \
>   \
>   if (__predict_false(dt_tracing) &&  \
>   __predict_false(dtp->dtp_recording)) {  \
> + struct dt_provider *dtpv = dtp->dtp_prov;   \
> + \
>   dtpv->dtpv_enter(dtpv, dtp, args);  \
> + }   \
> +} while (0)
> +
> +#define _DT_INDEX_P(func)(dtps_index_##func)
> +
> +#define DT_INDEX_ENTER(func, index, args...) do {\
> + extern struct dt_probe **_DT_INDEX_P(func); \
> + \
> + if (__predict_false(dt_tracing) &&  \
> + __predict_false(index > 0) &&   \
> + __predict_true(_DT_INDEX_P(func) != NULL)) {\
> + struct dt_probe *dtp = _DT_INDEX_P(func)[index];\
> + \
> + if(__predict_false(dtp->dtp_recording)) {   \
> + struct dt_provider *dtpv = dtp->dtp_prov;   \
> + \
> + dtpv->dtpv_enter(dtpv, dtp, args);  \
> + }   \
>   }   \
>  } while (0)
> 
> Index: kern/kern_synch.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.185
> diff -u -p -r1.185 kern_synch.c
> --- kern/kern_synch.c 18 Mar 2022 15:32:06 -  1.185
> +++ kern/kern_synch.c 21 Apr 2022 21:06:03 -
> @@ -804,7 +804,15 @@ sys___thrwakeup(struct proc *p, void *v,
>  void
>  refcnt_init(struct refcnt *r)
>  {
> + refcnt_init_trace(r, 0);
> +}
> +
> +void
> +refcnt_init_trace(struct refcnt *r, int idx)
> +{
> + r->r_traceidx = idx;
>   atomic_store_int(>r_refs, 1);
> + TRACEINDEX(refcnt, r->r_traceidx, r, 0, +1);
>  }
> 
>  void
> @@ -814,6 +822,7 @@ 

Re: snmpd(8): Application.c properly initialize oidbuf for appl_region

2022-06-27 Thread Theo Buehler
On Mon, Jun 27, 2022 at 12:11:42PM +0200, Martijn van Duren wrote:
> When registering a region in appl_region (through appl_register) we
> fill oidbuf with strlcat, but we don't start from a clean state and
> might have garbage prepended.
> 
> This oidbuf is only used on error-conditions, so it's unlikely to
> trigger with the current code. Diff below properly initializes it.
> 
> OK?

ok tb

The regionbuf[] has the same issue, so you should treat it the same way
after the overlap: label.

> 
> martijn@
> 
> Index: application.c
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/application.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 application.c
> --- application.c 22 Feb 2022 15:59:13 -  1.3
> +++ application.c 27 Jun 2022 10:10:37 -
> @@ -224,6 +224,7 @@ appl_region(struct appl_context *ctx, ui
>   goto overlap;
>  
>   /* Don't use smi_oid2string, because appl_register can't use it */
> + oidbuf[0] = '\0';
>   for (i = 0; i < oid->bo_n; i++) {
>   if (i != 0)
>   strlcat(oidbuf, ".", sizeof(oidbuf));
> 



snmpd(8): Application.c properly initialize oidbuf for appl_region

2022-06-27 Thread Martijn van Duren
When registering a region in appl_region (through appl_register) we
fill oidbuf with strlcat, but we don't start from a clean state and
might have garbage prepended.

This oidbuf is only used on error-conditions, so it's unlikely to
trigger with the current code. Diff below properly initializes it.

OK?

martijn@

Index: application.c
===
RCS file: /cvs/src/usr.sbin/snmpd/application.c,v
retrieving revision 1.3
diff -u -p -r1.3 application.c
--- application.c   22 Feb 2022 15:59:13 -  1.3
+++ application.c   27 Jun 2022 10:10:37 -
@@ -224,6 +224,7 @@ appl_region(struct appl_context *ctx, ui
goto overlap;
 
/* Don't use smi_oid2string, because appl_register can't use it */
+   oidbuf[0] = '\0';
for (i = 0; i < oid->bo_n; i++) {
if (i != 0)
strlcat(oidbuf, ".", sizeof(oidbuf));



Re: kernel lock in arp

2022-06-27 Thread Alexander Bluhm
On Sat, May 21, 2022 at 10:50:28PM +0300, Vitaliy Makkoveev wrote:
> This diff looks good, except the re-check after kernel lock. It???s
> supposed `rt??? could became inconsistent, right? But what stops to
> make it inconsistent after first unlocked RTF_LLINFO flag check?
> 
> I think this re-check should gone.

I have copied the re-check from intenal genua code.  I am not sure
if it is really needed.  We know from Hrvoje that the diff with
re-check is stable.  And we know that it crashes without kernel
lock at all.

I have talked with mpi@ about it.  The main problem is that we have
no write lock when we change RTF_LLINFO.  Then rt_llinfo can get
NULL or inconsistent.

Plan is that I put some lock asserts into route add and delete.
This helps to find the parts that modify RTF_LLINFO and rt_llinfo
without exclusive lock.

Maybe we need some kernel lock somewhere else.  Or we want to use
some ARP mutex.  We could also add some comment and commit the diff
that I have.  We know that it is faster and stable.  Pushing the
kernel lock down or replacing it with something clever can always
be done later.

bluhm

> > On 21 May 2022, at 19:45, Hrvoje Popovski  wrote:
> > 
> > On 18.5.2022. 20:52, Hrvoje Popovski wrote:
> >> On 18.5.2022. 17:31, Alexander Bluhm wrote:
> >>> Hi,
> >>> 
> >>> For parallel IP forwarding I had put kernel lock around arpresolve()
> >>> as a quick workaround for crashes.  Moving the kernel lock inside
> >>> the function makes the hot path lock free.  I see slight prerformace
> >>> increase in my test and no lock contention in kstack flamegraph.
> >>> 
> >>> http://bluhm.genua.de/perform/results/latest/2022-05-16T00%3A00%3A00Z/btrace/tcpbench_-S100_-t10_-n100_10.3.45.35-btrace-kstack.0.svg
> >>> http://bluhm.genua.de/perform/results/latest/patch-sys-arpresolve-kernel-lock.1/btrace/tcpbench_-S100_-t10_-n100_10.3.45.35-btrace-kstack.0.svg
> >>> 
> >>> Search for kernel_lock.  Matched goes from 0.6% to 0.2%
> >>> 
> >>> We are running such a diff in our genua code for a while.  I think
> >>> route flags need more love.  I doubt that all flags and fields are
> >>> consistent when run on multiple CPU.  But this diff does not make
> >>> it worse and increases MP pressure.
> >> 
> >> Hi,
> >> 
> >> I'm seeing increase of forwarding performance from 2Mpps to 2.4Mpps with
> >> this diff and NET_TASKQ=4 on 6 x E5-2643 v2 @ 3.50GHz, 3600.01 MHz
> >> 
> >> Thank you ..
> >> 
> >> 
> > 
> > Hi,
> > 
> > I'm not seeing any fallout's with this diff. I'm running it on
> > production and test firewalls.
> > 
> 



Re: kernel lock in arp

2022-06-27 Thread Hrvoje Popovski
On 18.5.2022. 17:31, Alexander Bluhm wrote:
> Hi,
> 
> For parallel IP forwarding I had put kernel lock around arpresolve()
> as a quick workaround for crashes.  Moving the kernel lock inside
> the function makes the hot path lock free.  I see slight prerformace
> increase in my test and no lock contention in kstack flamegraph.
> 
> http://bluhm.genua.de/perform/results/latest/2022-05-16T00%3A00%3A00Z/btrace/tcpbench_-S100_-t10_-n100_10.3.45.35-btrace-kstack.0.svg
> http://bluhm.genua.de/perform/results/latest/patch-sys-arpresolve-kernel-lock.1/btrace/tcpbench_-S100_-t10_-n100_10.3.45.35-btrace-kstack.0.svg
> 
> Search for kernel_lock.  Matched goes from 0.6% to 0.2%
> 
> We are running such a diff in our genua code for a while.  I think
> route flags need more love.  I doubt that all flags and fields are
> consistent when run on multiple CPU.  But this diff does not make
> it worse and increases MP pressure.

Hi,

I'm running this diff on messy network with lots of
"arp: attempt to add entry" and sometimes
"arpresolve: XXX: route contains no arp information" logs and everything
seems fine ..




Re: Check `flags' on target session, not multicast session

2022-06-27 Thread Vitaliy Makkoveev
On Mon, Jun 27, 2022 at 01:29:26AM +0300, Vitaliy Makkoveev wrote:
> We should check PIPEX_SFLAGS_IP{,6}_FORWARD bits on the session which we
> will output packet, not on the dummy multicast session.
> 
> npppd(8) clears these flags before release IP address assigned to
> session. So this IP address could be assigned to other session while our
> session is still alive.
> 
> We should also do this check within pppx_if_qstart(), but I want to do
> this with separate diff.
> 
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.138
> diff -u -p -r1.138 pipex.c
> --- sys/net/pipex.c   26 Jun 2022 15:50:21 -  1.138
> +++ sys/net/pipex.c   26 Jun 2022 22:16:17 -
> @@ -801,7 +801,7 @@ pipex_ip_output(struct mbuf *m0, struct 
>   LIST_FOREACH(session_tmp, _session_list, session_list) {
>   if (session_tmp->ownersc != session->ownersc)
>   continue;
> - if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
> + if ((session_tmp->flags & (PIPEX_SFLAGS_IP_FORWARD |
>   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
>   continue;
>   m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
>

I changed my opinion about PIPEX_SFLAGS_IP{,6}_FORWARD flags in pipex(4)
output path. We have a lot of buffers between pipex(4) server and ppp
client. Some of such buffers could be located in other routers. And
outgoing pipex(4) packets will be delivered regardless on session's
`flags'. pppx(4) output has no PIPEX_SFLAGS_IP{,6}_FORWARD flags check,
so remove them from pppac(4) output too.

This allow us to make output lockless for pppoe sessions and cover very
small code sections within pipex_{pptp,l2tp}_output().

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.141
diff -u -p -r1.141 pipex.c
--- sys/net/pipex.c 26 Jun 2022 22:51:58 -  1.141
+++ sys/net/pipex.c 27 Jun 2022 08:57:38 -
@@ -767,9 +767,7 @@ pipex_ip_output(struct mbuf *m0, struct 
/*
 * Multicast packet is a idle packet and it's not TCP.
 */
-   if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
-   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
-   goto drop;
+
/* reset idle timer */
if (session->timeout_sec != 0) {
is_idle = 0;
@@ -802,9 +800,6 @@ pipex_ip_output(struct mbuf *m0, struct 
LIST_FOREACH(session_tmp, _session_list, session_list) {
if (session_tmp->ownersc != session->ownersc)
continue;
-   if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
-   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
-   continue;
m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
if (m == NULL) {
counters_inc(session->stat_counters,
@@ -817,8 +812,6 @@ pipex_ip_output(struct mbuf *m0, struct 
}
 
return;
-drop:
-   m_freem(m0);
 dropped:
counters_inc(session->stat_counters, pxc_oerrors);
 }