if_delgroup(): Add size to free(9) call

2020-06-29 Thread Klemens Nanni
Interface groups are allocated as follows:

struct ifg_group *
if_creategroup(const char *groupname)
{
struct ifg_group*ifg;

if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
return (NULL);

...
}

Since this allocation per group does not change, we can use the same
size when freeing it in if_delgroup() accordingly.

Tested on sparc64.

Feedback? OK?


Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- sys/net/if.c22 Jun 2020 09:45:13 -  1.610
+++ sys/net/if.c30 Jun 2020 02:00:31 -
@@ -2774,7 +2774,7 @@ if_delgroup(struct ifnet *ifp, const cha
 #if NPF > 0
pfi_detach_ifgroup(ifgl->ifgl_group);
 #endif
-   free(ifgl->ifgl_group, M_TEMP, 0);
+   free(ifgl->ifgl_group, M_TEMP, sizeof(*ifgl->ifgl_group));
}
 
free(ifgl, M_TEMP, sizeof(*ifgl));



Re: route add ::/0 ...

2020-06-29 Thread Klemens Nanni
On Tue, Jun 30, 2020 at 09:00:30AM +0900, YASUOKA Masahiko wrote:
> inet_makenetandmask() had required another treatment.
> 
> Also -prefixlen 0 for -inet has a bug
> 
>  % doas ./obj/route -T100 add -inet 0.0.0.0 -prefixlen 0 127.0.0.1
>  add net 0.0.0.0: gateway 127.0.0.1
>  % netstat -nrf inet -T 100
>  Routing tables
> 
>  Internet:
>  DestinationGatewayFlags   Refs  Use   Mtu  Prio Iface
>  0.0.0.0/32 127.0.0.1  UGS00 32768 8 lo100
> 
> /0 becomes /32.  The diff following also fixes the problem.
Yes, this looks correct to me;  regress is also happy (again).

OK kn



Re: route add ::/0 ...

2020-06-29 Thread YASUOKA Masahiko
On Mon, 29 Jun 2020 19:18:17 +0200
Klemens Nanni  wrote:
> On Mon, Jun 29, 2020 at 11:55:10PM +0900, YASUOKA Masahiko wrote:
>> The function mask_addr() doesn't mask address for IPv4 and IPv6.  Does
>> any address family other than IPv4 or IPv6 require #1142:1148?  The
>> function seems to just trim the trailing zero.  Is it neccesaary?  And
>> it causes the confusion on the kernel.  How about deleting
>> mask_addr()?
>> 
>> The diff following also fixes the problem.
> Removing it breaks IPv4 default routes:
> 
>   # ifconfig lo1 rdomain 1 127.1.1.1
>   # ./obj/route -nT1 add 0.0.0.0/0 127.1.1.1
>   add net 0.0.0.0/0: gateway 127.1.1.1: Invalid argument
>   # route -nT1 add 0.0.0.0/0 127.1.1.1  
>   add net 0.0.0.0/0: gateway 127.1.1.1

Thanks,

inet_makenetandmask() had required another treatment.

Also -prefixlen 0 for -inet has a bug

 % doas ./obj/route -T100 add -inet 0.0.0.0 -prefixlen 0 127.0.0.1
 add net 0.0.0.0: gateway 127.0.0.1
 % netstat -nrf inet -T 100
 Routing tables

 Internet:
 DestinationGatewayFlags   Refs  Use   Mtu  Prio Iface
 0.0.0.0/32 127.0.0.1  UGS00 32768 8 lo100

/0 becomes /32.  The diff following also fixes the problem.


diff --git a/sbin/route/route.c b/sbin/route/route.c
index 9e43d8e89b6..532a918148d 100644
--- a/sbin/route/route.c
+++ b/sbin/route/route.c
@@ -107,7 +107,6 @@ void print_rtmsg(struct rt_msghdr *, int);
 voidpmsg_common(struct rt_msghdr *);
 voidpmsg_addrs(char *, int);
 voidbprintf(FILE *, int, char *);
-voidmask_addr(union sockunion *, union sockunion *, int);
 int getaddr(int, int, char *, struct hostent **);
 voidgetmplslabel(char *, int);
 int rtmsg(int, int, int, uint8_t);
@@ -781,12 +780,9 @@ inet_makenetandmask(u_int32_t net, struct sockaddr_in 
*sin, int bits)
sin->sin_addr.s_addr = htonl(net);
sin = _mask.sin;
sin->sin_addr.s_addr = htonl(mask);
-   sin->sin_len = 0;
-   sin->sin_family = 0;
+   sin->sin_family = AF_INET;
cp = (char *)(>sin_addr + 1);
-   while (*--cp == '\0' && cp > (char *)sin)
-   continue;
-   sin->sin_len = 1 + cp - (char *)sin;
+   sin->sin_len = sizeof(struct sockaddr_in);
 }
 
 /*
@@ -1001,7 +997,8 @@ prefixlen(int af, char *s)
memset(_mask, 0, sizeof(so_mask));
so_mask.sin.sin_family = AF_INET;
so_mask.sin.sin_len = sizeof(struct sockaddr_in);
-   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - len));
+   if (len != 0)
+   so_mask.sin.sin_addr.s_addr = htonl(0x << (32 - 
len));
break;
case AF_INET6:
so_mask.sin6.sin6_family = AF_INET6;
@@ -1088,8 +1085,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
rtm.rtm_mpls = mpls_flags;
rtm.rtm_hdrlen = sizeof(rtm);
 
-   if (rtm_addrs & RTA_NETMASK)
-   mask_addr(_dst, _mask, RTA_DST);
/* store addresses in ascending order of RTA values */
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
@@ -1120,34 +1115,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
return (0);
 }
 
-void
-mask_addr(union sockunion *addr, union sockunion *mask, int which)
-{
-   int olen = mask->sa.sa_len;
-   char *cp1 = olen + (char *)mask, *cp2;
-
-   for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
-   if (*--cp1 != '\0') {
-   mask->sa.sa_len = 1 + cp1 - (char *)mask;
-   break;
-   }
-   if ((rtm_addrs & which) == 0)
-   return;
-   switch (addr->sa.sa_family) {
-   case AF_INET:
-   case AF_INET6:
-   case AF_UNSPEC:
-   return;
-   }
-   cp1 = mask->sa.sa_len + 1 + (char *)addr;
-   cp2 = addr->sa.sa_len + 1 + (char *)addr;
-   while (cp2 > cp1)
-   *--cp2 = '\0';
-   cp2 = mask->sa.sa_len + 1 + (char *)mask;
-   while (cp1 > addr->sa.sa_data)
-   *--cp1 &= *--cp2;
-}
-
 char *msgtypes[] = {
"",
"RTM_ADD: Add Route",



Re: wg(4): encapsulated transport checksums

2020-06-29 Thread richard . n . procter
Hi Jason,

On 27/06/2020, at 10:09 PM, Jason A. Donenfeld  wrote:
> [...] I thought I'd lay out my understanding of the matter,
> and you can let me know whether or not this corresponds with what you  
> had in mind:
> [...]

My main concern is the end-to-end TCP or UDP transport checksum of the
tunneled (or inner, or encapsulated) packet. Your argument seems to
concern instead the per-hop IPv4 header checksum (which is also
interesting to look at, but first things first).

As I read it, wg_decap() tells the stack to ignore the transport checksum
of the tunneled packet, and I think this is incorrect for the following
reason: the packet may have been corrupted outside of the tunnel, because
the tunnel needn't cover the entire path the packet took through the
network.

So a tunneled packet may be corrupt, and one addressed to the tunnel
endpoint will be delivered with its end-to-end transport checksum
unverified. Higher layers may receive corrupt data as as result.[*]  

(Routers, as intermediate network elements, are under no obligation to
check end-to-end transport checksums. It isn't their job.)

One might argue (I do not) that this isn't a concern, because encryption
these days is ubiquitous and comes with far stronger integrity checks.
Nonetheless, even here transport checksums remain relevant for two
reasons. Firsly, because networks remain unreliable: I see non-zero
numbers of failed checksums on my systems. And, secondly, because higher
layers require a reliable transport: TLS for instance will drop the
connection when the MAC check fails[1].

Let's turn to the per-hop IPv4 checksum, which wg_decap() also tells the 
stack to ignore. I suspect the cost of verifying that checksum -- 20 bytes 
on a hot cache line -- is negligible both absolutely and relative to the 
cost of decrypting the packet. And as the optimisation appears nowhere 
else in the tree that I can find, removing it would make wg(4) no worse 
than the other tunnels. Do you have evidence that the cost is in fact 
significant?

Further, as Theo pointed out, the link layer has in any case no business 
with the IPv4 checksum, being part of the network layer. Layer violations 
are problematic for at least two reasons. Firstly, they constrict the 
design space. For instance, I suppose the IPv4 checksum needn't be 
per-hop. It might be updated incrementally, increasing its scope. But this 
would be nullified by any link layer that, acting on a false assumption 
about the layer above it, told the stack to ignore that layer's checksum. 
Secondly, layer violations, an optimisation, which almost by definition 
rely on additional premises, burden the correctness obligation and so 
trade less work for the computer in narrow circumstances for (often much) 
more work for the fallible human to show that the process remains sound.

On that balance I now think that skipping the IPv4 check a false economy,
too. Hopefully I have managed to make my reasons clearer this time around, 
please let me know if not, or if you disagree. 

See updated patch below, looking for oks to commit that.
 
cheers,
Richard.

[*] By way of background, I understand the transport checksum was added to
the early ARPANET primarily to guard against corruption inside its
routers[0], after a router fault which "[brought] the network to its
knees". In other words, end-to-end checks are necessary to catch faults in
the routers; link-level checks aren't enough.

More generally, transport checksums set a lower bound on the reliability
of the end-to-end transport, which decouples it from the reliability of  
the underlying network.

[0] Kleinrock "Queuing Systems, Volume 2: Computer Applications",
John Wiley and Sons (1976), p441

[1] RFC5246 TLS v1.2 (2008), section 7.2.2.

TLS's requirement is reasonable: it would otherwise be obliged to
reimplement for itself retransmission, which would drag with it other
transport layer mechanisms like acknowledgements and (for performance)
sequence numbers and sliding windows.

Index: net/if_wg.c
===
RCS file: /cvs/src/sys/net/if_wg.c,v
retrieving revision 1.7
diff -u -p -u -p -r1.7 if_wg.c
--- net/if_wg.c 23 Jun 2020 10:03:49 -  1.7
+++ net/if_wg.c 28 Jun 2020 20:17:07 -
@@ -1660,14 +1660,8 @@ wg_decap(struct wg_softc *sc, struct mbu
goto error;
}
 
-   /*
-* We can mark incoming packet csum OK. We mark all flags OK
-* irrespective to the packet type.
-*/
-   m->m_pkthdr.csum_flags |= (M_IPV4_CSUM_IN_OK | M_TCP_CSUM_IN_OK |
-   M_UDP_CSUM_IN_OK | M_ICMP_CSUM_IN_OK);
-   m->m_pkthdr.csum_flags &= ~(M_IPV4_CSUM_IN_BAD | M_TCP_CSUM_IN_BAD |
-   M_UDP_CSUM_IN_BAD | M_ICMP_CSUM_IN_BAD);
+   /* tunneled packet was not offloaded */
+   m->m_pkthdr.csum_flags = 0;
 
m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;



Re: Stuck in Needbuf state, trying to understand (6.7)

2020-06-29 Thread sven falempin
On Mon, Jun 29, 2020 at 12:58 PM sven falempin 
wrote:

>
>
> On Mon, Jun 29, 2020 at 12:12 PM Bob Beck  wrote:
>
>>
>> > Awesome, thanks!
>> >
>> > I will test that, ASAP,
>> > do not hesitate to slay dragon,
>> > i heard the bathing in the blood pool is good for the skin
>> >
>> > Little concern, I did the test without the MFS and ran into issues ,
>> > anyway i get back to you (or list ?) when i have test report with
>> patched
>> > kernel
>>
>> Yes, howver, you didn't tell my what options you had on the filesystem
>> mounted
>> when you did the test without MFS, because it matters. If you had your
>> filesystem
>> mounted ASYNC it would have exhibited the same behavoir.  the issue is
>> due to the
>> async mount, which MFS does by default, not strictly to do with MFS.
>>
>>
> tmpfs was just not mounted so it was the option of the underlying /home
>
> /dev/sd0d on /home type ffs (local, nodev, nosuid)
>
> So far the above patch improve the situation drastically
>
> I will now perform a test in the original device.
>
>
>
It works in the original problematic setup.

Will it go to base ?

-- 
--
-
Knowing is not enough; we must apply. Willing is not enough; we must do


Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
On Mon, Jun 29, 2020 at 04:27:50PM +0200, Hrvoje Popovski wrote:
> On 29.6.2020. 10:59, Vitaliy Makkoveev wrote:
> > I reworked tool for reproduce. Now I avoided fork()/exec() route and it
> > takes couple of minutes to take panic on 4 cores. Also some screenshots
> > attached.
> > 
> > I hope anyone else will try it.
> 
> Hi,
> 
> i'm getting panic quite fast :)
> i will leave box in ddb if more information is needed
>

Thanks. Right now it takes seconds to catch panic at least with switch(4),
bridge(4), pflog(4), vether(4) and etherip(4). So you can leave ddb(4).

I like to someone will try my solution for this issue. And reviews are
welcomed :)

The latest diff below. If the `unit' was obtained it's guaranteed that
there is no pseudo interface with `name' is system. ifunit() now useless
here and can be dropped.


Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- sys/net/if.c22 Jun 2020 09:45:13 -  1.610
+++ sys/net/if.c29 Jun 2020 13:54:29 -
@@ -157,6 +157,8 @@ voidif_linkstate_task(void *);
 
 intif_clone_list(struct if_clonereq *);
 struct if_clone*if_clone_lookup(const char *, int *);
+intif_clone_alloc_unit(struct if_clone *, int);
+void   if_clone_rele_unit(struct if_clone *, int);
 
 intif_group_egress_build(void);
 
@@ -1244,19 +1246,18 @@ if_clone_create(const char *name, int rd
 {
struct if_clone *ifc;
struct ifnet *ifp;
-   int unit, ret;
+   int unit, error;
 
ifc = if_clone_lookup(name, );
if (ifc == NULL)
return (EINVAL);
+   error = if_clone_alloc_unit(ifc, unit);
+   if (error != 0)
+   return (error);
 
-   if (ifunit(name) != NULL)
-   return (EEXIST);
-
-   ret = (*ifc->ifc_create)(ifc, unit);
-
-   if (ret != 0 || (ifp = ifunit(name)) == NULL)
-   return (ret);
+   error = (*ifc->ifc_create)(ifc, unit);
+   if (error != 0 || (ifp = ifunit(name)) == NULL)
+   return (error);
 
NET_LOCK();
if_addgroup(ifp, ifc->ifc_name);
@@ -1264,7 +1265,7 @@ if_clone_create(const char *name, int rd
if_setrdomain(ifp, rdomain);
NET_UNLOCK();
 
-   return (ret);
+   return (0);
 }
 
 /*
@@ -1275,9 +1276,9 @@ if_clone_destroy(const char *name)
 {
struct if_clone *ifc;
struct ifnet *ifp;
-   int ret;
+   int ret, unit;
 
-   ifc = if_clone_lookup(name, NULL);
+   ifc = if_clone_lookup(name, );
if (ifc == NULL)
return (EINVAL);
 
@@ -1297,6 +1298,7 @@ if_clone_destroy(const char *name)
}
NET_UNLOCK();
ret = (*ifc->ifc_destroy)(ifp);
+   if_clone_rele_unit(ifc, unit);
 
return (ret);
 }
@@ -1342,12 +1344,95 @@ if_clone_lookup(const char *name, int *u
unit = (unit * 10) + (*cp++ - '0');
}
 
-   if (unitp != NULL)
-   *unitp = unit;
+   *unitp = unit;
return (ifc);
 }
 
 /*
+ * Allocate unit for cloned network interface.
+ */
+int if_clone_alloc_unit(struct if_clone *ifc, int unit)
+{
+   int word, bit, ret;
+
+   word = unit / (sizeof(*ifc->ifc_map) * 8);
+   bit = unit % (sizeof(*ifc->ifc_map) * 8);
+
+   rw_enter_write(>ifc_lock);
+
+   if(word >= ifc->ifc_map_size) {
+   u_long *map;
+   int size;
+
+   size = word + 1;
+   map = mallocarray(size, sizeof(*map), M_TEMP, M_WAITOK |
+   M_ZERO);
+
+   if (ifc->ifc_map != NULL) {
+   memcpy(map, ifc->ifc_map, ifc->ifc_map_size);
+   free(ifc->ifc_map, M_TEMP,
+   ifc->ifc_map_size * sizeof(*map));
+   }
+
+   ifc->ifc_map = map;
+   ifc->ifc_map_size = size;
+   }
+
+   if (ifc->ifc_map[word] & (1UL << bit))
+   ret = EEXIST;
+   else {
+   ifc->ifc_map[word] |= (1UL << bit);
+   ret = 0;
+   }
+
+   rw_exit_write(>ifc_lock);
+
+   return ret;
+}
+
+/*
+ * Release allocated unit for cloned network interface.
+ */
+void if_clone_rele_unit(struct if_clone *ifc, int unit)
+{
+   int word, bit;
+
+   word = unit / (sizeof(*ifc->ifc_map) * 8);
+   bit = unit % (sizeof(*ifc->ifc_map) * 8);
+
+   rw_enter_write(>ifc_lock);
+   KASSERT(word < ifc->ifc_map_size);
+
+   ifc->ifc_map[word] &= ~(1UL << bit);
+
+   if (ifc->ifc_map[word] == 0) {
+   u_long *map;
+   int size;
+
+   size = ifc->ifc_map_size - 2;
+   while (size>=0) {
+   if (ifc->ifc_map[size] != 0)
+   break;
+   --size;
+   }
+   if (size<0) {
+   size = 0;
+  

harmonize locking annotations

2020-06-29 Thread Anton Lindqvist
Hi,
I think we all agree that global locks should be represented using
uppercase letters in locking annotations. This is an attempt to
harmonize the existing annotations.

Comments? OK?

Index: dev/dt/dt_dev.c
===
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.7
diff -u -p -r1.7 dt_dev.c
--- dev/dt/dt_dev.c 27 Jun 2020 07:22:09 -  1.7
+++ dev/dt/dt_dev.c 29 Jun 2020 18:47:30 -
@@ -74,19 +74,19 @@
  *
  *  Locks used to protect struct members in this file:
  * m   per-softc mutex
- * k   kernel lock
+ * K   kernel lock
  */
 struct dt_softc {
-   SLIST_ENTRY(dt_softc)ds_next;   /* [k] descriptor list */
+   SLIST_ENTRY(dt_softc)ds_next;   /* [K] descriptor list */
int  ds_unit;   /* [I] D_CLONE unique unit */
pid_tds_pid;/* [I] PID of tracing program */
 
struct mutex ds_mtx;
 
-   struct dt_pcb_list   ds_pcbs;   /* [k] list of enabled PCBs */
-   struct dt_evt   *ds_bufqueue;   /* [k] copy evts to userland */
-   size_t   ds_bufqlen;/* [k] length of the queue */
-   int  ds_recording;  /* [k] currently recording? */
+   struct dt_pcb_list   ds_pcbs;   /* [K] list of enabled PCBs */
+   struct dt_evt   *ds_bufqueue;   /* [K] copy evts to userland */
+   size_t   ds_bufqlen;/* [K] length of the queue */
+   int  ds_recording;  /* [K] currently recording? */
int  ds_evtcnt; /* [m] # of readable evts */
 
/* Counters */
@@ -94,7 +94,7 @@ struct dt_softc {
uint64_t ds_dropevt;/* [m] # of events dropped */
 };
 
-SLIST_HEAD(, dt_softc) dtdev_list; /* [k] list of open /dev/dt nodes */
+SLIST_HEAD(, dt_softc) dtdev_list; /* [K] list of open /dev/dt nodes */
 
 /*
  * Probes are created during dt_attach() and never modified/freed during
@@ -104,7 +104,7 @@ unsigned intdt_nprobes; /* [I] 
# of p
 SIMPLEQ_HEAD(, dt_probe)   dt_probe_list;  /* [I] list of probes */
 
 struct rwlock  dt_lock = RWLOCK_INITIALIZER("dtlk");
-volatile uint32_t  dt_tracing = 0; /* [k] # of processes tracing */
+volatile uint32_t  dt_tracing = 0; /* [K] # of processes tracing */
 
 void   dtattach(struct device *, struct device *, void *);
 intdtopen(dev_t, int, int, struct proc *);
Index: dev/dt/dtvar.h
===
RCS file: /cvs/src/sys/dev/dt/dtvar.h,v
retrieving revision 1.3
diff -u -p -r1.3 dtvar.h
--- dev/dt/dtvar.h  28 Mar 2020 15:42:25 -  1.3
+++ dev/dt/dtvar.h  29 Jun 2020 18:47:30 -
@@ -159,14 +159,14 @@ int   dtioc_req_isvalid(struct dtioc_req 
  *
  *  Locks used to protect struct members in this file:
  * I   immutable after creation
- * k   kernel lock
- * k,s kernel lock for writting and SMR for reading
+ * K   kernel lock
+ * K,S kernel lock for writting and SMR for reading
  * m   per-pcb mutex
  * c   owned (read & modified) by a single CPU
  */
 struct dt_pcb {
-   SMR_SLIST_ENTRY(dt_pcb)  dp_pnext;  /* [k,s] next PCB per probe */
-   TAILQ_ENTRY(dt_pcb)  dp_snext;  /* [k] next PCB per softc */
+   SMR_SLIST_ENTRY(dt_pcb)  dp_pnext;  /* [K,S] next PCB per probe */
+   TAILQ_ENTRY(dt_pcb)  dp_snext;  /* [K] next PCB per softc */
 
/* Event states ring */
unsigned int dp_prod;   /* [m] read index */
@@ -203,18 +203,18 @@ void   dt_pcb_ring_consume(struct dt_pcb
  *
  *  Locks used to protect struct members in this file:
  * I   immutable after creation
- * k   kernel lock
- * d   dt_lock
- * d,s dt_lock for writting and SMR for reading
+ * K   kernel lock
+ * D   dt_lock
+ * D,S dt_lock for writting and SMR for reading
  */
 struct dt_probe {
-   SIMPLEQ_ENTRY(dt_probe)  dtp_next;  /* [k] global list of probes */
-   SMR_SLIST_HEAD(, dt_pcb) dtp_pcbs;  /* [d,s] list of enabled PCBs */
+   SIMPLEQ_ENTRY(dt_probe)  dtp_next;  /* [K] global list of probes */
+   SMR_SLIST_HEAD(, dt_pcb) dtp_pcbs;  /* [D,S] list of enabled PCBs */
struct dt_provider  *dtp_prov;  /* [I] its to provider */
const char  *dtp_func;  /* [I] probe function */
const char  *dtp_name;  /* [I] probe name */
uint32_t dtp_pbn;   /* [I] unique ID */
-   volatile uint32_tdtp_recording; /* [d] is it recording? */
+   volatile uint32_tdtp_recording; /* [D] is it recording? */
uint8_t  

Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
On Sat, Jun 27, 2020 at 12:10:24PM +0200, Martin Pieuchot wrote:
> On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote:
> > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote:
> > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote:
> > > > if_clone_create() has the races caused by context switch.
> > > 
> > > Can you share a backtrace of such race?  Where does the kernel panic?
> > >
> > 
> > This diff was inspired by thread [1]. As I explained [2] here is 3
> > issues that cause panics produced by command below:
> > 
> >  cut begin 
> > for i in 1 2 3; do while true; do ifconfig bridge0 create& \
> > ifconfig bridge0 destroy& done& done
> >  cut end 
> 
> Thanks, I couldn't reproduce it on any of the machines I tried.  Did you
> managed to reproduce it with other pseudo-devices or just with bridge0?
> 
> > My system was stable with the last diff I did for thread [1]. But since
> > this final diff [3] which include fixes for tun(4) is quick and dirty
> > and not for commit I decided to make the diff to fix the races caused by
> > if_clone_create() at first.
> > 
> > I included screenshot with panic.
> 
> Thanks, interesting that the corruption happens on a list that should be
> initialized.  Does that mean the context switch on Thread 1 is happening
> before if_attach_common() is called?
> 
> You said your previous email that there's a context switch.  Do you know
> when it happens?  You could see that in ddb by looking at the backtrace
> of the other CPU.
> 
> Is the context switch leading to the race common to all pseudo-drivers
> or is it in the bridge(4) driver?
> 
> Regarding your solution, do I understand correctly that the goal is to
> serialize all if_clone_create()?  Is it really needed to remember which
> unit is being currently created or can't we just serialize all of them?
> 
> The fact that a lock is not held over the cloning operation is imho
> positive.
> 

I reworked tool for reproduce. Now I avoided fork()/exec() route and it
takes couple of minutes to take panic on 4 cores. Also some screenshots
attached.

I hope anyone else will try it.

 cut begin 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct ifreq ifr;

static void *clone_create(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFCREATE, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

static void *clone_destroy(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFDESTROY, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t thr;
int i;

if(argc!=2){
fprintf(stderr, "usage: %s ifname\n", getprogname());
return 1;
}

if(getuid()!=0){
fprintf(stderr, "should be root\n");
return 1;
}

memset(, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));

for(i=0; i<8*4; ++i){
if(pthread_create(, NULL, clone_create, NULL)!=0)
errx(1, "pthread_create(clone_create)");
}

clone_destroy(NULL);

return 0;
}

 cut end 


Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
screenshot


Re: route add ::/0 ...

2020-06-29 Thread Klemens Nanni
On Mon, Jun 29, 2020 at 11:55:10PM +0900, YASUOKA Masahiko wrote:
> The function mask_addr() doesn't mask address for IPv4 and IPv6.  Does
> any address family other than IPv4 or IPv6 require #1142:1148?  The
> function seems to just trim the trailing zero.  Is it neccesaary?  And
> it causes the confusion on the kernel.  How about deleting
> mask_addr()?
> 
> The diff following also fixes the problem.
Removing it breaks IPv4 default routes:

# ifconfig lo1 rdomain 1 127.1.1.1
# ./obj/route -nT1 add 0.0.0.0/0 127.1.1.1
add net 0.0.0.0/0: gateway 127.1.1.1: Invalid argument
# route -nT1 add 0.0.0.0/0 127.1.1.1  
add net 0.0.0.0/0: gateway 127.1.1.1



Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
On Sat, Jun 27, 2020 at 12:10:24PM +0200, Martin Pieuchot wrote:
> On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote:
> > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote:
> > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote:
> > > > if_clone_create() has the races caused by context switch.
> > > 
> > > Can you share a backtrace of such race?  Where does the kernel panic?
> > >
> > 
> > This diff was inspired by thread [1]. As I explained [2] here is 3
> > issues that cause panics produced by command below:
> > 
> >  cut begin 
> > for i in 1 2 3; do while true; do ifconfig bridge0 create& \
> > ifconfig bridge0 destroy& done& done
> >  cut end 
> 
> Thanks, I couldn't reproduce it on any of the machines I tried.  Did you
> managed to reproduce it with other pseudo-devices or just with bridge0?
> 
> > My system was stable with the last diff I did for thread [1]. But since
> > this final diff [3] which include fixes for tun(4) is quick and dirty
> > and not for commit I decided to make the diff to fix the races caused by
> > if_clone_create() at first.
> > 
> > I included screenshot with panic.
> 
> Thanks, interesting that the corruption happens on a list that should be
> initialized.  Does that mean the context switch on Thread 1 is happening
> before if_attach_common() is called?
> 
> You said your previous email that there's a context switch.  Do you know
> when it happens?  You could see that in ddb by looking at the backtrace
> of the other CPU.
> 
> Is the context switch leading to the race common to all pseudo-drivers
> or is it in the bridge(4) driver?
> 
> Regarding your solution, do I understand correctly that the goal is to
> serialize all if_clone_create()?  Is it really needed to remember which
> unit is being currently created or can't we just serialize all of them?
> 
> The fact that a lock is not held over the cloning operation is imho
> positive.
> 

I reworked tool for reproduce. Now I avoided fork()/exec() route and it
takes couple of minutes to take panic on 4 cores. I attached some
screenshots with panics caused by various pseudo-interfaces but my
previous mail was banned. I will try to attach them with separate mails.

I hope anyone else will try it. Now switch(4) is the bast way to
reproduce.

 cut begin 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct ifreq ifr;

static void *clone_create(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFCREATE, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

static void *clone_destroy(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFDESTROY, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t thr;
int i;

if(argc!=2){
fprintf(stderr, "usage: %s ifname\n", getprogname());
return 1;
}

if(getuid()!=0){
fprintf(stderr, "should be root\n");
return 1;
}

memset(, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));

for(i=0; i<8*4; ++i){
if(pthread_create(, NULL, clone_create, NULL)!=0)
errx(1, "pthread_create(clone_create)");
}

clone_destroy(NULL);

return 0;
}

 cut end 



Re: Stuck in Needbuf state, trying to understand (6.7)

2020-06-29 Thread sven falempin
On Mon, Jun 29, 2020 at 12:12 PM Bob Beck  wrote:

>
> > Awesome, thanks!
> >
> > I will test that, ASAP,
> > do not hesitate to slay dragon,
> > i heard the bathing in the blood pool is good for the skin
> >
> > Little concern, I did the test without the MFS and ran into issues ,
> > anyway i get back to you (or list ?) when i have test report with patched
> > kernel
>
> Yes, howver, you didn't tell my what options you had on the filesystem
> mounted
> when you did the test without MFS, because it matters. If you had your
> filesystem
> mounted ASYNC it would have exhibited the same behavoir.  the issue is due
> to the
> async mount, which MFS does by default, not strictly to do with MFS.
>
>
tmpfs was just not mounted so it was the option of the underlying /home

/dev/sd0d on /home type ffs (local, nodev, nosuid)

So far the above patch improve the situation drastically

I will now perform a test in the original device.


Use klist_invalidate() in knote_processexit()

2020-06-29 Thread Visa Hankala
knote_processexit() uses knote_remove() to clear any remaining knotes
from >ps_klist when process `pr' exits. All EVFILT_PROC knotes are
removed by the KNOTE(>ps_klist, NOTE_EXIT) call. However, ps_klist
can contain EVFILT_SIGNAL knotes after the KNOTE().

To reserve knote_remove() for the kqueue subsystem's internal use, the
diff below makes knote_processexit() use klist_invalidate() instead.
For now, knote_remove() and klist_invalidate() are quite similar.
However, more differences will arise with fine-grained locking, and
knote_remove() will no longer be appropriate for knote_processexit().

To keep the existing behaviour with EVFILT_SIGNAL knotes, the diff
adjusts klist_invalidate() to drop non-fd-based knotes instead of
activating them with an EOF condition. This overloading of FILTEROP_ISFD
should be fine because EVFILT_PROC, EVFILT_SIGNAL and EVFILT_TIMER
are the only filter types that are not fd-based; EVFILT_PROC and
EVFILT_SIGNAL use the same knote list, whereas EVFILT_TIMER does not
use a knote list. The existing uses of klist_invalidate() should not
be affected.

OK?

Index: kern/kern_event.c
===
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.140
diff -u -p -r1.140 kern_event.c
--- kern/kern_event.c   22 Jun 2020 13:14:32 -  1.140
+++ kern/kern_event.c   29 Jun 2020 16:10:14 -
@@ -1336,10 +1336,12 @@ knote_processexit(struct proc *p)
 {
struct process *pr = p->p_p;
 
+   KASSERT(p == curproc);
+
KNOTE(>ps_klist, NOTE_EXIT);
 
/* remove other knotes hanging off the process */
-   knote_remove(p, >ps_klist.kl_list);
+   klist_invalidate(>ps_klist);
 }
 
 void
@@ -1446,6 +1448,7 @@ void
 klist_invalidate(struct klist *list)
 {
struct knote *kn;
+   struct proc *p = curproc;
int s;
 
/*
@@ -1460,10 +1463,15 @@ klist_invalidate(struct klist *list)
continue;
splx(s);
kn->kn_fop->f_detach(kn);
-   kn->kn_fop = _filtops;
-   knote_activate(kn);
-   s = splhigh();
-   knote_release(kn);
+   if (kn->kn_fop->f_flags & FILTEROP_ISFD) {
+   kn->kn_fop = _filtops;
+   knote_activate(kn);
+   s = splhigh();
+   knote_release(kn);
+   } else {
+   knote_drop(kn, p);
+   s = splhigh();
+   }
}
splx(s);
 }



Re: Stuck in Needbuf state, trying to understand (6.7)

2020-06-29 Thread Bob Beck


> Awesome, thanks!
> 
> I will test that, ASAP,
> do not hesitate to slay dragon,
> i heard the bathing in the blood pool is good for the skin
> 
> Little concern, I did the test without the MFS and ran into issues ,
> anyway i get back to you (or list ?) when i have test report with patched
> kernel

Yes, howver, you didn't tell my what options you had on the filesystem mounted
when you did the test without MFS, because it matters. If you had your 
filesystem
mounted ASYNC it would have exhibited the same behavoir.  the issue is due to 
the
async mount, which MFS does by default, not strictly to do with MFS. 


> 
> Again thanks for helping.
> 
> -- 
> --
> -
> Knowing is not enough; we must apply. Willing is not enough; we must do



Re: Stuck in Needbuf state, trying to understand (6.7)

2020-06-29 Thread sven falempin
On Mon, Jun 29, 2020 at 11:44 AM Bob Beck  wrote:

> On Sun, Jun 28, 2020 at 12:18:06PM -0400, sven falempin wrote:
> > On Sun, Jun 28, 2020 at 2:40 AM Bryan Linton  wrote:
> >
> > > On 2020-06-27 19:29:31, Bob Beck  wrote:
> > > >
> > > > No.
> > > >
> > > > I know *exactly* what needbuf is but to attempt to diagnose what your
> > > > problem is we need exact details. especially:
> > > >
> > > > 1) The configuration of your system including all the details of the
> > > filesystems
> > > > you have mounted, all options used, etc.
> > > >
> > > > 2) The script you are using to generate the problem (Not a
> paraphrasing
> > > of what
> > > > you think the script does) What filesystems it is using.
> > > >
> > >
> > > Not the OP, but this problem sounds almost exactly like the bug I
> > > reported last year.
> > >
> > > There is a detailed list of steps I used to reproduce the bug in
> > > the following bug report.
> > >
> > > https://marc.info/?l=openbsd-bugs=156412299418191
> > >
> > > I was even able to bisect and identify the commit which first
> > > caused the breakage for me.
> > >
> > >
> > > ---8<---
> > >
> > > CVSROOT:/cvs
> > > Module name:src
> > > Changes by: b...@cvs.openbsd.org2019/05/08 06:40:57
> > >
> > > Modified files:
> > > sys/kern   : vfs_bio.c vfs_biomem.c
> > >
> > > Log message:
> > > Modify the buffer cache to always flip recovered DMA buffers high.
> > >
> > > This also modifies the backoff logic to only back off what is requested
> > > and not a "mimimum" amount. Tested by me, benno@, tedu@ anda ports
> build
> > > by naddy@.
> > >
> > > ok tedu@
> > >
> > > ---8<---
> > >
> > > However, I have since migrated away from using vnd(4)s since I was
> > > able to find other solutions that worked for my use cases.  So I
> > > may not be able to provide much additional information other than
> > > what is contained in the above bug report.
> > >
> > > --
> > > Bryan
> > >
> > > >
> > > >
> > >
> > >
> > Reproduction of BUG.
> >
> >
> > # optional
> > mkdir /tmpfs
> > mount_mfs -o rw -s 2500M swap /tmpfs # i mounted through fstab so this
> line
> > is not tested
> > #the bug
> > /bin/dd if=/dev/zero of=/tmpfs/img.dd count=0 bs=1 seek=25
> > vnconfig vnd3 /tmpfs/img.dd
> > printf "a a\n\n\n\nw\nq\n" | disklabel -E vnd3
> > newfs vnd3a
> > mount /dev/vnd3a /mnt
> > cd /tmp && ftp https://cdn.openbsd.org/pub/OpenBSD/6.7/amd64/base67.tgz
> > cd /mnt
> > #will occur here (the mkdir was ub beedbuf state for a while ...
> > for v in 1 2 3 4 5 6 7 8 9; do mkdir /tmp/$v; tar xzvf /tmp/base67.tgz -C
> > /mnt/$v; done
> >
> > Ready to test patches.
> >
> >
>
> So, your problem is that you have your vnd created in an mfs
> filesystem, when I run your test with the vnd backed by a regular
> filesystem (withe softdep even) it works fine.
>
> The trouble happens when your VND has buffers cached in it's
> "filesystem" but then is not flushing them out to the underlyin file
> (vnode) that you have your vnd backed by.  On normal filesystems this
> works fine, since vnd tells the lower layer to not cache the writes
> and to do them syncrhonously, to avoid an explosion of delayed writes
> and dependencies of buffers.
>
> The problem happens when we convert syncryonous bwrites to
> asynchronous bdwrites if the fileystem is mounted ASYNC, which,
> curiously, MFS always is (I don't know why, it doesn't really make any
> sense, and I might even look at changing that) All the writes you do
> end up being delayed anc chewing up more buffer space. And they are
> all tied to one vnode (your image).  once you exhaust the buffer
> space, the cleaner runs, but as you have noticed can't clean out your
> vnode until the syncer runs (every 60 seconds).  This is why your
> thing "takes a long time", and things stall in need buffer. softdep
> has deep dark voodoo in it to avoid this problem and therefore when I
> use a softdep filesystem instead of an ASYNC filesystem it works.
>
> Anyway, what's below fixes your issue on my machine. I'm not sure I'm
> happy that it's the final fix but it does fix it. there are many other
> dragons lurking in here.
>
> Index: sys/kern/vfs_bio.c
> ===
> RCS file: /cvs/src/sys/kern/vfs_bio.c,v
> retrieving revision 1.200
> diff -u -p -u -p -r1.200 vfs_bio.c
> --- sys/kern/vfs_bio.c  29 Apr 2020 02:25:48 -  1.200
> +++ sys/kern/vfs_bio.c  29 Jun 2020 15:18:21 -
> @@ -706,8 +706,14 @@ bwrite(struct buf *bp)
>  */
> async = ISSET(bp->b_flags, B_ASYNC);
> if (!async && mp && ISSET(mp->mnt_flag, MNT_ASYNC)) {
> -   bdwrite(bp);
> -   return (0);
> +   /*
> +* Don't convert writes from VND on async filesystems
> +* that already have delayed writes in the upper layer.
> +*/
> +   if (!ISSET(bp->b_flags, B_NOCACHE)) {
> +   bdwrite(bp);
> + 

Re: Stuck in Needbuf state, trying to understand (6.7)

2020-06-29 Thread Bob Beck
On Sun, Jun 28, 2020 at 12:18:06PM -0400, sven falempin wrote:
> On Sun, Jun 28, 2020 at 2:40 AM Bryan Linton  wrote:
> 
> > On 2020-06-27 19:29:31, Bob Beck  wrote:
> > >
> > > No.
> > >
> > > I know *exactly* what needbuf is but to attempt to diagnose what your
> > > problem is we need exact details. especially:
> > >
> > > 1) The configuration of your system including all the details of the
> > filesystems
> > > you have mounted, all options used, etc.
> > >
> > > 2) The script you are using to generate the problem (Not a paraphrasing
> > of what
> > > you think the script does) What filesystems it is using.
> > >
> >
> > Not the OP, but this problem sounds almost exactly like the bug I
> > reported last year.
> >
> > There is a detailed list of steps I used to reproduce the bug in
> > the following bug report.
> >
> > https://marc.info/?l=openbsd-bugs=156412299418191
> >
> > I was even able to bisect and identify the commit which first
> > caused the breakage for me.
> >
> >
> > ---8<---
> >
> > CVSROOT:/cvs
> > Module name:src
> > Changes by: b...@cvs.openbsd.org2019/05/08 06:40:57
> >
> > Modified files:
> > sys/kern   : vfs_bio.c vfs_biomem.c
> >
> > Log message:
> > Modify the buffer cache to always flip recovered DMA buffers high.
> >
> > This also modifies the backoff logic to only back off what is requested
> > and not a "mimimum" amount. Tested by me, benno@, tedu@ anda ports build
> > by naddy@.
> >
> > ok tedu@
> >
> > ---8<---
> >
> > However, I have since migrated away from using vnd(4)s since I was
> > able to find other solutions that worked for my use cases.  So I
> > may not be able to provide much additional information other than
> > what is contained in the above bug report.
> >
> > --
> > Bryan
> >
> > >
> > >
> >
> >
> Reproduction of BUG.
> 
> 
> # optional
> mkdir /tmpfs
> mount_mfs -o rw -s 2500M swap /tmpfs # i mounted through fstab so this line
> is not tested
> #the bug
> /bin/dd if=/dev/zero of=/tmpfs/img.dd count=0 bs=1 seek=25
> vnconfig vnd3 /tmpfs/img.dd
> printf "a a\n\n\n\nw\nq\n" | disklabel -E vnd3
> newfs vnd3a
> mount /dev/vnd3a /mnt
> cd /tmp && ftp https://cdn.openbsd.org/pub/OpenBSD/6.7/amd64/base67.tgz
> cd /mnt
> #will occur here (the mkdir was ub beedbuf state for a while ...
> for v in 1 2 3 4 5 6 7 8 9; do mkdir /tmp/$v; tar xzvf /tmp/base67.tgz -C
> /mnt/$v; done
> 
> Ready to test patches.
> 
> 

So, your problem is that you have your vnd created in an mfs
filesystem, when I run your test with the vnd backed by a regular
filesystem (withe softdep even) it works fine. 

The trouble happens when your VND has buffers cached in it's
"filesystem" but then is not flushing them out to the underlyin file
(vnode) that you have your vnd backed by.  On normal filesystems this
works fine, since vnd tells the lower layer to not cache the writes
and to do them syncrhonously, to avoid an explosion of delayed writes
and dependencies of buffers. 

The problem happens when we convert syncryonous bwrites to
asynchronous bdwrites if the fileystem is mounted ASYNC, which,
curiously, MFS always is (I don't know why, it doesn't really make any
sense, and I might even look at changing that) All the writes you do
end up being delayed anc chewing up more buffer space. And they are
all tied to one vnode (your image).  once you exhaust the buffer
space, the cleaner runs, but as you have noticed can't clean out your
vnode until the syncer runs (every 60 seconds).  This is why your
thing "takes a long time", and things stall in need buffer. softdep
has deep dark voodoo in it to avoid this problem and therefore when I
use a softdep filesystem instead of an ASYNC filesystem it works. 

Anyway, what's below fixes your issue on my machine. I'm not sure I'm
happy that it's the final fix but it does fix it. there are many other
dragons lurking in here.

Index: sys/kern/vfs_bio.c
===
RCS file: /cvs/src/sys/kern/vfs_bio.c,v
retrieving revision 1.200
diff -u -p -u -p -r1.200 vfs_bio.c
--- sys/kern/vfs_bio.c  29 Apr 2020 02:25:48 -  1.200
+++ sys/kern/vfs_bio.c  29 Jun 2020 15:18:21 -
@@ -706,8 +706,14 @@ bwrite(struct buf *bp)
 */
async = ISSET(bp->b_flags, B_ASYNC);
if (!async && mp && ISSET(mp->mnt_flag, MNT_ASYNC)) {
-   bdwrite(bp);
-   return (0);
+   /*
+* Don't convert writes from VND on async filesystems
+* that already have delayed writes in the upper layer.
+*/
+   if (!ISSET(bp->b_flags, B_NOCACHE)) {
+   bdwrite(bp);
+   return (0);
+   }
}
 
/*



Re: route add ::/0 ...

2020-06-29 Thread YASUOKA Masahiko
On Mon, 29 Jun 2020 18:45:07 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Mon, 29 Jun 2020 10:12:23 +0200
> Martin Pieuchot  wrote:
>> On 28/06/20(Sun) 20:41, YASUOKA Masahiko wrote:
>>> Hi,
>>> 
>>> When "::/0" is used as "default",
>>> 
>>>   # route add ::/0 fe80::1%em0
>>>   add net ::/0: gateway fe80::1%em0: Invalid argument
>>> 
>>> route command trims the sockaddr to { .len = 2, .family = AF_INET6 }
>>> for "::/0", but rtable_satoplen() refuses it.  I think it should be
>>> accepted.
>> 
>> rtable_satoplen() is used in many places, not just in the socket parsing
>> code used by route(8).  I don't know what side effects can be introduced
>> by this change.
>> 
>> Why is IPv6 different from IPv4 when it comes to the default route?
> 
> Diferent functions are used.  route(8) uses inet_makenetandmask() to
> create a sockaddr for IPv4 prefix length and uses prefixlen() for IPv6
> prefix length.  "/0" results:
> 
> IPv4
>   { .len = 1, .family = 0, ... }
> IPv6 
>   { .len = 2, .family = AF_INET6, ... }

I'm sorry this is not correct.  It is actually

IPv6 
  { .len = 28, .family = AF_INET6, ... }

> Next, route(8) uses mask_addr() to trim the tailing zeros.
> 
> 1129 void
> 1130 mask_addr(union sockunion *addr, union sockunion *mask, int which)
> 1131 {
> 1132 int olen = mask->sa.sa_len;
> 1133 char *cp1 = olen + (char *)mask, *cp2;
> 1134 
> 1135 for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
> 1136 if (*--cp1 != '\0') {
> 1137 mask->sa.sa_len = 1 + cp1 - (char *)mask;
> 1138 break;
> 1139 }
> 
> See #1135 carefully.  As the results, the sockaddrs become:
> 
> IPv4
>   { .len = 0, .family = 0, ... }
> IPv6
>   { .len = 2, .family = AF_INET6, ... }

I'm start wondering what the mask_addr() is for.

   1123 void
   1124 mask_addr(union sockunion *addr, union sockunion *mask, int which)
   1125 {
   1126 int olen = mask->sa.sa_len;
   1127 char *cp1 = olen + (char *)mask, *cp2;
   1128 
   1129 for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
   1130 if (*--cp1 != '\0') {
   1131 mask->sa.sa_len = 1 + cp1 - (char *)mask;
   1132 break;
   1133 }
   1134 if ((rtm_addrs & which) == 0)
   1135 return;
   1136 switch (addr->sa.sa_family) {
   1137 case AF_INET:
   1138 case AF_INET6:
   1139 case AF_UNSPEC:
   1140 return;
   1141 }
   1142 cp1 = mask->sa.sa_len + 1 + (char *)addr;
   1143 cp2 = addr->sa.sa_len + 1 + (char *)addr;
   1144 while (cp2 > cp1)
   1145 *--cp2 = '\0';
   1146 cp2 = mask->sa.sa_len + 1 + (char *)mask;
   1147 while (cp1 > addr->sa.sa_data)
   1148 *--cp1 &= *--cp2;
   1149 }

The function mask_addr() doesn't mask address for IPv4 and IPv6.  Does
any address family other than IPv4 or IPv6 require #1142:1148?  The
function seems to just trim the trailing zero.  Is it neccesaary?  And
it causes the confusion on the kernel.  How about deleting
mask_addr()?

The diff following also fixes the problem.

diff --git a/sbin/route/route.c b/sbin/route/route.c
index 9e43d8e89b6..87f26e5c1e7 100644
--- a/sbin/route/route.c
+++ b/sbin/route/route.c
@@ -107,7 +107,6 @@ void print_rtmsg(struct rt_msghdr *, int);
 voidpmsg_common(struct rt_msghdr *);
 voidpmsg_addrs(char *, int);
 voidbprintf(FILE *, int, char *);
-voidmask_addr(union sockunion *, union sockunion *, int);
 int getaddr(int, int, char *, struct hostent **);
 voidgetmplslabel(char *, int);
 int rtmsg(int, int, int, uint8_t);
@@ -1088,8 +1087,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
rtm.rtm_mpls = mpls_flags;
rtm.rtm_hdrlen = sizeof(rtm);
 
-   if (rtm_addrs & RTA_NETMASK)
-   mask_addr(_dst, _mask, RTA_DST);
/* store addresses in ascending order of RTA values */
NEXTADDR(RTA_DST, so_dst);
NEXTADDR(RTA_GATEWAY, so_gate);
@@ -1120,34 +1117,6 @@ rtmsg(int cmd, int flags, int fmask, uint8_t prio)
return (0);
 }
 
-void
-mask_addr(union sockunion *addr, union sockunion *mask, int which)
-{
-   int olen = mask->sa.sa_len;
-   char *cp1 = olen + (char *)mask, *cp2;
-
-   for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
-   if (*--cp1 != '\0') {
-   mask->sa.sa_len = 1 + cp1 - (char *)mask;
-   break;
-   }
-   if ((rtm_addrs & which) == 0)
-   return;
-   switch (addr->sa.sa_family) {
-   case AF_INET:
-   case AF_INET6:
-   case AF_UNSPEC:
-   return;
-   }
-   cp1 = mask->sa.sa_len + 1 + (char *)addr;
-   cp2 = addr->sa.sa_len + 1 + (char *)addr;
-   while (cp2 > cp1)
-   *--cp2 = '\0';
-   cp2 = mask->sa.sa_len + 1 + (char 

Re: fix races in if_clone_create()

2020-06-29 Thread Hrvoje Popovski
On 29.6.2020. 10:59, Vitaliy Makkoveev wrote:
> I reworked tool for reproduce. Now I avoided fork()/exec() route and it
> takes couple of minutes to take panic on 4 cores. Also some screenshots
> attached.
> 
> I hope anyone else will try it.

Hi,

i'm getting panic quite fast :)
i will leave box in ddb if more information is needed

r620-1# ./a.out bridge0
panic: kernel diagnostic assertion "TAILQ_EMPTY(>if_addrhooks)"
failed: file "/sys/net/if.c", line 1168
Stopped at  db_enter+0x10:  popq%rbp
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
 475311   7753   1000 0x3  00  ifconfig
*128110   3280  0 0x3  01K a.out
  86419   3280  0 0x3  0x4004  a.out
 352360   3280  0 0x3  0x4003  a.out
 309715   3280  0 0x3  0x4005  a.out
 268210   3280  0 0x3  0x4002  a.out
db_enter() at db_enter+0x10
panic(81df42d3) at panic+0x128
__assert(81e5d55e,81e5b1fa,490,81e408d9) at
__assert+0x2b
if_detach(81169000) at if_detach+0x45f
bridge_clone_destroy(81169000) at bridge_clone_destroy+0x17b
ifioctl(fd839209c828,80206979,8000248fa980,800024902618) at
ifioctl+0x1c2
soo_ioctl(fd83b04b34c8,80206979,8000248fa980,800024902618)
at soo_ioctl+0x171
sys_ioctl(800024902618,8000248faa90,8000248faaf0) at
sys_ioctl+0x2df
syscall(8000248fab60) at syscall+0x389
Xsyscall() at Xsyscall+0x128
end of kernel
end trace frame: 0x7f7d3600, count: 5
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb{1}>



Re: 11n Tx aggregation for iwm(4)

2020-06-29 Thread Sebastian Benoit
Sebastian Benoit(be...@openbsd.org) on 2020.06.29 16:18:03 +0200:
> Stefan Sperling(s...@stsp.name) on 2020.06.26 14:45:53 +0200:
> > This patch adds support for 11n Tx aggregation to iwm(4).
> > 
> > Please help with testing if you can by running the patch and using wifi
> > as usual. Nothing should change, except that Tx speed may potentially
> > improve. If you have time to run before/after performance measurements with
> > tcpbench or such, that would be nice. But it's not required for testing.
> > 
> > If Tx aggregation is active then netstat will show a non-zero output block 
> > ack
> > agreement counter:
> > 
> > $ netstat -W iwm0 | grep 'output block'
> > 3 new output block ack agreements
> > 0 output block ack agreements timed out
> 
> iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
> iwm0: hw rev 0x230, fw ver 34.0.1, address 9c:da:3e:6f:51:a4
> 
> With intel firmware updated: intel-firmware-20200520v0->20200609v0: ok

He, i meant to say with "iwm-firmware-20191022p1" as before, but copied the
wrong line and then wrote the correct thing for it :)



Re: 11n Tx aggregation for iwm(4)

2020-06-29 Thread Sebastian Benoit
Stefan Sperling(s...@stsp.name) on 2020.06.26 14:45:53 +0200:
> This patch adds support for 11n Tx aggregation to iwm(4).
> 
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
> 
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
> 
> $ netstat -W iwm0 | grep 'output block'
> 3 new output block ack agreements
>   0 output block ack agreements timed out

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
iwm0: hw rev 0x230, fw ver 34.0.1, address 9c:da:3e:6f:51:a4

With intel firmware updated: intel-firmware-20200520v0->20200609v0: ok

With the patch i get a speedup from ca 25 MBit/s to ca. 35 MBit/s.

However i do not see any 'new output block ack agreements'.

0 new output block ack agreements
0 output block ack agreements timed out

This is with two TP-Link APs (Archer A7 or something like that).

/Benno



Re: fsck_ffs: faster with lots of cylinder groups

2020-06-29 Thread Otto Moerbeek
On Sun, Jun 21, 2020 at 03:35:21PM +0200, Otto Moerbeek wrote:

> Hi,
> 
> both phase 1 and phase 5 need cylinder group metadata.  This diff
> keeps the cg data read in phase 1 in memory to be used by phase 5 if
> possible. From FreeBSD. 
> 
>   -Otto
> 
> On an empty 30T fileystem:
> 
> $ time obj/fsck_ffs -f /dev/sd3a
> 2m44.10s real 0m13.21s user 0m07.38s system
> 
> $ time doas obj/fsck_ffs -f /dev/sd3a
> 1m32.81s real 0m12.86s user 0m05.25s system
> 
> The difference will be less if a fileystem is filled up, but still nice.

Any takers?

-Otto

> 
> Index: fsck.h
> ===
> RCS file: /cvs/src/sbin/fsck_ffs/fsck.h,v
> retrieving revision 1.32
> diff -u -p -r1.32 fsck.h
> --- fsck.h5 Jan 2018 09:33:47 -   1.32
> +++ fsck.h21 Jun 2020 12:48:50 -
> @@ -136,7 +136,6 @@ struct bufarea {
>  struct bufarea bufhead;  /* head of list of other blks in 
> filesys */
>  struct bufarea sblk; /* file system superblock */
>  struct bufarea asblk;/* alternate file system superblock */
> -struct bufarea cgblk;/* cylinder group blocks */
>  struct bufarea *pdirbp;  /* current directory contents */
>  struct bufarea *pbp; /* current inode block */
>  struct bufarea *getdatablk(daddr_t, long);
> @@ -148,9 +147,7 @@ struct bufarea *getdatablk(daddr_t, long
>   (bp)->b_flags = 0;
>  
>  #define  sbdirty()   sblk.b_dirty = 1
> -#define  cgdirty()   cgblk.b_dirty = 1
>  #define  sblock  (*sblk.b_un.b_fs)
> -#define  cgrp(*cgblk.b_un.b_cg)
>  
>  enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
>  
> @@ -275,9 +272,13 @@ struct ufs2_dinode ufs2_zino;
>  #define  FOUND   0x10
>  
>  union dinode *ginode(ino_t);
> +struct bufarea *cglookup(u_int cg);
>  struct inoinfo *getinoinfo(ino_t);
>  void getblk(struct bufarea *, daddr_t, long);
>  ino_t allocino(ino_t, int);
> +void *Malloc(size_t);
> +void *Calloc(size_t, size_t);
> +void *Reallocarray(void *, size_t, size_t);
>  
>  int  (*info_fn)(char *, size_t);
>  char *info_filesys;
> Index: inode.c
> ===
> RCS file: /cvs/src/sbin/fsck_ffs/inode.c,v
> retrieving revision 1.49
> diff -u -p -r1.49 inode.c
> --- inode.c   16 Sep 2018 02:43:11 -  1.49
> +++ inode.c   21 Jun 2020 12:48:50 -
> @@ -370,7 +370,7 @@ setinodebuf(ino_t inum)
>   partialsize = inobufsize;
>   }
>   if (inodebuf == NULL &&
> - (inodebuf = malloc((unsigned)inobufsize)) == NULL)
> + (inodebuf = Malloc((unsigned)inobufsize)) == NULL)
>   errexit("Cannot allocate space for inode buffer\n");
>  }
>  
> @@ -401,7 +401,7 @@ cacheino(union dinode *dp, ino_t inumber
>   blks = howmany(DIP(dp, di_size), sblock.fs_bsize);
>   if (blks > NDADDR)
>   blks = NDADDR + NIADDR;
> - inp = malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
> + inp = Malloc(sizeof(*inp) + (blks ? blks - 1 : 0) * sizeof(daddr_t));
>   if (inp == NULL)
>   errexit("cannot allocate memory for inode cache\n");
>   inpp = [inumber % numdirs];
> @@ -423,10 +423,10 @@ cacheino(union dinode *dp, ino_t inumber
>   inp->i_blks[NDADDR + i] = DIP(dp, di_ib[i]);
>   if (inplast == listmax) {
>   newlistmax = listmax + 100;
> - newinpsort = reallocarray(inpsort,
> + newinpsort = Reallocarray(inpsort,
>   (unsigned)newlistmax, sizeof(struct inoinfo *));
>   if (newinpsort == NULL)
> - errexit("cannot increase directory list");
> + errexit("cannot increase directory list\n");
>   inpsort = newinpsort;
>   listmax = newlistmax;
>   }
> @@ -582,7 +582,8 @@ allocino(ino_t request, int type)
>  {
>   ino_t ino;
>   union dinode *dp;
> - struct cg *cgp = 
> + struct bufarea *cgbp;
> + struct cg *cgp;
>   int cg;
>   time_t t;
>   struct inostat *info;
> @@ -602,7 +603,7 @@ allocino(ino_t request, int type)
>   unsigned long newalloced, i;
>   newalloced = MINIMUM(sblock.fs_ipg,
>   MAXIMUM(2 * inostathead[cg].il_numalloced, 10));
> - info = calloc(newalloced, sizeof(struct inostat));
> + info = Calloc(newalloced, sizeof(struct inostat));
>   if (info == NULL) {
>   pwarn("cannot alloc %zu bytes to extend inoinfo\n",
>   sizeof(struct inostat) * newalloced);
> @@ -619,7 +620,8 @@ allocino(ino_t request, int type)
>   inostathead[cg].il_numalloced = newalloced;
>   info = inoinfo(ino);
>   }
> - getblk(, cgtod(, cg), sblock.fs_cgsize);
> + cgbp = cglookup(cg);
> + cgp = cgbp->b_un.b_cg;
>   if 

Re: 11n Tx aggregation for iwm(4)

2020-06-29 Thread Uwe Werler
Woahhh,

was also trying 5GHz (and tcpbench against one of our bsd servers in DMZ):

469651560 bytes sent over 85.291 seconds
bandwidth min/avg/max/std-dev = 3.475/43.927/87.071/29.809 Mbps


6 new output block ack agreements
0 output block ack agreements timed out

(Tomorrow @work I will test against our new APs. My AP @home is a Technicolor 
MediaAccess TG789vac).

mbk Uwe

On 29 Jun 09:48, Uwe Werler wrote:
> Hi Stefan,
> 
> for me the patch works in mode 11n:
> 
> before (OpenBSD 6.7-current (GENERIC.MP) #304: Fri Jun 26 02:08:50 MDT 2020)
> bandwidth min/avg/max/std-dev = 2.354/12.319/15.391/3.850 Mbps
> 
> with patch (OpenBSD 6.7-current (GENERIC.MP) #0: Mon Jun 29 09:35:24 GMT 2020)
> bandwidth min/avg/max/std-dev = 12.174/31.411/57.746/15.154 Mbps
> 
> iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
> iwm0: hw rev 0x230, fw ver 34.0.1, address 60:f6:77:bc:3a:04
> 
> (mode 11g: bandwidth min/avg/max/std-dev = 0.620/0.844/1.101/0.153 Mbps)
> 
> mbk Uwe
> 
> 
> On 26 Jun 14:45, Stefan Sperling wrote:
> > This patch adds support for 11n Tx aggregation to iwm(4).
> > 
> > Please help with testing if you can by running the patch and using wifi
> > as usual. Nothing should change, except that Tx speed may potentially
> > improve. If you have time to run before/after performance measurements with
> > tcpbench or such, that would be nice. But it's not required for testing.
> > 
> > If Tx aggregation is active then netstat will show a non-zero output block 
> > ack
> > agreement counter:
> > 
> > $ netstat -W iwm0 | grep 'output block'
> > 3 new output block ack agreements
> > 0 output block ack agreements timed out
> > 
> > It would be great to get at least one test for all the chipsets the driver
> > supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> > The behaviour of the access point also matters a great deal. It won't
> > hurt to test the same chipset against several different access points.
> > 
> > I have tested this version on 8265 only so far. I've run older revisions
> > of this patch on 7265 so I'm confident that this chip will work, too.
> > So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> > mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
> > 
> > diff refs/heads/master refs/heads/txagg
> > blob - 3a75d07a60a7eb4c66540474e47aeffd7a85250a
> > blob + 853bdd1290ad509f5fce7b5bf20550f458a2b460
> > --- sys/dev/pci/if_iwm.c
> > +++ sys/dev/pci/if_iwm.c
> > @@ -144,6 +144,8 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include  /* for SEQ_LT */
> > +#undef DPRINTF /* defined in ieee80211_priv.h */
> >  
> >  #define DEVNAME(_s)((_s)->sc_dev.dv_xname)
> >  
> > @@ -299,7 +301,8 @@ int iwm_nic_rx_mq_init(struct iwm_softc *);
> >  intiwm_nic_tx_init(struct iwm_softc *);
> >  intiwm_nic_init(struct iwm_softc *);
> >  intiwm_enable_ac_txq(struct iwm_softc *, int, int);
> > -intiwm_enable_txq(struct iwm_softc *, int, int, int);
> > +intiwm_enable_txq(struct iwm_softc *, int, int, int, int, uint8_t,
> > +   uint16_t);
> >  intiwm_post_alive(struct iwm_softc *);
> >  struct iwm_phy_db_entry *iwm_phy_db_get_section(struct iwm_softc *, 
> > uint16_t,
> > uint16_t);
> > @@ -334,12 +337,12 @@ void  iwm_ampdu_rx_stop(struct ieee80211com *, struct 
> > i
> > uint8_t);
> >  void   iwm_sta_rx_agg(struct iwm_softc *, struct ieee80211_node *, 
> > uint8_t,
> > uint16_t, uint16_t, int);
> > -#ifdef notyet
> > +void   iwm_sta_tx_agg(struct iwm_softc *, struct ieee80211_node *, 
> > uint8_t,
> > +   uint16_t, uint16_t, int);
> >  intiwm_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node 
> > *,
> > uint8_t);
> >  void   iwm_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node 
> > *,
> > uint8_t);
> > -#endif
> >  void   iwm_ba_task(void *);
> >  
> >  intiwm_parse_nvm_data(struct iwm_softc *, const uint16_t *,
> > @@ -372,14 +375,25 @@ int   iwm_rxmq_get_signal_strength(struct iwm_softc 
> > *, s
> >  void   iwm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *,
> > struct iwm_rx_data *);
> >  intiwm_get_noise(const struct iwm_statistics_rx_non_phy *);
> > +void   iwm_txq_advance(struct iwm_softc *, struct iwm_tx_ring *, int);
> > +void   iwm_ampdu_tx_done(struct iwm_softc *, struct iwm_cmd_header *,
> > +   struct iwm_node *, struct iwm_tx_ring *, uint32_t, uint8_t,
> > +   uint8_t, uint16_t, int, struct iwm_agg_tx_status *);
> >  intiwm_ccmp_decap(struct iwm_softc *, struct mbuf *,
> > struct ieee80211_node *);
> >  void   iwm_rx_frame(struct iwm_softc *, struct mbuf *, int, uint32_t, 
> > int, int,
> > uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
> > -void   iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *,
> > -   struct 

Re: 11n Tx aggregation for iwm(4)

2020-06-29 Thread Uwe Werler
Hi Stefan,

for me the patch works in mode 11n:

before (OpenBSD 6.7-current (GENERIC.MP) #304: Fri Jun 26 02:08:50 MDT 2020)
bandwidth min/avg/max/std-dev = 2.354/12.319/15.391/3.850 Mbps

with patch (OpenBSD 6.7-current (GENERIC.MP) #0: Mon Jun 29 09:35:24 GMT 2020)
bandwidth min/avg/max/std-dev = 12.174/31.411/57.746/15.154 Mbps

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
iwm0: hw rev 0x230, fw ver 34.0.1, address 60:f6:77:bc:3a:04

(mode 11g: bandwidth min/avg/max/std-dev = 0.620/0.844/1.101/0.153 Mbps)

mbk Uwe


On 26 Jun 14:45, Stefan Sperling wrote:
> This patch adds support for 11n Tx aggregation to iwm(4).
> 
> Please help with testing if you can by running the patch and using wifi
> as usual. Nothing should change, except that Tx speed may potentially
> improve. If you have time to run before/after performance measurements with
> tcpbench or such, that would be nice. But it's not required for testing.
> 
> If Tx aggregation is active then netstat will show a non-zero output block ack
> agreement counter:
> 
> $ netstat -W iwm0 | grep 'output block'
> 3 new output block ack agreements
>   0 output block ack agreements timed out
> 
> It would be great to get at least one test for all the chipsets the driver
> supports: 7260, 7265, 3160, 3165, 3168, 8260, 8265, 9260, 9560
> The behaviour of the access point also matters a great deal. It won't
> hurt to test the same chipset against several different access points.
> 
> I have tested this version on 8265 only so far. I've run older revisions
> of this patch on 7265 so I'm confident that this chip will work, too.
> So far, the APs I have tested against are athn(4) in 11a mode and in 11n
> mode with the 'nomimo' nwflag, and a Sagemcom 11ac AP. All on 5Ghz channels.
> 
> diff refs/heads/master refs/heads/txagg
> blob - 3a75d07a60a7eb4c66540474e47aeffd7a85250a
> blob + 853bdd1290ad509f5fce7b5bf20550f458a2b460
> --- sys/dev/pci/if_iwm.c
> +++ sys/dev/pci/if_iwm.c
> @@ -144,6 +144,8 @@
>  #include 
>  #include 
>  #include 
> +#include  /* for SEQ_LT */
> +#undef DPRINTF /* defined in ieee80211_priv.h */
>  
>  #define DEVNAME(_s)  ((_s)->sc_dev.dv_xname)
>  
> @@ -299,7 +301,8 @@ int   iwm_nic_rx_mq_init(struct iwm_softc *);
>  int  iwm_nic_tx_init(struct iwm_softc *);
>  int  iwm_nic_init(struct iwm_softc *);
>  int  iwm_enable_ac_txq(struct iwm_softc *, int, int);
> -int  iwm_enable_txq(struct iwm_softc *, int, int, int);
> +int  iwm_enable_txq(struct iwm_softc *, int, int, int, int, uint8_t,
> + uint16_t);
>  int  iwm_post_alive(struct iwm_softc *);
>  struct iwm_phy_db_entry *iwm_phy_db_get_section(struct iwm_softc *, uint16_t,
>   uint16_t);
> @@ -334,12 +337,12 @@ voidiwm_ampdu_rx_stop(struct ieee80211com *, struct 
> i
>   uint8_t);
>  void iwm_sta_rx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
>   uint16_t, uint16_t, int);
> -#ifdef notyet
> +void iwm_sta_tx_agg(struct iwm_softc *, struct ieee80211_node *, uint8_t,
> + uint16_t, uint16_t, int);
>  int  iwm_ampdu_tx_start(struct ieee80211com *, struct ieee80211_node *,
>   uint8_t);
>  void iwm_ampdu_tx_stop(struct ieee80211com *, struct ieee80211_node *,
>   uint8_t);
> -#endif
>  void iwm_ba_task(void *);
>  
>  int  iwm_parse_nvm_data(struct iwm_softc *, const uint16_t *,
> @@ -372,14 +375,25 @@ int iwm_rxmq_get_signal_strength(struct iwm_softc 
> *, s
>  void iwm_rx_rx_phy_cmd(struct iwm_softc *, struct iwm_rx_packet *,
>   struct iwm_rx_data *);
>  int  iwm_get_noise(const struct iwm_statistics_rx_non_phy *);
> +void iwm_txq_advance(struct iwm_softc *, struct iwm_tx_ring *, int);
> +void iwm_ampdu_tx_done(struct iwm_softc *, struct iwm_cmd_header *,
> + struct iwm_node *, struct iwm_tx_ring *, uint32_t, uint8_t,
> + uint8_t, uint16_t, int, struct iwm_agg_tx_status *);
>  int  iwm_ccmp_decap(struct iwm_softc *, struct mbuf *,
>   struct ieee80211_node *);
>  void iwm_rx_frame(struct iwm_softc *, struct mbuf *, int, uint32_t, int, int,
>   uint32_t, struct ieee80211_rxinfo *, struct mbuf_list *);
> -void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_rx_packet *,
> - struct iwm_node *, int, int);
> +void iwm_rx_tx_cmd_single(struct iwm_softc *, struct iwm_tx_resp *,
> + struct iwm_node *, int, int, int);
> +void iwm_txd_done(struct iwm_softc *, struct iwm_tx_data *);
>  void iwm_rx_tx_cmd(struct iwm_softc *, struct iwm_rx_packet *,
>   struct iwm_rx_data *);
> +void iwm_clear_oactive(struct iwm_softc *, struct iwm_tx_ring *);
> +void iwm_mira_choose(struct iwm_softc *, struct ieee80211_node *);
> +void iwm_ampdu_rate_control(struct iwm_softc *, struct ieee80211_node *,
> + struct iwm_tx_ring *, int, uint16_t, uint16_t);
> +void iwm_rx_ba(struct iwm_softc *, struct iwm_rx_packet *,
> + struct iwm_rx_data *);
>  void iwm_rx_bmiss(struct iwm_softc *, struct iwm_rx_packet *,
>   

Re: route add ::/0 ...

2020-06-29 Thread YASUOKA Masahiko
Hi,

On Mon, 29 Jun 2020 10:12:23 +0200
Martin Pieuchot  wrote:
> On 28/06/20(Sun) 20:41, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> When "::/0" is used as "default",
>> 
>>   # route add ::/0 fe80::1%em0
>>   add net ::/0: gateway fe80::1%em0: Invalid argument
>> 
>> route command trims the sockaddr to { .len = 2, .family = AF_INET6 }
>> for "::/0", but rtable_satoplen() refuses it.  I think it should be
>> accepted.
> 
> rtable_satoplen() is used in many places, not just in the socket parsing
> code used by route(8).  I don't know what side effects can be introduced
> by this change.
> 
> Why is IPv6 different from IPv4 when it comes to the default route?

Diferent functions are used.  route(8) uses inet_makenetandmask() to
create a sockaddr for IPv4 prefix length and uses prefixlen() for IPv6
prefix length.  "/0" results:

IPv4
  { .len = 1, .family = 0, ... }
IPv6 
  { .len = 2, .family = AF_INET6, ... }

Next, route(8) uses mask_addr() to trim the tailing zeros.

1129 void
1130 mask_addr(union sockunion *addr, union sockunion *mask, int which)
1131 {
1132 int olen = mask->sa.sa_len;
1133 char *cp1 = olen + (char *)mask, *cp2;
1134 
1135 for (mask->sa.sa_len = 0; cp1 > (char *)mask; )
1136 if (*--cp1 != '\0') {
1137 mask->sa.sa_len = 1 + cp1 - (char *)mask;
1138 break;
1139 }

See #1135 carefully.  As the results, the sockaddrs become:

IPv4
  { .len = 0, .family = 0, ... }
IPv6
  { .len = 2, .family = AF_INET6, ... }

Yes, we can fix IPv6 case to have .len = 0 as well.

But I thought kernel should accept both cases, since the
representation for IPv6 didn't seem so bad for me.

> Shouldn't we change route(8) to have a `sa_len' of 0?
> 
> That would make the following true:
> 
> mlen = mask->sa_len;
> 
>   /* Default route */
>   if (mlen == 0)
>   return (0)
> 
>> Allow sockaddr for prefix length be trimmed before the key(address)
>> field.  Actually "route" command trims at the address family field for
>> "::/0"
>> 
>> Index: sys/net/rtable.c
>> ===
>> RCS file: /cvs/src/sys/net/rtable.c,v
>> retrieving revision 1.69
>> diff -u -p -r1.69 rtable.c
>> --- sys/net/rtable.c 21 Jun 2019 17:11:42 -  1.69
>> +++ sys/net/rtable.c 28 Jun 2020 11:30:54 -
>> @@ -887,8 +887,8 @@ rtable_satoplen(sa_family_t af, struct s
>>  
>>  ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset;
>>  ep = (uint8_t *)((uint8_t *)mask) + mlen;
>> -if (ap > ep)
>> -return (-1);
>> +if (ap >= ep)
>> +return (0);
> 
> That means the kernel now silently ignore sockaddr short `sa_len'. Are
> they supposed to be supported or are they symptoms of bugs?

I have missed rtable_satoplen() is used by other functions.

> 
>>  /* Trim trailing zeroes. */
>>  while (ap < ep && ep[-1] == 0)
> 



Re: route add ::/0 ...

2020-06-29 Thread Martin Pieuchot
On 28/06/20(Sun) 20:41, YASUOKA Masahiko wrote:
> Hi,
> 
> When "::/0" is used as "default",
> 
>   # route add ::/0 fe80::1%em0
>   add net ::/0: gateway fe80::1%em0: Invalid argument
> 
> route command trims the sockaddr to { .len = 2, .family = AF_INET6 }
> for "::/0", but rtable_satoplen() refuses it.  I think it should be
> accepted.

rtable_satoplen() is used in many places, not just in the socket parsing
code used by route(8).  I don't know what side effects can be introduced
by this change.

Why is IPv6 different from IPv4 when it comes to the default route?
Shouldn't we change route(8) to have a `sa_len' of 0?  That would make
the following true:

mlen = mask->sa_len;

/* Default route */
if (mlen == 0)
return (0)

> Allow sockaddr for prefix length be trimmed before the key(address)
> field.  Actually "route" command trims at the address family field for
> "::/0"
> 
> Index: sys/net/rtable.c
> ===
> RCS file: /cvs/src/sys/net/rtable.c,v
> retrieving revision 1.69
> diff -u -p -r1.69 rtable.c
> --- sys/net/rtable.c  21 Jun 2019 17:11:42 -  1.69
> +++ sys/net/rtable.c  28 Jun 2020 11:30:54 -
> @@ -887,8 +887,8 @@ rtable_satoplen(sa_family_t af, struct s
>  
>   ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset;
>   ep = (uint8_t *)((uint8_t *)mask) + mlen;
> - if (ap > ep)
> - return (-1);
> + if (ap >= ep)
> + return (0);

That means the kernel now silently ignore sockaddr short `sa_len'. Are
they supposed to be supported or are they symptoms of bugs?

>   /* Trim trailing zeroes. */
>   while (ap < ep && ep[-1] == 0)



Re: OpenBSD.calendar patch

2020-06-29 Thread Jason McIntyre
On Sat, Jun 27, 2020 at 11:56:49AM -0700, jungle boogie wrote:
> Hi Friends,
> 
> Here's a small patch to the OpenBSD.calendar. I didn't want to spend too 
> much time on this until I find out if it would be accepted.
> 

morning.

i think if such a file is worth having, it's worth updating.

having said that, i'm counting myself out from looking after this
one - hopefully someone else will pick it up.

jmc

> Here's my changes:
> 
> 
> --- /usr/share/calendar/calendar.openbsd  Fri Jun 26 21:01:56 2020
> +++ calendar.openbsd  Sat Jun 27 01:37:40 2020
> @@ -10,15 +10,19 @@
>   Jan 06  IPF gets integrated into the OpenBSD kernel, 1996
>   Jan 06  NRL IPv6 addition to OpenBSD, 1999
>   Jan 09  n2k10: Network hackathon, Melbourne, Australia, 17 developers, 
> 2010
> +Jan 12   u2k20: Uckermark hackathon, Urckermark, Germany, 14 developers, 
> 2020
>   Jan 13  n2k13: Network hackathon, Dunedin, New Zealand, 17 developers, 
> 2013
> +Jan 17   a2k19: Antipodean hackathon, Wellington, New Zealand, 18 
> developers, 2019
>   Jan 18  n2k14: Mini-hackathon, Dunedin, New Zealand, 15 developers, 2014
>   Jan 20  Bind 9 goes into the tree, 2003
> +Jan 20   a2k20: Antipodean hackathon, Hobart, Tasmania, 17 developers, 
> 2020
>   Jan 26  Anoncvs service inaugurated, 1996
>   Jan 26  n2k9: Network hackathon, Basel, Switzerland, 19 developers, 2009
>   Jan 27  OpenBSD/amd64 port is added, from NetBSD, 2004
>   Jan 29  "second anoncvs server is 100 miles from the first", 1996
>   Jan 31  OpenBSD/cats port is added, from NetBSD, 2004
>   Feb 03  Describe the ports mechanism [in OpenBSD], 1997
> +Feb 05   a2k18: Dunedin, New Zeland, 19 developers, 2018
>   Feb 13  Unpatented fast block cipher for new password hashing, 1997
>   Feb 14  GNU RCS expired from source tree, replaced with OpenRCS, 2007
>   Feb 19  IPsec package by John Ioannidis and Angelos D. Keromytis, 1997
> @@ -27,6 +31,7 @@
>   Feb 28  Cryptographic services framework in OpenBSD, 2000
>   Mar 09  Support for the VAX architecture removed, 2016
>   Mar 10  OpenBSD/WWW translation started -- German, Spanish, Dutch, 2000
> +Mar 28   t2k19: Taipei mini hackathon, Taipei, Taiwan, 16 developers, 
> 2019
>   Apr 01  OpenBSD/hppa64 port is added, 2005
>   Apr 01  k2k11: Kernel hackathon, Hafnarfjordur, Iceland, 15 developers, 
> 2011
>   Apr 10  f2k7: First filesystem hackathon, Vienna, Austria, 14 
> developers, 2007
> @@ -40,10 +45,12 @@
>   Apr 27  i386/PAE work integrated, 2006
>   May 01  OpenBSD 3.3 released, exploiting W^X, 2003
>   May 05  n2k8: Network hackathon, Ito, Japan, 18 developers, 2008
> +May 07   g2k19: General hackathon, Ottawa, Canada, 43 developers, 2019
>   May 08  c2k3 General hackathon, Calgary, Alberta, 51 developers, 2003
>   May 09  First commit to OpenBSD stable branch, OPENBSD_2_7, 2000
>   May 09  OpenBSD/aviion port is added, 2006
>   May 19  OpenBSD 2.3 released, including "ports" system, 1998
> +May 19   OpenBSD 6.7 released, 48th release, 2020
>   May 21  c2k5: General hackathon, Calgary, Alberta, 60 developers, 2005
>   May 21  c2k6: General hackathon, Calgary, Alberta, 47 developers, 2006
>   May 24  OpenBSD gets a trunk(4), 2005
> @@ -57,6 +64,7 @@
>   Jun 04  c99: First hackathon (IPsec), Calgary, Alberta, 10 developers, 
> 1999
>   Jun 04  c2k2: General hackathon, Calgary, Alberta, 42 developers, 2002
>   Jun 06  c2k8: General hackathon, Edmonton, Alberta, 55 developers, 2008
> +Jun 21   WireGuard imported into kernel, 2020
>   Jun 14  r2k6: First network hackathon, Hamburg, Germany, 6 developers, 
> 2006
>   Jun 15  OpenBSD 2.7 released, including OpenSSH, 2000
>   Jun 15  c2k: First general hackathon, Calgary, Alberta, 18 developers, 
> 2000
> @@ -70,6 +78,7 @@
>   Jul 02  c2k11: General hackathon, Edmonton, Alberta, Canada, 36 
> developers, 2011
>   Jul 07  g2k12: General hackathon, Budapest, Hungary, 41 developers, 2012
>   Jul 08  g2k14: General hackathon, Ljubljana, Slovenia, 49 developers, 
> 2014
> +Jul 08   g2k18: General hackathon, Ljubljana, Slovenia, 39 developers, 
> 2018
>   Jul 11  OpenBSD goes wireless w/ if_wi addition, 1999
>   Jul 23  OpenBSD goes multimedia with Brooktree 848 support, 1998
>   Jul 24  Non-executable stack on most architectures, 2002
> @@ -83,6 +92,7 @@
>   Aug 28  k2k6: IPsec hackathon, Schloss Kransberg, Germany, 14 
> developers, 2006
>   Sep 01  Support for the sparc (32bit) architecture removed, 2016
>   Sep 03  Support for the zaurus architecture removed, 2016
> +Sep 06   n2k18: Network hackathon, Usti nad Labem, Czech Republic, 11 
> developers, 2018
>   Sep 16  s2k11: General hackathon, Ljubljana, Slovenia, 25 developers, 
> 2011
>   Sep 17  n2k12: Network hackathon, Starnberg, Germany, 23 developers, 
> 2012
>   Sep 19  j2k10: Mini-hackathon, Sakae