Re: pcb mutex userland

2022-03-18 Thread Alexander Bluhm
On Thu, Mar 17, 2022 at 12:47:15AM +0100, Alexander Bluhm wrote:
> My previous atempt to add a mutex to in_pcb.h was reverted as it
> broke userland build.

This diff passes make release and regress test.

I would like to commit it to proceed with pcb locking.
Generally fixing sysctl includes is out of my scope.

ok?

bluhm

Index: sys/netinet/in_pcb.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.125
diff -u -p -r1.125 in_pcb.h
--- sys/netinet/in_pcb.h14 Mar 2022 22:38:43 -  1.125
+++ sys/netinet/in_pcb.h18 Mar 2022 16:17:54 -
@@ -65,6 +65,7 @@
 #define _NETINET_IN_PCB_H_
 
 #include 
+#include 
 #include 
 #include 
 #include 



Re: refcount btrace

2022-03-18 Thread Alexander Bluhm
On Thu, Mar 17, 2022 at 07:25:27AM +, Visa Hankala wrote:
> On Thu, Mar 17, 2022 at 12:42:13AM +0100, Alexander Bluhm wrote:
> > I would like to use btrace to debug refernce counting.  The idea
> > is to a a tracepoint for every type of refcnt we have.  When it
> > changes, print the actual object, the current counter and the change
> > value.
> 
> > Do we want that feature?
> 
> I am against this in its current form. The code would become more
> complex, and the trace points can affect timing. There is a risk that
> the kernel behaves slightly differently when dt has been compiled in.

On our main architectures dt(4) is in GENERIC.  I see your timing
point for uvm structures.

What do you think about this?  The check starts with a
__predict_false(index > 0) in #define DT_INDEX_ENTER.  The r_traceidx
is very likely in the same cache line as r_refs.  So the additional
overhead of the branch should be small compared to the atomic
operation.  The __predict_false(dt_tracing) might take longer as
it is a global variable.

Default is not to trace refcnt.  But I would like to have it for
network objects.  For sending network packets the additional branch
instruction depending on a global variable does not count.

bluhm

Index: dev/dt/dt_prov_static.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dt_prov_static.c,v
retrieving revision 1.13
diff -u -p -r1.13 dt_prov_static.c
--- dev/dt/dt_prov_static.c 17 Mar 2022 14:53:59 -  1.13
+++ dev/dt/dt_prov_static.c 18 Mar 2022 20:35:02 -
@@ -2,6 +2,7 @@
 
 /*
  * Copyright (c) 2019 Martin Pieuchot 
+ * Copyright (c) 2022 Alexander Bluhm 
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -87,6 +88,12 @@ DT_STATIC_PROBE1(smr, barrier_exit, "int
 DT_STATIC_PROBE0(smr, wakeup);
 DT_STATIC_PROBE2(smr, thread, "uint64_t", "uint64_t");
 
+/*
+ * reference counting
+ */
+DT_STATIC_PROBE0(refcnt, none);
+DT_STATIC_PROBE3(refcnt, inpcb, "void *", "int", "int");
+DT_STATIC_PROBE3(refcnt, tdb, "void *", "int", "int");
 
 /*
  * List of all static probes
@@ -127,15 +134,24 @@ struct dt_probe *const dtps_static[] = {
&_DT_STATIC_P(smr, barrier_exit),
&_DT_STATIC_P(smr, wakeup),
&_DT_STATIC_P(smr, thread),
+   /* refcnt */
+   &_DT_STATIC_P(refcnt, none),
+   &_DT_STATIC_P(refcnt, inpcb),
+   &_DT_STATIC_P(refcnt, tdb),
 };
 
+struct dt_probe *const *dtps_index_refcnt;
+
 int
 dt_prov_static_init(void)
 {
int i;
 
-   for (i = 0; i < nitems(dtps_static); i++)
+   for (i = 0; i < nitems(dtps_static); i++) {
+   if (dtps_static[i] == &_DT_STATIC_P(refcnt, none))
+   dtps_index_refcnt = _static[i];
dt_dev_register_probe(dtps_static[i]);
+   }
 
return i;
 }
Index: dev/dt/dtvar.h
===
RCS file: /data/mirror/openbsd/cvs/src/sys/dev/dt/dtvar.h,v
retrieving revision 1.13
diff -u -p -r1.13 dtvar.h
--- dev/dt/dtvar.h  27 Feb 2022 10:14:01 -  1.13
+++ dev/dt/dtvar.h  18 Mar 2022 20:58:28 -
@@ -2,6 +2,7 @@
 
 /*
  * Copyright (c) 2019 Martin Pieuchot 
+ * Copyright (c) 2022 Alexander Bluhm 
  *
  * Permission to use, copy, modify, and distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
@@ -313,11 +314,30 @@ extern volatile uint32_t  dt_tracing; /* 
 #defineDT_STATIC_ENTER(func, name, args...) do {   
\
extern struct dt_probe _DT_STATIC_P(func, name);\
struct dt_probe *dtp = &_DT_STATIC_P(func, name);   \
-   struct dt_provider *dtpv = dtp->dtp_prov;   \
\
if (__predict_false(dt_tracing) &&  \
__predict_false(dtp->dtp_recording)) {  \
+   struct dt_provider *dtpv = dtp->dtp_prov;   \
+   \
dtpv->dtpv_enter(dtpv, dtp, args);  \
+   }   \
+} while (0)
+
+#define _DT_INDEX_P(func)  (dtps_index_##func)
+
+#define DT_INDEX_ENTER(func, index, args...) do {  \
+   extern struct dt_probe **_DT_INDEX_P(func); \
+   \
+   if (__predict_false(index > 0) &&   \
+   __predict_false(dt_tracing) &&  \
+   __predict_true(_DT_INDEX_P(func) != NULL)) {\
+   struct dt_probe *dtp = _DT_INDEX_P(func)[index];\
+ 

Re: initial 11ac support for iwm(4)

2022-03-18 Thread Stefan Sperling
On Fri, Mar 18, 2022 at 01:39:01PM +0100, Marcus MERIGHI wrote:
> Hello, 
> 
> s...@stsp.name (Stefan Sperling), 2022.03.17 (Thu) 21:09 (CET):
> > On Thu, Mar 17, 2022 at 07:02:06PM +0100, Marcus MERIGHI wrote:
> > > s...@stsp.name (Stefan Sperling), 2022.03.16 (Wed) 16:11 (CET):
> > > > This patch adds initial 11ac support to the iwm(4) driver.
> > > > It allows use of 80 MHz channels and VHT MCS.
> > > [...]
> > > > So far, I have tested this successfully on iwm(4) 8265.
> > > 
> > > With this patch a "larger" file transfer will reliably panic my machine.
> > > "large" means even small files. 
> > > ICMP and NTP (and probably DNS) seem to work, though.
> > > I've tried it six times, it always crashed.
> > > 
> > > This is with:
> > > iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 
> > > 0x88, msi
> > > iwm0: hw rev 0x230, fw ver 36.ca7b901d.0, address 00:28:f8:xx:yy:zz
> > 
> > If you have not done so yet, could you please try the revised
> > version of the patch which I posted here?
> > https://marc.info/?l=openbsd-tech=164746924001340=2
> > I believe this should fix your issue.
> > 
> > Below is the difference between the two versions, in case it is easier
> > for you to apply the fix on top of the first version of the patch.
> 
> this did the trick. Thank you and sorry for the noise.

Not worries at all, this was informative. You have a fairly unique test case.
The bug triggered because your AP is using 11ac without MIMO, while everyone
else is testing on a MIMO-capable AP.



Re: initial 11ac support for iwm(4)

2022-03-18 Thread Stefan Sperling
On Fri, Mar 18, 2022 at 05:25:42AM +0100, Landry Breuil wrote:
> interestingly, when associated over ac to the livebox the background
> scans only shows the 5ghz channels from both APs, but when im associated
> to my regular AP the background scans shows both 2ghz and 5ghz chans.
> not sure that matters.

Thanks for pointing this out!
Background scan should indeed still report APs on all channels.
I will try to find a fix for this. It can be fixed independently from
the iwm 11ac patch. This will likely already affect iwx(4) as well.



Re: pcb mutex userland

2022-03-18 Thread Mark Kettenis
> Date: Thu, 17 Mar 2022 15:14:51 +0100
> From: Alexander Bluhm 
> 
> On Thu, Mar 17, 2022 at 02:09:39PM +0100, Mark Kettenis wrote:
> > I fear the fundamental problem is that we should not expose data
> > structures internal to the kernel to userland.  What I don't
> > understand though is how that happens.  The sysctl code doesn't seem
> > to export "struct inpcb" instances directly, but instead it exports
> > selected members through "struct kinfo_file".  So why is "struct
> > inpcb" exposed to userland at all?
> 
> A few tools use it.  One thing is post mortem analysis of kernel
> core dumps.  Sometimes I get dumps sent from customers.  They don't
> have WITNESS.
> 
> Some traditional network debugging tools use these strutures.
> 
> lib/libkvm
> sbin/sysctl
> usr.bin/netstat
> usr.bin/tcpbench
> usr.sbin/trpt
> 
> As you need sysctl kern.allowkmem=1 for them this is only useful
> on debugging machines.  Ob course they can be rewritten using sysctl.
> A drawback is that you have to write a lot of copy code and post
> mortem analysis code gets out of sync.  I see tools with -M -N
> breaking over time.

Well, maybe the libkvm case is fine for debugging tools.  You're
supposed to know what you're doing in that case.  And if you put the
mutex at the very end of the structure maybe you won't be utterly
confused if you try to run those tools on a WITNESS kernel.

However, for sysctl(8) we really should use explicitly defined export
data structures.  You listed sysctl(8) above together with the other
tools, but sysctl doesn't use libkvm.  Is the sysctl binary needlessly
pulling in ?

> The question is how to proceed.  For MP in the network stack I need
> mutex in structs.  And I don't want to rewrite all tools before
> making progress.

Well, the big question here is whether  is a
userland header or not.  If it is, we need to be careful about what it
exports and not add random kernel data structures into it without
proper #ifdef _KERNEL.  If it isn't, then your solution of including
 is fine, but we should make sure that only the libkvm
users include that header file.

> > > Index: sys/netinet/in_pcb.h
> > > ===
> > > RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
> > > retrieving revision 1.125
> > > diff -u -p -r1.125 in_pcb.h
> > > --- sys/netinet/in_pcb.h  14 Mar 2022 22:38:43 -  1.125
> > > +++ sys/netinet/in_pcb.h  17 Mar 2022 00:44:54 -
> > > @@ -65,6 +65,7 @@
> > >  #define _NETINET_IN_PCB_H_
> > >
> > >  #include 
> > > +#include 
> > >  #include 
> > >  #include 
> > >  #include 
> > > Index: sys/sys/mutex.h
> > > ===
> > > RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mutex.h,v
> > > retrieving revision 1.18
> > > diff -u -p -r1.18 mutex.h
> > > --- sys/sys/mutex.h   23 Apr 2019 13:35:12 -  1.18
> > > +++ sys/sys/mutex.h   17 Mar 2022 00:44:23 -
> > > @@ -48,6 +48,8 @@ struct mutex {
> > >  #endif
> > >  };
> > >
> > > +#ifdef _KERNEL
> > > +
> > >  /*
> > >   * To prevent lock ordering problems with the kernel lock, we need to
> > >   * make sure we block all interrupts that can grab the kernel lock.
> > > @@ -148,7 +150,7 @@ void  _mtx_init_flags(struct mutex *, int
> > >
> > >  #endif /* WITNESS */
> > >
> > > -#if defined(_KERNEL) && defined(DDB)
> > > +#ifdef DDB
> > >
> > >  struct db_mutex {
> > >   struct cpu_info *mtx_owner;
> > > @@ -160,6 +162,8 @@ struct db_mutex {
> > >  void db_mtx_enter(struct db_mutex *);
> > >  void db_mtx_leave(struct db_mutex *);
> > >
> > > -#endif /* _KERNEL && DDB */
> > > +#endif /* DDB */
> > > +
> > > +#endif /* _KERNEL */
> > >
> > >  #endif
> > >
> > >
> 



Re: pcb mutex userland

2022-03-18 Thread Mark Kettenis
> From: "Ted Unangst" 
> Date: Fri, 18 Mar 2022 05:04:23 -0400
> 
> On 2022-03-17, Alexander Bluhm wrote:
> > On Thu, Mar 17, 2022 at 01:07:12AM +0100, Mark Kettenis wrote:
> > > > Date: Thu, 17 Mar 2022 01:01:46 +0100 (CET)
> > > > From: Mark Kettenis 
> > > > 
> > > > > Date: Thu, 17 Mar 2022 00:47:15 +0100
> > > > > From: Alexander Bluhm 
> > > > > 
> > > > > Hi,
> > > > > 
> > > > > My previous atempt to add a mutex to in_pcb.h was reverted as it
> > > > > broke userland build.
> > > > > 
> > > > > Is the correct fix to include sys/mutex.h in every .c file that
> > > > > includes netinet/in_pcb.h ?  I made a release with it.
> > > > > Or should I include sys/mutex.h in netinet/in_pcb.h ?
> > > > 
> > > > Neither?
> > > > 
> > > > It makes no sense to export the kernel mutex stuff to userland.  Is
> > > > there a way to avoid doing that by adding a bit for #ifdef _KERNEL?
> > > ^
> > >   a bit more
> > 
> > My diff adds struct mutex to struct inpcbtable.  My later plan is
> > to add a mutex also to struct inpcb.
> > 
> > tcpbench uses libkvm to extract information from struct inpcbtable.
> > netstat does that for struct inpcb.  Also post mortem analysis from
> > a kernel core dump is possible.
> > 
> > I don't understand why userland must not know the size of struct
> > mutex when tools where written to analyze these structs.
> > 
> > Is there something special about struct mutex that should not shown
> > to userland?
> > 
> > Do you like this?  Different structs for kernel and userland.
> > I think this is confusing when used with libkvm.
> > 
> > struct inpcbtable {
> > TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
> > struct  inpcbhead *inpt_hashtbl;/* [t] local and foreign 
> > hash */
> > struct  inpcbhead *inpt_lhashtbl;   /* [t] local port hash */
> > SIPHASH_KEY inpt_key, inpt_lkey;/* [t] secrets for hashes */
> > u_long  inpt_mask, inpt_lmask;  /* [t] hash masks */
> > int inpt_count, inpt_size;  /* [t] queue count, hash 
> > size */
> > #ifdef _KERNEL
> > struct mutex inpt_mtx;  /* protect queue and hash */
> > #endif
> > };
> 
> What if you add an #else case?
> 
> #ifdef _KERNEL
> struct mutex inpt_mtx;
> #else
> char _mtxpad[8];
> #endif

Keeping the size in sync (and consistent across platforms!) will be a
nightmare.  So no.



Re: pcb mutex userland

2022-03-18 Thread Ted Unangst
On 2022-03-17, Alexander Bluhm wrote:
> On Thu, Mar 17, 2022 at 01:07:12AM +0100, Mark Kettenis wrote:
> > > Date: Thu, 17 Mar 2022 01:01:46 +0100 (CET)
> > > From: Mark Kettenis 
> > > 
> > > > Date: Thu, 17 Mar 2022 00:47:15 +0100
> > > > From: Alexander Bluhm 
> > > > 
> > > > Hi,
> > > > 
> > > > My previous atempt to add a mutex to in_pcb.h was reverted as it
> > > > broke userland build.
> > > > 
> > > > Is the correct fix to include sys/mutex.h in every .c file that
> > > > includes netinet/in_pcb.h ?  I made a release with it.
> > > > Or should I include sys/mutex.h in netinet/in_pcb.h ?
> > > 
> > > Neither?
> > > 
> > > It makes no sense to export the kernel mutex stuff to userland.  Is
> > > there a way to avoid doing that by adding a bit for #ifdef _KERNEL?
> > ^
> > a bit more
> 
> My diff adds struct mutex to struct inpcbtable.  My later plan is
> to add a mutex also to struct inpcb.
> 
> tcpbench uses libkvm to extract information from struct inpcbtable.
> netstat does that for struct inpcb.  Also post mortem analysis from
> a kernel core dump is possible.
> 
> I don't understand why userland must not know the size of struct
> mutex when tools where written to analyze these structs.
> 
> Is there something special about struct mutex that should not shown
> to userland?
> 
> Do you like this?  Different structs for kernel and userland.
> I think this is confusing when used with libkvm.
> 
> struct inpcbtable {
> TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
> struct  inpcbhead *inpt_hashtbl;/* [t] local and foreign hash 
> */
> struct  inpcbhead *inpt_lhashtbl;   /* [t] local port hash */
> SIPHASH_KEY inpt_key, inpt_lkey;/* [t] secrets for hashes */
> u_long  inpt_mask, inpt_lmask;  /* [t] hash masks */
> int inpt_count, inpt_size;  /* [t] queue count, hash size 
> */
> #ifdef _KERNEL
> struct mutex inpt_mtx;  /* protect queue and hash */
> #endif
> };

What if you add an #else case?

#ifdef _KERNEL
struct mutex inpt_mtx;
#else
char _mtxpad[8];
#endif



Re: refcount btrace

2022-03-18 Thread Visa Hankala
On Thu, Mar 17, 2022 at 06:16:51PM +0100, Alexander Bluhm wrote:
> On Thu, Mar 17, 2022 at 07:25:27AM +, Visa Hankala wrote:
> > On Thu, Mar 17, 2022 at 12:42:13AM +0100, Alexander Bluhm wrote:
> > > I would like to use btrace to debug refernce counting.  The idea
> > > is to a a tracepoint for every type of refcnt we have.  When it
> > > changes, print the actual object, the current counter and the change
> > > value.
> > 
> > > Do we want that feature?
> > 
> > I am against this in its current form. The code would become more
> > complex, and the trace points can affect timing. There is a risk that
> > the kernel behaves slightly differently when dt has been compiled in.
> 
> Can we get in this part then?
> 
> - Remove DIAGNOSTIC to keep similar in non DIAGNOSTIC case.
> - Rename refcnt to refs.  refcnt is the struct, refs contains the
>   r_refs value.
> - Add KASSERT(refs != ~0) in refcnt_finalize().
> - Always use u_int refs so I can insert my btrace diff easily.

I think this is fine.

OK visa@

> Index: kern/kern_synch.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.184
> diff -u -p -r1.184 kern_synch.c
> --- kern/kern_synch.c 16 Mar 2022 14:13:01 -  1.184
> +++ kern/kern_synch.c 17 Mar 2022 16:12:50 -
> @@ -810,25 +810,21 @@ refcnt_init(struct refcnt *r)
>  void
>  refcnt_take(struct refcnt *r)
>  {
> -#ifdef DIAGNOSTIC
> - u_int refcnt;
> + u_int refs;
>  
> - refcnt = atomic_inc_int_nv(>r_refs);
> - KASSERT(refcnt != 0);
> -#else
> - atomic_inc_int(>r_refs);
> -#endif
> + refs = atomic_inc_int_nv(>r_refs);
> + KASSERT(refs != 0);
> + (void)refs;
>  }
>  
>  int
>  refcnt_rele(struct refcnt *r)
>  {
> - u_int refcnt;
> + u_int refs;
>  
> - refcnt = atomic_dec_int_nv(>r_refs);
> - KASSERT(refcnt != ~0);
> -
> - return (refcnt == 0);
> + refs = atomic_dec_int_nv(>r_refs);
> + KASSERT(refs != ~0);
> + return (refs == 0);
>  }
>  
>  void
> @@ -842,26 +838,33 @@ void
>  refcnt_finalize(struct refcnt *r, const char *wmesg)
>  {
>   struct sleep_state sls;
> - u_int refcnt;
> + u_int refs;
>  
> - refcnt = atomic_dec_int_nv(>r_refs);
> - while (refcnt) {
> + refs = atomic_dec_int_nv(>r_refs);
> + KASSERT(refs != ~0);
> + while (refs) {
>   sleep_setup(, r, PWAIT, wmesg, 0);
> - refcnt = atomic_load_int(>r_refs);
> - sleep_finish(, refcnt);
> + refs = atomic_load_int(>r_refs);
> + sleep_finish(, refs);
>   }
>  }
>  
>  int
>  refcnt_shared(struct refcnt *r)
>  {
> - return (atomic_load_int(>r_refs) > 1);
> + u_int refs;
> +
> + refs = atomic_load_int(>r_refs);
> + return (refs > 1);
>  }
>  
>  unsigned int
>  refcnt_read(struct refcnt *r)
>  {
> - return (atomic_load_int(>r_refs));
> + u_int refs;
> +
> + refs = atomic_load_int(>r_refs);
> + return (refs);
>  }
>  
>  void
>