Re: SunBlade 100: X segfault with onboard ati rage adapter (machfb)

2021-12-05 Thread Theo de Raadt
Way faster if I put this in snaps.  Try again in around 4 hours.

Jonathan Gray  wrote:

> On Sun, Dec 05, 2021 at 04:54:28PM -0700, Ted Bullock wrote:
> > Hey folks,
> > 
> > Looking into another usability fault with the SunBlade 100. This time
> > with the onboard video adapter.  I'm seeing X segfault when starting up
> > using the default configuration and after a fresh install of -current.
> > 
> > Obviously old hardware here.  Though I wonder if other folks out there
> > might have similar vintage ati rage xl display adapters since I think it
> > was used extensively as an affordable video adapter for servers for many
> > years.  Or is this just a sparc64 bug.
> 
> This is likely related to the patch matthieu@ posted to tech
> for recent xserver breakage:
> 
> https://marc.info/?l=openbsd-tech=163873978109335=2
> 
> Index: hw/xfree86/modes/xf86Crtc.h
> ===
> RCS file: /cvs/OpenBSD/xenocara/xserver/hw/xfree86/modes/xf86Crtc.h,v
> retrieving revision 1.15
> diff -u -p -u -r1.15 xf86Crtc.h
> --- hw/xfree86/modes/xf86Crtc.h   11 Nov 2021 09:10:04 -  1.15
> +++ hw/xfree86/modes/xf86Crtc.h   4 Dec 2021 17:50:33 -
> @@ -837,11 +837,11 @@ extern _X_EXPORT int xf86CrtcConfigPriva
>  static _X_INLINE xf86OutputPtr
>  xf86CompatOutput(ScrnInfoPtr pScrn)
>  {
> -xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn);
> +xf86CrtcConfigPtr config;
>  
>  if (xf86CrtcConfigPrivateIndex == -1)
> -return NULL;
> -
> +return NULL;
> +config = XF86_CRTC_CONFIG_PTR(pScrn);
>  if (config->compat_output < 0)
>  return NULL;
>  return config->output[config->compat_output];
> Index: hw/xfree86/modes/xf86Modes.c
> ===
> RCS file: /cvs/OpenBSD/xenocara/xserver/hw/xfree86/modes/xf86Modes.c,v
> retrieving revision 1.12
> diff -u -p -u -r1.12 xf86Modes.c
> --- hw/xfree86/modes/xf86Modes.c  11 Nov 2021 09:03:08 -  1.12
> +++ hw/xfree86/modes/xf86Modes.c  5 Dec 2021 18:20:52 -
> @@ -803,10 +803,14 @@ xf86CVTMode(int HDisplay, int VDisplay, 
>  {
>  struct libxcvt_mode_info *libxcvt_mode_info;
>  DisplayModeRec *Mode = xnfcalloc(1, sizeof(DisplayModeRec));
> +char *tmp;
>  
>  libxcvt_mode_info =
>  libxcvt_gen_mode_info(HDisplay, VDisplay, VRefresh, Reduced, 
> Interlaced);
>  
> +XNFasprintf(, "%dx%d", HDisplay, VDisplay);
> +Mode->name = tmp;
> +
>  Mode->VDisplay   = libxcvt_mode_info->vdisplay;
>  Mode->HDisplay   = libxcvt_mode_info->hdisplay;
>  Mode->Clock  = libxcvt_mode_info->dot_clock;
> 
> > 
> > Xorg.0.log
> > 
> > [56.518] (--) Using wscons driver on /dev/ttyC0
> > [56.754] 
> > This is a pre-release version of the X server from The X.Org Foundation.
> > It is not supported in any way.
> > Bugs may be filed in the bugzilla at http://bugs.freedesktop.org/.
> > Select the "xorg" product for bugs you find in this release.
> > Before reporting bugs in pre-release versions please check the
> > latest version in the X.Org Foundation git repository.
> > See http://wiki.x.org/wiki/GitPage for git access instructions.
> > [56.758] 
> > X.Org X Server 1.21.1.1
> > X Protocol Version 11, Revision 0
> > [56.759] Current Operating System: OpenBSD spikard.my.domain 7.0 
> > GENERIC#1046 sparc64
> > [56.759]  
> > [56.759] Current version of pixman: 0.40.0
> > [56.759]Before reporting problems, check http://wiki.x.org
> > to make sure that you have the latest version.
> > [56.759] Markers: (--) probed, (**) from config file, (==) default 
> > setting,
> > (++) from command line, (!!) notice, (II) informational,
> > (WW) warning, (EE) error, (NI) not implemented, (??) unknown.
> > [56.763] (==) Log file: "/var/log/Xorg.0.log", Time: Sat Dec  4 
> > 17:23:03 2021
> > [56.819] (==) Using system config directory 
> > "/usr/X11R6/share/X11/xorg.conf.d"
> > [56.852] (==) No Layout section.  Using the first Screen section.
> > [56.861] (==) No screen section available. Using defaults.
> > [56.861] (**) |-->Screen "Default Screen Section" (0)
> > [56.861] (**) |   |-->Monitor ""
> > [56.906] (==) No monitor specified for screen "Default Screen Section".
> > Using a default monitor configuration.
> > [56.907] (==) Automatically adding devices
> > [56.907] (==) Automatically enabling devices
> > [56.907] (==) Not automatically adding GPU devices
> > [56.907] (==) Automatically binding GPU devices
> > [56.933] (==) Max clients allowed: 256, resource mask: 0x1f
> > [57.165] (==) FontPath set to:
> > /usr/X11R6/lib/X11/fonts/misc/,
> > /usr/X11R6/lib/X11/fonts/TTF/,
> > /usr/X11R6/lib/X11/fonts/OTF/,
> > /usr/X11R6/lib/X11/fonts/Type1/,
> > /usr/X11R6/lib/X11/fonts/100dpi/,
> > /usr/X11R6/lib/X11/fonts/75dpi/
> > [57.165] (==) ModulePath set 

Re: SunBlade 100: X segfault with onboard ati rage adapter (machfb)

2021-12-05 Thread Jonathan Gray
On Sun, Dec 05, 2021 at 04:54:28PM -0700, Ted Bullock wrote:
> Hey folks,
> 
> Looking into another usability fault with the SunBlade 100. This time
> with the onboard video adapter.  I'm seeing X segfault when starting up
> using the default configuration and after a fresh install of -current.
> 
> Obviously old hardware here.  Though I wonder if other folks out there
> might have similar vintage ati rage xl display adapters since I think it
> was used extensively as an affordable video adapter for servers for many
> years.  Or is this just a sparc64 bug.

This is likely related to the patch matthieu@ posted to tech
for recent xserver breakage:

https://marc.info/?l=openbsd-tech=163873978109335=2

Index: hw/xfree86/modes/xf86Crtc.h
===
RCS file: /cvs/OpenBSD/xenocara/xserver/hw/xfree86/modes/xf86Crtc.h,v
retrieving revision 1.15
diff -u -p -u -r1.15 xf86Crtc.h
--- hw/xfree86/modes/xf86Crtc.h 11 Nov 2021 09:10:04 -  1.15
+++ hw/xfree86/modes/xf86Crtc.h 4 Dec 2021 17:50:33 -
@@ -837,11 +837,11 @@ extern _X_EXPORT int xf86CrtcConfigPriva
 static _X_INLINE xf86OutputPtr
 xf86CompatOutput(ScrnInfoPtr pScrn)
 {
-xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn);
+xf86CrtcConfigPtr config;
 
 if (xf86CrtcConfigPrivateIndex == -1)
-return NULL;
-
+return NULL;
+config = XF86_CRTC_CONFIG_PTR(pScrn);
 if (config->compat_output < 0)
 return NULL;
 return config->output[config->compat_output];
Index: hw/xfree86/modes/xf86Modes.c
===
RCS file: /cvs/OpenBSD/xenocara/xserver/hw/xfree86/modes/xf86Modes.c,v
retrieving revision 1.12
diff -u -p -u -r1.12 xf86Modes.c
--- hw/xfree86/modes/xf86Modes.c11 Nov 2021 09:03:08 -  1.12
+++ hw/xfree86/modes/xf86Modes.c5 Dec 2021 18:20:52 -
@@ -803,10 +803,14 @@ xf86CVTMode(int HDisplay, int VDisplay, 
 {
 struct libxcvt_mode_info *libxcvt_mode_info;
 DisplayModeRec *Mode = xnfcalloc(1, sizeof(DisplayModeRec));
+char *tmp;
 
 libxcvt_mode_info =
 libxcvt_gen_mode_info(HDisplay, VDisplay, VRefresh, Reduced, 
Interlaced);
 
+XNFasprintf(, "%dx%d", HDisplay, VDisplay);
+Mode->name = tmp;
+
 Mode->VDisplay   = libxcvt_mode_info->vdisplay;
 Mode->HDisplay   = libxcvt_mode_info->hdisplay;
 Mode->Clock  = libxcvt_mode_info->dot_clock;

> 
> Xorg.0.log
> 
> [56.518] (--) Using wscons driver on /dev/ttyC0
> [56.754] 
> This is a pre-release version of the X server from The X.Org Foundation.
> It is not supported in any way.
> Bugs may be filed in the bugzilla at http://bugs.freedesktop.org/.
> Select the "xorg" product for bugs you find in this release.
> Before reporting bugs in pre-release versions please check the
> latest version in the X.Org Foundation git repository.
> See http://wiki.x.org/wiki/GitPage for git access instructions.
> [56.758] 
> X.Org X Server 1.21.1.1
> X Protocol Version 11, Revision 0
> [56.759] Current Operating System: OpenBSD spikard.my.domain 7.0 
> GENERIC#1046 sparc64
> [56.759]  
> [56.759] Current version of pixman: 0.40.0
> [56.759]  Before reporting problems, check http://wiki.x.org
>   to make sure that you have the latest version.
> [56.759] Markers: (--) probed, (**) from config file, (==) default 
> setting,
>   (++) from command line, (!!) notice, (II) informational,
>   (WW) warning, (EE) error, (NI) not implemented, (??) unknown.
> [56.763] (==) Log file: "/var/log/Xorg.0.log", Time: Sat Dec  4 17:23:03 
> 2021
> [56.819] (==) Using system config directory 
> "/usr/X11R6/share/X11/xorg.conf.d"
> [56.852] (==) No Layout section.  Using the first Screen section.
> [56.861] (==) No screen section available. Using defaults.
> [56.861] (**) |-->Screen "Default Screen Section" (0)
> [56.861] (**) |   |-->Monitor ""
> [56.906] (==) No monitor specified for screen "Default Screen Section".
>   Using a default monitor configuration.
> [56.907] (==) Automatically adding devices
> [56.907] (==) Automatically enabling devices
> [56.907] (==) Not automatically adding GPU devices
> [56.907] (==) Automatically binding GPU devices
> [56.933] (==) Max clients allowed: 256, resource mask: 0x1f
> [57.165] (==) FontPath set to:
>   /usr/X11R6/lib/X11/fonts/misc/,
>   /usr/X11R6/lib/X11/fonts/TTF/,
>   /usr/X11R6/lib/X11/fonts/OTF/,
>   /usr/X11R6/lib/X11/fonts/Type1/,
>   /usr/X11R6/lib/X11/fonts/100dpi/,
>   /usr/X11R6/lib/X11/fonts/75dpi/
> [57.165] (==) ModulePath set to "/usr/X11R6/lib/modules"
> [57.165] (II) The server relies on wscons to provide the list of input 
> devices.
>   If no devices become available, reconfigure wscons or disable 
> AutoAddDevices.
> [57.177] (II) Loader magic: 0x49f41c0010
> [57.177] (II) Module ABI versions:
> [

SunBlade 100: X segfault with onboard ati rage adapter (machfb)

2021-12-05 Thread Ted Bullock
Hey folks,

Looking into another usability fault with the SunBlade 100. This time
with the onboard video adapter.  I'm seeing X segfault when starting up
using the default configuration and after a fresh install of -current.

Obviously old hardware here.  Though I wonder if other folks out there
might have similar vintage ati rage xl display adapters since I think it
was used extensively as an affordable video adapter for servers for many
years.  Or is this just a sparc64 bug.

Xorg.0.log

[56.518] (--) Using wscons driver on /dev/ttyC0
[56.754] 
This is a pre-release version of the X server from The X.Org Foundation.
It is not supported in any way.
Bugs may be filed in the bugzilla at http://bugs.freedesktop.org/.
Select the "xorg" product for bugs you find in this release.
Before reporting bugs in pre-release versions please check the
latest version in the X.Org Foundation git repository.
See http://wiki.x.org/wiki/GitPage for git access instructions.
[56.758] 
X.Org X Server 1.21.1.1
X Protocol Version 11, Revision 0
[56.759] Current Operating System: OpenBSD spikard.my.domain 7.0 
GENERIC#1046 sparc64
[56.759]  
[56.759] Current version of pixman: 0.40.0
[56.759]Before reporting problems, check http://wiki.x.org
to make sure that you have the latest version.
[56.759] Markers: (--) probed, (**) from config file, (==) default setting,
(++) from command line, (!!) notice, (II) informational,
(WW) warning, (EE) error, (NI) not implemented, (??) unknown.
[56.763] (==) Log file: "/var/log/Xorg.0.log", Time: Sat Dec  4 17:23:03 
2021
[56.819] (==) Using system config directory 
"/usr/X11R6/share/X11/xorg.conf.d"
[56.852] (==) No Layout section.  Using the first Screen section.
[56.861] (==) No screen section available. Using defaults.
[56.861] (**) |-->Screen "Default Screen Section" (0)
[56.861] (**) |   |-->Monitor ""
[56.906] (==) No monitor specified for screen "Default Screen Section".
Using a default monitor configuration.
[56.907] (==) Automatically adding devices
[56.907] (==) Automatically enabling devices
[56.907] (==) Not automatically adding GPU devices
[56.907] (==) Automatically binding GPU devices
[56.933] (==) Max clients allowed: 256, resource mask: 0x1f
[57.165] (==) FontPath set to:
/usr/X11R6/lib/X11/fonts/misc/,
/usr/X11R6/lib/X11/fonts/TTF/,
/usr/X11R6/lib/X11/fonts/OTF/,
/usr/X11R6/lib/X11/fonts/Type1/,
/usr/X11R6/lib/X11/fonts/100dpi/,
/usr/X11R6/lib/X11/fonts/75dpi/
[57.165] (==) ModulePath set to "/usr/X11R6/lib/modules"
[57.165] (II) The server relies on wscons to provide the list of input 
devices.
If no devices become available, reconfigure wscons or disable 
AutoAddDevices.
[57.177] (II) Loader magic: 0x49f41c0010
[57.177] (II) Module ABI versions:
[57.177]X.Org ANSI C Emulation: 0.4
[57.177]X.Org Video Driver: 25.2
[57.180]X.Org XInput driver : 24.4
[57.180]X.Org Server Extension : 10.0
[57.250] (--) PCI:*(0@0:19:0) 1002:4752:: rev 39, Mem @ 
0x0300/16777216, 0x00426000/4096, I/O @ 0x0b00/256, BIOS @ 
0x/131072
[57.252] (II) LoadModule: "glx"
[57.303] (II) Loading /usr/X11R6/lib/modules/extensions/libglx.so
[58.058] (II) Module glx: vendor="X.Org Foundation"
[58.058]compiled for 1.21.1.1, module version = 1.0.0
[58.058]ABI class: X.Org Server Extension, version 10.0
[58.071] (==) Matched ati as autoconfigured driver 0
[58.071] (==) Assigned the driver to the xf86ConfigLayout
[58.072] (II) LoadModule: "ati"
[58.085] (II) Loading /usr/X11R6/lib/modules/drivers/ati_drv.so
[58.114] (II) Module ati: vendor="X.Org Foundation"
[58.114]compiled for 1.21.1.1, module version = 19.1.0
[58.114]Module class: X.Org Video Driver
[58.114]ABI class: X.Org Video Driver, version 25.2
[58.115] (II) LoadModule: "mach64"
[58.118] (II) Loading /usr/X11R6/lib/modules/drivers/mach64_drv.so
[58.172] (II) Module mach64: vendor="X.Org Foundation"
[58.172]compiled for 1.21.1.1, module version = 6.9.6
[58.172]Module class: X.Org Video Driver
[58.172]ABI class: X.Org Video Driver, version 25.2
[58.174] (II) MACH64: Driver for ATI Mach64 chipsets
[58.191] (WW) VGA arbiter: cannot open kernel arbiter, no multi-card support
[58.204] (II) MACH64(0): Creating default Display subsection in Screen 
section
"Default Screen Section" for depth/fbbpp 24/32
[58.205] (==) MACH64(0): Depth 24, (--) framebuffer bpp 32
[58.206] (==) MACH64(0): Using EXA acceleration architecture
[58.207] (II) MACH64: Mach64 in slot 0:19:0 detected.
[58.226] (II) MACH64(0): BIOS Data:  BIOSSize=0x, ROMTable=0x.
[58.226] (II) MACH64(0): BIOS Data:  ClockTable=0x, 
FrequencyTable=0x.
[58.226] (II) MACH64(0): BIOS Data:  

Re: panic: ieee80211_set_link_state() calls rtm_80211info() from timeout context

2021-12-05 Thread Vitaliy Makkoveev
> On 5 Dec 2021, at 20:05, Scott Cheloha  wrote:
> 
> Suppose the ic_bgscan_timeout timeout is running at the moment we're
> running ieee80211_ifdetach().  Ignore the kernel lock for the moment,
> think about the future.
> 
> If we delete the task before we delete the timeout and the timeout
> then adds the task back onto the task queue, what happens?
> 
> My guess is you need to ensure the timeout is no longer running
> *before* you delete the task.  Can you do timeout_del_barrier()
> here?  See the attached patch.

This timeout_del_barrier(9) doesn’t make sense. You also need to
wait ieee80211_rtm_80211info_task() to be accomplished and
taskq_del_barrier(9) should be used instead of task_del(9).

I doubt this code will be the same when unlocking started.

> 
> $ dmesg | grep iwm0 | tail -n2
> iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
> iwm0: hw rev 0x230, fw ver 36.ca7b901d.0, address 98:3b:8f:ef:6b:ef
> 
> Unsure how `route monitor` exercises this path, but I've left it
> running, too.

You have at least one PF_ROUTE socket. Otherwise route_input()
performs drain run without any solock() call.

> 
> Index: ieee80211.c
> ===
> RCS file: /cvs/src/sys/net80211/ieee80211.c,v
> retrieving revision 1.85
> diff -u -p -r1.85 ieee80211.c
> --- ieee80211.c   11 Oct 2021 09:01:06 -  1.85
> +++ ieee80211.c   5 Dec 2021 17:01:51 -
> @@ -193,6 +193,7 @@ ieee80211_ifattach(struct ifnet *ifp)
>   if_addgroup(ifp, "wlan");
>   ifp->if_priority = IF_WIRELESS_DEFAULT_PRIORITY;
> 
> + task_set(>ic_rtm_80211info_task, ieee80211_rtm_80211info_task, ic);
>   ieee80211_set_link_state(ic, LINK_STATE_DOWN);
> 
>   timeout_set(>ic_bgscan_timeout, ieee80211_bgscan_timeout, ifp);
> @@ -203,7 +204,8 @@ ieee80211_ifdetach(struct ifnet *ifp)
> {
>   struct ieee80211com *ic = (void *)ifp;
> 
> - timeout_del(>ic_bgscan_timeout);
> + timeout_del_barrier(>ic_bgscan_timeout);
> + task_del(systq, >ic_rtm_80211info_task);
> 
>   /*
>* Undo pseudo-driver changes. Pseudo-driver detach hooks could




Re: panic: ieee80211_set_link_state() calls rtm_80211info() from timeout context

2021-12-05 Thread Stefan Sperling
On Sun, Dec 05, 2021 at 11:05:32AM -0600, Scott Cheloha wrote:
> > diff 0b61c8235787960f0010ef627ea5b2c6309a81f0 
> > de98c050ea709bdb8e26be40ab0cc82ef9afed80
> > blob - 7bb68194dd78417b06c59f81d1ebbff4165203d8
> > blob + 5b9a969258074fde29e21a33ac035cf170ec3b03
> > --- sys/net80211/ieee80211.c
> > +++ sys/net80211/ieee80211.c
> > @@ -193,6 +193,7 @@ ieee80211_ifattach(struct ifnet *ifp)
> > if_addgroup(ifp, "wlan");
> > ifp->if_priority = IF_WIRELESS_DEFAULT_PRIORITY;
> >  
> > +   task_set(>ic_rtm_80211info_task, ieee80211_rtm_80211info_task, ic);
> > ieee80211_set_link_state(ic, LINK_STATE_DOWN);
> >  
> > timeout_set(>ic_bgscan_timeout, ieee80211_bgscan_timeout, ifp);
> > @@ -203,6 +204,7 @@ ieee80211_ifdetach(struct ifnet *ifp)
> >  {
> > struct ieee80211com *ic = (void *)ifp;
> >  
> > +   task_del(systq, >ic_rtm_80211info_task);
> > timeout_del(>ic_bgscan_timeout);
> 
> Suppose the ic_bgscan_timeout timeout is running at the moment we're
> running ieee80211_ifdetach().  Ignore the kernel lock for the moment,
> think about the future.

There are many more places in the wireless stack that do such things.
But I am not interested in doing MP work on wireless myself. That is
simply asking too much on top of everything else I do.

> If we delete the task before we delete the timeout and the timeout
> then adds the task back onto the task queue, what happens?
> 
> My guess is you need to ensure the timeout is no longer running
> *before* you delete the task.  Can you do timeout_del_barrier()
> here?  See the attached patch.

Yes, sure we can. There are dozens of other timeouts in the wireless
stack and drivers, so your patch is a small step on a very long road.

> > /*
> > blob - 447a2676bfb250b7f917206549679d6ae68de1f6
> > blob + 7e10fc1336067542c13d5607602e658ce2b3926b
> > --- sys/net80211/ieee80211_proto.c
> > +++ sys/net80211/ieee80211_proto.c
> > @@ -1288,6 +1288,31 @@ justcleanup:
> >  }
> >  
> >  void
> > +ieee80211_rtm_80211info_task(void *arg)
> > +{
> > +   struct ieee80211com *ic = arg;
> > +   struct ifnet *ifp = >ic_if;
> > +   struct if_ieee80211_data ifie;
> > +   int s = splnet();
> > +
> > +   if (LINK_STATE_IS_UP(ifp->if_link_state)) {
> 
> Does this mean userspace can "miss" state transitions if the task runs
> and the state has already changed back to not-up?
> 
> I don't know whether this would matter in practice, but it would be a
> behavior change.

If this task doesn't find link state up for some reason then it is
running in an unexpected situation. What else should it do?

I think it makes sense for a scheduled task to check that its precondition
still applies. We have had several bugs in iwm(4) where tasks did their
thing even though their reason for being scheduled no longer applied.
In that case we ended up sending firmware commands that appeared our
of order from the device's point of view. I don't know if this really
matters here, it will depend on what userland expects.
 
> Unsure how `route monitor` exercises this path, but I've left it
> running, too.

That was just to see whether the routing message is still being generated.



Re: panic: ieee80211_set_link_state() calls rtm_80211info() from timeout context

2021-12-05 Thread Scott Cheloha
On Sat, Dec 04, 2021 at 08:40:42PM +0100, Stefan Sperling wrote:
> On Sat, Dec 04, 2021 at 09:32:40PM +0300, Vitaliy Makkoveev wrote:
> > I think rtm_80211info() could follow the if_link_state_change()
> > way and use task for that.
> 
> Indeed. I did not realize that if_link_state_change() schedules a task.
> 
> This means ieee80211_set_link_state() is already deferring some of its
> work to a task.

Probably should have mentioned this in my original mail.

> The patch below defers sending the 80211info message to a task as well.
> 
> I am keeping things simple for now and use systq (kernel-locked) instead
> of copying the argument-passing approach used by if_link_state_change().

All timeouts run under the kernel lock, so this is appropriate.

> Tested on iwm(4) 8265 with 'route monitor'.
> 
> ok?

I like the direction.  I have a few concerns noted below.

> diff 0b61c8235787960f0010ef627ea5b2c6309a81f0 
> de98c050ea709bdb8e26be40ab0cc82ef9afed80
> blob - 7bb68194dd78417b06c59f81d1ebbff4165203d8
> blob + 5b9a969258074fde29e21a33ac035cf170ec3b03
> --- sys/net80211/ieee80211.c
> +++ sys/net80211/ieee80211.c
> @@ -193,6 +193,7 @@ ieee80211_ifattach(struct ifnet *ifp)
>   if_addgroup(ifp, "wlan");
>   ifp->if_priority = IF_WIRELESS_DEFAULT_PRIORITY;
>  
> + task_set(>ic_rtm_80211info_task, ieee80211_rtm_80211info_task, ic);
>   ieee80211_set_link_state(ic, LINK_STATE_DOWN);
>  
>   timeout_set(>ic_bgscan_timeout, ieee80211_bgscan_timeout, ifp);
> @@ -203,6 +204,7 @@ ieee80211_ifdetach(struct ifnet *ifp)
>  {
>   struct ieee80211com *ic = (void *)ifp;
>  
> + task_del(systq, >ic_rtm_80211info_task);
>   timeout_del(>ic_bgscan_timeout);

Suppose the ic_bgscan_timeout timeout is running at the moment we're
running ieee80211_ifdetach().  Ignore the kernel lock for the moment,
think about the future.

If we delete the task before we delete the timeout and the timeout
then adds the task back onto the task queue, what happens?

My guess is you need to ensure the timeout is no longer running
*before* you delete the task.  Can you do timeout_del_barrier()
here?  See the attached patch.

>  
>   /*
> blob - 447a2676bfb250b7f917206549679d6ae68de1f6
> blob + 7e10fc1336067542c13d5607602e658ce2b3926b
> --- sys/net80211/ieee80211_proto.c
> +++ sys/net80211/ieee80211_proto.c
> @@ -1288,6 +1288,31 @@ justcleanup:
>  }
>  
>  void
> +ieee80211_rtm_80211info_task(void *arg)
> +{
> + struct ieee80211com *ic = arg;
> + struct ifnet *ifp = >ic_if;
> + struct if_ieee80211_data ifie;
> + int s = splnet();
> +
> + if (LINK_STATE_IS_UP(ifp->if_link_state)) {

Does this mean userspace can "miss" state transitions if the task runs
and the state has already changed back to not-up?

I don't know whether this would matter in practice, but it would be a
behavior change.

--

The rest of the patch looks good, running with it now on this:

$ dmesg | grep iwm0 | tail -n2
iwm0 at pci1 dev 0 function 0 "Intel Dual Band Wireless-AC 8265" rev 0x78, msi
iwm0: hw rev 0x230, fw ver 36.ca7b901d.0, address 98:3b:8f:ef:6b:ef

Unsure how `route monitor` exercises this path, but I've left it
running, too.

Index: ieee80211.c
===
RCS file: /cvs/src/sys/net80211/ieee80211.c,v
retrieving revision 1.85
diff -u -p -r1.85 ieee80211.c
--- ieee80211.c 11 Oct 2021 09:01:06 -  1.85
+++ ieee80211.c 5 Dec 2021 17:01:51 -
@@ -193,6 +193,7 @@ ieee80211_ifattach(struct ifnet *ifp)
if_addgroup(ifp, "wlan");
ifp->if_priority = IF_WIRELESS_DEFAULT_PRIORITY;
 
+   task_set(>ic_rtm_80211info_task, ieee80211_rtm_80211info_task, ic);
ieee80211_set_link_state(ic, LINK_STATE_DOWN);
 
timeout_set(>ic_bgscan_timeout, ieee80211_bgscan_timeout, ifp);
@@ -203,7 +204,8 @@ ieee80211_ifdetach(struct ifnet *ifp)
 {
struct ieee80211com *ic = (void *)ifp;
 
-   timeout_del(>ic_bgscan_timeout);
+   timeout_del_barrier(>ic_bgscan_timeout);
+   task_del(systq, >ic_rtm_80211info_task);
 
/*
 * Undo pseudo-driver changes. Pseudo-driver detach hooks could



Re: panic: ieee80211_set_link_state() calls rtm_80211info() from timeout context

2021-12-05 Thread Florian Obser


reads OK 

On 2021-12-04 20:40 +01, Stefan Sperling  wrote:
> On Sat, Dec 04, 2021 at 09:32:40PM +0300, Vitaliy Makkoveev wrote:
>> I think rtm_80211info() could follow the if_link_state_change()
>> way and use task for that.
>
> Indeed. I did not realize that if_link_state_change() schedules a task.
>
> This means ieee80211_set_link_state() is already deferring some of its
> work to a task. The patch below defers sending the 80211info message
> to a task as well.
>
> I am keeping things simple for now and use systq (kernel-locked) instead
> of copying the argument-passing approach used by if_link_state_change().
>
> Tested on iwm(4) 8265 with 'route monitor'.
>
> ok?
>
> diff 0b61c8235787960f0010ef627ea5b2c6309a81f0 
> de98c050ea709bdb8e26be40ab0cc82ef9afed80
> blob - 7bb68194dd78417b06c59f81d1ebbff4165203d8
> blob + 5b9a969258074fde29e21a33ac035cf170ec3b03
> --- sys/net80211/ieee80211.c
> +++ sys/net80211/ieee80211.c
> @@ -193,6 +193,7 @@ ieee80211_ifattach(struct ifnet *ifp)
>   if_addgroup(ifp, "wlan");
>   ifp->if_priority = IF_WIRELESS_DEFAULT_PRIORITY;
>  
> + task_set(>ic_rtm_80211info_task, ieee80211_rtm_80211info_task, ic);
>   ieee80211_set_link_state(ic, LINK_STATE_DOWN);
>  
>   timeout_set(>ic_bgscan_timeout, ieee80211_bgscan_timeout, ifp);
> @@ -203,6 +204,7 @@ ieee80211_ifdetach(struct ifnet *ifp)
>  {
>   struct ieee80211com *ic = (void *)ifp;
>  
> + task_del(systq, >ic_rtm_80211info_task);
>   timeout_del(>ic_bgscan_timeout);
>  
>   /*
> blob - 447a2676bfb250b7f917206549679d6ae68de1f6
> blob + 7e10fc1336067542c13d5607602e658ce2b3926b
> --- sys/net80211/ieee80211_proto.c
> +++ sys/net80211/ieee80211_proto.c
> @@ -1288,6 +1288,31 @@ justcleanup:
>  }
>  
>  void
> +ieee80211_rtm_80211info_task(void *arg)
> +{
> + struct ieee80211com *ic = arg;
> + struct ifnet *ifp = >ic_if;
> + struct if_ieee80211_data ifie;
> + int s = splnet();
> +
> + if (LINK_STATE_IS_UP(ifp->if_link_state)) {
> + memset(, 0, sizeof(ifie));
> + ifie.ifie_nwid_len = ic->ic_bss->ni_esslen;
> + memcpy(ifie.ifie_nwid, ic->ic_bss->ni_essid,
> + sizeof(ifie.ifie_nwid));
> + memcpy(ifie.ifie_addr, ic->ic_bss->ni_bssid,
> + sizeof(ifie.ifie_addr));
> + ifie.ifie_channel = ieee80211_chan2ieee(ic,
> + ic->ic_bss->ni_chan);
> + ifie.ifie_flags = ic->ic_flags;
> + ifie.ifie_xflags = ic->ic_xflags;
> + rtm_80211info(>ic_if, );
> + }
> +
> + splx(s);
> +}
> +
> +void
>  ieee80211_set_link_state(struct ieee80211com *ic, int nstate)
>  {
>   struct ifnet *ifp = >ic_if;
> @@ -1307,20 +1332,8 @@ ieee80211_set_link_state(struct ieee80211com *ic, int 
>   }
>   if (nstate != ifp->if_link_state) {
>   ifp->if_link_state = nstate;
> - if (LINK_STATE_IS_UP(nstate)) {
> - struct if_ieee80211_data ifie;
> - memset(, 0, sizeof(ifie));
> - ifie.ifie_nwid_len = ic->ic_bss->ni_esslen;
> - memcpy(ifie.ifie_nwid, ic->ic_bss->ni_essid,
> - sizeof(ifie.ifie_nwid));
> - memcpy(ifie.ifie_addr, ic->ic_bss->ni_bssid,
> - sizeof(ifie.ifie_addr));
> - ifie.ifie_channel = ieee80211_chan2ieee(ic,
> - ic->ic_bss->ni_chan);
> - ifie.ifie_flags = ic->ic_flags;
> - ifie.ifie_xflags = ic->ic_xflags;
> - rtm_80211info(>ic_if, );
> - }
> + if (LINK_STATE_IS_UP(nstate))
> + task_add(systq, >ic_rtm_80211info_task);
>   if_link_state_change(ifp);
>   }
>  }
> blob - 7208e5dc0be1983a31d7f74142e87581bea95d13
> blob + ce6d1ad91f391cb16c7f0bbfa79210401d5dc7eb
> --- sys/net80211/ieee80211_proto.h
> +++ sys/net80211/ieee80211_proto.h
> @@ -63,6 +63,7 @@ extern  void ieee80211_proto_detach(struct ifnet *);
>  struct ieee80211_node;
>  struct ieee80211_rxinfo;
>  struct ieee80211_rsnparams;
> +extern   void ieee80211_rtm_80211info_task(void *);
>  extern   void ieee80211_set_link_state(struct ieee80211com *, int);
>  extern   u_int ieee80211_get_hdrlen(const struct ieee80211_frame *);
>  extern   int ieee80211_classify(struct ieee80211com *, struct mbuf *);
> blob - dd17ed76031db17bd86cd75a5c1eec659dbd3f30
> blob + 641419331ed3f65fbc9272c055948438b28f1025
> --- sys/net80211/ieee80211_var.h
> +++ sys/net80211/ieee80211_var.h
> @@ -306,6 +306,7 @@ struct ieee80211com {
>   struct timeout  ic_inact_timeout; /* node inactivity timeout */
>   struct timeout  ic_node_cache_timeout;
>  #endif
> + struct task ic_rtm_80211info_task;
>   int ic_des_esslen;
>   u_int8_tic_des_essid[IEEE80211_NWID_LEN];
>   struct ieee80211_channel