Re: Xorg vs wifi scanning
On Mon, Feb 06, 2017 at 03:54:56PM +0100, Martin Pieuchot wrote: > On 06/02/17(Mon) 12:27, Martin Pieuchot wrote: > > Paul and Antoine reported that, since the NET_LOCK() went in, doing a > > channel scan with iwm(4) and iwn(4) freezes X. > > > > This is a deadlock due to the fact that wireless drivers sleep during > > a long time holding the NET_LOCK() while the X server tries to > > communicate with unix sockets, that still need the NET_LOCK(). > > > > The obvious solution to this problem is to not hold the NET_LOCK() for > > unix socket related operations. We're working on that. > > > > However since hardware ioctl(2) can sleep there's no problem to release > > the NET_LOCK() there. This is true for all ioctls that are not > > modifying any stack state (address, multicast group, etc). Hence the > > diff below that should fix the problem in a generic way. > > tb@ found that another code path needs to be unlocked. Diff below does > that. These ioctl(2)s are only messing at the driver level, so it is > safe to drop the NET_LOCK() there as well. > > ok? OK pirofti@. Thanks! > > Index: netinet/in.c > === > RCS file: /cvs/src/sys/netinet/in.c,v > retrieving revision 1.133 > diff -u -p -r1.133 in.c > --- netinet/in.c 20 Dec 2016 12:35:38 - 1.133 > +++ netinet/in.c 6 Feb 2017 13:43:06 - > @@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc > default: > if (ifp->if_ioctl == NULL) > return (EOPNOTSUPP); > - return ((*ifp->if_ioctl)(ifp, cmd, data)); > + /* XXXSMP breaks atomicity */ > + rw_exit_write(); > + error = ((*ifp->if_ioctl)(ifp, cmd, data)); > + rw_enter_write(); > + return (error); > } > return (0); > } > Index: netinet6/in6.c > === > RCS file: /cvs/src/sys/netinet6/in6.c,v > retrieving revision 1.197 > diff -u -p -r1.197 in6.c > --- netinet6/in6.c5 Feb 2017 16:04:14 - 1.197 > +++ netinet6/in6.c6 Feb 2017 13:43:06 - > @@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru > struct in6_ifaddr *ia6 = NULL; > struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; > struct sockaddr_in6 *sa6; > + int error; > > if (ifp == NULL) > return (EOPNOTSUPP); > @@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru > break; > > default: > - if (ifp == NULL || ifp->if_ioctl == 0) > + if (ifp->if_ioctl == NULL) > return (EOPNOTSUPP); > - return ((*ifp->if_ioctl)(ifp, cmd, data)); > + /* XXXSMP breaks atomicity */ > + rw_exit_write(); > + error = ((*ifp->if_ioctl)(ifp, cmd, data)); > + rw_enter_write(); > + return (error); > } > > return (0); > Index: dev/pci/if_iwm.c > === > RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v > retrieving revision 1.162 > diff -u -p -r1.162 if_iwm.c > --- dev/pci/if_iwm.c 4 Feb 2017 19:20:59 - 1.162 > +++ dev/pci/if_iwm.c 6 Feb 2017 14:52:24 - > @@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd, > struct ifreq *ifr; > int s, err = 0; > > - /* XXXSMP breaks atomicity */ > - rw_exit_write(); > - > /* >* Prevent processes from entering this function while another >* process is tsleep'ing in it. >*/ > err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR); > - if (err) { > - rw_enter_write(); > + if (err) > return err; > - } > > s = splnet(); > > @@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd, > > splx(s); > rw_exit(>ioctl_rwl); > - rw_enter_write(); > > return err; > } > Index: net/if.c > === > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.485 > diff -u -p -r1.485 if.c > --- net/if.c 1 Feb 2017 02:02:01 - 1.485 > +++ net/if.c 6 Feb 2017 14:27:47 - > @@ -2029,7 +2029,10 @@ ifioctl(struct socket *so, u_long cmd, c > case SIOCGIFPARENT: > if (ifp->if_ioctl == 0) > return (EOPNOTSUPP); > + /* XXXSMP breaks atomicity */ > + rw_exit_write(); > error = (*ifp->if_ioctl)(ifp, cmd, data); > + rw_enter_write(); > break; > > case SIOCGIFDESCR:
Re: Xorg vs wifi scanning
On Mon, Feb 06, 2017 at 03:54:56PM +0100, Martin Pieuchot wrote: > tb@ found that another code path needs to be unlocked. Diff below does > that. These ioctl(2)s are only messing at the driver level, so it is > safe to drop the NET_LOCK() there as well. > > ok? I am happy with this. OK. And thank you for taking care of this problem!
Re: Xorg vs wifi scanning
On 06/02/17(Mon) 12:27, Martin Pieuchot wrote: > Paul and Antoine reported that, since the NET_LOCK() went in, doing a > channel scan with iwm(4) and iwn(4) freezes X. > > This is a deadlock due to the fact that wireless drivers sleep during > a long time holding the NET_LOCK() while the X server tries to > communicate with unix sockets, that still need the NET_LOCK(). > > The obvious solution to this problem is to not hold the NET_LOCK() for > unix socket related operations. We're working on that. > > However since hardware ioctl(2) can sleep there's no problem to release > the NET_LOCK() there. This is true for all ioctls that are not > modifying any stack state (address, multicast group, etc). Hence the > diff below that should fix the problem in a generic way. tb@ found that another code path needs to be unlocked. Diff below does that. These ioctl(2)s are only messing at the driver level, so it is safe to drop the NET_LOCK() there as well. ok? Index: netinet/in.c === RCS file: /cvs/src/sys/netinet/in.c,v retrieving revision 1.133 diff -u -p -r1.133 in.c --- netinet/in.c20 Dec 2016 12:35:38 - 1.133 +++ netinet/in.c6 Feb 2017 13:43:06 - @@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc default: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); - return ((*ifp->if_ioctl)(ifp, cmd, data)); + /* XXXSMP breaks atomicity */ + rw_exit_write(); + error = ((*ifp->if_ioctl)(ifp, cmd, data)); + rw_enter_write(); + return (error); } return (0); } Index: netinet6/in6.c === RCS file: /cvs/src/sys/netinet6/in6.c,v retrieving revision 1.197 diff -u -p -r1.197 in6.c --- netinet6/in6.c 5 Feb 2017 16:04:14 - 1.197 +++ netinet6/in6.c 6 Feb 2017 13:43:06 - @@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru struct in6_ifaddr *ia6 = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; + int error; if (ifp == NULL) return (EOPNOTSUPP); @@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru break; default: - if (ifp == NULL || ifp->if_ioctl == 0) + if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); - return ((*ifp->if_ioctl)(ifp, cmd, data)); + /* XXXSMP breaks atomicity */ + rw_exit_write(); + error = ((*ifp->if_ioctl)(ifp, cmd, data)); + rw_enter_write(); + return (error); } return (0); Index: dev/pci/if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.162 diff -u -p -r1.162 if_iwm.c --- dev/pci/if_iwm.c4 Feb 2017 19:20:59 - 1.162 +++ dev/pci/if_iwm.c6 Feb 2017 14:52:24 - @@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd, struct ifreq *ifr; int s, err = 0; - /* XXXSMP breaks atomicity */ - rw_exit_write(); - /* * Prevent processes from entering this function while another * process is tsleep'ing in it. */ err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR); - if (err) { - rw_enter_write(); + if (err) return err; - } s = splnet(); @@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd, splx(s); rw_exit(>ioctl_rwl); - rw_enter_write(); return err; } Index: net/if.c === RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.485 diff -u -p -r1.485 if.c --- net/if.c1 Feb 2017 02:02:01 - 1.485 +++ net/if.c6 Feb 2017 14:27:47 - @@ -2029,7 +2029,10 @@ ifioctl(struct socket *so, u_long cmd, c case SIOCGIFPARENT: if (ifp->if_ioctl == 0) return (EOPNOTSUPP); + /* XXXSMP breaks atomicity */ + rw_exit_write(); error = (*ifp->if_ioctl)(ifp, cmd, data); + rw_enter_write(); break; case SIOCGIFDESCR:
Re: Xorg vs wifi scanning
On 06/02/17(Mon) 12:59, Stefan Sperling wrote: > On Mon, Feb 06, 2017 at 12:44:52PM +0100, Martin Pieuchot wrote: > > On 06/02/17(Mon) 12:39, Theo Buehler wrote: > > > This fixes the issue on my iwn0, but re-introduces the problem on my iwm0. > > > > That means another ioctl(2) triggers the problem with iwm(4). It would > > help if you could figure out which one. > > Probably SIOCS80211SCAN. Your diff doesn't touch ieee80211_ioctl(). My diff does touch ieee80211_ioctl(). Like that: ifioctl() -> pr_usrreq(PRU_CONTROL -> in_control( -> iwm_ioctl( -> ieee80211_ioctl(). Or do you know another path to reach this ioctl handler?
Re: Xorg vs wifi scanning
On Mon, Feb 06, 2017 at 12:44:52PM +0100, Martin Pieuchot wrote: > On 06/02/17(Mon) 12:39, Theo Buehler wrote: > > This fixes the issue on my iwn0, but re-introduces the problem on my iwm0. > > That means another ioctl(2) triggers the problem with iwm(4). It would > help if you could figure out which one. Probably SIOCS80211SCAN. Your diff doesn't touch ieee80211_ioctl().
Re: Xorg vs wifi scanning
On 06/02/17(Mon) 12:39, Theo Buehler wrote: > On Mon, Feb 06, 2017 at 12:27:15PM +0100, Martin Pieuchot wrote: > > Paul and Antoine reported that, since the NET_LOCK() went in, doing a > > channel scan with iwm(4) and iwn(4) freezes X. > > > > This is a deadlock due to the fact that wireless drivers sleep during > > a long time holding the NET_LOCK() while the X server tries to > > communicate with unix sockets, that still need the NET_LOCK(). > > > > The obvious solution to this problem is to not hold the NET_LOCK() for > > unix socket related operations. We're working on that. > > > > However since hardware ioctl(2) can sleep there's no problem to release > > the NET_LOCK() there. This is true for all ioctls that are not > > modifying any stack state (address, multicast group, etc). Hence the > > diff below that should fix the problem in a generic way. > > > > ok? > > This fixes the issue on my iwn0, but re-introduces the problem on my iwm0. That means another ioctl(2) triggers the problem with iwm(4). It would help if you could figure out which one. > Tested with > > iwn0 at pci2 dev 0 function 0 "Intel Centrino Ultimate-N 6300" rev 0x35: msi, > MIMO 3T3R, MoW > > and > > iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x59, msi > iwm0: hw rev 0x210, fw ver 16.242414.0 >
Re: Xorg vs wifi scanning
On Mon, Feb 06, 2017 at 12:27:15PM +0100, Martin Pieuchot wrote: > Paul and Antoine reported that, since the NET_LOCK() went in, doing a > channel scan with iwm(4) and iwn(4) freezes X. > > This is a deadlock due to the fact that wireless drivers sleep during > a long time holding the NET_LOCK() while the X server tries to > communicate with unix sockets, that still need the NET_LOCK(). > > The obvious solution to this problem is to not hold the NET_LOCK() for > unix socket related operations. We're working on that. > > However since hardware ioctl(2) can sleep there's no problem to release > the NET_LOCK() there. This is true for all ioctls that are not > modifying any stack state (address, multicast group, etc). Hence the > diff below that should fix the problem in a generic way. > > ok? This fixes the issue on my iwn0, but re-introduces the problem on my iwm0. Tested with iwn0 at pci2 dev 0 function 0 "Intel Centrino Ultimate-N 6300" rev 0x35: msi, MIMO 3T3R, MoW and iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x59, msi iwm0: hw rev 0x210, fw ver 16.242414.0
Xorg vs wifi scanning
Paul and Antoine reported that, since the NET_LOCK() went in, doing a channel scan with iwm(4) and iwn(4) freezes X. This is a deadlock due to the fact that wireless drivers sleep during a long time holding the NET_LOCK() while the X server tries to communicate with unix sockets, that still need the NET_LOCK(). The obvious solution to this problem is to not hold the NET_LOCK() for unix socket related operations. We're working on that. However since hardware ioctl(2) can sleep there's no problem to release the NET_LOCK() there. This is true for all ioctls that are not modifying any stack state (address, multicast group, etc). Hence the diff below that should fix the problem in a generic way. ok? Index: netinet/in.c === RCS file: /cvs/src/sys/netinet/in.c,v retrieving revision 1.133 diff -u -p -r1.133 in.c --- netinet/in.c20 Dec 2016 12:35:38 - 1.133 +++ netinet/in.c6 Feb 2017 11:15:45 - @@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc default: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); - return ((*ifp->if_ioctl)(ifp, cmd, data)); + /* XXXSMP breaks atomicity */ + rw_exit_write(); + error = ((*ifp->if_ioctl)(ifp, cmd, data)); + rw_enter_write(); + return (error); } return (0); } Index: netinet6/in6.c === RCS file: /cvs/src/sys/netinet6/in6.c,v retrieving revision 1.197 diff -u -p -r1.197 in6.c --- netinet6/in6.c 5 Feb 2017 16:04:14 - 1.197 +++ netinet6/in6.c 6 Feb 2017 11:16:09 - @@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru struct in6_ifaddr *ia6 = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; + int error; if (ifp == NULL) return (EOPNOTSUPP); @@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru break; default: - if (ifp == NULL || ifp->if_ioctl == 0) + if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); - return ((*ifp->if_ioctl)(ifp, cmd, data)); + /* XXXSMP breaks atomicity */ + rw_exit_write(); + error = ((*ifp->if_ioctl)(ifp, cmd, data)); + rw_enter_write(); + return (error); } return (0); Index: dev/pci/if_iwm.c === RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v retrieving revision 1.162 diff -u -p -r1.162 if_iwm.c --- dev/pci/if_iwm.c4 Feb 2017 19:20:59 - 1.162 +++ dev/pci/if_iwm.c6 Feb 2017 11:15:56 - @@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd, struct ifreq *ifr; int s, err = 0; - /* XXXSMP breaks atomicity */ - rw_exit_write(); - /* * Prevent processes from entering this function while another * process is tsleep'ing in it. */ err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR); - if (err) { - rw_enter_write(); + if (err) return err; - } s = splnet(); @@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd, splx(s); rw_exit(>ioctl_rwl); - rw_enter_write(); return err; }