Re: Xorg vs wifi scanning

2017-02-07 Thread Paul Irofti
On Mon, Feb 06, 2017 at 03:54:56PM +0100, Martin Pieuchot wrote:
> On 06/02/17(Mon) 12:27, Martin Pieuchot wrote:
> > Paul and Antoine reported that, since the NET_LOCK() went in, doing a
> > channel scan with iwm(4) and iwn(4) freezes X.
> > 
> > This is a deadlock due to the fact that wireless drivers sleep during
> > a long time holding the NET_LOCK() while the X server tries to
> > communicate with unix sockets, that still need the NET_LOCK().
> > 
> > The obvious solution to this problem is to not hold the NET_LOCK() for
> > unix socket related operations.  We're working on that.
> > 
> > However since hardware ioctl(2) can sleep there's no problem to release
> > the NET_LOCK() there.  This is true for all ioctls that are not
> > modifying any stack state (address, multicast group, etc).  Hence the
> > diff below that should fix the problem in a generic way.
> 
> tb@ found that another code path needs to be unlocked.  Diff below does
> that.  These ioctl(2)s are only messing at the driver level, so it is
> safe to drop the NET_LOCK() there as well.
> 
> ok?

OK pirofti@. Thanks!

> 
> Index: netinet/in.c
> ===
> RCS file: /cvs/src/sys/netinet/in.c,v
> retrieving revision 1.133
> diff -u -p -r1.133 in.c
> --- netinet/in.c  20 Dec 2016 12:35:38 -  1.133
> +++ netinet/in.c  6 Feb 2017 13:43:06 -
> @@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc
>   default:
>   if (ifp->if_ioctl == NULL)
>   return (EOPNOTSUPP);
> - return ((*ifp->if_ioctl)(ifp, cmd, data));
> + /* XXXSMP breaks atomicity */
> + rw_exit_write();
> + error = ((*ifp->if_ioctl)(ifp, cmd, data));
> + rw_enter_write();
> + return (error);
>   }
>   return (0);
>  }
> Index: netinet6/in6.c
> ===
> RCS file: /cvs/src/sys/netinet6/in6.c,v
> retrieving revision 1.197
> diff -u -p -r1.197 in6.c
> --- netinet6/in6.c5 Feb 2017 16:04:14 -   1.197
> +++ netinet6/in6.c6 Feb 2017 13:43:06 -
> @@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru
>   struct  in6_ifaddr *ia6 = NULL;
>   struct  in6_aliasreq *ifra = (struct in6_aliasreq *)data;
>   struct sockaddr_in6 *sa6;
> + int error;
>  
>   if (ifp == NULL)
>   return (EOPNOTSUPP);
> @@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru
>   break;
>  
>   default:
> - if (ifp == NULL || ifp->if_ioctl == 0)
> + if (ifp->if_ioctl == NULL)
>   return (EOPNOTSUPP);
> - return ((*ifp->if_ioctl)(ifp, cmd, data));
> + /* XXXSMP breaks atomicity */
> + rw_exit_write();
> + error = ((*ifp->if_ioctl)(ifp, cmd, data));
> + rw_enter_write();
> + return (error);
>   }
>  
>   return (0);
> Index: dev/pci/if_iwm.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
> retrieving revision 1.162
> diff -u -p -r1.162 if_iwm.c
> --- dev/pci/if_iwm.c  4 Feb 2017 19:20:59 -   1.162
> +++ dev/pci/if_iwm.c  6 Feb 2017 14:52:24 -
> @@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
>   struct ifreq *ifr;
>   int s, err = 0;
>  
> - /* XXXSMP breaks atomicity */
> - rw_exit_write();
> -
>   /*
>* Prevent processes from entering this function while another
>* process is tsleep'ing in it.
>*/
>   err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR);
> - if (err) {
> - rw_enter_write();
> + if (err)
>   return err;
> - }
>  
>   s = splnet();
>  
> @@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
>  
>   splx(s);
>   rw_exit(>ioctl_rwl);
> - rw_enter_write();
>  
>   return err;
>  }
> Index: net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.485
> diff -u -p -r1.485 if.c
> --- net/if.c  1 Feb 2017 02:02:01 -   1.485
> +++ net/if.c  6 Feb 2017 14:27:47 -
> @@ -2029,7 +2029,10 @@ ifioctl(struct socket *so, u_long cmd, c
>   case SIOCGIFPARENT:
>   if (ifp->if_ioctl == 0)
>   return (EOPNOTSUPP);
> + /* XXXSMP breaks atomicity */
> + rw_exit_write();
>   error = (*ifp->if_ioctl)(ifp, cmd, data);
> + rw_enter_write();
>   break;
>  
>   case SIOCGIFDESCR:



Re: Xorg vs wifi scanning

2017-02-06 Thread Stefan Sperling
On Mon, Feb 06, 2017 at 03:54:56PM +0100, Martin Pieuchot wrote:
> tb@ found that another code path needs to be unlocked.  Diff below does
> that.  These ioctl(2)s are only messing at the driver level, so it is
> safe to drop the NET_LOCK() there as well.
> 
> ok?

I am happy with this. OK.

And thank you for taking care of this problem!



Re: Xorg vs wifi scanning

2017-02-06 Thread Martin Pieuchot
On 06/02/17(Mon) 12:27, Martin Pieuchot wrote:
> Paul and Antoine reported that, since the NET_LOCK() went in, doing a
> channel scan with iwm(4) and iwn(4) freezes X.
> 
> This is a deadlock due to the fact that wireless drivers sleep during
> a long time holding the NET_LOCK() while the X server tries to
> communicate with unix sockets, that still need the NET_LOCK().
> 
> The obvious solution to this problem is to not hold the NET_LOCK() for
> unix socket related operations.  We're working on that.
> 
> However since hardware ioctl(2) can sleep there's no problem to release
> the NET_LOCK() there.  This is true for all ioctls that are not
> modifying any stack state (address, multicast group, etc).  Hence the
> diff below that should fix the problem in a generic way.

tb@ found that another code path needs to be unlocked.  Diff below does
that.  These ioctl(2)s are only messing at the driver level, so it is
safe to drop the NET_LOCK() there as well.

ok?

Index: netinet/in.c
===
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.133
diff -u -p -r1.133 in.c
--- netinet/in.c20 Dec 2016 12:35:38 -  1.133
+++ netinet/in.c6 Feb 2017 13:43:06 -
@@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc
default:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
-   return ((*ifp->if_ioctl)(ifp, cmd, data));
+   /* XXXSMP breaks atomicity */
+   rw_exit_write();
+   error = ((*ifp->if_ioctl)(ifp, cmd, data));
+   rw_enter_write();
+   return (error);
}
return (0);
 }
Index: netinet6/in6.c
===
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.197
diff -u -p -r1.197 in6.c
--- netinet6/in6.c  5 Feb 2017 16:04:14 -   1.197
+++ netinet6/in6.c  6 Feb 2017 13:43:06 -
@@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru
struct  in6_ifaddr *ia6 = NULL;
struct  in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
+   int error;
 
if (ifp == NULL)
return (EOPNOTSUPP);
@@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru
break;
 
default:
-   if (ifp == NULL || ifp->if_ioctl == 0)
+   if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
-   return ((*ifp->if_ioctl)(ifp, cmd, data));
+   /* XXXSMP breaks atomicity */
+   rw_exit_write();
+   error = ((*ifp->if_ioctl)(ifp, cmd, data));
+   rw_enter_write();
+   return (error);
}
 
return (0);
Index: dev/pci/if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.162
diff -u -p -r1.162 if_iwm.c
--- dev/pci/if_iwm.c4 Feb 2017 19:20:59 -   1.162
+++ dev/pci/if_iwm.c6 Feb 2017 14:52:24 -
@@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
struct ifreq *ifr;
int s, err = 0;
 
-   /* XXXSMP breaks atomicity */
-   rw_exit_write();
-
/*
 * Prevent processes from entering this function while another
 * process is tsleep'ing in it.
 */
err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR);
-   if (err) {
-   rw_enter_write();
+   if (err)
return err;
-   }
 
s = splnet();
 
@@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
 
splx(s);
rw_exit(>ioctl_rwl);
-   rw_enter_write();
 
return err;
 }
Index: net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.485
diff -u -p -r1.485 if.c
--- net/if.c1 Feb 2017 02:02:01 -   1.485
+++ net/if.c6 Feb 2017 14:27:47 -
@@ -2029,7 +2029,10 @@ ifioctl(struct socket *so, u_long cmd, c
case SIOCGIFPARENT:
if (ifp->if_ioctl == 0)
return (EOPNOTSUPP);
+   /* XXXSMP breaks atomicity */
+   rw_exit_write();
error = (*ifp->if_ioctl)(ifp, cmd, data);
+   rw_enter_write();
break;
 
case SIOCGIFDESCR:



Re: Xorg vs wifi scanning

2017-02-06 Thread Martin Pieuchot
On 06/02/17(Mon) 12:59, Stefan Sperling wrote:
> On Mon, Feb 06, 2017 at 12:44:52PM +0100, Martin Pieuchot wrote:
> > On 06/02/17(Mon) 12:39, Theo Buehler wrote:
> > > This fixes the issue on my iwn0, but re-introduces the problem on my iwm0.
> > 
> > That means another ioctl(2) triggers the problem with iwm(4).  It would
> > help if you could figure out which one.
> 
> Probably SIOCS80211SCAN. Your diff doesn't touch ieee80211_ioctl().

My diff does touch ieee80211_ioctl().  Like that:

ifioctl() ->
  pr_usrreq(PRU_CONTROL ->
in_control( ->
  iwm_ioctl( ->
ieee80211_ioctl().

Or do you know another path to reach this ioctl handler?



Re: Xorg vs wifi scanning

2017-02-06 Thread Stefan Sperling
On Mon, Feb 06, 2017 at 12:44:52PM +0100, Martin Pieuchot wrote:
> On 06/02/17(Mon) 12:39, Theo Buehler wrote:
> > This fixes the issue on my iwn0, but re-introduces the problem on my iwm0.
> 
> That means another ioctl(2) triggers the problem with iwm(4).  It would
> help if you could figure out which one.

Probably SIOCS80211SCAN. Your diff doesn't touch ieee80211_ioctl().



Re: Xorg vs wifi scanning

2017-02-06 Thread Martin Pieuchot
On 06/02/17(Mon) 12:39, Theo Buehler wrote:
> On Mon, Feb 06, 2017 at 12:27:15PM +0100, Martin Pieuchot wrote:
> > Paul and Antoine reported that, since the NET_LOCK() went in, doing a
> > channel scan with iwm(4) and iwn(4) freezes X.
> > 
> > This is a deadlock due to the fact that wireless drivers sleep during
> > a long time holding the NET_LOCK() while the X server tries to
> > communicate with unix sockets, that still need the NET_LOCK().
> > 
> > The obvious solution to this problem is to not hold the NET_LOCK() for
> > unix socket related operations.  We're working on that.
> > 
> > However since hardware ioctl(2) can sleep there's no problem to release
> > the NET_LOCK() there.  This is true for all ioctls that are not
> > modifying any stack state (address, multicast group, etc).  Hence the
> > diff below that should fix the problem in a generic way.
> > 
> > ok?
> 
> This fixes the issue on my iwn0, but re-introduces the problem on my iwm0.

That means another ioctl(2) triggers the problem with iwm(4).  It would
help if you could figure out which one.

> Tested with
> 
> iwn0 at pci2 dev 0 function 0 "Intel Centrino Ultimate-N 6300" rev 0x35: msi, 
> MIMO 3T3R, MoW
> 
> and
> 
> iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x59, msi
> iwm0: hw rev 0x210, fw ver 16.242414.0
> 



Re: Xorg vs wifi scanning

2017-02-06 Thread Theo Buehler
On Mon, Feb 06, 2017 at 12:27:15PM +0100, Martin Pieuchot wrote:
> Paul and Antoine reported that, since the NET_LOCK() went in, doing a
> channel scan with iwm(4) and iwn(4) freezes X.
> 
> This is a deadlock due to the fact that wireless drivers sleep during
> a long time holding the NET_LOCK() while the X server tries to
> communicate with unix sockets, that still need the NET_LOCK().
> 
> The obvious solution to this problem is to not hold the NET_LOCK() for
> unix socket related operations.  We're working on that.
> 
> However since hardware ioctl(2) can sleep there's no problem to release
> the NET_LOCK() there.  This is true for all ioctls that are not
> modifying any stack state (address, multicast group, etc).  Hence the
> diff below that should fix the problem in a generic way.
> 
> ok?

This fixes the issue on my iwn0, but re-introduces the problem on my iwm0.

Tested with

iwn0 at pci2 dev 0 function 0 "Intel Centrino Ultimate-N 6300" rev 0x35: msi, 
MIMO 3T3R, MoW

and

iwm0 at pci2 dev 0 function 0 "Intel Dual Band Wireless AC 7265" rev 0x59, msi
iwm0: hw rev 0x210, fw ver 16.242414.0



Xorg vs wifi scanning

2017-02-06 Thread Martin Pieuchot
Paul and Antoine reported that, since the NET_LOCK() went in, doing a
channel scan with iwm(4) and iwn(4) freezes X.

This is a deadlock due to the fact that wireless drivers sleep during
a long time holding the NET_LOCK() while the X server tries to
communicate with unix sockets, that still need the NET_LOCK().

The obvious solution to this problem is to not hold the NET_LOCK() for
unix socket related operations.  We're working on that.

However since hardware ioctl(2) can sleep there's no problem to release
the NET_LOCK() there.  This is true for all ioctls that are not
modifying any stack state (address, multicast group, etc).  Hence the
diff below that should fix the problem in a generic way.

ok?

Index: netinet/in.c
===
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.133
diff -u -p -r1.133 in.c
--- netinet/in.c20 Dec 2016 12:35:38 -  1.133
+++ netinet/in.c6 Feb 2017 11:15:45 -
@@ -390,7 +390,11 @@ in_ioctl(u_long cmd, caddr_t data, struc
default:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
-   return ((*ifp->if_ioctl)(ifp, cmd, data));
+   /* XXXSMP breaks atomicity */
+   rw_exit_write();
+   error = ((*ifp->if_ioctl)(ifp, cmd, data));
+   rw_enter_write();
+   return (error);
}
return (0);
 }
Index: netinet6/in6.c
===
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.197
diff -u -p -r1.197 in6.c
--- netinet6/in6.c  5 Feb 2017 16:04:14 -   1.197
+++ netinet6/in6.c  6 Feb 2017 11:16:09 -
@@ -190,6 +190,7 @@ in6_ioctl(u_long cmd, caddr_t data, stru
struct  in6_ifaddr *ia6 = NULL;
struct  in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
+   int error;
 
if (ifp == NULL)
return (EOPNOTSUPP);
@@ -469,9 +470,13 @@ in6_ioctl(u_long cmd, caddr_t data, stru
break;
 
default:
-   if (ifp == NULL || ifp->if_ioctl == 0)
+   if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
-   return ((*ifp->if_ioctl)(ifp, cmd, data));
+   /* XXXSMP breaks atomicity */
+   rw_exit_write();
+   error = ((*ifp->if_ioctl)(ifp, cmd, data));
+   rw_enter_write();
+   return (error);
}
 
return (0);
Index: dev/pci/if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.162
diff -u -p -r1.162 if_iwm.c
--- dev/pci/if_iwm.c4 Feb 2017 19:20:59 -   1.162
+++ dev/pci/if_iwm.c6 Feb 2017 11:15:56 -
@@ -6133,18 +6133,13 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
struct ifreq *ifr;
int s, err = 0;
 
-   /* XXXSMP breaks atomicity */
-   rw_exit_write();
-
/*
 * Prevent processes from entering this function while another
 * process is tsleep'ing in it.
 */
err = rw_enter(>ioctl_rwl, RW_WRITE | RW_INTR);
-   if (err) {
-   rw_enter_write();
+   if (err)
return err;
-   }
 
s = splnet();
 
@@ -6190,7 +6185,6 @@ iwm_ioctl(struct ifnet *ifp, u_long cmd,
 
splx(s);
rw_exit(>ioctl_rwl);
-   rw_enter_write();
 
return err;
 }