The existing variations of the NET_LOCK() macros are confusing. We're
now all aware of this fact. So let's keep them simple to prevent future
mistakes :)
The diff below reduces the current set of methods to the following:
NET_LOCK()/NET_UNLOCK()
NET_ASSERT_LOCKED()
NET_ASSERT_UNLOCKED()
On top of those, two extra methods are provided for a *very specific*
purpose:
NET_RLOCK_IN_IOCTL()
NET_RUNLOCK_IN_IOCTL()
"IN_IOCTL()" mean they should only be used in ioctl(2) context ;) The
purpose is to keep previous behavior where read-only ioctl(2) dot not
wait for packet processing to finish.
To achieve this NET_LOCK() and NET_UNLOCK() now contain some magic to
ensure the only packet processing thread grabbing a read-lock is the
softnet thread. In other words, one doesn't need to be aware of all
this magic, just imagine that the Network Stack is big-locked and use
NET_LOCK()/NET_UNLOCK() everywhere.
If you think this is an improvement. I'd like to know if there is a
better way to implement the magic in NET_LOCK(). String comparison is
not great ;)
Comments? What do you think about this? I'm interested to hear all of
you, 'case I'd like to know how does everyone perceive this complexity
related to locks.
Thanks for reading.
PS: macros have been converted to functions to avoid pulling more
headers in <sys/systm.h>, the magic requires <sys/proc.h>.
Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.603
diff -u -p -r1.603 if.c
--- net/if.c 12 Apr 2020 07:04:03 -0000 1.603
+++ net/if.c 12 Apr 2020 12:58:10 -0000
@@ -250,6 +250,60 @@ struct task if_input_task_locked = TASK_
struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
/*
+ * Network stack data structures are, unless stated otherwise, protected
+ * by the NET_LOCK(). It's a single non-recursive lock for the whole
+ * subsystem.
+ */
+void
+NET_LOCK(void)
+{
+ struct proc *p = curproc;
+
+ /*
+ * The "softnet" thread should be the only thread processing
+ * packets without holding an exclusive lock. This is done
+ * to allow read-only ioctl(2) to not block.
+ */
+ if ((p->p_flag & P_SYSTEM) &&
+ (strncmp(p->p_p->ps_comm, "softnet", 7) == 0))
+ rw_enter_read(&netlock);
+ else
+ rw_enter_write(&netlock);
+}
+
+void
+NET_UNLOCK(void)
+{
+ struct proc *p = curproc;
+
+ if ((p->p_flag & P_SYSTEM) &&
+ (strncmp(p->p_p->ps_comm, "softnet", 7) == 0))
+ rw_exit_read(&netlock);
+ else
+ rw_exit_write(&netlock);
+}
+
+/*
+ * Reader version of NET_LOCK() to be used in ioctl(2) path only.
+ *
+ * Can be grabbed instead of the exclusive version when no field
+ * protected by the NET_LOCK() is modified by the ioctl(2).
+ */
+void
+NET_RLOCK_IN_IOCTL(void)
+{
+ KASSERT((curproc->p_flag & P_SYSTEM) == 0);
+ rw_enter_read(&netlock);
+}
+
+void
+NET_RUNLOCK_IN_IOCTL(void)
+{
+ KASSERT((curproc->p_flag & P_SYSTEM) == 0);
+ rw_exit_read(&netlock);
+}
+
+/*
* Network interface utility routines.
*/
void
@@ -936,7 +990,7 @@ if_input_process(struct ifnet *ifp, stru
*
* Since we have a NET_LOCK() we also use it to serialize access
* to PF globals, pipex globals, unicast and multicast addresses
- * lists.
+ * lists and the socket layer.
*/
NET_LOCK();
while ((m = ml_dequeue(ml)) != NULL)
@@ -2338,27 +2392,27 @@ ifioctl_get(u_long cmd, caddr_t data)
switch(cmd) {
case SIOCGIFCONF:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = ifconf(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
case SIOCIFGCLONERS:
error = if_clone_list((struct if_clonereq *)data);
return (error);
case SIOCGIFGMEMB:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = if_getgroupmembers(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
case SIOCGIFGATTR:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = if_getgroupattribs(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
case SIOCGIFGLIST:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
error = if_getgrouplist(data);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
@@ -2366,7 +2420,7 @@ ifioctl_get(u_long cmd, caddr_t data)
if (ifp == NULL)
return (ENXIO);
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
switch(cmd) {
case SIOCGIFFLAGS:
@@ -2434,7 +2488,7 @@ ifioctl_get(u_long cmd, caddr_t data)
panic("invalid ioctl %lu", cmd);
}
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
Index: netinet/in.c
===================================================================
RCS file: /cvs/src/sys/netinet/in.c,v
retrieving revision 1.169
diff -u -p -r1.169 in.c
--- netinet/in.c 15 Mar 2020 05:34:13 -0000 1.169
+++ netinet/in.c 12 Apr 2020 12:58:10 -0000
@@ -569,7 +569,7 @@ in_ioctl_get(u_long cmd, caddr_t data, s
return (error);
}
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET)
@@ -620,7 +620,7 @@ in_ioctl_get(u_long cmd, caddr_t data, s
}
err:
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
Index: netinet6/in6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6.c,v
retrieving revision 1.235
diff -u -p -r1.235 in6.c
--- netinet6/in6.c 15 Mar 2020 05:34:14 -0000 1.235
+++ netinet6/in6.c 12 Apr 2020 12:58:10 -0000
@@ -412,7 +412,7 @@ in6_ioctl_get(u_long cmd, caddr_t data,
return (error);
}
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
if (sa6 != NULL) {
error = in6_check_embed_scope(sa6, ifp->if_index);
@@ -506,7 +506,7 @@ in6_ioctl_get(u_long cmd, caddr_t data,
}
err:
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (error);
}
Index: netinet6/nd6.c
===================================================================
RCS file: /cvs/src/sys/netinet6/nd6.c,v
retrieving revision 1.229
diff -u -p -r1.229 nd6.c
--- netinet6/nd6.c 29 Nov 2019 16:41:01 -0000 1.229
+++ netinet6/nd6.c 12 Apr 2020 12:58:11 -0000
@@ -1021,9 +1021,9 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
switch (cmd) {
case SIOCGIFINFO_IN6:
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
ndi->ndi = *ND_IFINFO(ifp);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (0);
case SIOCGNBRINFO_IN6:
{
@@ -1031,7 +1031,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
struct in6_addr nb_addr = nbi->addr; /* make local for safety */
time_t expire;
- NET_RLOCK();
+ NET_RLOCK_IN_IOCTL();
/*
* XXX: KAME specific hack for scoped addresses
* XXXX: for other scopes than link-local?
@@ -1048,7 +1048,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
if (rt == NULL ||
(ln = (struct llinfo_nd6 *)rt->rt_llinfo) == NULL) {
rtfree(rt);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (EINVAL);
}
expire = ln->ln_rt->rt_expire;
@@ -1063,7 +1063,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru
nbi->expire = expire;
rtfree(rt);
- NET_RUNLOCK();
+ NET_RUNLOCK_IN_IOCTL();
return (0);
}
}
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.145
diff -u -p -r1.145 systm.h
--- sys/systm.h 20 Mar 2020 03:37:08 -0000 1.145
+++ sys/systm.h 12 Apr 2020 12:58:11 -0000
@@ -315,37 +315,24 @@ int uiomove(void *, size_t, struct uio *
extern struct rwlock netlock;
-#define NET_LOCK() NET_WLOCK()
-#define NET_UNLOCK() NET_WUNLOCK()
-#define NET_ASSERT_UNLOCKED() NET_ASSERT_WUNLOCKED()
-
-
-#define NET_WLOCK() do { rw_enter_write(&netlock); } while (0)
-#define NET_WUNLOCK() do { rw_exit_write(&netlock); } while (0)
-
-#define NET_ASSERT_WLOCKED()
\
-do { \
- int _s = rw_status(&netlock); \
- if ((splassert_ctl > 0) && (_s != RW_WRITE)) \
- splassert_fail(RW_WRITE, _s, __func__); \
-} while (0)
-
-#define NET_ASSERT_WUNLOCKED()
\
+#define NET_ASSERT_UNLOCKED()
\
do { \
int _s = rw_status(&netlock); \
if ((splassert_ctl > 0) && (_s == RW_WRITE)) \
splassert_fail(0, RW_WRITE, __func__); \
} while (0)
-#define NET_RLOCK() do { rw_enter_read(&netlock); } while (0)
-#define NET_RUNLOCK() do { rw_exit_read(&netlock); } while (0)
-
#define NET_ASSERT_LOCKED()
\
do { \
int _s = rw_status(&netlock); \
if ((splassert_ctl > 0) && (_s != RW_WRITE && _s != RW_READ)) \
splassert_fail(RW_READ, _s, __func__); \
} while (0)
+
+void NET_LOCK(void);
+void NET_UNLOCK(void);
+void NET_RLOCK_IN_IOCTL(void);
+void NET_RUNLOCK_IN_IOCTL(void);
__returns_twice int setjmp(label_t *);
__dead void longjmp(label_t *);