Module Name: src
Committed By: ozaki-r
Date: Wed Mar 1 09:09:37 UTC 2017
Modified Files:
src/sys/netinet6: mld6.c
Log Message:
Make IPv6 multicast MP-safe partially
To complete the task, we need to make users of IPv6 multicast MP-safe, for
example socket/PCB and CARP.
To generate a diff of this commit:
cvs rdiff -u -r1.84 -r1.85 src/sys/netinet6/mld6.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/netinet6/mld6.c
diff -u src/sys/netinet6/mld6.c:1.84 src/sys/netinet6/mld6.c:1.85
--- src/sys/netinet6/mld6.c:1.84 Wed Mar 1 08:54:12 2017
+++ src/sys/netinet6/mld6.c Wed Mar 1 09:09:37 2017
@@ -1,4 +1,4 @@
-/* $NetBSD: mld6.c,v 1.84 2017/03/01 08:54:12 ozaki-r Exp $ */
+/* $NetBSD: mld6.c,v 1.85 2017/03/01 09:09:37 ozaki-r Exp $ */
/* $KAME: mld6.c,v 1.25 2001/01/16 14:14:18 itojun Exp $ */
/*
@@ -102,7 +102,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.84 2017/03/01 08:54:12 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.85 2017/03/01 09:09:37 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -119,6 +119,7 @@ __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.8
#include <sys/kernel.h>
#include <sys/callout.h>
#include <sys/cprng.h>
+#include <sys/rwlock.h>
#include <net/if.h>
@@ -135,6 +136,8 @@ __KERNEL_RCSID(0, "$NetBSD: mld6.c,v 1.8
#include <net/net_osdep.h>
+static krwlock_t in6_multilock __cacheline_aligned;
+
/*
* Protocol constants
*/
@@ -157,6 +160,10 @@ static void mld_starttimer(struct in6_mu
static void mld_stoptimer(struct in6_multi *);
static u_long mld_timerresid(struct in6_multi *);
+static void in6m_ref(struct in6_multi *);
+static void in6m_unref(struct in6_multi *);
+static void in6m_destroy(struct in6_multi *);
+
void
mld_init(void)
{
@@ -178,6 +185,8 @@ mld_init(void)
/* We will specify the hoplimit by a multicast option. */
ip6_opts.ip6po_hlim = -1;
ip6_opts.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
+
+ rw_init(&in6_multilock);
}
static void
@@ -185,6 +194,7 @@ mld_starttimer(struct in6_multi *in6m)
{
struct timeval now;
+ KASSERT(rw_write_held(&in6_multilock));
KASSERT(in6m->in6m_timer != IN6M_TIMER_UNDEF);
microtime(&now);
@@ -200,13 +210,27 @@ mld_starttimer(struct in6_multi *in6m)
callout_schedule(&in6m->in6m_timer_ch, in6m->in6m_timer);
}
+/*
+ * mld_stoptimer releases in6_multilock when calling callout_halt.
+ * The caller must ensure in6m won't be freed while releasing the lock.
+ */
static void
mld_stoptimer(struct in6_multi *in6m)
{
+
+ KASSERT(rw_write_held(&in6_multilock));
+
if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
return;
- callout_stop(&in6m->in6m_timer_ch);
+ rw_exit(&in6_multilock);
+
+ if (mutex_owned(softnet_lock))
+ callout_halt(&in6m->in6m_timer_ch, softnet_lock);
+ else
+ callout_halt(&in6m->in6m_timer_ch, NULL);
+
+ rw_enter(&in6_multilock, RW_WRITER);
in6m->in6m_timer = IN6M_TIMER_UNDEF;
}
@@ -216,10 +240,13 @@ mld_timeo(void *arg)
{
struct in6_multi *in6m = arg;
- /* XXX NOMPSAFE still need softnet_lock */
+ KASSERT(in6m->in6m_refcount > 0);
+
+#ifndef NET_MPSAFE
mutex_enter(softnet_lock);
KERNEL_LOCK(1, NULL);
-
+#endif
+ rw_enter(&in6_multilock, RW_WRITER);
if (in6m->in6m_timer == IN6M_TIMER_UNDEF)
goto out;
@@ -235,8 +262,13 @@ mld_timeo(void *arg)
}
out:
+ rw_exit(&in6_multilock);
+#ifndef NET_MPSAFE
KERNEL_UNLOCK_ONE(NULL);
mutex_exit(softnet_lock);
+#else
+ return;
+#endif
}
static u_long
@@ -268,6 +300,8 @@ mld_start_listening(struct in6_multi *in
{
struct in6_addr all_in6;
+ KASSERT(rw_write_held(&in6_multilock));
+
/*
* RFC2710 page 10:
* The node never sends a Report or Done for the link-scope all-nodes
@@ -300,6 +334,8 @@ mld_stop_listening(struct in6_multi *in6
{
struct in6_addr allnode, allrouter;
+ KASSERT(rw_lock_held(&in6_multilock));
+
allnode = in6addr_linklocal_allnodes;
if (in6_setscope(&allnode, in6m->in6m_ifp, NULL)) {
/* XXX: this should not happen! */
@@ -395,6 +431,8 @@ mld_input(struct mbuf *m, int off)
*/
switch (mldh->mld_type) {
case MLD_LISTENER_QUERY: {
+ struct in6_multi *next;
+
if (ifp->if_flags & IFF_LOOPBACK)
break;
@@ -420,7 +458,17 @@ mld_input(struct mbuf *m, int off)
*/
timer = ntohs(mldh->mld_maxdelay);
- LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
+ rw_enter(&in6_multilock, RW_WRITER);
+ /*
+ * mld_stoptimer and mld_sendpkt release in6_multilock
+ * temporarily, so we have to prevent in6m from being freed
+ * while releasing the lock by having an extra reference to it.
+ *
+ * Also in6_purge_multi might remove items from the list of the
+ * ifp while releasing the lock. Fortunately in6_purge_multi is
+ * never executed as long as we have a psref of the ifp.
+ */
+ LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, &all_in6) ||
IPV6_ADDR_MC_SCOPE(&in6m->in6m_addr) <
IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -434,10 +482,14 @@ mld_input(struct mbuf *m, int off)
continue;
if (timer == 0) {
+ in6m_ref(in6m);
+
/* send a report immediately */
mld_stoptimer(in6m);
mld_sendpkt(in6m, MLD_LISTENER_REPORT, NULL);
in6m->in6m_state = MLD_IREPORTEDLAST;
+
+ in6m_unref(in6m); /* May free in6m */
} else if (in6m->in6m_timer == IN6M_TIMER_UNDEF ||
mld_timerresid(in6m) > timer) {
in6m->in6m_timer =
@@ -445,6 +497,7 @@ mld_input(struct mbuf *m, int off)
mld_starttimer(in6m);
}
}
+ rw_exit(&in6_multilock);
break;
}
@@ -468,11 +521,16 @@ mld_input(struct mbuf *m, int off)
* If we belong to the group being reported, stop
* our timer for that group.
*/
+ rw_enter(&in6_multilock, RW_WRITER);
in6m = in6_lookup_multi(&mld_addr, ifp);
if (in6m) {
+ in6m_ref(in6m);
mld_stoptimer(in6m); /* transit to idle state */
in6m->in6m_state = MLD_OTHERLISTENER; /* clear flag */
+ in6m_unref(in6m);
+ in6m = NULL; /* in6m might be freed */
}
+ rw_exit(&in6_multilock);
break;
default: /* this is impossible */
#if 0
@@ -492,6 +550,11 @@ out_nodrop:
m_put_rcvif_psref(ifp, &psref);
}
+/*
+ * XXX mld_sendpkt must be called with in6_multilock held and
+ * will release in6_multilock before calling ip6_output and
+ * returning to avoid locking against myself in ip6_output.
+ */
static void
mld_sendpkt(struct in6_multi *in6m, int type,
const struct in6_addr *dst)
@@ -506,6 +569,8 @@ mld_sendpkt(struct in6_multi *in6m, int
struct psref psref;
int bound;
+ KASSERT(rw_write_held(&in6_multilock));
+
/*
* At first, find a link local address on the outgoing interface
* to use as the source address of the MLD packet.
@@ -570,8 +635,13 @@ mld_sendpkt(struct in6_multi *in6m, int
break;
}
+ /* XXX we cannot call ip6_output with holding in6_multilock */
+ rw_exit(&in6_multilock);
+
ip6_output(mh, &ip6_opts, NULL, ia ? 0 : IPV6_UNSPECSRC,
&im6o, NULL, NULL);
+
+ rw_enter(&in6_multilock, RW_WRITER);
}
static struct mld_hdr *
@@ -623,6 +693,23 @@ mld_allocbuf(struct mbuf **mh, int len,
return mldh;
}
+static void
+in6m_ref(struct in6_multi *in6m)
+{
+
+ KASSERT(rw_write_held(&in6_multilock));
+ in6m->in6m_refcount++;
+}
+
+static void
+in6m_unref(struct in6_multi *in6m)
+{
+
+ KASSERT(rw_write_held(&in6_multilock));
+ if (--in6m->in6m_refcount == 0)
+ in6m_destroy(in6m);
+}
+
/*
* Add an address to the list of IP6 multicast addresses for a given interface.
*/
@@ -632,10 +719,10 @@ in6_addmulti(struct in6_addr *maddr6, st
{
struct sockaddr_in6 sin6;
struct in6_multi *in6m;
- int s = splsoftnet();
*errorp = 0;
+ rw_enter(&in6_multilock, RW_WRITER);
/*
* See if address already in list.
*/
@@ -653,9 +740,8 @@ in6_addmulti(struct in6_addr *maddr6, st
in6m = (struct in6_multi *)
malloc(sizeof(*in6m), M_IPMADDR, M_NOWAIT|M_ZERO);
if (in6m == NULL) {
- splx(s);
*errorp = ENOBUFS;
- return (NULL);
+ goto out;
}
in6m->in6m_addr = *maddr6;
@@ -665,7 +751,6 @@ in6_addmulti(struct in6_addr *maddr6, st
callout_init(&in6m->in6m_timer_ch, CALLOUT_MPSAFE);
callout_setfunc(&in6m->in6m_timer_ch, mld_timeo, in6m);
- /* FIXME NOMPSAFE: need to lock */
LIST_INSERT_HEAD(&ifp->if_multiaddrs, in6m, in6m_entry);
/*
@@ -678,17 +763,15 @@ in6_addmulti(struct in6_addr *maddr6, st
callout_destroy(&in6m->in6m_timer_ch);
LIST_REMOVE(in6m, in6m_entry);
free(in6m, M_IPMADDR);
- splx(s);
- return (NULL);
+ in6m = NULL;
+ goto out;
}
in6m->in6m_timer = timer;
if (in6m->in6m_timer > 0) {
in6m->in6m_state = MLD_REPORTPENDING;
mld_starttimer(in6m);
-
- splx(s);
- return (in6m);
+ goto out;
}
/*
@@ -697,69 +780,82 @@ in6_addmulti(struct in6_addr *maddr6, st
*/
mld_start_listening(in6m);
}
- splx(s);
- return (in6m);
+out:
+ rw_exit(&in6_multilock);
+ return in6m;
}
-/*
- * Delete a multicast address record.
- */
-void
-in6_delmulti(struct in6_multi *in6m)
+static void
+in6m_destroy(struct in6_multi *in6m)
{
- struct sockaddr_in6 sin6;
struct in6_ifaddr *ia;
- int s = splsoftnet();
-
- mld_stoptimer(in6m);
+ struct sockaddr_in6 sin6;
+ int s;
- if (--in6m->in6m_refcount == 0) {
- int _s;
+ KASSERT(rw_write_held(&in6_multilock));
+ KASSERT(in6m->in6m_refcount == 0);
- /*
- * No remaining claims to this record; let MLD6 know
- * that we are leaving the multicast group.
- */
- mld_stop_listening(in6m);
+ /*
+ * No remaining claims to this record; let MLD6 know
+ * that we are leaving the multicast group.
+ */
+ mld_stop_listening(in6m);
- /*
- * Unlink from list.
- */
- /* FIXME NOMPSAFE: need to lock */
- LIST_REMOVE(in6m, in6m_entry);
+ /*
+ * Unlink from list.
+ */
+ LIST_REMOVE(in6m, in6m_entry);
- /*
- * Delete all references of this multicasting group from
- * the membership arrays
- */
- _s = pserialize_read_enter();
- IN6_ADDRLIST_READER_FOREACH(ia) {
- struct in6_multi_mship *imm;
- LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
- if (imm->i6mm_maddr == in6m)
- imm->i6mm_maddr = NULL;
- }
+ /*
+ * Delete all references of this multicasting group from
+ * the membership arrays
+ */
+ s = pserialize_read_enter();
+ IN6_ADDRLIST_READER_FOREACH(ia) {
+ struct in6_multi_mship *imm;
+ LIST_FOREACH(imm, &ia->ia6_memberships, i6mm_chain) {
+ if (imm->i6mm_maddr == in6m)
+ imm->i6mm_maddr = NULL;
}
- pserialize_read_exit(_s);
+ }
+ pserialize_read_exit(s);
- /*
- * Notify the network driver to update its multicast
- * reception filter.
- */
- sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
- if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
+ /*
+ * Notify the network driver to update its multicast
+ * reception filter.
+ */
+ sockaddr_in6_init(&sin6, &in6m->in6m_addr, 0, 0, 0);
+ if_mcast_op(in6m->in6m_ifp, SIOCDELMULTI, sin6tosa(&sin6));
- /* Tell mld_timeo we're halting the timer */
- in6m->in6m_timer = IN6M_TIMER_UNDEF;
- if (mutex_owned(softnet_lock))
- callout_halt(&in6m->in6m_timer_ch, softnet_lock);
- else
- callout_halt(&in6m->in6m_timer_ch, NULL);
- callout_destroy(&in6m->in6m_timer_ch);
+ /* Tell mld_timeo we're halting the timer */
+ in6m->in6m_timer = IN6M_TIMER_UNDEF;
+ if (mutex_owned(softnet_lock))
+ callout_halt(&in6m->in6m_timer_ch, softnet_lock);
+ else
+ callout_halt(&in6m->in6m_timer_ch, NULL);
+ callout_destroy(&in6m->in6m_timer_ch);
- free(in6m, M_IPMADDR);
- }
- splx(s);
+ free(in6m, M_IPMADDR);
+}
+
+/*
+ * Delete a multicast address record.
+ */
+void
+in6_delmulti(struct in6_multi *in6m)
+{
+
+ KASSERT(in6m->in6m_refcount > 0);
+
+ rw_enter(&in6_multilock, RW_WRITER);
+ /*
+ * The caller should have a reference to in6m. So we don't need to care
+ * of releasing the lock in mld_stoptimer.
+ */
+ mld_stoptimer(in6m);
+ if (--in6m->in6m_refcount == 0)
+ in6m_destroy(in6m);
+ rw_exit(&in6_multilock);
}
/*
@@ -772,7 +868,8 @@ in6_lookup_multi(const struct in6_addr *
{
struct in6_multi *in6m;
- /* XXX NOMPSAFE */
+ KASSERT(rw_lock_held(&in6_multilock));
+
LIST_FOREACH(in6m, &ifp->if_multiaddrs, in6m_entry) {
if (IN6_ARE_ADDR_EQUAL(&in6m->in6m_addr, addr))
break;
@@ -785,7 +882,9 @@ in6_multi_group(const struct in6_addr *a
{
bool ingroup;
+ rw_enter(&in6_multilock, RW_READER);
ingroup = in6_lookup_multi(addr, ifp) != NULL;
+ rw_exit(&in6_multilock);
return ingroup;
}
@@ -798,10 +897,18 @@ in6_purge_multi(struct ifnet *ifp)
{
struct in6_multi *in6m, *next;
- /* XXX NOMPSAFE */
+ rw_enter(&in6_multilock, RW_WRITER);
LIST_FOREACH_SAFE(in6m, &ifp->if_multiaddrs, in6m_entry, next) {
- in6_delmulti(in6m);
+ /*
+ * Normally multicast addresses are already purged at this
+ * point. Remaining references aren't accessible via ifp,
+ * so what we can do here is to prevent ifp from being
+ * accessed via in6m by removing it from the list of ifp.
+ */
+ mld_stoptimer(in6m);
+ LIST_REMOVE(in6m, in6m_entry);
}
+ rw_exit(&in6_multilock);
}
struct in6_multi_mship *
@@ -865,10 +972,13 @@ in6_multicast_sysctl(SYSCTLFN_ARGS)
if (namelen != 1)
return EINVAL;
+ rw_enter(&in6_multilock, RW_READER);
+
bound = curlwp_bind();
ifp = if_get_byindex(name[0], &psref);
if (ifp == NULL) {
curlwp_bindx(bound);
+ rw_exit(&in6_multilock);
return ENODEV;
}
@@ -884,6 +994,7 @@ in6_multicast_sysctl(SYSCTLFN_ARGS)
pserialize_read_exit(s);
if_put(ifp, &psref);
curlwp_bindx(bound);
+ rw_exit(&in6_multilock);
return 0;
}
@@ -938,6 +1049,7 @@ done:
ifa_release(ifa, &psref_ia);
if_put(ifp, &psref);
curlwp_bindx(bound);
+ rw_exit(&in6_multilock);
*oldlenp = written;
return error;
}