Hello,
I'm not sure I got everything right in Calgary. So this patch should roughly
illustrates how I think we should start moving forward to make PF MULTIPROCESSOR
friendly. It's quite likely my proposal/way is completely off, I'll be happy if
you put me back to ground.
The brief summary of what patch is trying to achieve is as follows:
patch trades all splsoftnet() for KERNEL_LOCK() when it gets compiled
with MULTIPROCESSOR option on.
if MULTIPROCESSOR option is off, the compiler produces PF, which uses
splsoftnet.
To achieve this the patch introduces macros PF_LOCK()/PF_UNLOCK(),
which expand to KERNEL_LOCK()/KERNEL_UNLOCK(), when MULTIPROCESSOR is
on.
On the other hand if MULTIPROCESSOR is off the PF_*LOCK() macros become
splsoftnet()/splx()
Skip to =breakage= if you don't care about details/future plans. Currently PF
must synchronize all those guys:
- packets, which are running through pf_test(). IP stack already
serializes calls to pf_test() (there is always one running pf_test()
instance at most)
- ioctl() operations on PF driver with packets and with each other
(it looks like there might be more processes, which read state table,
those are allowed to run in parallel). To serialize ioctl() operations
with each other PF uses pf_consistency_lock (which is an RW-lock).
If particular ioctl() operation must be synchronized with packets it
must get splsotnet.
- purge thread, which expires states. purge thread must grab
pf_consistency_lock and splsoftnet.
The desired state is to break a giant pf_consistency_lock into few more
RW-locks. Which will protect various data PF keeps. Those RW-locks will
also synchronize packets. The list of locks, which I have on mind is as follows:
- pf_state_rw
- pf_anchors_rw (packets don't need to grab it as they grab rw-locks
bound to individual rulesets)
- pf_tables_rw (packets don't need to grab it as they grab rw-locks
bound to table instances).
The first major milestone in this effort is to introduce pf_state_rw. The patch
I'm proposing here buys us enough freedom to relatively safely decompose the
pf_consistency_lock and make pf_test() parallel for packets.
=breakage=
The proposed patch breaks 'return-*' action, when PF gets compiled with
MULTIPROCESSOR on. I think it is unsafe to call icmp_err*() functions, while
holding a KERNEL_LOCK(). And it is risky to give up KERNEL_LOCK(), execute
a send operation on response packet and re-grab KERNEL_LOCK() again as we
would arrive to different world (different in sense the pointer we remember
might be invalid now). To fix that we must introduce a reference counting
for objects, so it will become safe to drop and re-grab KERNEL_LOCK(), while
holding a reference.
The problem has been solved for pf_route*() functions, so PBR works in
MULTIPROCESSOR friendly PF.
My patch does not touch if_pfsync.c at all. The PF_SYNC support in
MULTIPROCESSOR PF will have to come in some later phase. You should consider it
to be broken in MULTIPROCESSOR version.
There should be no breakage in PF for GENERIC kernel.
regards
sasha
--------8<----------------8<---------------8<-----------------8<--------
Index: pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.936
diff -u -p -r1.936 pf.c
--- pf.c 19 Aug 2015 21:22:41 -0000 1.936
+++ pf.c 26 Aug 2015 14:11:17 -0000
@@ -906,7 +906,7 @@ int
pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw,
struct pf_state_key **sks, struct pf_state *s)
{
- splsoftassert(IPL_SOFTNET);
+ PF_ASSERT_LOCKED(nothing);
s->kif = kif;
if (*skw == *sks) {
@@ -1150,12 +1150,13 @@ pf_state_export(struct pfsync_state *sp,
void
pf_purge_thread(void *v)
{
- int nloops = 0, s;
+ int nloops = 0;
+ PF_LOCK_INSTANCE(s);
for (;;) {
tsleep(pf_purge_thread, PWAIT, "pftm", 1 * hz);
- s = splsoftnet();
+ PF_LOCK(s);
/* process a fraction of the state table every second */
pf_purge_expired_states(1 + (pf_status.states
@@ -1168,7 +1169,7 @@ pf_purge_thread(void *v)
nloops = 0;
}
- splx(s);
+ PF_UNLOCK(s);
}
}
@@ -1259,7 +1260,7 @@ pf_src_tree_remove_state(struct pf_state
void
pf_unlink_state(struct pf_state *cur)
{
- splsoftassert(IPL_SOFTNET);
+ PF_ASSERT_LOCKED(nothing);
/* handle load balancing related tasks */
pf_postprocess_addr(cur);
@@ -1294,7 +1295,7 @@ pf_free_state(struct pf_state *cur)
{
struct pf_rule_item *ri;
- splsoftassert(IPL_SOFTNET);
+ PF_ASSERT_LOCKED(nothing);
#if NPFSYNC > 0
if (pfsync_state_in_use(cur))
@@ -2414,6 +2415,13 @@ pf_send_tcp(const struct pf_rule *r, sa_
memcpy((opt + 2), &mss, 2);
}
+ /*
+ * XXX: I think we should not call ip*_output() while holding a
+ * KERNEL_LOCK() We have to figure out a way around...
+ */
+#ifdef MULTIPROCESSOR
+ m_freem(m);
+#else /* !MULTIPROCESSOR */
switch (af) {
case AF_INET:
ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
@@ -2424,6 +2432,7 @@ pf_send_tcp(const struct pf_rule *r, sa_
break;
#endif /* INET6 */
}
+#endif /* MULTIPROCESSOR */
}
void
@@ -2442,6 +2451,13 @@ pf_send_icmp(struct mbuf *m, u_int8_t ty
if (r && r->qid)
m0->m_pkthdr.pf.qid = r->qid;
+ /*
+ * XXX: I think we should not call icmp_error*() while holding a
+ * KERNEL_LOCK() We have to figure out a way around...
+ */
+#ifdef MULTIPROCESSOR
+ m_freem(m0);
+#else /* !MULTIPROCESSOR */
switch (af) {
case AF_INET:
icmp_error(m0, type, code, 0, 0);
@@ -2452,6 +2468,7 @@ pf_send_icmp(struct mbuf *m, u_int8_t ty
break;
#endif /* INET6 */
}
+#endif /* MULTIPROCESSOR */
}
/*
@@ -5482,21 +5499,27 @@ pf_route(struct mbuf **m, struct pf_rule
if ((*m)->m_pkthdr.pf.routed++ > 3) {
m0 = *m;
*m = NULL;
+ KERNEL_UNLOCK();
goto bad;
}
if (r->rt == PF_DUPTO) {
- if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
+ if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
+ KERNEL_UNLOCK();
return;
+ }
} else {
- if ((r->rt == PF_REPLYTO) == (r->direction == dir))
+ if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+ KERNEL_UNLOCK();
return;
+ }
m0 = *m;
}
if (m0->m_len < sizeof(struct ip)) {
DPFPRINTF(LOG_ERR,
"pf_route: m0->m_len < sizeof(struct ip)");
+ KERNEL_UNLOCK();
goto bad;
}
@@ -5513,6 +5536,7 @@ pf_route(struct mbuf **m, struct pf_rule
rt = rtalloc(sintosa(dst), RT_REPORT|RT_RESOLVE, rtableid);
if (rt == NULL) {
ipstat.ips_noroute++;
+ KERNEL_UNLOCK();
goto bad;
}
@@ -5531,6 +5555,7 @@ pf_route(struct mbuf **m, struct pf_rule
&naddr, NULL, sns, &r->route, PF_SN_ROUTE)) {
DPFPRINTF(LOG_ERR,
"pf_route: pf_map_addr() failed.");
+ KERNEL_UNLOCK();
goto bad;
}
@@ -5545,15 +5570,20 @@ pf_route(struct mbuf **m, struct pf_rule
ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
}
}
- if (ifp == NULL)
+
+ KERNEL_UNLOCK();
+
+ if (ifp == NULL) {
goto bad;
+ }
if (oifp != ifp) {
if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
goto bad;
- else if (m0 == NULL)
+ else if (m0 == NULL) {
goto done;
+ }
if (m0->m_len < sizeof(struct ip)) {
DPFPRINTF(LOG_ERR,
"pf_route: m0->m_len < sizeof(struct ip)");
@@ -5642,21 +5672,27 @@ pf_route6(struct mbuf **m, struct pf_rul
if ((*m)->m_pkthdr.pf.routed++ > 3) {
m0 = *m;
*m = NULL;
+ KERNEL_UNLOCK();
goto bad;
}
if (r->rt == PF_DUPTO) {
- if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL)
+ if ((m0 = m_copym2(*m, 0, M_COPYALL, M_NOWAIT)) == NULL) {
+ KERNEL_UNLOCK();
return;
+ }
} else {
- if ((r->rt == PF_REPLYTO) == (r->direction == dir))
+ if ((r->rt == PF_REPLYTO) == (r->direction == dir)) {
+ KERNEL_UNLOCK();
return;
+ }
m0 = *m;
}
if (m0->m_len < sizeof(struct ip6_hdr)) {
DPFPRINTF(LOG_ERR,
"pf_route6: m0->m_len < sizeof(struct ip6_hdr)");
+ KERNEL_UNLOCK();
goto bad;
}
ip6 = mtod(m0, struct ip6_hdr *);
@@ -5669,6 +5705,7 @@ pf_route6(struct mbuf **m, struct pf_rul
if (!r->rt) {
m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
+ KERNEL_UNLOCK();
ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL);
return;
}
@@ -5679,6 +5716,7 @@ pf_route6(struct mbuf **m, struct pf_rul
&naddr, NULL, sns, &r->route, PF_SN_ROUTE)) {
DPFPRINTF(LOG_ERR,
"pf_route6: pf_map_addr() failed.");
+ KERNEL_UNLOCK();
goto bad;
}
if (!PF_AZERO(&naddr, AF_INET6))
@@ -5691,6 +5729,9 @@ pf_route6(struct mbuf **m, struct pf_rul
&s->rt_addr, AF_INET6);
ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL;
}
+
+ KERNEL_UNLOCK();
+
if (ifp == NULL)
goto bad;
@@ -6349,6 +6390,8 @@ pf_test(sa_family_t af, int fwdir, struc
return (PF_PASS);
}
+ KERNEL_LOCK();
+
action = pf_setup_pdesc(&pd, &pdhdrs, af, dir, kif, *m0, &reason);
if (action != PF_PASS) {
#if NPFLOG > 0
@@ -6589,6 +6632,7 @@ done:
case PF_DEFER:
*m0 = NULL;
action = PF_PASS;
+ KERNEL_UNLOCK();
break;
case PF_DIVERT:
switch (pd.af) {
@@ -6604,6 +6648,7 @@ done:
break;
#endif /* INET6 */
}
+ KERNEL_UNLOCK();
action = PF_PASS;
break;
#ifdef INET6
@@ -6614,10 +6659,20 @@ done:
action = PF_DROP;
break;
}
- if (pd.naf == AF_INET)
+ switch (pd.naf) {
+ case AF_INET:
pf_route(&pd.m, r, dir, kif->pfik_ifp, s);
- if (pd.naf == AF_INET6)
+ break;
+ case AF_INET6:
pf_route6(&pd.m, r, dir, kif->pfik_ifp, s);
+ break;
+ default:
+ /*
+ * pf_route*() functions unlock the kernel lock for us.
+ */
+ KERNEL_UNLOCK();
+ m_freem(pd.m);
+ }
*m0 = NULL;
action = PF_PASS;
break;
@@ -6634,7 +6689,15 @@ done:
pf_route6(m0, r, pd.dir, pd.kif->pfik_ifp, s);
break;
#endif /* INET6 */
+ default:
+ /*
+ * pf_route*() functions unlock the kernel lock
+ * for us.
+ */
+ KERNEL_UNLOCK();
}
+ } else {
+ KERNEL_UNLOCK();
}
break;
}
Index: pf_if.c
===================================================================
RCS file: /cvs/src/sys/net/pf_if.c,v
retrieving revision 1.79
diff -u -p -r1.79 pf_if.c
--- pf_if.c 21 Jul 2015 02:32:04 -0000 1.79
+++ pf_if.c 26 Aug 2015 14:11:17 -0000
@@ -216,10 +216,10 @@ void
pfi_attach_ifnet(struct ifnet *ifp)
{
struct pfi_kif *kif;
- int s;
+ PF_LOCK_INSTANCE(s);
pfi_initialize();
- s = splsoftnet();
+ PF_LOCK(s);
pfi_update++;
if ((kif = pfi_kif_get(ifp->if_xname)) == NULL)
panic("pfi_kif_get failed");
@@ -234,19 +234,19 @@ pfi_attach_ifnet(struct ifnet *ifp)
pfi_kif_update(kif);
- splx(s);
+ PF_UNLOCK(s);
}
void
pfi_detach_ifnet(struct ifnet *ifp)
{
- int s;
struct pfi_kif *kif;
+ PF_LOCK_INSTANCE(s);
if ((kif = (struct pfi_kif *)ifp->if_pf_kif) == NULL)
return;
- s = splsoftnet();
+ PF_LOCK(s);
pfi_update++;
hook_disestablish(ifp->if_addrhooks, kif->pfik_ah_cookie);
pfi_kif_update(kif);
@@ -260,17 +260,17 @@ pfi_detach_ifnet(struct ifnet *ifp)
ifp->if_pf_kif = NULL;
pfi_kif_unref(kif, PFI_KIF_REF_NONE);
- splx(s);
+ PF_UNLOCK(s);
}
void
pfi_attach_ifgroup(struct ifg_group *ifg)
{
struct pfi_kif *kif;
- int s;
+ PF_LOCK_INSTANCE(s);
pfi_initialize();
- s = splsoftnet();
+ PF_LOCK(s);
pfi_update++;
if ((kif = pfi_kif_get(ifg->ifg_group)) == NULL)
panic("pfi_kif_get failed");
@@ -278,41 +278,41 @@ pfi_attach_ifgroup(struct ifg_group *ifg
kif->pfik_group = ifg;
ifg->ifg_pf_kif = (caddr_t)kif;
- splx(s);
+ PF_UNLOCK(s);
}
void
pfi_detach_ifgroup(struct ifg_group *ifg)
{
- int s;
struct pfi_kif *kif;
+ PF_LOCK_INSTANCE(s);
if ((kif = (struct pfi_kif *)ifg->ifg_pf_kif) == NULL)
return;
- s = splsoftnet();
+ PF_LOCK(s);
pfi_update++;
kif->pfik_group = NULL;
ifg->ifg_pf_kif = NULL;
pfi_kif_unref(kif, PFI_KIF_REF_NONE);
- splx(s);
+ PF_UNLOCK(s);
}
void
pfi_group_change(const char *group)
{
struct pfi_kif *kif;
- int s;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
pfi_update++;
if ((kif = pfi_kif_get(group)) == NULL)
panic("pfi_kif_get failed");
pfi_kif_update(kif);
- splx(s);
+ PF_UNLOCK(s);
}
int
@@ -354,7 +354,8 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a
struct pfi_dynaddr *dyn;
char tblname[PF_TABLE_NAME_SIZE];
struct pf_ruleset *ruleset = NULL;
- int s, rv = 0;
+ int rv = 0;
+ PF_LOCK_INSTANCE(s);
if (aw->type != PF_ADDR_DYNIFTL)
return (0);
@@ -362,7 +363,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a
== NULL)
return (1);
- s = splsoftnet();
+ PF_LOCK(s);
if (!strcmp(aw->v.ifname, "self"))
dyn->pfid_kif = pfi_kif_get(IFG_ALL);
else
@@ -405,7 +406,7 @@ pfi_dynaddr_setup(struct pf_addr_wrap *a
TAILQ_INSERT_TAIL(&dyn->pfid_kif->pfik_dynaddrs, dyn, entry);
aw->p.dyn = dyn;
pfi_kif_update(dyn->pfid_kif);
- splx(s);
+ PF_UNLOCK(s);
return (0);
_bad:
@@ -416,7 +417,7 @@ _bad:
if (dyn->pfid_kif != NULL)
pfi_kif_unref(dyn->pfid_kif, PFI_KIF_REF_RULE);
pool_put(&pfi_addr_pl, dyn);
- splx(s);
+ PF_UNLOCK(s);
return (rv);
}
@@ -587,13 +588,13 @@ pfi_address_add(struct sockaddr *sa, sa_
void
pfi_dynaddr_remove(struct pf_addr_wrap *aw)
{
- int s;
+ PF_LOCK_INSTANCE(s);
if (aw->type != PF_ADDR_DYNIFTL || aw->p.dyn == NULL ||
aw->p.dyn->pfid_kif == NULL || aw->p.dyn->pfid_kt == NULL)
return;
- s = splsoftnet();
+ PF_LOCK(s);
TAILQ_REMOVE(&aw->p.dyn->pfid_kif->pfik_dynaddrs, aw->p.dyn, entry);
pfi_kif_unref(aw->p.dyn->pfid_kif, PFI_KIF_REF_RULE);
aw->p.dyn->pfid_kif = NULL;
@@ -601,7 +602,7 @@ pfi_dynaddr_remove(struct pf_addr_wrap *
aw->p.dyn->pfid_kt = NULL;
pool_put(&pfi_addr_pl, aw->p.dyn);
aw->p.dyn = NULL;
- splx(s);
+ PF_UNLOCK(s);
}
void
@@ -616,13 +617,13 @@ pfi_dynaddr_copyout(struct pf_addr_wrap
void
pfi_kifaddr_update(void *v)
{
- int s;
struct pfi_kif *kif = (struct pfi_kif *)v;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
pfi_update++;
pfi_kif_update(kif);
- splx(s);
+ PF_UNLOCK(s);
}
int
@@ -638,23 +639,24 @@ pfi_update_status(const char *name, stru
struct pfi_kif_cmp key;
struct ifg_member p_member, *ifgm;
TAILQ_HEAD(, ifg_member) ifg_members;
- int i, j, k, s;
+ int i, j, k;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
if (*name == '\0' && pfs == NULL) {
RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
bzero(p->pfik_packets, sizeof(p->pfik_packets));
bzero(p->pfik_bytes, sizeof(p->pfik_bytes));
p->pfik_tzero = time_second;
}
- splx(s);
+ PF_UNLOCK(s);
return;
}
strlcpy(key.pfik_name, name, sizeof(key.pfik_name));
p = RB_FIND(pfi_ifhead, &pfi_ifs, (struct pfi_kif *)&key);
if (p == NULL) {
- splx(s);
+ PF_UNLOCK(s);
return;
}
if (p->pfik_group != NULL) {
@@ -692,16 +694,17 @@ pfi_update_status(const char *name, stru
p->pfik_bytes[i][j][k];
}
}
- splx(s);
+ PF_UNLOCK(s);
}
int
pfi_get_ifaces(const char *name, struct pfi_kif *buf, int *size)
{
struct pfi_kif *p, *nextp;
- int s, n = 0;
+ int n = 0;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
for (p = RB_MIN(pfi_ifhead, &pfi_ifs); p; p = nextp) {
nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p);
if (pfi_skip_if(name, p))
@@ -712,14 +715,14 @@ pfi_get_ifaces(const char *name, struct
pfi_kif_ref(p, PFI_KIF_REF_RULE);
if (copyout(p, buf++, sizeof(*buf))) {
pfi_kif_unref(p, PFI_KIF_REF_RULE);
- splx(s);
+ PF_UNLOCK(s);
return (EFAULT);
}
nextp = RB_NEXT(pfi_ifhead, &pfi_ifs, p);
pfi_kif_unref(p, PFI_KIF_REF_RULE);
}
}
- splx(s);
+ PF_UNLOCK(s);
*size = n;
return (0);
}
@@ -750,15 +753,15 @@ int
pfi_set_flags(const char *name, int flags)
{
struct pfi_kif *p;
- int s;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
if (pfi_skip_if(name, p))
continue;
p->pfik_flags_new = p->pfik_flags | flags;
}
- splx(s);
+ PF_UNLOCK(s);
return (0);
}
@@ -766,15 +769,15 @@ int
pfi_clear_flags(const char *name, int flags)
{
struct pfi_kif *p;
- int s;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
RB_FOREACH(p, pfi_ifhead, &pfi_ifs) {
if (pfi_skip_if(name, p))
continue;
p->pfik_flags_new = p->pfik_flags & ~flags;
}
- splx(s);
+ PF_UNLOCK(s);
return (0);
}
@@ -782,12 +785,12 @@ void
pfi_xcommit(void)
{
struct pfi_kif *p;
- int s;
+ PF_LOCK_INSTANCE(s);
- s = splsoftnet();
+ PF_LOCK(s);
RB_FOREACH(p, pfi_ifhead, &pfi_ifs)
p->pfik_flags = p->pfik_flags_new;
- splx(s);
+ PF_UNLOCK(s);
}
/* from pf_print_state.c */
Index: pf_ioctl.c
===================================================================
RCS file: /cvs/src/sys/net/pf_ioctl.c,v
retrieving revision 1.289
diff -u -p -r1.289 pf_ioctl.c
--- pf_ioctl.c 21 Jul 2015 02:32:04 -0000 1.289
+++ pf_ioctl.c 26 Aug 2015 14:11:18 -0000
@@ -689,8 +689,9 @@ pf_commit_rules(u_int32_t ticket, char *
struct pf_ruleset *rs;
struct pf_rule *rule, **old_array;
struct pf_rulequeue *old_rules;
- int s, error;
+ int error;
u_int32_t old_rcount;
+ PF_LOCK_INSTANCE(s);
rs = pf_find_ruleset(anchor);
if (rs == NULL || !rs->rules.inactive.open ||
@@ -705,7 +706,7 @@ pf_commit_rules(u_int32_t ticket, char *
}
/* Swap rules, keep the old. */
- s = splsoftnet();
+ PF_LOCK(s);
old_rules = rs->rules.active.ptr;
old_rcount = rs->rules.active.rcount;
old_array = rs->rules.active.ptr_array;
@@ -730,7 +731,7 @@ pf_commit_rules(u_int32_t ticket, char *
rs->rules.inactive.rcount = 0;
rs->rules.inactive.open = 0;
pf_remove_if_empty_ruleset(rs);
- splx(s);
+ PF_UNLOCK(s);
/* queue defs only in the main ruleset */
if (anchor[0])
@@ -804,11 +805,34 @@ pf_addr_copyout(struct pf_addr_wrap *add
pf_rtlabel_copyout(addr);
}
+/*
+ * Macro defines list of case statements for ioctl commands, which must not be
+ * covered by PF global locks such as: KERNEL_LOCK(), splsoftnet,
+ * pf_consistency_lock.
+ *
+ * In example if want to exclude ioctl commands DIOCGETSTATE, DIOCGETSTATES,
+ * DIOCNATLOOK, DIOCGETSRCNODES, DIOCCLRSRCNODES, DIOCXBEGIN, DIOCXROLLBACK,
+ * DIOCXCOMMIT, then we define macro as follows:
+ * case DIOCGETSTATE: \
+ * case DIOCGETSTATES: \
+ * case DIOCNATLOOK: \
+ * case DIOCGETSRCNODES: \
+ * case DIOCCLRSRCNODES: \
+ * case DIOCXBEGIN: \
+ * case DIOCXROLLBACK: \
+ * case DIOCXCOMMIT: \
+ * break;
+ *
+ * The macro is temporal hack as soon as all PF will be turned to be MP
+ * friendly the macro will be removed.
+ */
+#define PF_NO_GLOBAL_LOCK /* is empty now */
+
int
pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
{
- int s;
int error = 0;
+ PF_LOCK_INSTANCE(s);
/* XXX keep in sync with switch() below */
if (securelevel > 1)
@@ -906,12 +930,22 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
return (EACCES);
}
- if (flags & FWRITE)
- rw_enter_write(&pf_consistency_lock);
- else
- rw_enter_read(&pf_consistency_lock);
+ /*
+ * select ioctl commands, which has to be guarded by giant lock. Giant
+ * lock here is either KERNEL_LOCK() (when PF gets compiled with
+ * MULTITHREADED option) or giant lock becomes splsoftnet (when PF is
+ * part of non-MP kernel).
+ */
+ switch (cmd) {
+ PF_NO_GLOBAL_LOCK
+ default:
+ if (flags & FWRITE)
+ rw_enter_write(&pf_consistency_lock);
+ else
+ rw_enter_read(&pf_consistency_lock);
+ PF_LOCK(s);
+ }
- s = splsoftnet();
switch (cmd) {
case DIOCSTART:
@@ -2305,11 +2339,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
break;
}
fail:
- splx(s);
- if (flags & FWRITE)
- rw_exit_write(&pf_consistency_lock);
- else
- rw_exit_read(&pf_consistency_lock);
+ switch (cmd) {
+ PF_NO_GLOBAL_LOCK
+ default:
+ if (flags & FWRITE)
+ rw_exit_write(&pf_consistency_lock);
+ else
+ rw_exit_read(&pf_consistency_lock);
+ PF_UNLOCK(s);
+ }
+
return (error);
}
Index: pf_table.c
===================================================================
RCS file: /cvs/src/sys/net/pf_table.c,v
retrieving revision 1.113
diff -u -p -r1.113 pf_table.c
--- pf_table.c 20 Jul 2015 18:42:08 -0000 1.113
+++ pf_table.c 26 Aug 2015 14:11:18 -0000
@@ -791,7 +791,7 @@ pfr_lookup_addr(struct pfr_ktable *kt, s
union sockaddr_union sa, mask;
struct radix_node_head *head;
struct pfr_kentry *ke;
- int s;
+ PF_LOCK_INSTANCE(s);
bzero(&sa, sizeof(sa));
switch (ad->pfra_af) {
@@ -810,9 +810,9 @@ pfr_lookup_addr(struct pfr_ktable *kt, s
}
if (ADDR_NETWORK(ad)) {
pfr_prepare_network(&mask, ad->pfra_af, ad->pfra_net);
- s = splsoftnet(); /* rn_lookup makes use of globals */
+ PF_LOCK(s); /* rn_lookup makes use of globals */
ke = (struct pfr_kentry *)rn_lookup(&sa, &mask, head);
- splx(s);
+ PF_UNLOCK(s);
} else {
ke = (struct pfr_kentry *)rn_match(&sa, head);
if (exact && ke && KENTRY_NETWORK(ke))
@@ -990,17 +990,17 @@ void
pfr_clstats_kentries(struct pfr_kentryworkq *workq, time_t tzero, int
negchange)
{
struct pfr_kentry *p;
- int s;
+ PF_LOCK_INSTANCE(s);
SLIST_FOREACH(p, workq, pfrke_workq) {
- s = splsoftnet();
+ PF_LOCK(s);
if (negchange)
p->pfrke_flags ^= PFRKE_FLAG_NOT;
if (p->pfrke_counters) {
pool_put(&pfr_kcounters_pl, p->pfrke_counters);
p->pfrke_counters = NULL;
}
- splx(s);
+ PF_UNLOCK(s);
p->pfrke_tzero = tzero;
}
}
@@ -1061,7 +1061,7 @@ pfr_route_kentry(struct pfr_ktable *kt,
union sockaddr_union mask;
struct radix_node *rn;
struct radix_node_head *head;
- int s;
+ PF_LOCK_INSTANCE(s);
bzero(ke->pfrke_node, sizeof(ke->pfrke_node));
switch (ke->pfrke_af) {
@@ -1077,13 +1077,13 @@ pfr_route_kentry(struct pfr_ktable *kt,
unhandled_af(ke->pfrke_af);
}
- s = splsoftnet();
+ PF_LOCK(s);
if (KENTRY_NETWORK(ke)) {
pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
rn = rn_addroute(&ke->pfrke_sa, &mask, head, ke->pfrke_node, 0);
} else
rn = rn_addroute(&ke->pfrke_sa, NULL, head, ke->pfrke_node, 0);
- splx(s);
+ PF_UNLOCK(s);
return (rn == NULL ? -1 : 0);
}
@@ -1094,7 +1094,7 @@ pfr_unroute_kentry(struct pfr_ktable *kt
union sockaddr_union mask;
struct radix_node *rn;
struct radix_node_head *head;
- int s;
+ PF_LOCK_INSTANCE(s);
switch (ke->pfrke_af) {
case AF_INET:
@@ -1109,13 +1109,13 @@ pfr_unroute_kentry(struct pfr_ktable *kt
unhandled_af(ke->pfrke_af);
}
- s = splsoftnet();
+ PF_LOCK(s);
if (KENTRY_NETWORK(ke)) {
pfr_prepare_network(&mask, ke->pfrke_af, ke->pfrke_net);
rn = rn_delete(&ke->pfrke_sa, &mask, head, NULL);
} else
rn = rn_delete(&ke->pfrke_sa, NULL, head, NULL);
- splx(s);
+ PF_UNLOCK(s);
if (rn == NULL) {
DPFPRINTF(LOG_ERR, "pfr_unroute_kentry: delete failed.\n");
@@ -1170,7 +1170,8 @@ pfr_walktree(struct radix_node *rn, void
{
struct pfr_kentry *ke = (struct pfr_kentry *)rn;
struct pfr_walktree *w = arg;
- int s, flags = w->pfrw_flags;
+ int flags = w->pfrw_flags;
+ PF_LOCK_INSTANCE(s);
switch (w->pfrw_op) {
case PFRW_MARK:
@@ -1200,7 +1201,7 @@ pfr_walktree(struct radix_node *rn, void
pfr_copyout_addr(&as.pfras_a, ke);
- s = splsoftnet();
+ PF_LOCK(s);
if (ke->pfrke_counters) {
bcopy(ke->pfrke_counters->pfrkc_packets,
as.pfras_packets, sizeof(as.pfras_packets));
@@ -1212,7 +1213,7 @@ pfr_walktree(struct radix_node *rn, void
bzero(as.pfras_bytes, sizeof(as.pfras_bytes));
as.pfras_a.pfra_fback = PFR_FB_NOCOUNT;
}
- splx(s);
+ PF_UNLOCK(s);
as.pfras_tzero = ke->pfrke_tzero;
if (COPYOUT(&as, w->pfrw_astats, sizeof(as), flags))
@@ -1447,8 +1448,9 @@ pfr_get_tstats(struct pfr_table *filter,
{
struct pfr_ktable *p;
struct pfr_ktableworkq workq;
- int s, n, nn;
+ int n, nn;
time_t tzero = time_second;
+ PF_LOCK_INSTANCE(s);
/* XXX PFR_FLAG_CLSTATS disabled */
ACCEPT_FLAGS(flags, PFR_FLAG_ALLRSETS);
@@ -1467,12 +1469,12 @@ pfr_get_tstats(struct pfr_table *filter,
continue;
if (n-- <= 0)
continue;
- s = splsoftnet();
+ PF_LOCK(s);
if (COPYOUT(&p->pfrkt_ts, tbl++, sizeof(*tbl), flags)) {
- splx(s);
+ PF_UNLOCK(s);
return (EFAULT);
}
- splx(s);
+ PF_UNLOCK(s);
SLIST_INSERT_HEAD(&workq, p, pfrkt_workq);
}
if (flags & PFR_FLAG_CLSTATS)
@@ -1992,17 +1994,17 @@ void
pfr_clstats_ktable(struct pfr_ktable *kt, time_t tzero, int recurse)
{
struct pfr_kentryworkq addrq;
- int s;
+ PF_LOCK_INSTANCE(s);
if (recurse) {
pfr_enqueue_addrs(kt, &addrq, NULL, 0);
pfr_clstats_kentries(&addrq, tzero, 0);
}
- s = splsoftnet();
+ PF_LOCK(s);
bzero(kt->pfrkt_packets, sizeof(kt->pfrkt_packets));
bzero(kt->pfrkt_bytes, sizeof(kt->pfrkt_bytes));
kt->pfrkt_match = kt->pfrkt_nomatch = 0;
- splx(s);
+ PF_UNLOCK(s);
kt->pfrkt_tzero = tzero;
}
@@ -2523,13 +2525,13 @@ void
pfr_dynaddr_update(struct pfr_ktable *kt, struct pfi_dynaddr *dyn)
{
struct pfr_walktree w;
- int s;
+ PF_LOCK_INSTANCE(s);
bzero(&w, sizeof(w));
w.pfrw_op = PFRW_DYNADDR_UPDATE;
w.pfrw_dyn = dyn;
- s = splsoftnet();
+ PF_LOCK(s);
dyn->pfid_acnt4 = 0;
dyn->pfid_acnt6 = 0;
switch (dyn->pfid_af) {
@@ -2548,7 +2550,7 @@ pfr_dynaddr_update(struct pfr_ktable *kt
default:
unhandled_af(dyn->pfid_af);
}
- splx(s);
+ PF_UNLOCK(s);
}
void
Index: pfvar.h
===================================================================
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.420
diff -u -p -r1.420 pfvar.h
--- pfvar.h 19 Aug 2015 21:22:41 -0000 1.420
+++ pfvar.h 26 Aug 2015 14:11:18 -0000
@@ -1909,5 +1909,46 @@ void pf_cksum(struct pf_pdesc *,
stru
#endif /* _KERNEL */
+/*
+ * PF_LOCK_* macros are compatibility bridge between MP and non-MP PF. The
+ * idea is to use KERNEL_LOCK()/KERNEL_UNLOCK() when PF gets compilied with
+ * MULTIPROCESSOR support enabled. If MULTIPROCESSOR compile time option
+ * is not present, the macro will use splsoftnet()/splx().
+ *
+ * This should buy us enough lock freedom to deploy phased approach to giant
+ * lock dismantling in PF code.
+ */
+#ifdef MULTIPROCESSOR
+#define PF_LOCK_INSTANCE(_s_) ((void)0)
+
+#define PF_LOCK(_s_) do { \
+ KERNEL_LOCK(); \
+ } while (0)
+
+#define PF_UNLOCK(_s_) do { \
+ KERNEL_ASSERT_LOCKED(); \
+ KERNEL_UNLOCK(); \
+ } while (0)
+
+#define PF_ASSERT_LOCKED(_s_) KERNEL_ASSERT_LOCKED()
+
+#define PF_ASSERT_UNLOCKED(_s_) KERNEL_ASSERT_UNLOCKED()
+
+#else /* !MULTIPROCESSOR */
+
+#define PF_LOCK_INSTANCE(_s_) int _s_
+
+#define PF_LOCK(_s_) do { \
+ _s_ = splsoftnet(); \
+ } while (0)
+
+#define PF_UNLOCK(_s_) do { \
+ splx(_s_); \
+ } while (0);
+
+#define PF_ASSERT_LOCKED(_s_) ((void)0)
+#define PF_ASSERT_UNLOCKED(_s_) ((void)0)
+
+#endif /* MULTIPROCESSOR */
#endif /* _NET_PFVAR_H_ */