On 01/06/17(Thu) 09:32, Alexandr Nedvedicky wrote: > [...] > > > > Where do you want to define this WITH_PF_LOCK? > > > > I currently add > > option WITH_PF_LOCK > > to sys/arch/amd64/compile/GENERIC.MP to build PF with PF_LOCK. > But there should be better place in tree. Perhaps sys/conf/GENERIC?
I don't think it matters. We still need more diffs to be able to test that. Now I'd really like if you could commit this so we continue improving it in-tree. ok mpi@ > diff -r ccb9f01e56a7 .hgtags > --- /dev/null Thu Jan 01 00:00:00 1970 +0000 > +++ .hgtags Thu Jun 01 09:29:15 2017 +0200 > @@ -0,0 +1,1 @@ > +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline > diff -r ccb9f01e56a7 src/sys/conf/GENERIC > --- src/sys/conf/GENERIC Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/conf/GENERIC Thu Jun 01 09:29:15 2017 +0200 > @@ -16,7 +16,7 @@ option ACCOUNTING # acct(2) process acc > option KMEMSTATS # collect malloc(9) statistics > option PTRACE # ptrace(2) system call > #option WITNESS # witness(4) lock checker > -#option PF_LOCK # build with pf lock support > +#option WITH_PF_LOCK # PF lock support > > #option KVA_GUARDPAGES # slow virtual address recycling (+ > guarding) > option POOL_DEBUG # pool corruption detection > diff -r ccb9f01e56a7 src/sys/net/pf.c > --- src/sys/net/pf.c Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/net/pf.c Thu Jun 01 09:29:15 2017 +0200 > @@ -923,7 +923,7 @@ int > pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, > struct pf_state_key **sks, struct pf_state *s) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > s->kif = kif; > if (*skw == *sks) { > @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) > { > struct pf_rule *r; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > if (SLIST_EMPTY(&pf_rule_gcl)) > return; > @@ -1208,15 +1208,22 @@ pf_purge_thread(void *v) > > NET_LOCK(s); > > + PF_LOCK(); > /* process a fraction of the state table every second */ > pf_purge_expired_states(1 + (pf_status.states > / pf_default_rule.timeout[PFTM_INTERVAL])); > > /* purge other expired types every PFTM_INTERVAL seconds */ > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > - pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > pf_purge_expired_rules(); > + } > + PF_UNLOCK(); > + /* > + * Fragments don't require PF_LOCK(), they use their own lock. > + */ > + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > + pf_purge_expired_fragments(); > nloops = 0; > } > > @@ -1267,7 +1274,7 @@ pf_purge_expired_src_nodes(void) > { > struct pf_src_node *cur, *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { > next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > @@ -1303,7 +1310,7 @@ pf_src_tree_remove_state(struct pf_state > void > pf_remove_state(struct pf_state *cur) > { > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > /* handle load balancing related tasks */ > pf_postprocess_addr(cur); > @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur) > { > struct pf_rule_item *ri; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > #if NPFSYNC > 0 > if (pfsync_state_in_use(cur)) > @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche > static struct pf_state *cur = NULL; > struct pf_state *next; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > > while (maxcheck--) { > /* wrap to start of list when we hit the end */ > @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd) > case IPPROTO_TCP: > sport = pd->hdr.tcp.th_sport; > dport = pd->hdr.tcp.th_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &tcbtable; > break; > case IPPROTO_UDP: > sport = pd->hdr.udp.uh_sport; > dport = pd->hdr.udp.uh_dport; > - NET_ASSERT_LOCKED(); > + PF_ASSERT_LOCKED(); > tb = &udbtable; > break; > default: > @@ -6678,6 +6685,7 @@ pf_test(sa_family_t af, int fwdir, struc > /* if packet sits in reassembly queue, return without error */ > if (pd.m == NULL) > return PF_PASS; > + > if (action != PF_PASS) { > #if NPFLOG > 0 > pd.pflog |= PF_LOG_FORCE; > @@ -6697,6 +6705,9 @@ pf_test(sa_family_t af, int fwdir, struc > } > pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; > > + /* lock the lookup/write section of pf_test() */ > + PF_LOCK(); > + > switch (pd.virtual_proto) { > > case PF_VPROTO_FRAGMENT: { > @@ -6716,7 +6727,7 @@ pf_test(sa_family_t af, int fwdir, struc > REASON_SET(&reason, PFRES_NORM); > DPFPRINTF(LOG_NOTICE, > "dropping IPv6 packet with ICMPv4 payload"); > - goto done; > + goto unlock; > } > action = pf_test_state_icmp(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6741,7 +6752,7 @@ pf_test(sa_family_t af, int fwdir, struc > REASON_SET(&reason, PFRES_NORM); > DPFPRINTF(LOG_NOTICE, > "dropping IPv4 packet with ICMPv6 payload"); > - goto done; > + goto unlock; > } > action = pf_test_state_icmp(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6766,7 +6777,7 @@ pf_test(sa_family_t af, int fwdir, struc > pqid = 1; > action = pf_normalize_tcp(&pd); > if (action == PF_DROP) > - goto done; > + goto unlock; > } > action = pf_test_state(&pd, &s, &reason); > if (action == PF_PASS || action == PF_AFRT) { > @@ -6793,6 +6804,15 @@ pf_test(sa_family_t af, int fwdir, struc > break; > } > > +unlock: > + PF_UNLOCK(); > + > + /* > + * At the moment, we rely on NET_LOCK() to prevent removal of items > + * we've collected above ('s', 'r', 'anchor' and 'ruleset'). They'll > + * have to be refcounted when NET_LOCK() is gone. > + */ > + > done: > if (action != PF_DROP) { > if (s) { > @@ -6996,6 +7016,7 @@ done: > } > > *m0 = pd.m; > + > return (action); > } > > diff -r ccb9f01e56a7 src/sys/net/pf_ioctl.c > --- src/sys/net/pf_ioctl.c Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/net/pf_ioctl.c Thu Jun 01 09:29:15 2017 +0200 > @@ -129,6 +129,10 @@ struct { > TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = > TAILQ_HEAD_INITIALIZER(pf_tags), > pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); > > +#ifdef WITH_PF_LOCK > +struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); > +#endif /* WITH_PF_LOCK */ > + > #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) > #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE > #endif > @@ -998,6 +1002,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > } > > NET_LOCK(s); > + PF_LOCK(); > switch (cmd) { > > case DIOCSTART: > @@ -2437,6 +2442,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > break; > } > fail: > + PF_UNLOCK(); > NET_UNLOCK(s); > return (error); > } > diff -r ccb9f01e56a7 src/sys/net/pf_norm.c > --- src/sys/net/pf_norm.c Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/net/pf_norm.c Thu Jun 01 09:29:15 2017 +0200 > @@ -39,6 +39,7 @@ > #include <sys/time.h> > #include <sys/pool.h> > #include <sys/syslog.h> > +#include <sys/mutex.h> > > #include <net/if.h> > #include <net/if_var.h> > @@ -135,6 +136,18 @@ struct pool pf_frent_pl, pf_frag_pl; > struct pool pf_state_scrub_pl; > int pf_nfrents; > > +#ifdef WITH_PF_LOCK > +struct mutex pf_frag_mtx; > + > +#define PF_FRAG_LOCK_INIT() mtx_init(&pf_frag_mtx, IPL_SOFTNET) > +#define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx) > +#define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx) > +#else /* !WITH_PF_LOCK */ > +#define PF_FRAG_LOCK_INIT() (void)(0) > +#define PF_FRAG_LOCK() (void)(0) > +#define PF_FRAG_UNLOCK() (void)(0) > +#endif /* WITH_PF_LOCK */ > + > void > pf_normalize_init(void) > { > @@ -149,6 +162,8 @@ pf_normalize_init(void) > pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); > > TAILQ_INIT(&pf_fragqueue); > + > + PF_FRAG_LOCK_INIT(); > } > > static __inline int > @@ -176,15 +191,18 @@ pf_purge_expired_fragments(void) > struct pf_fragment *frag; > int32_t expire; > > - NET_ASSERT_LOCKED(); > + PF_ASSERT_UNLOCKED(); > > expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; > + > + PF_FRAG_LOCK(); > while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { > if (frag->fr_timeout > expire) > break; > DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); > pf_free_fragment(frag); > } > + PF_FRAG_UNLOCK(); > } > > /* > @@ -533,14 +551,19 @@ pf_reassemble(struct mbuf **m0, int dir, > key.fr_id = ip->ip_id; > key.fr_direction = dir; > > - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) > + PF_FRAG_LOCK(); > + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { > + PF_FRAG_UNLOCK(); > return (PF_DROP); > + } > > /* The mbuf is part of the fragment entry, no direct free or access */ > m = *m0 = NULL; > > - if (!pf_isfull_fragment(frag)) > + if (!pf_isfull_fragment(frag)) { > + PF_FRAG_UNLOCK(); > return (PF_PASS); /* drop because *m0 is NULL, no error */ > + } > > /* We have all the data */ > frent = TAILQ_FIRST(&frag->fr_queue); > @@ -564,6 +587,7 @@ pf_reassemble(struct mbuf **m0, int dir, > ip->ip_off &= ~(IP_MF|IP_OFFMASK); > > if (hdrlen + total > IP_MAXPACKET) { > + PF_FRAG_UNLOCK(); > DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); > ip->ip_len = 0; > REASON_SET(reason, PFRES_SHORT); > @@ -571,6 +595,7 @@ pf_reassemble(struct mbuf **m0, int dir, > return (PF_DROP); > } > > + PF_FRAG_UNLOCK(); > DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len)); > return (PF_PASS); > } > @@ -610,14 +635,19 @@ pf_reassemble6(struct mbuf **m0, struct > key.fr_id = fraghdr->ip6f_ident; > key.fr_direction = dir; > > - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) > + PF_FRAG_LOCK(); > + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { > + PF_FRAG_UNLOCK(); > return (PF_DROP); > + } > > /* The mbuf is part of the fragment entry, no direct free or access */ > m = *m0 = NULL; > > - if (!pf_isfull_fragment(frag)) > + if (!pf_isfull_fragment(frag)) { > + PF_FRAG_UNLOCK(); > return (PF_PASS); /* drop because *m0 is NULL, no error */ > + } > > /* We have all the data */ > extoff = frent->fe_extoff; > @@ -671,17 +701,20 @@ pf_reassemble6(struct mbuf **m0, struct > ip6->ip6_nxt = proto; > > if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { > + PF_FRAG_UNLOCK(); > DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); > ip6->ip6_plen = 0; > REASON_SET(reason, PFRES_SHORT); > /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ > return (PF_DROP); > } > + PF_FRAG_UNLOCK(); > > DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); > return (PF_PASS); > > fail: > + PF_FRAG_UNLOCK(); > REASON_SET(reason, PFRES_MEMORY); > /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ > return (PF_DROP); > diff -r ccb9f01e56a7 src/sys/net/pfvar_priv.h > --- src/sys/net/pfvar_priv.h Thu Jun 01 09:19:56 2017 +0200 > +++ src/sys/net/pfvar_priv.h Thu Jun 01 09:29:15 2017 +0200 > @@ -37,6 +37,10 @@ > > #ifdef _KERNEL > > +#include <sys/rwlock.h> > + > +extern struct rwlock pf_lock; > + > struct pf_pdesc { > struct { > int done; > @@ -94,6 +98,36 @@ struct pf_pdesc { > } hdr; > }; > > +#ifdef WITH_PF_LOCK > +extern struct rwlock pf_lock; > + > +#define PF_LOCK() do { \ > + NET_ASSERT_LOCKED(); \ > + rw_enter_write(&pf_lock); \ > + } while (0) > + > +#define PF_UNLOCK() do { \ > + PF_ASSERT_LOCKED(); \ > + rw_exit_write(&pf_lock); \ > + } while (0) > + > +#define PF_ASSERT_LOCKED() do { \ > + if (rw_status(&pf_lock) != RW_WRITE) \ > + splassert_fail(RW_WRITE, \ > + rw_status(&pf_lock),__func__);\ > + } while (0) > + > +#define PF_ASSERT_UNLOCKED() do { \ > + if (rw_status(&pf_lock) == RW_WRITE) \ > + splassert_fail(0, rw_status(&pf_lock), __func__);\ > + } while (0) > +#else /* !WITH_PF_LOCK */ > +#define PF_LOCK() (void)(0) > +#define PF_UNLOCK() (void)(0) > +#define PF_ASSERT_LOCKED() (void)(0) > +#define PF_ASSERT_UNLOCKED() (void)(0) > +#endif /* WITH_PF_LOCK */ > + > #endif /* _KERNEL */ > > #endif /* _NET_PFVAR_PRIV_H_ */ > --------8<---------------8<---------------8<------------------8<-------- >