Re: let's add PF_LOCK()

Martin Pieuchot Mon, 05 Jun 2017 01:53:13 -0700

On 01/06/17(Thu) 09:32, Alexandr Nedvedicky wrote:
> [...]
> > 
> > Where do you want to define this WITH_PF_LOCK?
> > 
> 
>     I currently add 
> 
>       option WITH_PF_LOCK
> 
>     to sys/arch/amd64/compile/GENERIC.MP to build PF with PF_LOCK.
>     But there should be better place in tree. Perhaps sys/conf/GENERIC?


I don't think it matters.  We still need more diffs to be able to test
that.

Now I'd really like if you could commit this so we continue improving it
in-tree.

ok mpi@

> diff -r ccb9f01e56a7 .hgtags
> --- /dev/null Thu Jan 01 00:00:00 1970 +0000
> +++ .hgtags   Thu Jun 01 09:29:15 2017 +0200
> @@ -0,0 +1,1 @@
> +d545881e2652dbc0c057691a39a095bce92f441f pf-lock.baseline
> diff -r ccb9f01e56a7 src/sys/conf/GENERIC
> --- src/sys/conf/GENERIC      Thu Jun 01 09:19:56 2017 +0200
> +++ src/sys/conf/GENERIC      Thu Jun 01 09:29:15 2017 +0200
> @@ -16,7 +16,7 @@ option              ACCOUNTING      # acct(2) process acc
>  option               KMEMSTATS       # collect malloc(9) statistics
>  option               PTRACE          # ptrace(2) system call
>  #option              WITNESS         # witness(4) lock checker
> -#option              PF_LOCK         # build with pf lock support
> +#option              WITH_PF_LOCK    # PF lock support
>  
>  #option              KVA_GUARDPAGES  # slow virtual address recycling (+ 
> guarding)
>  option               POOL_DEBUG      # pool corruption detection
> diff -r ccb9f01e56a7 src/sys/net/pf.c
> --- src/sys/net/pf.c  Thu Jun 01 09:19:56 2017 +0200
> +++ src/sys/net/pf.c  Thu Jun 01 09:29:15 2017 +0200
> @@ -923,7 +923,7 @@ int
>  pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw,
>      struct pf_state_key **sks, struct pf_state *s)
>  {
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_LOCKED();
>  
>       s->kif = kif;
>       if (*skw == *sks) {
> @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void)
>  {
>       struct pf_rule  *r;
>  
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_LOCKED();
>  
>       if (SLIST_EMPTY(&pf_rule_gcl))
>               return;
> @@ -1208,15 +1208,22 @@ pf_purge_thread(void *v)
>  
>               NET_LOCK(s);
>  
> +             PF_LOCK();
>               /* process a fraction of the state table every second */
>               pf_purge_expired_states(1 + (pf_status.states
>                   / pf_default_rule.timeout[PFTM_INTERVAL]));
>  
>               /* purge other expired types every PFTM_INTERVAL seconds */
>               if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
> -                     pf_purge_expired_fragments();
>                       pf_purge_expired_src_nodes(0);
>                       pf_purge_expired_rules();
> +             }
> +             PF_UNLOCK();
> +             /*
> +              * Fragments don't require PF_LOCK(), they use their own lock.
> +              */
> +             if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
> +                     pf_purge_expired_fragments();
>                       nloops = 0;
>               }
>  
> @@ -1267,7 +1274,7 @@ pf_purge_expired_src_nodes(void)
>  {
>       struct pf_src_node              *cur, *next;
>  
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_LOCKED();
>  
>       for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
>       next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
> @@ -1303,7 +1310,7 @@ pf_src_tree_remove_state(struct pf_state
>  void
>  pf_remove_state(struct pf_state *cur)
>  {
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_LOCKED();
>  
>       /* handle load balancing related tasks */
>       pf_postprocess_addr(cur);
> @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur)
>  {
>       struct pf_rule_item *ri;
>  
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_LOCKED();
>  
>  #if NPFSYNC > 0
>       if (pfsync_state_in_use(cur))
> @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxche
>       static struct pf_state  *cur = NULL;
>       struct pf_state         *next;
>  
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_LOCKED();
>  
>       while (maxcheck--) {
>               /* wrap to start of list when we hit the end */
> @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
>       case IPPROTO_TCP:
>               sport = pd->hdr.tcp.th_sport;
>               dport = pd->hdr.tcp.th_dport;
> -             NET_ASSERT_LOCKED();
> +             PF_ASSERT_LOCKED();
>               tb = &tcbtable;
>               break;
>       case IPPROTO_UDP:
>               sport = pd->hdr.udp.uh_sport;
>               dport = pd->hdr.udp.uh_dport;
> -             NET_ASSERT_LOCKED();
> +             PF_ASSERT_LOCKED();
>               tb = &udbtable;
>               break;
>       default:
> @@ -6678,6 +6685,7 @@ pf_test(sa_family_t af, int fwdir, struc
>       /* if packet sits in reassembly queue, return without error */
>       if (pd.m == NULL)
>               return PF_PASS;
> +
>       if (action != PF_PASS) {
>  #if NPFLOG > 0
>               pd.pflog |= PF_LOG_FORCE;
> @@ -6697,6 +6705,9 @@ pf_test(sa_family_t af, int fwdir, struc
>       }
>       pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED;
>  
> +     /* lock the lookup/write section of pf_test() */
> +     PF_LOCK();
> +
>       switch (pd.virtual_proto) {
>  
>       case PF_VPROTO_FRAGMENT: {
> @@ -6716,7 +6727,7 @@ pf_test(sa_family_t af, int fwdir, struc
>                       REASON_SET(&reason, PFRES_NORM);
>                       DPFPRINTF(LOG_NOTICE,
>                           "dropping IPv6 packet with ICMPv4 payload");
> -                     goto done;
> +                     goto unlock;
>               }
>               action = pf_test_state_icmp(&pd, &s, &reason);
>               if (action == PF_PASS || action == PF_AFRT) {
> @@ -6741,7 +6752,7 @@ pf_test(sa_family_t af, int fwdir, struc
>                       REASON_SET(&reason, PFRES_NORM);
>                       DPFPRINTF(LOG_NOTICE,
>                           "dropping IPv4 packet with ICMPv6 payload");
> -                     goto done;
> +                     goto unlock;
>               }
>               action = pf_test_state_icmp(&pd, &s, &reason);
>               if (action == PF_PASS || action == PF_AFRT) {
> @@ -6766,7 +6777,7 @@ pf_test(sa_family_t af, int fwdir, struc
>                               pqid = 1;
>                       action = pf_normalize_tcp(&pd);
>                       if (action == PF_DROP)
> -                             goto done;
> +                             goto unlock;
>               }
>               action = pf_test_state(&pd, &s, &reason);
>               if (action == PF_PASS || action == PF_AFRT) {
> @@ -6793,6 +6804,15 @@ pf_test(sa_family_t af, int fwdir, struc
>               break;
>       }
>  
> +unlock:
> +     PF_UNLOCK();
> +
> +     /*
> +      * At the moment, we rely on NET_LOCK() to prevent removal of items
> +      * we've collected above ('s', 'r', 'anchor' and 'ruleset').  They'll
> +      * have to be refcounted when NET_LOCK() is gone.
> +      */
> +
>  done:
>       if (action != PF_DROP) {
>               if (s) {
> @@ -6996,6 +7016,7 @@ done:
>       }
>  
>       *m0 = pd.m;
> +
>       return (action);
>  }
>  
> diff -r ccb9f01e56a7 src/sys/net/pf_ioctl.c
> --- src/sys/net/pf_ioctl.c    Thu Jun 01 09:19:56 2017 +0200
> +++ src/sys/net/pf_ioctl.c    Thu Jun 01 09:29:15 2017 +0200
> @@ -129,6 +129,10 @@ struct {
>  TAILQ_HEAD(pf_tags, pf_tagname)      pf_tags = 
> TAILQ_HEAD_INITIALIZER(pf_tags),
>                               pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
>  
> +#ifdef WITH_PF_LOCK
> +struct rwlock                 pf_lock = RWLOCK_INITIALIZER("pf_lock");
> +#endif /* WITH_PF_LOCK */
> +
>  #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
>  #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
>  #endif
> @@ -998,6 +1002,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
>               }
>  
>       NET_LOCK(s);
> +     PF_LOCK();
>       switch (cmd) {
>  
>       case DIOCSTART:
> @@ -2437,6 +2442,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
>               break;
>       }
>  fail:
> +     PF_UNLOCK();
>       NET_UNLOCK(s);
>       return (error);
>  }
> diff -r ccb9f01e56a7 src/sys/net/pf_norm.c
> --- src/sys/net/pf_norm.c     Thu Jun 01 09:19:56 2017 +0200
> +++ src/sys/net/pf_norm.c     Thu Jun 01 09:29:15 2017 +0200
> @@ -39,6 +39,7 @@
>  #include <sys/time.h>
>  #include <sys/pool.h>
>  #include <sys/syslog.h>
> +#include <sys/mutex.h>
>  
>  #include <net/if.h>
>  #include <net/if_var.h>
> @@ -135,6 +136,18 @@ struct pool               pf_frent_pl, pf_frag_pl;
>  struct pool           pf_state_scrub_pl;
>  int                   pf_nfrents;
>  
> +#ifdef WITH_PF_LOCK
> +struct mutex          pf_frag_mtx;
> +
> +#define PF_FRAG_LOCK_INIT()  mtx_init(&pf_frag_mtx, IPL_SOFTNET)
> +#define PF_FRAG_LOCK()               mtx_enter(&pf_frag_mtx)
> +#define PF_FRAG_UNLOCK()     mtx_leave(&pf_frag_mtx)
> +#else /* !WITH_PF_LOCK */
> +#define PF_FRAG_LOCK_INIT()  (void)(0)
> +#define PF_FRAG_LOCK()               (void)(0)
> +#define PF_FRAG_UNLOCK()     (void)(0)
> +#endif /* WITH_PF_LOCK */
> +
>  void
>  pf_normalize_init(void)
>  {
> @@ -149,6 +162,8 @@ pf_normalize_init(void)
>       pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
>  
>       TAILQ_INIT(&pf_fragqueue);
> +
> +     PF_FRAG_LOCK_INIT();
>  }
>  
>  static __inline int
> @@ -176,15 +191,18 @@ pf_purge_expired_fragments(void)
>       struct pf_fragment      *frag;
>       int32_t                  expire;
>  
> -     NET_ASSERT_LOCKED();
> +     PF_ASSERT_UNLOCKED();
>  
>       expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG];
> +
> +     PF_FRAG_LOCK();
>       while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
>               if (frag->fr_timeout > expire)
>                       break;
>               DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag);
>               pf_free_fragment(frag);
>       }
> +     PF_FRAG_UNLOCK();
>  }
>  
>  /*
> @@ -533,14 +551,19 @@ pf_reassemble(struct mbuf **m0, int dir,
>       key.fr_id = ip->ip_id;
>       key.fr_direction = dir;
>  
> -     if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
> +     PF_FRAG_LOCK();
> +     if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
> +             PF_FRAG_UNLOCK();
>               return (PF_DROP);
> +     }
>  
>       /* The mbuf is part of the fragment entry, no direct free or access */
>       m = *m0 = NULL;
>  
> -     if (!pf_isfull_fragment(frag))
> +     if (!pf_isfull_fragment(frag)) {
> +             PF_FRAG_UNLOCK();
>               return (PF_PASS);  /* drop because *m0 is NULL, no error */
> +     }
>  
>       /* We have all the data */
>       frent = TAILQ_FIRST(&frag->fr_queue);
> @@ -564,6 +587,7 @@ pf_reassemble(struct mbuf **m0, int dir,
>       ip->ip_off &= ~(IP_MF|IP_OFFMASK);
>  
>       if (hdrlen + total > IP_MAXPACKET) {
> +             PF_FRAG_UNLOCK();
>               DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
>               ip->ip_len = 0;
>               REASON_SET(reason, PFRES_SHORT);
> @@ -571,6 +595,7 @@ pf_reassemble(struct mbuf **m0, int dir,
>               return (PF_DROP);
>       }
>  
> +     PF_FRAG_UNLOCK();
>       DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len));
>       return (PF_PASS);
>  }
> @@ -610,14 +635,19 @@ pf_reassemble6(struct mbuf **m0, struct 
>       key.fr_id = fraghdr->ip6f_ident;
>       key.fr_direction = dir;
>  
> -     if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
> +     PF_FRAG_LOCK();
> +     if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
> +             PF_FRAG_UNLOCK();
>               return (PF_DROP);
> +     }
>  
>       /* The mbuf is part of the fragment entry, no direct free or access */
>       m = *m0 = NULL;
>  
> -     if (!pf_isfull_fragment(frag))
> +     if (!pf_isfull_fragment(frag)) {
> +             PF_FRAG_UNLOCK();
>               return (PF_PASS);  /* drop because *m0 is NULL, no error */
> +     }
>  
>       /* We have all the data */
>       extoff = frent->fe_extoff;
> @@ -671,17 +701,20 @@ pf_reassemble6(struct mbuf **m0, struct 
>               ip6->ip6_nxt = proto;
>  
>       if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
> +             PF_FRAG_UNLOCK();
>               DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
>               ip6->ip6_plen = 0;
>               REASON_SET(reason, PFRES_SHORT);
>               /* PF_DROP requires a valid mbuf *m0 in pf_test6() */
>               return (PF_DROP);
>       }
> +     PF_FRAG_UNLOCK();
>  
>       DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen));
>       return (PF_PASS);
>  
>  fail:
> +     PF_FRAG_UNLOCK();
>       REASON_SET(reason, PFRES_MEMORY);
>       /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */
>       return (PF_DROP);
> diff -r ccb9f01e56a7 src/sys/net/pfvar_priv.h
> --- src/sys/net/pfvar_priv.h  Thu Jun 01 09:19:56 2017 +0200
> +++ src/sys/net/pfvar_priv.h  Thu Jun 01 09:29:15 2017 +0200
> @@ -37,6 +37,10 @@
>  
>  #ifdef _KERNEL
>  
> +#include <sys/rwlock.h>
> +
> +extern struct rwlock pf_lock;
> +
>  struct pf_pdesc {
>       struct {
>               int      done;
> @@ -94,6 +98,36 @@ struct pf_pdesc {
>       } hdr;
>  };
>  
> +#ifdef WITH_PF_LOCK
> +extern struct rwlock pf_lock;
> +
> +#define PF_LOCK()            do {                    \
> +             NET_ASSERT_LOCKED();                    \
> +             rw_enter_write(&pf_lock);               \
> +     } while (0)
> +
> +#define PF_UNLOCK()          do {                    \
> +             PF_ASSERT_LOCKED();                     \
> +             rw_exit_write(&pf_lock);                \
> +     } while (0)
> +
> +#define PF_ASSERT_LOCKED()   do {                    \
> +             if (rw_status(&pf_lock) != RW_WRITE)    \
> +                     splassert_fail(RW_WRITE,        \
> +                         rw_status(&pf_lock),__func__);\
> +     } while (0)
> +
> +#define PF_ASSERT_UNLOCKED() do {                    \
> +             if (rw_status(&pf_lock) == RW_WRITE)    \
> +                     splassert_fail(0, rw_status(&pf_lock), __func__);\
> +     } while (0)
> +#else /* !WITH_PF_LOCK */
> +#define PF_LOCK()            (void)(0)
> +#define PF_UNLOCK()          (void)(0)
> +#define PF_ASSERT_LOCKED()   (void)(0)
> +#define PF_ASSERT_UNLOCKED() (void)(0)
> +#endif /* WITH_PF_LOCK */
> +
>  #endif /* _KERNEL */
>  
>  #endif /* _NET_PFVAR_PRIV_H_ */
> --------8<---------------8<---------------8<------------------8<--------
>

Re: let's add PF_LOCK()

Reply via email to