Author: kp
Date: Thu Dec  6 19:27:15 2018
New Revision: 341646
URL: https://svnweb.freebsd.org/changeset/base/341646

Log:
  pfsync: Performance improvement
  
  pfsync code is called for every new state, state update and state
  deletion in pf. While pf itself can operate on multiple states at the
  same time (on different cores, assuming the states hash to a different
  hashrow), pfsync only had a single lock.
  This greatly reduced throughput on multicore systems.
  
  Address this by splitting the pfsync queues into buckets, based on the
  state id. This ensures that updates for a given connection always end up
  in the same bucket, which allows pfsync to still collapse multiple
  updates into one, while allowing multiple cores to proceed at the same
  time.
  
  The number of buckets is tunable, but defaults to 2 x number of cpus.
  Benchmarking has shown improvement, depending on hardware and setup, from ~30%
  to ~100%.
  
  MFC after:    1 week
  Sponsored by: Orange Business Services
  Differential Revision:        https://reviews.freebsd.org/D18373

Modified:
  head/share/man/man4/pfsync.4
  head/sys/netpfil/pf/if_pfsync.c

Modified: head/share/man/man4/pfsync.4
==============================================================================
--- head/share/man/man4/pfsync.4        Thu Dec  6 19:20:00 2018        
(r341645)
+++ head/share/man/man4/pfsync.4        Thu Dec  6 19:27:15 2018        
(r341646)
@@ -26,7 +26,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd August 18, 2017
+.Dd December 6, 2018
 .Dt PFSYNC 4
 .Os
 .Sh NAME
@@ -130,6 +130,13 @@ See
 .Xr carp 4
 for more information.
 Default value is 240.
+.It Va net.pfsync.pfsync_buckets
+The number of
+.Nm
+buckets.
+This affects the performance and memory tradeoff.
+Defaults to twice the number of CPUs.
+Change only if benchmarks show this helps on your workload.
 .El
 .Sh EXAMPLES
 .Nm

Modified: head/sys/netpfil/pf/if_pfsync.c
==============================================================================
--- head/sys/netpfil/pf/if_pfsync.c     Thu Dec  6 19:20:00 2018        
(r341645)
+++ head/sys/netpfil/pf/if_pfsync.c     Thu Dec  6 19:27:15 2018        
(r341646)
@@ -77,6 +77,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/priv.h>
 #include <sys/protosw.h>
+#include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/sysctl.h>
@@ -106,6 +107,8 @@ __FBSDID("$FreeBSD$");
        sizeof(struct pfsync_header) + \
        sizeof(struct pfsync_subheader) )
 
+struct pfsync_bucket;
+
 struct pfsync_pkt {
        struct ip *ip;
        struct in_addr src;
@@ -164,7 +167,7 @@ static struct pfsync_q pfsync_qs[] = {
 };
 
 static void    pfsync_q_ins(struct pf_state *, int, bool);
-static void    pfsync_q_del(struct pf_state *, bool);
+static void    pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *);
 
 static void    pfsync_update_state(struct pf_state *);
 
@@ -183,6 +186,28 @@ struct pfsync_deferral {
        struct mbuf                     *pd_m;
 };
 
+struct pfsync_sofct;
+
+struct pfsync_bucket
+{
+       int                     b_id;
+       struct pfsync_softc     *b_sc;
+       struct mtx              b_mtx;
+       struct callout          b_tmo;
+       int                     b_flags;
+#define        PFSYNCF_BUCKET_PUSH     0x00000001
+
+       size_t                  b_len;
+       TAILQ_HEAD(, pf_state)                  b_qs[PFSYNC_S_COUNT];
+       TAILQ_HEAD(, pfsync_upd_req_item)       b_upd_req_list;
+       TAILQ_HEAD(, pfsync_deferral)           b_deferrals;
+       u_int                   b_deferred;
+       void                    *b_plus;
+       size_t                  b_pluslen;
+
+       struct  ifaltq b_snd;
+};
+
 struct pfsync_softc {
        /* Configuration */
        struct ifnet            *sc_ifp;
@@ -192,20 +217,12 @@ struct pfsync_softc {
        uint32_t                sc_flags;
 #define        PFSYNCF_OK              0x00000001
 #define        PFSYNCF_DEFER           0x00000002
-#define        PFSYNCF_PUSH            0x00000004
        uint8_t                 sc_maxupdates;
        struct ip               sc_template;
-       struct callout          sc_tmo;
        struct mtx              sc_mtx;
 
        /* Queued data */
-       size_t                  sc_len;
-       TAILQ_HEAD(, pf_state)                  sc_qs[PFSYNC_S_COUNT];
-       TAILQ_HEAD(, pfsync_upd_req_item)       sc_upd_req_list;
-       TAILQ_HEAD(, pfsync_deferral)           sc_deferrals;
-       u_int                   sc_deferred;
-       void                    *sc_plus;
-       size_t                  sc_pluslen;
+       struct pfsync_bucket    *sc_buckets;
 
        /* Bulk update info */
        struct mtx              sc_bulk_mtx;
@@ -223,6 +240,10 @@ struct pfsync_softc {
 #define        PFSYNC_UNLOCK(sc)       mtx_unlock(&(sc)->sc_mtx)
 #define        PFSYNC_LOCK_ASSERT(sc)  mtx_assert(&(sc)->sc_mtx, MA_OWNED)
 
+#define PFSYNC_BUCKET_LOCK(b)          mtx_lock(&(b)->b_mtx)
+#define PFSYNC_BUCKET_UNLOCK(b)                mtx_unlock(&(b)->b_mtx)
+#define PFSYNC_BUCKET_LOCK_ASSERT(b)   mtx_assert(&(b)->b_mtx, MA_OWNED)
+
 #define        PFSYNC_BLOCK(sc)        mtx_lock(&(sc)->sc_bulk_mtx)
 #define        PFSYNC_BUNLOCK(sc)      mtx_unlock(&(sc)->sc_bulk_mtx)
 #define        PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED)
@@ -239,7 +260,8 @@ VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKE
 #define        V_pfsync_carp_adj       VNET(pfsync_carp_adj)
 
 static void    pfsync_timeout(void *);
-static void    pfsync_push(struct pfsync_softc *);
+static void    pfsync_push(struct pfsync_bucket *);
+static void    pfsync_push_all(struct pfsync_softc *);
 static void    pfsyncintr(void *);
 static int     pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *,
                    void *);
@@ -249,12 +271,16 @@ static void       pfsync_pointers_uninit(void);
 static int     pfsync_init(void);
 static void    pfsync_uninit(void);
 
+static unsigned long pfsync_buckets;
+
 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC");
 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(pfsyncstats), pfsyncstats,
     "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)");
 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW,
     &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor 
adjustment");
+SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN,
+    &pfsync_buckets, 0, "Number of pfsync hash buckets");
 
 static int     pfsync_clone_create(struct if_clone *, int, caddr_t);
 static void    pfsync_clone_destroy(struct ifnet *);
@@ -270,10 +296,10 @@ static void       pfsync_undefer_state(struct pf_state *, 
in
 static void    pfsync_defer_tmo(void *);
 
 static void    pfsync_request_update(u_int32_t, u_int64_t);
-static void    pfsync_update_state_req(struct pf_state *);
+static bool    pfsync_update_state_req(struct pf_state *);
 
 static void    pfsync_drop(struct pfsync_softc *);
-static void    pfsync_sendout(int);
+static void    pfsync_sendout(int, int);
 static void    pfsync_send_plus(void *, size_t);
 
 static void    pfsync_bulk_start(void);
@@ -285,7 +311,10 @@ static void        pfsync_detach_ifnet(struct ifnet *);
 #ifdef IPSEC
 static void    pfsync_update_net_tdb(struct pfsync_tdb *);
 #endif
+static struct pfsync_bucket    *pfsync_get_bucket(struct pfsync_softc *,
+                   struct pf_state *);
 
+
 #define PFSYNC_MAX_BULKTRIES   12
 
 VNET_DEFINE(struct if_clone *, pfsync_cloner);
@@ -296,21 +325,16 @@ pfsync_clone_create(struct if_clone *ifc, int unit, ca
 {
        struct pfsync_softc *sc;
        struct ifnet *ifp;
-       int q;
+       struct pfsync_bucket *b;
+       int c, q;
 
        if (unit != 0)
                return (EINVAL);
 
-       sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
-       sc->sc_flags |= PFSYNCF_OK;
+       if (! pfsync_buckets)
+               pfsync_buckets = mp_ncpus * 2;
 
-       for (q = 0; q < PFSYNC_S_COUNT; q++)
-               TAILQ_INIT(&sc->sc_qs[q]);
-
-       TAILQ_INIT(&sc->sc_upd_req_list);
-       TAILQ_INIT(&sc->sc_deferrals);
-
-       sc->sc_len = PFSYNC_MINPKT;
+       sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO);
        sc->sc_maxupdates = 128;
 
        ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC);
@@ -323,12 +347,10 @@ pfsync_clone_create(struct if_clone *ifc, int unit, ca
        ifp->if_ioctl = pfsyncioctl;
        ifp->if_output = pfsyncoutput;
        ifp->if_type = IFT_PFSYNC;
-       ifp->if_snd.ifq_maxlen = ifqmaxlen;
        ifp->if_hdrlen = sizeof(struct pfsync_header);
        ifp->if_mtu = ETHERMTU;
        mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF);
        mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF);
-       callout_init(&sc->sc_tmo, 1);
        callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0);
        callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0);
 
@@ -336,6 +358,27 @@ pfsync_clone_create(struct if_clone *ifc, int unit, ca
 
        bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
 
+       sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets),
+           M_PFSYNC, M_ZERO | M_WAITOK);
+       for (c = 0; c < pfsync_buckets; c++) {
+               b = &sc->sc_buckets[c];
+               mtx_init(&b->b_mtx, pfsyncname, NULL, MTX_DEF);
+
+               b->b_id = c;
+               b->b_sc = sc;
+               b->b_len = PFSYNC_MINPKT;
+
+               for (q = 0; q < PFSYNC_S_COUNT; q++)
+                       TAILQ_INIT(&b->b_qs[q]);
+
+               TAILQ_INIT(&b->b_upd_req_list);
+               TAILQ_INIT(&b->b_deferrals);
+
+               callout_init(&b->b_tmo, 1);
+
+               b->b_snd.ifq_maxlen = ifqmaxlen;
+       }
+
        V_pfsyncif = sc;
 
        return (0);
@@ -345,29 +388,36 @@ static void
 pfsync_clone_destroy(struct ifnet *ifp)
 {
        struct pfsync_softc *sc = ifp->if_softc;
+       struct pfsync_bucket *b;
+       int c;
 
-       /*
-        * At this stage, everything should have already been
-        * cleared by pfsync_uninit(), and we have only to
-        * drain callouts.
-        */
-       while (sc->sc_deferred > 0) {
-               struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals);
+       for (c = 0; c < pfsync_buckets; c++) {
+               b = &sc->sc_buckets[c];
+               /*
+                * At this stage, everything should have already been
+                * cleared by pfsync_uninit(), and we have only to
+                * drain callouts.
+                */
+               while (b->b_deferred > 0) {
+                       struct pfsync_deferral *pd =
+                           TAILQ_FIRST(&b->b_deferrals);
 
-               TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
-               sc->sc_deferred--;
-               if (callout_stop(&pd->pd_tmo) > 0) {
-                       pf_release_state(pd->pd_st);
-                       m_freem(pd->pd_m);
-                       free(pd, M_PFSYNC);
-               } else {
-                       pd->pd_refs++;
-                       callout_drain(&pd->pd_tmo);
-                       free(pd, M_PFSYNC);
+                       TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+                       b->b_deferred--;
+                       if (callout_stop(&pd->pd_tmo) > 0) {
+                               pf_release_state(pd->pd_st);
+                               m_freem(pd->pd_m);
+                               free(pd, M_PFSYNC);
+                       } else {
+                               pd->pd_refs++;
+                               callout_drain(&pd->pd_tmo);
+                               free(pd, M_PFSYNC);
+                       }
                }
+
+               callout_drain(&b->b_tmo);
        }
 
-       callout_drain(&sc->sc_tmo);
        callout_drain(&sc->sc_bulkfail_tmo);
        callout_drain(&sc->sc_bulk_tmo);
 
@@ -383,6 +433,8 @@ pfsync_clone_destroy(struct ifnet *ifp)
                pfsync_multicast_cleanup(sc);
        mtx_destroy(&sc->sc_mtx);
        mtx_destroy(&sc->sc_bulk_mtx);
+
+       free(sc->sc_buckets, M_PFSYNC);
        free(sc, M_PFSYNC);
 
        V_pfsyncif = NULL;
@@ -546,7 +598,7 @@ pfsync_state_import(struct pfsync_state *sp, u_int8_t 
                st->state_flags &= ~PFSTATE_NOSYNC;
                if (st->state_flags & PFSTATE_ACK) {
                        pfsync_q_ins(st, PFSYNC_S_IACK, true);
-                       pfsync_push(sc);
+                       pfsync_push_all(sc);
                }
        }
        st->state_flags &= ~PFSTATE_ACK;
@@ -785,9 +837,7 @@ pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m,
                        continue;
 
                if (st->state_flags & PFSTATE_ACK) {
-                       PFSYNC_LOCK(V_pfsyncif);
                        pfsync_undefer_state(st, 0);
-                       PFSYNC_UNLOCK(V_pfsyncif);
                }
                PF_STATE_UNLOCK(st);
        }
@@ -876,9 +926,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, 
                }
 
                if (st->state_flags & PFSTATE_ACK) {
-                       PFSYNC_LOCK(sc);
                        pfsync_undefer_state(st, 1);
-                       PFSYNC_UNLOCK(sc);
                }
 
                if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
@@ -912,9 +960,7 @@ pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, 
 
                        pfsync_update_state(st);
                        PF_STATE_UNLOCK(st);
-                       PFSYNC_LOCK(sc);
-                       pfsync_push(sc);
-                       PFSYNC_UNLOCK(sc);
+                       pfsync_push_all(sc);
                        continue;
                }
                PF_STATE_UNLOCK(st);
@@ -960,16 +1006,14 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m
                st = pf_find_state_byid(up->id, up->creatorid);
                if (st == NULL) {
                        /* We don't have this state. Ask for it. */
-                       PFSYNC_LOCK(sc);
+                       PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
                        pfsync_request_update(up->creatorid, up->id);
-                       PFSYNC_UNLOCK(sc);
+                       PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
                        continue;
                }
 
                if (st->state_flags & PFSTATE_ACK) {
-                       PFSYNC_LOCK(sc);
                        pfsync_undefer_state(st, 1);
-                       PFSYNC_UNLOCK(sc);
                }
 
                if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
@@ -1003,9 +1047,7 @@ pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m
 
                        pfsync_update_state(st);
                        PF_STATE_UNLOCK(st);
-                       PFSYNC_LOCK(sc);
-                       pfsync_push(sc);
-                       PFSYNC_UNLOCK(sc);
+                       pfsync_push_all(sc);
                        continue;
                }
                PF_STATE_UNLOCK(st);
@@ -1283,6 +1325,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t dat
        struct ifreq *ifr = (struct ifreq *)data;
        struct pfsyncreq pfsyncr;
        int error;
+       int c;
 
        switch (cmd) {
        case SIOCSIFFLAGS:
@@ -1303,10 +1346,12 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t dat
                    ifr->ifr_mtu > sc->sc_sync_if->if_mtu)
                        return (EINVAL);
                if (ifr->ifr_mtu < ifp->if_mtu) {
-                       PFSYNC_LOCK(sc);
-                       if (sc->sc_len > PFSYNC_MINPKT)
-                               pfsync_sendout(1);
-                       PFSYNC_UNLOCK(sc);
+                       for (c = 0; c < pfsync_buckets; c++) {
+                               PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
+                               if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT)
+                                       pfsync_sendout(1, c);
+                               PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
+                       }
                }
                ifp->if_mtu = ifr->ifr_mtu;
                break;
@@ -1379,12 +1424,16 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t dat
                        break;
                }
 
-               if (sc->sc_len > PFSYNC_MINPKT &&
-                   (sifp->if_mtu < sc->sc_ifp->if_mtu ||
-                   (sc->sc_sync_if != NULL &&
-                   sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
-                   sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
-                       pfsync_sendout(1);
+               for (c = 0; c < pfsync_buckets; c++) {
+                       PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]);
+                       if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT &&
+                           (sifp->if_mtu < sc->sc_ifp->if_mtu ||
+                           (sc->sc_sync_if != NULL &&
+                           sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
+                           sifp->if_mtu < MCLBYTES - sizeof(struct ip)))
+                               pfsync_sendout(1, c);
+                       PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]);
+               }
 
                if (imo->imo_membership)
                        pfsync_multicast_cleanup(sc);
@@ -1421,8 +1470,10 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t dat
                sc->sc_flags &= ~PFSYNCF_OK;
                if (V_pf_status.debug >= PF_DEBUG_MISC)
                        printf("pfsync: requesting bulk update\n");
-               pfsync_request_update(0, 0);
                PFSYNC_UNLOCK(sc);
+               PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]);
+               pfsync_request_update(0, 0);
+               PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]);
                PFSYNC_BLOCK(sc);
                sc->sc_ureq_sent = time_uptime;
                callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail,
@@ -1483,33 +1534,37 @@ pfsync_drop(struct pfsync_softc *sc)
 {
        struct pf_state *st, *next;
        struct pfsync_upd_req_item *ur;
-       int q;
+       struct pfsync_bucket *b;
+       int c, q;
 
-       for (q = 0; q < PFSYNC_S_COUNT; q++) {
-               if (TAILQ_EMPTY(&sc->sc_qs[q]))
-                       continue;
+       for (c = 0; c < pfsync_buckets; c++) {
+               b = &sc->sc_buckets[c];
+               for (q = 0; q < PFSYNC_S_COUNT; q++) {
+                       if (TAILQ_EMPTY(&b->b_qs[q]))
+                               continue;
 
-               TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) {
-                       KASSERT(st->sync_state == q,
-                               ("%s: st->sync_state == q",
-                                       __func__));
-                       st->sync_state = PFSYNC_S_NONE;
-                       pf_release_state(st);
+                       TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, next) {
+                               KASSERT(st->sync_state == q,
+                                       ("%s: st->sync_state == q",
+                                               __func__));
+                               st->sync_state = PFSYNC_S_NONE;
+                               pf_release_state(st);
+                       }
+                       TAILQ_INIT(&b->b_qs[q]);
                }
-               TAILQ_INIT(&sc->sc_qs[q]);
-       }
 
-       while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
-               TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
-               free(ur, M_PFSYNC);
-       }
+               while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
+                       TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
+                       free(ur, M_PFSYNC);
+               }
 
-       sc->sc_plus = NULL;
-       sc->sc_len = PFSYNC_MINPKT;
+               b->b_len = PFSYNC_MINPKT;
+               b->b_plus = NULL;
+       }
 }
 
 static void
-pfsync_sendout(int schedswi)
+pfsync_sendout(int schedswi, int c)
 {
        struct pfsync_softc *sc = V_pfsyncif;
        struct ifnet *ifp = sc->sc_ifp;
@@ -1519,27 +1574,28 @@ pfsync_sendout(int schedswi)
        struct pfsync_subheader *subh;
        struct pf_state *st, *st_next;
        struct pfsync_upd_req_item *ur;
+       struct pfsync_bucket *b = &sc->sc_buckets[c];
        int offset;
        int q, count = 0;
 
        KASSERT(sc != NULL, ("%s: null sc", __func__));
-       KASSERT(sc->sc_len > PFSYNC_MINPKT,
-           ("%s: sc_len %zu", __func__, sc->sc_len));
-       PFSYNC_LOCK_ASSERT(sc);
+       KASSERT(b->b_len > PFSYNC_MINPKT,
+           ("%s: sc_len %zu", __func__, b->b_len));
+       PFSYNC_BUCKET_LOCK_ASSERT(b);
 
        if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) {
                pfsync_drop(sc);
                return;
        }
 
-       m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR);
+       m = m_get2(max_linkhdr + b->b_len, M_NOWAIT, MT_DATA, M_PKTHDR);
        if (m == NULL) {
                if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
                V_pfsyncstats.pfsyncs_onomem++;
                return;
        }
        m->m_data += max_linkhdr;
-       m->m_len = m->m_pkthdr.len = sc->sc_len;
+       m->m_len = m->m_pkthdr.len = b->b_len;
 
        /* build the ip header */
        ip = (struct ip *)m->m_data;
@@ -1555,19 +1611,19 @@ pfsync_sendout(int schedswi)
        offset += sizeof(*ph);
 
        ph->version = PFSYNC_VERSION;
-       ph->len = htons(sc->sc_len - sizeof(*ip));
+       ph->len = htons(b->b_len - sizeof(*ip));
        bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
 
        /* walk the queues */
        for (q = 0; q < PFSYNC_S_COUNT; q++) {
-               if (TAILQ_EMPTY(&sc->sc_qs[q]))
+               if (TAILQ_EMPTY(&b->b_qs[q]))
                        continue;
 
                subh = (struct pfsync_subheader *)(m->m_data + offset);
                offset += sizeof(*subh);
 
                count = 0;
-               TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) {
+               TAILQ_FOREACH_SAFE(st, &b->b_qs[q], sync_list, st_next) {
                        KASSERT(st->sync_state == q,
                                ("%s: st->sync_state == q",
                                        __func__));
@@ -1581,7 +1637,7 @@ pfsync_sendout(int schedswi)
                        pf_release_state(st);
                        count++;
                }
-               TAILQ_INIT(&sc->sc_qs[q]);
+               TAILQ_INIT(&b->b_qs[q]);
 
                bzero(subh, sizeof(*subh));
                subh->action = pfsync_qs[q].action;
@@ -1589,13 +1645,13 @@ pfsync_sendout(int schedswi)
                V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count;
        }
 
-       if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
+       if (!TAILQ_EMPTY(&b->b_upd_req_list)) {
                subh = (struct pfsync_subheader *)(m->m_data + offset);
                offset += sizeof(*subh);
 
                count = 0;
-               while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
-                       TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
+               while ((ur = TAILQ_FIRST(&b->b_upd_req_list)) != NULL) {
+                       TAILQ_REMOVE(&b->b_upd_req_list, ur, ur_entry);
 
                        bcopy(&ur->ur_msg, m->m_data + offset,
                            sizeof(ur->ur_msg));
@@ -1611,11 +1667,11 @@ pfsync_sendout(int schedswi)
        }
 
        /* has someone built a custom region for us to add? */
-       if (sc->sc_plus != NULL) {
-               bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
-               offset += sc->sc_pluslen;
+       if (b->b_plus != NULL) {
+               bcopy(b->b_plus, m->m_data + offset, b->b_pluslen);
+               offset += b->b_pluslen;
 
-               sc->sc_plus = NULL;
+               b->b_plus = NULL;
        }
 
        subh = (struct pfsync_subheader *)(m->m_data + offset);
@@ -1629,24 +1685,24 @@ pfsync_sendout(int schedswi)
        /* we're done, let's put it on the wire */
        if (ifp->if_bpf) {
                m->m_data += sizeof(*ip);
-               m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
+               m->m_len = m->m_pkthdr.len = b->b_len - sizeof(*ip);
                BPF_MTAP(ifp, m);
                m->m_data -= sizeof(*ip);
-               m->m_len = m->m_pkthdr.len = sc->sc_len;
+               m->m_len = m->m_pkthdr.len = b->b_len;
        }
 
        if (sc->sc_sync_if == NULL) {
-               sc->sc_len = PFSYNC_MINPKT;
+               b->b_len = PFSYNC_MINPKT;
                m_freem(m);
                return;
        }
 
        if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
        if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
-       sc->sc_len = PFSYNC_MINPKT;
+       b->b_len = PFSYNC_MINPKT;
 
-       if (!_IF_QFULL(&sc->sc_ifp->if_snd))
-               _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
+       if (!_IF_QFULL(&b->b_snd))
+               _IF_ENQUEUE(&b->b_snd, m);
        else {
                m_freem(m);
                if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
@@ -1659,6 +1715,7 @@ static void
 pfsync_insert_state(struct pf_state *st)
 {
        struct pfsync_softc *sc = V_pfsyncif;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
        if (st->state_flags & PFSTATE_NOSYNC)
                return;
@@ -1672,12 +1729,12 @@ pfsync_insert_state(struct pf_state *st)
        KASSERT(st->sync_state == PFSYNC_S_NONE,
                ("%s: st->sync_state %u", __func__, st->sync_state));
 
-       PFSYNC_LOCK(sc);
-       if (sc->sc_len == PFSYNC_MINPKT)
-               callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+       PFSYNC_BUCKET_LOCK(b);
+       if (b->b_len == PFSYNC_MINPKT)
+               callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
 
        pfsync_q_ins(st, PFSYNC_S_INS, true);
-       PFSYNC_UNLOCK(sc);
+       PFSYNC_BUCKET_UNLOCK(b);
 
        st->sync_updates = 0;
 }
@@ -1687,6 +1744,7 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
 {
        struct pfsync_softc *sc = V_pfsyncif;
        struct pfsync_deferral *pd;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
        if (m->m_flags & (M_BCAST|M_MCAST))
                return (0);
@@ -1699,13 +1757,13 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
                return (0);
        }
 
-        if (sc->sc_deferred >= 128)
-               pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
+       if (b->b_deferred >= 128)
+               pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0);
 
        pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT);
        if (pd == NULL)
                return (0);
-       sc->sc_deferred++;
+       b->b_deferred++;
 
        m->m_flags |= M_SKIP_FIREWALL;
        st->state_flags |= PFSTATE_ACK;
@@ -1716,11 +1774,11 @@ pfsync_defer(struct pf_state *st, struct mbuf *m)
        pf_ref_state(st);
        pd->pd_m = m;
 
-       TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
-       callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED);
+       TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry);
+       callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED);
        callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd);
 
-       pfsync_push(sc);
+       pfsync_push(b);
 
        return (1);
 }
@@ -1731,11 +1789,12 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop)
        struct pfsync_softc *sc = pd->pd_sc;
        struct mbuf *m = pd->pd_m;
        struct pf_state *st = pd->pd_st;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
-       PFSYNC_LOCK_ASSERT(sc);
+       PFSYNC_BUCKET_LOCK_ASSERT(b);
 
-       TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
-       sc->sc_deferred--;
+       TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+       b->b_deferred--;
        pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
        free(pd, M_PFSYNC);
        pf_release_state(st);
@@ -1743,8 +1802,8 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop)
        if (drop)
                m_freem(m);
        else {
-               _IF_ENQUEUE(&sc->sc_ifp->if_snd, m);
-               pfsync_push(sc);
+               _IF_ENQUEUE(&b->b_snd, m);
+               pfsync_push(b);
        }
 }
 
@@ -1755,13 +1814,14 @@ pfsync_defer_tmo(void *arg)
        struct pfsync_softc *sc = pd->pd_sc;
        struct mbuf *m = pd->pd_m;
        struct pf_state *st = pd->pd_st;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
-       PFSYNC_LOCK_ASSERT(sc);
+       PFSYNC_BUCKET_LOCK_ASSERT(b);
 
        CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
 
-       TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
-       sc->sc_deferred--;
+       TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
+       b->b_deferred--;
        pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */
        if (pd->pd_refs == 0)
                free(pd, M_PFSYNC);
@@ -1779,40 +1839,52 @@ pfsync_undefer_state(struct pf_state *st, int drop)
 {
        struct pfsync_softc *sc = V_pfsyncif;
        struct pfsync_deferral *pd;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
-       PFSYNC_LOCK_ASSERT(sc);
+       PFSYNC_BUCKET_LOCK(b);
 
-       TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
+       TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) {
                 if (pd->pd_st == st) {
                        if (callout_stop(&pd->pd_tmo) > 0)
                                pfsync_undefer(pd, drop);
+
+                       PFSYNC_BUCKET_UNLOCK(b);
                        return;
                }
        }
+       PFSYNC_BUCKET_UNLOCK(b);
 
        panic("%s: unable to find deferred state", __func__);
 }
 
+static struct pfsync_bucket*
+pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st)
+{
+       int c = PF_IDHASH(st) % pfsync_buckets;
+       return &sc->sc_buckets[c];
+}
+
 static void
 pfsync_update_state(struct pf_state *st)
 {
        struct pfsync_softc *sc = V_pfsyncif;
        bool sync = false, ref = true;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
        PF_STATE_LOCK_ASSERT(st);
-       PFSYNC_LOCK(sc);
+       PFSYNC_BUCKET_LOCK(b);
 
        if (st->state_flags & PFSTATE_ACK)
                pfsync_undefer_state(st, 0);
        if (st->state_flags & PFSTATE_NOSYNC) {
                if (st->sync_state != PFSYNC_S_NONE)
-                       pfsync_q_del(st, true);
-               PFSYNC_UNLOCK(sc);
+                       pfsync_q_del(st, true, b);
+               PFSYNC_BUCKET_UNLOCK(b);
                return;
        }
 
-       if (sc->sc_len == PFSYNC_MINPKT)
-               callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+       if (b->b_len == PFSYNC_MINPKT)
+               callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
 
        switch (st->sync_state) {
        case PFSYNC_S_UPD_C:
@@ -1828,7 +1900,7 @@ pfsync_update_state(struct pf_state *st)
                break;
 
        case PFSYNC_S_IACK:
-               pfsync_q_del(st, false);
+               pfsync_q_del(st, false, b);
                ref = false;
                /* FALLTHROUGH */
 
@@ -1842,26 +1914,27 @@ pfsync_update_state(struct pf_state *st)
        }
 
        if (sync || (time_uptime - st->pfsync_time) < 2)
-               pfsync_push(sc);
+               pfsync_push(b);
 
-       PFSYNC_UNLOCK(sc);
+       PFSYNC_BUCKET_UNLOCK(b);
 }
 
 static void
 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
 {
        struct pfsync_softc *sc = V_pfsyncif;
+       struct pfsync_bucket *b = &sc->sc_buckets[0];
        struct pfsync_upd_req_item *item;
        size_t nlen = sizeof(struct pfsync_upd_req);
 
-       PFSYNC_LOCK_ASSERT(sc);
+       PFSYNC_BUCKET_LOCK_ASSERT(b);
 
        /*
         * This code does a bit to prevent multiple update requests for the
         * same state being generated. It searches current subheader queue,
         * but it doesn't lookup into queue of already packed datagrams.
         */
-       TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry)
+       TAILQ_FOREACH(item, &b->b_upd_req_list, ur_entry)
                if (item->ur_msg.id == id &&
                    item->ur_msg.creatorid == creatorid)
                        return;
@@ -1873,46 +1946,47 @@ pfsync_request_update(u_int32_t creatorid, u_int64_t i
        item->ur_msg.id = id;
        item->ur_msg.creatorid = creatorid;
 
-       if (TAILQ_EMPTY(&sc->sc_upd_req_list))
+       if (TAILQ_EMPTY(&b->b_upd_req_list))
                nlen += sizeof(struct pfsync_subheader);
 
-       if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
-               pfsync_sendout(1);
+       if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
+               pfsync_sendout(1, 0);
 
                nlen = sizeof(struct pfsync_subheader) +
                    sizeof(struct pfsync_upd_req);
        }
 
-       TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
-       sc->sc_len += nlen;
+       TAILQ_INSERT_TAIL(&b->b_upd_req_list, item, ur_entry);
+       b->b_len += nlen;
 }
 
-static void
+static bool
 pfsync_update_state_req(struct pf_state *st)
 {
        struct pfsync_softc *sc = V_pfsyncif;
-       bool ref = true;
+       bool ref = true, full = false;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
        PF_STATE_LOCK_ASSERT(st);
-       PFSYNC_LOCK(sc);
+       PFSYNC_BUCKET_LOCK(b);
 
        if (st->state_flags & PFSTATE_NOSYNC) {
                if (st->sync_state != PFSYNC_S_NONE)
-                       pfsync_q_del(st, true);
-               PFSYNC_UNLOCK(sc);
-               return;
+                       pfsync_q_del(st, true, b);
+               PFSYNC_BUCKET_UNLOCK(b);
+               return (full);
        }
 
        switch (st->sync_state) {
        case PFSYNC_S_UPD_C:
        case PFSYNC_S_IACK:
-               pfsync_q_del(st, false);
+               pfsync_q_del(st, false, b);
                ref = false;
                /* FALLTHROUGH */
 
        case PFSYNC_S_NONE:
                pfsync_q_ins(st, PFSYNC_S_UPD, ref);
-               pfsync_push(sc);
+               pfsync_push(b);
                break;
 
        case PFSYNC_S_INS:
@@ -1925,38 +1999,44 @@ pfsync_update_state_req(struct pf_state *st)
                panic("%s: unexpected sync state %d", __func__, st->sync_state);
        }
 
-       PFSYNC_UNLOCK(sc);
+       if ((sc->sc_ifp->if_mtu - b->b_len) < sizeof(struct pfsync_state))
+               full = true;
+
+       PFSYNC_BUCKET_UNLOCK(b);
+
+       return (full);
 }
 
 static void
 pfsync_delete_state(struct pf_state *st)
 {
        struct pfsync_softc *sc = V_pfsyncif;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
        bool ref = true;
 
-       PFSYNC_LOCK(sc);
+       PFSYNC_BUCKET_LOCK(b);
        if (st->state_flags & PFSTATE_ACK)
                pfsync_undefer_state(st, 1);
        if (st->state_flags & PFSTATE_NOSYNC) {
                if (st->sync_state != PFSYNC_S_NONE)
-                       pfsync_q_del(st, true);
-               PFSYNC_UNLOCK(sc);
+                       pfsync_q_del(st, true, b);
+               PFSYNC_BUCKET_UNLOCK(b);
                return;
        }
 
-       if (sc->sc_len == PFSYNC_MINPKT)
-               callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif);
+       if (b->b_len == PFSYNC_MINPKT)
+               callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b);
 
        switch (st->sync_state) {
        case PFSYNC_S_INS:
                /* We never got to tell the world so just forget about it. */
-               pfsync_q_del(st, true);
+               pfsync_q_del(st, true, b);
                break;
 
        case PFSYNC_S_UPD_C:
        case PFSYNC_S_UPD:
        case PFSYNC_S_IACK:
-               pfsync_q_del(st, false);
+               pfsync_q_del(st, false, b);
                ref = false;
                /* FALLTHROUGH */
 
@@ -1968,13 +2048,12 @@ pfsync_delete_state(struct pf_state *st)
                panic("%s: unexpected sync state %d", __func__, st->sync_state);
        }
 
-       PFSYNC_UNLOCK(sc);
+       PFSYNC_BUCKET_UNLOCK(b);
 }
 
 static void
 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
 {
-       struct pfsync_softc *sc = V_pfsyncif;
        struct {
                struct pfsync_subheader subh;
                struct pfsync_clr clr;
@@ -1989,9 +2068,7 @@ pfsync_clear_states(u_int32_t creatorid, const char *i
        strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
        r.clr.creatorid = creatorid;
 
-       PFSYNC_LOCK(sc);
        pfsync_send_plus(&r, sizeof(r));
-       PFSYNC_UNLOCK(sc);
 }
 
 static void
@@ -1999,48 +2076,48 @@ pfsync_q_ins(struct pf_state *st, int q, bool ref)
 {
        struct pfsync_softc *sc = V_pfsyncif;
        size_t nlen = pfsync_qs[q].len;
+       struct pfsync_bucket *b = pfsync_get_bucket(sc, st);
 
-       PFSYNC_LOCK_ASSERT(sc);
+       PFSYNC_BUCKET_LOCK_ASSERT(b);
 
        KASSERT(st->sync_state == PFSYNC_S_NONE,
                ("%s: st->sync_state %u", __func__, st->sync_state));
-       KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
-           sc->sc_len));
+       KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu",
+           b->b_len));
 
-       if (TAILQ_EMPTY(&sc->sc_qs[q]))
+       if (TAILQ_EMPTY(&b->b_qs[q]))
                nlen += sizeof(struct pfsync_subheader);
 
-       if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) {
-               pfsync_sendout(1);
+       if (b->b_len + nlen > sc->sc_ifp->if_mtu) {
+               pfsync_sendout(1, b->b_id);
 
                nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
        }
 
-       sc->sc_len += nlen;
-       TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
+       b->b_len += nlen;
+       TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list);
        st->sync_state = q;
        if (ref)
                pf_ref_state(st);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to