i started looking at locking on interface send queues (which use
struct ifqueue) and got distracted cos the same data structure is
used for the stack input queues.

the stack input queues dont need the pri or hfsc queing bits, theyre
just used as a fifo, so they can largely get directly replaced with
mbuf_queues. however, ifqueues grew a congestion thing cos of a
smart tweak made to pf.

if a softint input queue gets full, we consider it congested and
then set things up so pf will continue to match packets to states
but stops doing ruleset evaluation.

i want to remove the congestion stuff in ifqueue, but i dont want
to remove the pf functionality. my attempt at this below.

instead of having pf check the ip or ip6 input queues and handling
them differently, this sets up a global congestion marker. if you
hit queue full on an input queue, you say the system is congested.
this means we drop ruleset evaluation on both v4 and v6 at the same
time, but still maintain state matching.

the old congestion implementation relied on malloc, which is both
unreliable and not mpsafe which gets in the way of us moving the
stack forward. pf reaching around to quickly fumble with the ifqs
is something we can do without too.

i tried to make it a bit more symmetrical in terms of where the
code is.

anyway, i never hit qfull on softint queues so i dont know how to
test this. can i get reviews and tests please? im not goign to be
committing anything that depends on this before 5.7, so there's no
urgency to the testing. some feedback on potential flaws in the new
semantics would be appreciated though.

Index: net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.319
diff -u -p -r1.319 if.c
--- net/if.c    9 Feb 2015 03:09:57 -0000       1.319
+++ net/if.c    10 Feb 2015 05:06:21 -0000
@@ -139,7 +139,6 @@ int if_setgroupattribs(caddr_t);
 int    if_clone_list(struct if_clonereq *);
 struct if_clone        *if_clone_lookup(const char *, int *);
 
-void   if_congestion_clear(void *);
 int    if_group_egress_build(void);
 
 void   if_link_state_change_task(void *);
@@ -155,6 +154,7 @@ int if_cloners_count;
 struct timeout net_tick_to;
 void   net_tick(void *);
 int    net_livelocked(void);
+int    ifq_congestion;
 
 /*
  * Network interface utility routines.
@@ -787,33 +787,22 @@ if_clone_list(struct if_clonereq *ifcr)
 }
 
 /*
- * set queue congestion marker and register timeout to clear it
+ * set queue congestion marker
  */
 void
-if_congestion(struct ifqueue *ifq)
+if_congestion(void)
 {
-       /* Not currently needed, all callers check this */
-       if (ifq->ifq_congestion)
-               return;
+       extern int ticks;
 
-       ifq->ifq_congestion = malloc(sizeof(struct timeout), M_TEMP, M_NOWAIT);
-       if (ifq->ifq_congestion == NULL)
-               return;
-       timeout_set(ifq->ifq_congestion, if_congestion_clear, ifq);
-       timeout_add(ifq->ifq_congestion, hz / 100);
+       ifq_congestion = ticks;
 }
 
-/*
- * clear the congestion flag
- */
-void
-if_congestion_clear(void *arg)
+int
+if_congested(void)
 {
-       struct ifqueue *ifq = arg;
-       struct timeout *to = ifq->ifq_congestion;
+       extern int ticks;
 
-       ifq->ifq_congestion = NULL;
-       free(to, M_TEMP, sizeof(*to));
+       return (ticks - ifq_congestion <= (hz / 100));
 }
 
 #define        equal(a1, a2)   \
Index: net/if.h
===================================================================
RCS file: /cvs/src/sys/net/if.h,v
retrieving revision 1.160
diff -u -p -r1.160 if.h
--- net/if.h    8 Feb 2015 06:00:52 -0000       1.160
+++ net/if.h    10 Feb 2015 05:06:21 -0000
@@ -463,6 +463,8 @@ void        if_group_routechange(struct sockadd
 struct ifnet *ifunit(const char *);
 struct ifnet *if_get(unsigned int);
 void   ifnewlladdr(struct ifnet *);
+void   if_congestion(void);
+int    if_congested(void);
 
 #endif /* _KERNEL */
 
Index: net/if_ppp.c
===================================================================
RCS file: /cvs/src/sys/net/if_ppp.c,v
retrieving revision 1.80
diff -u -p -r1.80 if_ppp.c
--- net/if_ppp.c        19 Dec 2014 17:14:39 -0000      1.80
+++ net/if_ppp.c        10 Feb 2015 05:06:21 -0000
@@ -1485,8 +1485,7 @@ ppp_inproc(struct ppp_softc *sc, struct 
        if (sc->sc_flags & SC_DEBUG)
            printf("%s: input queue full\n", ifp->if_xname);
        ifp->if_iqdrops++;
-       if (!inq->ifq_congestion)
-               if_congestion(inq);
+       if_congestion();
        goto bad;
     }
     IF_ENQUEUE(inq, m);
Index: net/if_spppsubr.c
===================================================================
RCS file: /cvs/src/sys/net/if_spppsubr.c,v
retrieving revision 1.130
diff -u -p -r1.130 if_spppsubr.c
--- net/if_spppsubr.c   27 Jan 2015 03:17:36 -0000      1.130
+++ net/if_spppsubr.c   10 Feb 2015 05:06:21 -0000
@@ -614,8 +614,7 @@ sppp_input(struct ifnet *ifp, struct mbu
                if (debug)
                        log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n",
                                SPP_ARGS(ifp));
-               if (!inq->ifq_congestion)
-                       if_congestion(inq);
+               if_congestion();
                goto drop;
        }
        IF_ENQUEUE(inq, m);
Index: net/if_tun.c
===================================================================
RCS file: /cvs/src/sys/net/if_tun.c,v
retrieving revision 1.131
diff -u -p -r1.131 if_tun.c
--- net/if_tun.c        21 Jan 2015 02:23:14 -0000      1.131
+++ net/if_tun.c        10 Feb 2015 05:06:21 -0000
@@ -907,8 +907,7 @@ tunwrite(dev_t dev, struct uio *uio, int
                splx(s);
                ifp->if_collisions++;
                m_freem(top);
-               if (!ifq->ifq_congestion)
-                       if_congestion(ifq);
+               if_congestion();
                return (ENOBUFS);
        }
        IF_ENQUEUE(ifq, top);
Index: net/if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.20
diff -u -p -r1.20 if_var.h
--- net/if_var.h        9 Feb 2015 03:09:57 -0000       1.20
+++ net/if_var.h        10 Feb 2015 05:06:21 -0000
@@ -108,7 +108,6 @@ struct      ifqueue {
        int                      ifq_maxlen;
        int                      ifq_drops;
        struct hfsc_if          *ifq_hfsc;
-       struct timeout          *ifq_congestion;
 };
 
 /*
@@ -320,8 +319,7 @@ do {                                                        
                \
        if (IF_QFULL(ifq)) {                                            \
                IF_DROP(ifq);                                           \
                m_freem(m);                                             \
-               if (!(ifq)->ifq_congestion)                             \
-                       if_congestion(ifq);                             \
+               if_congestion();                                        \
        } else                                                          \
                IF_ENQUEUE(ifq, m);                                     \
 } while (/* CONSTCOND */0)
@@ -423,7 +421,6 @@ void        if_clone_detach(struct if_clone *);
 int    if_clone_create(const char *);
 int    if_clone_destroy(const char *);
 
-void   if_congestion(struct ifqueue *);
 int     sysctl_ifq(int *, u_int, void *, size_t *, void *, size_t,
            struct ifqueue *);
 
Index: net/pf.c
===================================================================
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.902
diff -u -p -r1.902 pf.c
--- net/pf.c    9 Feb 2015 19:14:48 -0000       1.902
+++ net/pf.c    10 Feb 2015 05:06:21 -0000
@@ -222,7 +222,6 @@ int                  pf_compare_state_keys(struct pf_s
 struct pf_state                *pf_find_state(struct pfi_kif *,
                            struct pf_state_key_cmp *, u_int, struct mbuf *);
 int                     pf_src_connlimit(struct pf_state **);
-int                     pf_check_congestion(struct ifqueue *);
 int                     pf_match_rcvif(struct mbuf *, struct pf_rule *);
 void                    pf_step_into_anchor(int *, struct pf_ruleset **,
                            struct pf_rule **, struct pf_rule **);
@@ -3072,7 +3071,6 @@ pf_test_rule(struct pf_pdesc *pd, struct
        struct tcphdr           *th = pd->hdr.tcp;
        struct pf_state_key     *skw = NULL, *sks = NULL;
        struct pf_rule_actions   act;
-       struct ifqueue          *ifq = &ipintrq;
        u_short                  reason;
        int                      rewrite = 0;
        int                      tag = -1;
@@ -3087,12 +3085,7 @@ pf_test_rule(struct pf_pdesc *pd, struct
        act.rtableid = pd->rdomain;
        SLIST_INIT(&rules);
 
-#ifdef INET6
-       if (pd->af == AF_INET6)
-               ifq = &ip6intrq;
-#endif
-
-       if (pd->dir == PF_IN && pf_check_congestion(ifq)) {
+       if (pd->dir == PF_IN && if_congested()) {
                REASON_SET(&reason, PFRES_CONGEST);
                return (PF_DROP);
        }
@@ -6628,15 +6621,6 @@ done:
        }
 
        return (action);
-}
-
-int
-pf_check_congestion(struct ifqueue *ifq)
-{
-       if (ifq->ifq_congestion)
-               return (1);
-       else
-               return (0);
 }
 
 void
Index: net/pipex.c
===================================================================
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.65
diff -u -p -r1.65 pipex.c
--- net/pipex.c 19 Dec 2014 17:14:40 -0000      1.65
+++ net/pipex.c 10 Feb 2015 05:06:21 -0000
@@ -1227,8 +1227,7 @@ pipex_ip_input(struct mbuf *m0, struct p
        if (IF_QFULL(&ipintrq)) {
                IF_DROP(&ipintrq);
                ifp->if_collisions++;
-               if (!ipintrq.ifq_congestion)
-                       if_congestion(&ipintrq);
+               if_congestion();
                splx(s);
                goto drop;
        }
@@ -1302,8 +1301,7 @@ pipex_ip6_input(struct mbuf *m0, struct 
        if (IF_QFULL(&ip6intrq)) {
                IF_DROP(&ip6intrq);
                ifp->if_collisions++;
-               if (!ip6intrq.ifq_congestion)
-                       if_congestion(&ip6intrq);
+               if_congestion();
                splx(s);
                goto drop;
        }

Reply via email to