i started looking at locking on interface send queues (which use struct ifqueue) and got distracted cos the same data structure is used for the stack input queues.
the stack input queues dont need the pri or hfsc queing bits, theyre just used as a fifo, so they can largely get directly replaced with mbuf_queues. however, ifqueues grew a congestion thing cos of a smart tweak made to pf. if a softint input queue gets full, we consider it congested and then set things up so pf will continue to match packets to states but stops doing ruleset evaluation. i want to remove the congestion stuff in ifqueue, but i dont want to remove the pf functionality. my attempt at this below. instead of having pf check the ip or ip6 input queues and handling them differently, this sets up a global congestion marker. if you hit queue full on an input queue, you say the system is congested. this means we drop ruleset evaluation on both v4 and v6 at the same time, but still maintain state matching. the old congestion implementation relied on malloc, which is both unreliable and not mpsafe which gets in the way of us moving the stack forward. pf reaching around to quickly fumble with the ifqs is something we can do without too. i tried to make it a bit more symmetrical in terms of where the code is. anyway, i never hit qfull on softint queues so i dont know how to test this. can i get reviews and tests please? im not goign to be committing anything that depends on this before 5.7, so there's no urgency to the testing. some feedback on potential flaws in the new semantics would be appreciated though. Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.319 diff -u -p -r1.319 if.c --- net/if.c 9 Feb 2015 03:09:57 -0000 1.319 +++ net/if.c 10 Feb 2015 05:06:21 -0000 @@ -139,7 +139,6 @@ int if_setgroupattribs(caddr_t); int if_clone_list(struct if_clonereq *); struct if_clone *if_clone_lookup(const char *, int *); -void if_congestion_clear(void *); int if_group_egress_build(void); void if_link_state_change_task(void *); @@ -155,6 +154,7 @@ int if_cloners_count; struct timeout net_tick_to; void net_tick(void *); int net_livelocked(void); +int ifq_congestion; /* * Network interface utility routines. @@ -787,33 +787,22 @@ if_clone_list(struct if_clonereq *ifcr) } /* - * set queue congestion marker and register timeout to clear it + * set queue congestion marker */ void -if_congestion(struct ifqueue *ifq) +if_congestion(void) { - /* Not currently needed, all callers check this */ - if (ifq->ifq_congestion) - return; + extern int ticks; - ifq->ifq_congestion = malloc(sizeof(struct timeout), M_TEMP, M_NOWAIT); - if (ifq->ifq_congestion == NULL) - return; - timeout_set(ifq->ifq_congestion, if_congestion_clear, ifq); - timeout_add(ifq->ifq_congestion, hz / 100); + ifq_congestion = ticks; } -/* - * clear the congestion flag - */ -void -if_congestion_clear(void *arg) +int +if_congested(void) { - struct ifqueue *ifq = arg; - struct timeout *to = ifq->ifq_congestion; + extern int ticks; - ifq->ifq_congestion = NULL; - free(to, M_TEMP, sizeof(*to)); + return (ticks - ifq_congestion <= (hz / 100)); } #define equal(a1, a2) \ Index: net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.160 diff -u -p -r1.160 if.h --- net/if.h 8 Feb 2015 06:00:52 -0000 1.160 +++ net/if.h 10 Feb 2015 05:06:21 -0000 @@ -463,6 +463,8 @@ void if_group_routechange(struct sockadd struct ifnet *ifunit(const char *); struct ifnet *if_get(unsigned int); void ifnewlladdr(struct ifnet *); +void if_congestion(void); +int if_congested(void); #endif /* _KERNEL */ Index: net/if_ppp.c =================================================================== RCS file: /cvs/src/sys/net/if_ppp.c,v retrieving revision 1.80 diff -u -p -r1.80 if_ppp.c --- net/if_ppp.c 19 Dec 2014 17:14:39 -0000 1.80 +++ net/if_ppp.c 10 Feb 2015 05:06:21 -0000 @@ -1485,8 +1485,7 @@ ppp_inproc(struct ppp_softc *sc, struct if (sc->sc_flags & SC_DEBUG) printf("%s: input queue full\n", ifp->if_xname); ifp->if_iqdrops++; - if (!inq->ifq_congestion) - if_congestion(inq); + if_congestion(); goto bad; } IF_ENQUEUE(inq, m); Index: net/if_spppsubr.c =================================================================== RCS file: /cvs/src/sys/net/if_spppsubr.c,v retrieving revision 1.130 diff -u -p -r1.130 if_spppsubr.c --- net/if_spppsubr.c 27 Jan 2015 03:17:36 -0000 1.130 +++ net/if_spppsubr.c 10 Feb 2015 05:06:21 -0000 @@ -614,8 +614,7 @@ sppp_input(struct ifnet *ifp, struct mbu if (debug) log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n", SPP_ARGS(ifp)); - if (!inq->ifq_congestion) - if_congestion(inq); + if_congestion(); goto drop; } IF_ENQUEUE(inq, m); Index: net/if_tun.c =================================================================== RCS file: /cvs/src/sys/net/if_tun.c,v retrieving revision 1.131 diff -u -p -r1.131 if_tun.c --- net/if_tun.c 21 Jan 2015 02:23:14 -0000 1.131 +++ net/if_tun.c 10 Feb 2015 05:06:21 -0000 @@ -907,8 +907,7 @@ tunwrite(dev_t dev, struct uio *uio, int splx(s); ifp->if_collisions++; m_freem(top); - if (!ifq->ifq_congestion) - if_congestion(ifq); + if_congestion(); return (ENOBUFS); } IF_ENQUEUE(ifq, top); Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.20 diff -u -p -r1.20 if_var.h --- net/if_var.h 9 Feb 2015 03:09:57 -0000 1.20 +++ net/if_var.h 10 Feb 2015 05:06:21 -0000 @@ -108,7 +108,6 @@ struct ifqueue { int ifq_maxlen; int ifq_drops; struct hfsc_if *ifq_hfsc; - struct timeout *ifq_congestion; }; /* @@ -320,8 +319,7 @@ do { \ if (IF_QFULL(ifq)) { \ IF_DROP(ifq); \ m_freem(m); \ - if (!(ifq)->ifq_congestion) \ - if_congestion(ifq); \ + if_congestion(); \ } else \ IF_ENQUEUE(ifq, m); \ } while (/* CONSTCOND */0) @@ -423,7 +421,6 @@ void if_clone_detach(struct if_clone *); int if_clone_create(const char *); int if_clone_destroy(const char *); -void if_congestion(struct ifqueue *); int sysctl_ifq(int *, u_int, void *, size_t *, void *, size_t, struct ifqueue *); Index: net/pf.c =================================================================== RCS file: /cvs/src/sys/net/pf.c,v retrieving revision 1.902 diff -u -p -r1.902 pf.c --- net/pf.c 9 Feb 2015 19:14:48 -0000 1.902 +++ net/pf.c 10 Feb 2015 05:06:21 -0000 @@ -222,7 +222,6 @@ int pf_compare_state_keys(struct pf_s struct pf_state *pf_find_state(struct pfi_kif *, struct pf_state_key_cmp *, u_int, struct mbuf *); int pf_src_connlimit(struct pf_state **); -int pf_check_congestion(struct ifqueue *); int pf_match_rcvif(struct mbuf *, struct pf_rule *); void pf_step_into_anchor(int *, struct pf_ruleset **, struct pf_rule **, struct pf_rule **); @@ -3072,7 +3071,6 @@ pf_test_rule(struct pf_pdesc *pd, struct struct tcphdr *th = pd->hdr.tcp; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule_actions act; - struct ifqueue *ifq = &ipintrq; u_short reason; int rewrite = 0; int tag = -1; @@ -3087,12 +3085,7 @@ pf_test_rule(struct pf_pdesc *pd, struct act.rtableid = pd->rdomain; SLIST_INIT(&rules); -#ifdef INET6 - if (pd->af == AF_INET6) - ifq = &ip6intrq; -#endif - - if (pd->dir == PF_IN && pf_check_congestion(ifq)) { + if (pd->dir == PF_IN && if_congested()) { REASON_SET(&reason, PFRES_CONGEST); return (PF_DROP); } @@ -6628,15 +6621,6 @@ done: } return (action); -} - -int -pf_check_congestion(struct ifqueue *ifq) -{ - if (ifq->ifq_congestion) - return (1); - else - return (0); } void Index: net/pipex.c =================================================================== RCS file: /cvs/src/sys/net/pipex.c,v retrieving revision 1.65 diff -u -p -r1.65 pipex.c --- net/pipex.c 19 Dec 2014 17:14:40 -0000 1.65 +++ net/pipex.c 10 Feb 2015 05:06:21 -0000 @@ -1227,8 +1227,7 @@ pipex_ip_input(struct mbuf *m0, struct p if (IF_QFULL(&ipintrq)) { IF_DROP(&ipintrq); ifp->if_collisions++; - if (!ipintrq.ifq_congestion) - if_congestion(&ipintrq); + if_congestion(); splx(s); goto drop; } @@ -1302,8 +1301,7 @@ pipex_ip6_input(struct mbuf *m0, struct if (IF_QFULL(&ip6intrq)) { IF_DROP(&ip6intrq); ifp->if_collisions++; - if (!ip6intrq.ifq_congestion) - if_congestion(&ip6intrq); + if_congestion(); splx(s); goto drop; }