This is a first stab at HFSC and FQ-CoDel integration via extending PF queueing operations (pfq_ops) interface. With this FQ-CoDel can be attached directly to an interface as well as serve as a replacement for the HFSC queue to improve its characteristics. In essence, in many setups (router behind a modem) FQ-CoDel can benefit immensely from HFSC. pf.conf grammar is simply a "queue" statement with an additional "flows" parameter, e.g.:
queue rootq on em0 bandwidth 10M flows 1000 default The default queue limit (50) is inherited from HFSC and can be adjusted for FQ-CoDel with the same 'qlimit' keyword: queue rootq on em0 bandwidth 10M flows 1000 qlimit 300 default There's however a limitation: the 'min' keyword specifying reserved bandwidth requires knowing which packet will be dequeued next ahead of time and thus is not supported with the "flows" specification. (At least for now). The polishing is still in progress, but I'd like to continue in tree if that's possible. I won't mind if someone does an independent test of course. --- sbin/pfctl/parse.y | 10 --- sbin/pfctl/pfctl_parser.c | 5 +- sbin/pfctl/pfctl_queue.c | 3 +- sys/conf/files | 4 +- sys/net/fq_codel.c | 146 ++++++++++++++++++++++++++++--------- sys/net/hfsc.c | 180 +++++++++++++++++++++++++++++++++++++--------- sys/net/pf_ioctl.c | 13 +++- sys/net/pfvar.h | 18 +++-- usr.bin/systat/pftop.c | 19 +++-- 9 files changed, 301 insertions(+), 97 deletions(-) diff --git sbin/pfctl/parse.y sbin/pfctl/parse.y index 63aaafeeea5..47deb3db3d8 100644 --- sbin/pfctl/parse.y +++ sbin/pfctl/parse.y @@ -1326,15 +1326,10 @@ queue_opts_l : queue_opts_l queue_opt queue_opt : BANDWIDTH scspec optscs { if (queue_opts.marker & QOM_BWSPEC) { yyerror("bandwidth cannot be respecified"); YYERROR; } - if (queue_opts.marker & QOM_FLOWS) { - yyerror("bandwidth cannot be specified for " - "a flow queue"); - YYERROR; - } queue_opts.marker |= QOM_BWSPEC; queue_opts.linkshare = $2; queue_opts.realtime= $3.realtime; queue_opts.upperlimit = $3.upperlimit; } @@ -1369,15 +1364,10 @@ queue_opt : BANDWIDTH scspec optscs { | FLOWS NUMBER { if (queue_opts.marker & QOM_FLOWS) { yyerror("number of flows cannot be respecified"); YYERROR; } - if (queue_opts.marker & QOM_BWSPEC) { - yyerror("bandwidth cannot be specified for " - "a flow queue"); - YYERROR; - } if ($2 < 1 || $2 > 32767) { yyerror("number of flows out of range: " "max 32767"); YYERROR; } diff --git sbin/pfctl/pfctl_parser.c sbin/pfctl/pfctl_parser.c index a69acb2e5b2..d9f63da99b0 100644 --- sbin/pfctl/pfctl_parser.c +++ sbin/pfctl/pfctl_parser.c @@ -1199,21 +1199,22 @@ print_queuespec(struct pf_queuespec *q) printf("queue %s", q->qname); if (q->parent[0]) printf(" parent %s", q->parent); else if (q->ifname[0]) printf(" on %s", q->ifname); - if (q->flags & PFQS_FLOWQUEUE) { + if (q->flowqueue.flows > 0) { printf(" flows %u", q->flowqueue.flows); if (q->flowqueue.quantum > 0) printf(" quantum %u", q->flowqueue.quantum); if (q->flowqueue.interval > 0) printf(" interval %ums", q->flowqueue.interval / 1000000); if (q->flowqueue.target > 0) printf(" target %ums", q->flowqueue.target / 1000000); - } else { + } + if (q->linkshare.m1.absolute || q->linkshare.m2.absolute) { print_scspec(" bandwidth ", &q->linkshare); print_scspec(", min ", &q->realtime); print_scspec(", max ", &q->upperlimit); } if (q->flags & PFQS_DEFAULT) diff --git sbin/pfctl/pfctl_queue.c sbin/pfctl/pfctl_queue.c index feeeba33f8d..0d1abce36c6 100644 --- sbin/pfctl/pfctl_queue.c +++ sbin/pfctl/pfctl_queue.c @@ -210,11 +210,12 @@ pfctl_print_queue_nodestat(int dev, const struct pfctl_queue_node *node) "dropped pkts: %6llu bytes: %6llu ]\n", (unsigned long long)stats->xmit_cnt.packets, (unsigned long long)stats->xmit_cnt.bytes, (unsigned long long)stats->drop_cnt.packets, (unsigned long long)stats->drop_cnt.bytes); - if (node->qs.flags & PFQS_FLOWQUEUE) { + if ((node->qs.flags & PFQS_FLOWQUEUE) && node->qs.parent_qid == 0 && + !(node->qs.flags & PFQS_DEFAULT)) { double avg = 0, dev = 0; if (fqstats->flows > 0) { avg = (double)fqstats->delaysum / (double)fqstats->flows; diff --git sys/conf/files sys/conf/files index af7b8ba7912..a499f6163d3 100644 --- sys/conf/files +++ sys/conf/files @@ -570,11 +570,12 @@ file net/pf_ruleset.c pf file net/pf_ioctl.c pf file net/pf_table.c pf file net/pf_osfp.c pf file net/pf_if.c pf file net/pf_lb.c pf -file net/hfsc.c ifnet +file net/hfsc.c pf +file net/fq_codel.c pf pseudo-device pflog: ifnet file net/if_pflog.c pflog needs-flag pseudo-device pfsync: ifnet file net/if_pfsync.c pfsync needs-flag @@ -761,11 +762,10 @@ file tmpfs/tmpfs_vnops.c tmpfs file tmpfs/tmpfs_specops.c tmpfs file tmpfs/tmpfs_fifoops.c tmpfs & fifo file net/art.c art file net/bpf.c bpfilter needs-count file net/bpf_filter.c bpfilter -file net/fq_codel.c pf file net/if.c file net/ifq.c file net/if_ethersubr.c ether needs-flag file net/if_etherip.c etherip needs-flag file net/if_spppsubr.c sppp diff --git sys/net/fq_codel.c sys/net/fq_codel.c index 8e9e065843a..8fa5ea85f78 100644 --- sys/net/fq_codel.c +++ sys/net/fq_codel.c @@ -95,10 +95,13 @@ SIMPLEQ_HEAD(flowq, flow); struct fqcodel { struct flowq newq; struct flowq oldq; struct flow *flows; + unsigned int qlength; + + struct ifnet *ifp; struct codel_params cparams; unsigned int nflows; unsigned int qlimit; @@ -113,45 +116,61 @@ struct fqcodel { }; unsigned int fqcodel_idx(unsigned int, const struct mbuf *); void *fqcodel_alloc(unsigned int, void *); void fqcodel_free(unsigned int, void *); -struct mbuf *fqcodel_enq(struct ifqueue *, struct mbuf *); -struct mbuf *fqcodel_deq_begin(struct ifqueue *, void **); -void fqcodel_deq_commit(struct ifqueue *, struct mbuf *, void *); -void fqcodel_purge(struct ifqueue *, struct mbuf_list *); +struct mbuf *fqcodel_if_enq(struct ifqueue *, struct mbuf *); +struct mbuf *fqcodel_if_deq_begin(struct ifqueue *, void **); +void fqcodel_if_deq_commit(struct ifqueue *, struct mbuf *, void *); +void fqcodel_if_purge(struct ifqueue *, struct mbuf_list *); + +struct mbuf *fqcodel_enq(struct fqcodel *, struct mbuf *); +struct mbuf *fqcodel_deq_begin(struct fqcodel *, void **, + struct mbuf_list *); +void fqcodel_deq_commit(struct fqcodel *, struct mbuf *, void *); +void fqcodel_purge(struct fqcodel *, struct mbuf_list *); /* * ifqueue glue. */ static const struct ifq_ops fqcodel_ops = { fqcodel_idx, - fqcodel_enq, - fqcodel_deq_begin, - fqcodel_deq_commit, - fqcodel_purge, + fqcodel_if_enq, + fqcodel_if_deq_begin, + fqcodel_if_deq_commit, + fqcodel_if_purge, fqcodel_alloc, - fqcodel_free, + fqcodel_free }; const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops; void *fqcodel_pf_alloc(struct ifnet *); int fqcodel_pf_addqueue(void *, struct pf_queuespec *); void fqcodel_pf_free(void *); int fqcodel_pf_qstats(struct pf_queuespec *, void *, int *); +unsigned int fqcodel_pf_qlength(void *); +struct mbuf * fqcodel_pf_enqueue(void *, struct mbuf *); +struct mbuf * fqcodel_pf_deq_begin(void *, void **); +void fqcodel_pf_deq_commit(void *, struct mbuf *, void *); +void fqcodel_pf_purge(void *, struct mbuf_list *); /* * pf queue glue. */ static const struct pfq_ops fqcodel_pf_ops = { fqcodel_pf_alloc, fqcodel_pf_addqueue, fqcodel_pf_free, - fqcodel_pf_qstats + fqcodel_pf_qstats, + fqcodel_pf_qlength, + fqcodel_pf_enqueue, + fqcodel_pf_deq_begin, + fqcodel_pf_deq_commit, + fqcodel_pf_purge }; const struct pfq_ops * const pfq_fqcodel_ops = &fqcodel_pf_ops; /* Default aggregate queue depth */ @@ -511,13 +530,12 @@ classify_flow(struct fqcodel *fqc, struct mbuf *m) return (&fqc->flows[index]); } struct mbuf * -fqcodel_enq(struct ifqueue *ifq, struct mbuf *m) +fqcodel_enq(struct fqcodel *fqc, struct mbuf *m) { - struct fqcodel *fqc = ifq->ifq_q; struct flow *flow; unsigned int backlog = 0; int64_t now; int i; @@ -525,10 +543,11 @@ fqcodel_enq(struct ifqueue *ifq, struct mbuf *m) if (flow == NULL) return (m); codel_gettime(&now); codel_enqueue(&flow->cd, now, m); + fqc->qlength++; if (!flow->active) { SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry); flow->deficit = fqc->quantum; flow->active = 1; @@ -538,11 +557,11 @@ fqcodel_enq(struct ifqueue *ifq, struct mbuf *m) /* * Check the limit for all queues and remove a packet * from the longest one. */ - if (ifq_len(ifq) >= fqcodel_qlimit) { + if (fqc->qlength >= fqcodel_qlimit) { for (i = 0; i < fqc->nflows; i++) { if (codel_backlog(&fqc->flows[i].cd) > backlog) { flow = &fqc->flows[i]; backlog = codel_backlog(&flow->cd); } @@ -615,32 +634,28 @@ next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq) return (first_flow(fqc, fq)); } struct mbuf * -fqcodel_deq_begin(struct ifqueue *ifq, void **cookiep) +fqcodel_deq_begin(struct fqcodel *fqc, void **cookiep, + struct mbuf_list *free_ml) { - struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); - struct ifnet *ifp = ifq->ifq_if; - struct fqcodel *fqc = ifq->ifq_q; struct flowq *fq; struct flow *flow; struct mbuf *m; int64_t now; if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0) - fqc->quantum = ifp->if_mtu + max_linkhdr; + fqc->quantum = fqc->ifp->if_mtu + max_linkhdr; codel_gettime(&now); for (flow = first_flow(fqc, &fq); flow != NULL; flow = next_flow(fqc, flow, &fq)) { - m = codel_dequeue(&flow->cd, &fqc->cparams, now, &free_ml, + m = codel_dequeue(&flow->cd, &fqc->cparams, now, free_ml, &fqc->drop_cnt.packets, &fqc->drop_cnt.bytes); - ifq_mfreeml(ifq, &free_ml); - if (m != NULL) { flow->deficit -= m->m_pkthdr.len; DPRINTF("%s: flow %u deficit %d\n", __func__, flow->id, flow->deficit); *cookiep = flow; @@ -650,50 +665,80 @@ fqcodel_deq_begin(struct ifqueue *ifq, void **cookiep) return (NULL); } void -fqcodel_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) +fqcodel_deq_commit(struct fqcodel *fqc, struct mbuf *m, void *cookie) { - struct fqcodel *fqc = ifq->ifq_q; struct flow *flow = cookie; + KASSERT(fqc->qlength > 0); + fqc->qlength--; + fqc->xmit_cnt.packets++; fqc->xmit_cnt.bytes += m->m_pkthdr.len; (void)codel_commit(&flow->cd, m); } void -fqcodel_purge(struct ifqueue *ifq, struct mbuf_list *ml) +fqcodel_purge(struct fqcodel *fqc, struct mbuf_list *ml) { - struct fqcodel *fqc = ifq->ifq_q; unsigned int i; for (i = 0; i < fqc->nflows; i++) codel_purge(&fqc->flows[i].cd, ml); } +struct mbuf * +fqcodel_if_enq(struct ifqueue *ifq, struct mbuf *m) +{ + return fqcodel_enq(ifq->ifq_q, m); +} + +struct mbuf * +fqcodel_if_deq_begin(struct ifqueue *ifq, void **cookiep) +{ + struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); + struct mbuf *m; + + m = fqcodel_deq_begin(ifq->ifq_q, cookiep, &free_ml); + ifq_mfreeml(ifq, &free_ml); + return (m); +} + +void +fqcodel_if_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) +{ + return fqcodel_deq_commit(ifq->ifq_q, m, cookie); +} + +void +fqcodel_if_purge(struct ifqueue *ifq, struct mbuf_list *ml) +{ + return fqcodel_purge(ifq->ifq_q, ml); +} + void * fqcodel_pf_alloc(struct ifnet *ifp) { struct fqcodel *fqc; fqc = malloc(sizeof(struct fqcodel), M_DEVBUF, M_WAITOK | M_ZERO); + SIMPLEQ_INIT(&fqc->newq); + SIMPLEQ_INIT(&fqc->oldq); + return (fqc); } int fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs) { struct ifnet *ifp = qs->kif->pfik_ifp; struct fqcodel *fqc = arg; - if (qs->parent_qid != 0) - return (EINVAL); - if (qs->flowqueue.flows == 0 || qs->flowqueue.flows > M_FLOWID_MASK) return (EINVAL); fqc->nflows = qs->flowqueue.flows; fqc->quantum = qs->flowqueue.quantum; @@ -719,10 +764,12 @@ fqcodel_pf_addqueue(void *arg, struct pf_queuespec *qs) for (i = 0; i < fqc->nflows; i++) fqc->flows[i].id = i; } #endif + fqc->ifp = ifp; + DPRINTF("fq-codel on %s: %d queues %d deep, quantum %d target %llums " "interval %llums\n", ifp->if_xname, fqc->nflows, fqc->qlimit, fqc->quantum, fqc->cparams.target / 1000000, fqc->cparams.interval / 1000000); @@ -789,24 +836,55 @@ fqcodel_pf_qstats(struct pf_queuespec *qs, void *ubuf, int *nbytes) *nbytes = sizeof(stats); return (0); } unsigned int +fqcodel_pf_qlength(void *fqc) +{ + return ((struct fqcodel *)fqc)->qlength; +} + +struct mbuf * +fqcodel_pf_enqueue(void *fqc, struct mbuf *m) +{ + return fqcodel_enq(fqc, m); +} + +struct mbuf * +fqcodel_pf_deq_begin(void *fqc, void **cookiep) +{ + struct mbuf_list free_ml = MBUF_LIST_INITIALIZER(); + struct mbuf *m; + + m = fqcodel_deq_begin(fqc, cookiep, &free_ml); + ml_purge(&free_ml); + return (m); +} + +void +fqcodel_pf_deq_commit(void *fqc, struct mbuf *m, void *cookie) +{ + return fqcodel_deq_commit(fqc, m, cookie); +} + +void +fqcodel_pf_purge(void *fqc, struct mbuf_list *ml) +{ + return fqcodel_purge(fqc, ml); +} + +unsigned int fqcodel_idx(unsigned int nqueues, const struct mbuf *m) { return (0); } void * fqcodel_alloc(unsigned int idx, void *arg) { - struct fqcodel *fqc = arg; - - SIMPLEQ_INIT(&fqc->newq); - SIMPLEQ_INIT(&fqc->oldq); - - return (fqc); + /* Allocation is done in fqcodel_pf_alloc */ + return (arg); } void fqcodel_free(unsigned int idx, void *arg) { diff --git sys/net/hfsc.c sys/net/hfsc.c index 8fddbe5d933..03a3c256cb6 100644 --- sys/net/hfsc.c +++ sys/net/hfsc.c @@ -58,13 +58,10 @@ #include <netinet/in.h> #include <net/pfvar.h> #include <net/hfsc.h> -/* need to provide dummies for hfsc-less kernels to reduce the if.h horror */ -#include "pf.h" -#if NPF > 0 /* * kernel internal service curve representation * coordinates are given by 64 bit unsigned integers. * x-axis: unit is clock count. for the intel x86 architecture, * the raw Pentium TSC (Timestamp Counter) value is used. @@ -124,12 +121,14 @@ struct hfsc_class { struct hfsc_class *cl_parent; /* parent class */ struct hfsc_class *cl_siblings; /* sibling classes */ struct hfsc_class *cl_children; /* child classes */ struct hfsc_classq cl_q; /* class queue structure */ -/* struct red *cl_red;*/ /* RED state */ - struct altq_pktattr *cl_pktattr; /* saved header used by ECN */ + + const struct pfq_ops *cl_qops; /* queue manager */ + void *cl_qdata; /* queue manager data */ + void *cl_cookie; /* queue manager cookie */ u_int64_t cl_total; /* total work in bytes */ u_int64_t cl_cumul; /* cumulative work in bytes done by real-time criteria */ u_int64_t cl_d; /* deadline */ @@ -285,20 +284,66 @@ const struct ifq_ops * const ifq_hfsc_ops = &hfsc_ops; void *hfsc_pf_alloc(struct ifnet *); int hfsc_pf_addqueue(void *, struct pf_queuespec *); void hfsc_pf_free(void *); int hfsc_pf_qstats(struct pf_queuespec *, void *, int *); +unsigned int hfsc_pf_qlength(void *); +struct mbuf * hfsc_pf_enqueue(void *, struct mbuf *); +struct mbuf * hfsc_pf_deq_begin(void *, void **); +void hfsc_pf_deq_commit(void *, struct mbuf *, void *); +void hfsc_pf_purge(void *, struct mbuf_list *); const struct pfq_ops hfsc_pf_ops = { hfsc_pf_alloc, hfsc_pf_addqueue, hfsc_pf_free, - hfsc_pf_qstats + hfsc_pf_qstats, + hfsc_pf_qlength, + hfsc_pf_enqueue, + hfsc_pf_deq_begin, + hfsc_pf_deq_commit, + hfsc_pf_purge }; const struct pfq_ops * const pfq_hfsc_ops = &hfsc_pf_ops; +/* + * shortcuts for repeated use + */ +static inline unsigned int +hfsc_class_qlength(struct hfsc_class *cl) +{ + return cl->cl_qops->pfq_qlength(cl->cl_qdata); +} + +static inline struct mbuf * +hfsc_class_enqueue(struct hfsc_class *cl, struct mbuf *m) +{ + return cl->cl_qops->pfq_enqueue(cl->cl_qdata, m); +} + +static inline struct mbuf * +hfsc_class_deq_begin(struct hfsc_class *cl) +{ + return cl->cl_qops->pfq_deq_begin(cl->cl_qdata, &cl->cl_cookie); +} + +static inline void +hfsc_class_deq_commit(struct hfsc_class *cl, struct mbuf *m) +{ + return cl->cl_qops->pfq_deq_commit(cl->cl_qdata, m, cl->cl_cookie); +} + +static inline void +hfsc_class_purge(struct hfsc_class *cl, struct mbuf_list *ml) +{ + /* Some H-FSC classes do not have a queue */ + if (cl->cl_qops == NULL) + return; + return cl->cl_qops->pfq_purge(cl->cl_qdata, ml); +} + u_int64_t hfsc_microuptime(void) { struct timeval tv; @@ -363,10 +408,11 @@ int hfsc_pf_addqueue(void *arg, struct pf_queuespec *q) { struct hfsc_if *hif = arg; struct hfsc_class *cl, *parent; struct hfsc_sc rtsc, lssc, ulsc; + int error = 0; KASSERT(hif != NULL); if (q->parent_qid == 0 && hif->hif_rootclass == NULL) { parent = hfsc_class_create(hif, NULL, NULL, NULL, NULL, @@ -403,10 +449,41 @@ hfsc_pf_addqueue(void *arg, struct pf_queuespec *q) cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc, parent, q->qlimit, q->flags, q->qid); if (cl == NULL) return (ENOMEM); + if (cl->cl_rsc != NULL) { + cl->cl_qops = &hfsc_pf_ops; + cl->cl_qdata = &cl->cl_q; + return (0); + } + + /* Attach a queue manager if specified */ + cl->cl_qops = pf_queue_manager(q); + /* Realtime class cannot be used with an external queue manager */ + if (cl->cl_qops == NULL || cl->cl_rsc != NULL) { + cl->cl_qops = pfq_hfsc_ops; + cl->cl_qdata = &cl->cl_q; + } else { + cl->cl_qdata = cl->cl_qops->pfq_alloc(q->kif->pfik_ifp); + if (cl->cl_qdata == NULL) { + cl->cl_qops = NULL; + hfsc_class_destroy(hif, cl); + return (ENOMEM); + } + error = cl->cl_qops->pfq_addqueue(cl->cl_qdata, q); + if (error) { + cl->cl_qops->pfq_free(cl->cl_qdata); + cl->cl_qops = NULL; + hfsc_class_destroy(hif, cl); + return (error); + } + } + + KASSERT(cl->cl_qops != NULL); + KASSERT(cl->cl_qdata != NULL); + return (0); } int hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes) @@ -443,13 +520,56 @@ hfsc_pf_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes) } void hfsc_pf_free(void *arg) { - struct hfsc_if *hif = arg; + /* nothing to do here */ +} - hfsc_free(0, hif); +unsigned int +hfsc_pf_qlength(void *xcq) +{ + struct hfsc_classq *cq = xcq; + + return ml_len(&cq->q); +} + +struct mbuf * +hfsc_pf_enqueue(void *xcq, struct mbuf *m) +{ + struct hfsc_classq *cq = xcq; + + if (ml_len(&cq->q) >= cq->qlimit) + return (m); + + ml_enqueue(&cq->q, m); + m->m_pkthdr.pf.prio = IFQ_MAXPRIO; + return (NULL); +} + +struct mbuf * +hfsc_pf_deq_begin(void *xcq, void **cookiep) +{ + struct hfsc_classq *cq = xcq; + + return MBUF_LIST_FIRST(&cq->q); +} + +void +hfsc_pf_deq_commit(void *xcq, struct mbuf *m, void *cookie) +{ + struct hfsc_classq *cq = xcq; + + ml_dequeue(&cq->q); +} + +void +hfsc_pf_purge(void *xcq, struct mbuf_list *ml) +{ + struct hfsc_classq *cq = xcq; + + ml_enlist(ml, &cq->q); } unsigned int hfsc_idx(unsigned int nqueues, const struct mbuf *m) { @@ -649,10 +769,14 @@ hfsc_class_destroy(struct hfsc_if *hif, struct hfsc_class *cl) if (cl == hif->hif_rootclass) hif->hif_rootclass = NULL; if (cl == hif->hif_defaultclass) hif->hif_defaultclass = NULL; + /* Free external queue manager resources */ + if (cl->cl_qops) + cl->cl_qops->pfq_free(cl->cl_qdata); + if (cl->cl_usc != NULL) pool_put(&hfsc_internal_sc_pl, cl->cl_usc); if (cl->cl_fsc != NULL) pool_put(&hfsc_internal_sc_pl, cl->cl_fsc); if (cl->cl_rsc != NULL) @@ -689,30 +813,28 @@ hfsc_nextclass(struct hfsc_class *cl) struct mbuf * hfsc_enq(struct ifqueue *ifq, struct mbuf *m) { struct hfsc_if *hif = ifq->ifq_q; struct hfsc_class *cl; + struct mbuf *dm; if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL || cl->cl_children != NULL) { cl = hif->hif_defaultclass; if (cl == NULL) return (m); - cl->cl_pktattr = NULL; } - if (ml_len(&cl->cl_q.q) >= cl->cl_q.qlimit) { + dm = hfsc_class_enqueue(cl, m); + if (dm != NULL) { /* drop occurred. mbuf needs to be freed */ - PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len); - return (m); + PKTCNTR_INC(&cl->cl_stats.drop_cnt, dm->m_pkthdr.len); + return (dm); } - ml_enqueue(&cl->cl_q.q, m); - m->m_pkthdr.pf.prio = IFQ_MAXPRIO; - /* successfully queued. */ - if (ml_len(&cl->cl_q.q) == 1) + if (hfsc_class_qlength(cl) == 1) hfsc_set_active(hif, cl, m->m_pkthdr.len); return (NULL); } @@ -757,11 +879,11 @@ hfsc_deq_begin(struct ifqueue *ifq, void **cookiep) /* XXX HRTIMER plan hfsc_deferred precisely here. */ if (cl == NULL) return (NULL); } - m = MBUF_LIST_FIRST(&cl->cl_q.q); + m = hfsc_class_deq_begin(cl); KASSERT(m != NULL); hif->hif_microtime = cur_time; *cookiep = cl; return (m); @@ -778,23 +900,27 @@ hfsc_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie) /* check if the class was scheduled by real-time criteria */ if (cl->cl_rsc != NULL) realtime = (cl->cl_e <= cur_time); - m0 = ml_dequeue(&cl->cl_q.q); - KASSERT(m == m0); + hfsc_class_deq_commit(cl, m); PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len); hfsc_update_vf(cl, m->m_pkthdr.len, cur_time); if (realtime) cl->cl_cumul += m->m_pkthdr.len; - if (ml_len(&cl->cl_q.q) > 0) { + if (hfsc_class_qlength(cl) > 0) { + /* + * Realtime queue needs to look into the future and make + * calculations based on that. This is the reason it can't + * be used with an external queue manager. + */ if (cl->cl_rsc != NULL) { /* update ed */ - m0 = MBUF_LIST_FIRST(&cl->cl_q.q); + m0 = hfsc_class_deq_begin(cl); next_len = m0->m_pkthdr.len; if (realtime) hfsc_update_ed(hif, cl, next_len); else @@ -826,20 +952,11 @@ hfsc_deferred(void *arg) } void hfsc_cl_purge(struct hfsc_if *hif, struct hfsc_class *cl, struct mbuf_list *ml) { - struct mbuf *m; - - if (ml_empty(&cl->cl_q.q)) - return; - - MBUF_LIST_FOREACH(&cl->cl_q.q, m) - PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len); - - ml_enlist(ml, &cl->cl_q.q); - + hfsc_class_purge(cl, ml); hfsc_update_vf(cl, 0, 0); /* remove cl from the actlist */ hfsc_set_passive(hif, cl); } void @@ -1595,6 +1712,5 @@ hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle) if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle) return (cl); return (NULL); } -#endif diff --git sys/net/pf_ioctl.c sys/net/pf_ioctl.c index 7cb7b92ed8a..19da2beaae3 100644 --- sys/net/pf_ioctl.c +++ sys/net/pf_ioctl.c @@ -596,11 +596,11 @@ pf_create_queues(void) continue; qif = malloc(sizeof(*qif), M_TEMP, M_WAITOK); qif->ifp = ifp; - if (q->flags & PFQS_FLOWQUEUE) { + if ((q->flags & PFQS_FLOWQUEUE) && !(q->flags & PFQS_DEFAULT)) { qif->ifqops = ifq_fqcodel_ops; qif->pfqops = pfq_fqcodel_ops; } else { qif->ifqops = ifq_hfsc_ops; qif->pfqops = pfq_hfsc_ops; @@ -688,10 +688,18 @@ pf_commit_queues(void) pf_free_queues(pf_queues_inactive); return (0); } +const struct pfq_ops * +pf_queue_manager(struct pf_queuespec *q) +{ + if (q->flags & PFQS_FLOWQUEUE) + return pfq_fqcodel_ops; + return (/* pfq_default_ops */ NULL); +} + #define PF_MD5_UPD(st, elm) \ MD5Update(ctx, (u_int8_t *) &(st)->elm, sizeof((st)->elm)) #define PF_MD5_UPD_STR(st, elm) \ MD5Update(ctx, (u_int8_t *) (st)->elm, strlen((st)->elm)) @@ -1086,11 +1094,12 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (qs == NULL) { error = EBUSY; break; } bcopy(qs, &pq->queue, sizeof(pq->queue)); - if (qs->flags & PFQS_FLOWQUEUE) + if ((qs->flags & PFQS_FLOWQUEUE) && qs->parent_qid == 0 && + !(qs->flags & PFQS_DEFAULT)) error = pfq_fqcodel_ops->pfq_qstats(qs, pq->buf, &nbytes); else error = pfq_hfsc_ops->pfq_qstats(qs, pq->buf, &nbytes); diff --git sys/net/pfvar.h sys/net/pfvar.h index e2f01130746..14563382060 100644 --- sys/net/pfvar.h +++ sys/net/pfvar.h @@ -45,10 +45,11 @@ #include <net/radix.h> #include <net/route.h> struct ip; struct ip6_hdr; +struct mbuf_list; #define PF_TCPS_PROXY_SRC ((TCP_NSTATES)+0) #define PF_TCPS_PROXY_DST ((TCP_NSTATES)+1) #define PF_MD5_DIGEST_LENGTH 16 @@ -1355,14 +1356,20 @@ struct hfsc_opts { u_int ulsc_m2; int flags; }; struct pfq_ops { - void *(*pfq_alloc)(struct ifnet *); - int (*pfq_addqueue)(void *, struct pf_queuespec *); - void (*pfq_free)(void *); - int (*pfq_qstats)(struct pf_queuespec *, void *, int *); + void * (*pfq_alloc)(struct ifnet *); + int (*pfq_addqueue)(void *, struct pf_queuespec *); + void (*pfq_free)(void *); + int (*pfq_qstats)(struct pf_queuespec *, void *, int *); + /* Queue manager ops */ + unsigned int (*pfq_qlength)(void *); + struct mbuf * (*pfq_enqueue)(void *, struct mbuf *); + struct mbuf * (*pfq_deq_begin)(void *, void **); + void (*pfq_deq_commit)(void *, struct mbuf *, void *); + void (*pfq_purge)(void *, struct mbuf_list *); }; struct pf_tagname { TAILQ_ENTRY(pf_tagname) entries; char name[PF_TAG_NAME_SIZE]; @@ -1804,10 +1811,13 @@ void pf_tag_ref(u_int16_t); void pf_tag_unref(u_int16_t); void pf_tag_packet(struct mbuf *, int, int); int pf_addr_compare(struct pf_addr *, struct pf_addr *, sa_family_t); +const struct pfq_ops + *pf_queue_manager(struct pf_queuespec *); + extern struct pf_status pf_status; extern struct pool pf_frent_pl, pf_frag_pl; struct pf_pool_limit { void *pp; diff --git usr.bin/systat/pftop.c usr.bin/systat/pftop.c index 93f43999749..0a9c2d51230 100644 --- usr.bin/systat/pftop.c +++ usr.bin/systat/pftop.c @@ -1621,27 +1621,26 @@ print_queue_node(struct pfctl_queue_node *node) tbprintf(" on %s ", node->qs.ifname); print_fld_tb(FLD_QUEUE); // XXX: missing min, max, burst tb_start(); - if (node->qs.flags & PFQS_FLOWQUEUE) { + rate = node->qs.linkshare.m2.absolute; + for (i = 0; rate > 9999 && i <= 3; i++) { + rtmp = rate / 1000; + if (rtmp <= 9999) + rtmp += (rate % 1000) / 500; + rate = rtmp; + } + if (rate == 0 && (node->qs.flags & PFQS_FLOWQUEUE)) { /* * XXX We're abusing the fact that 'flows' in * the fqcodel_stats structure is at the same * spot as the 'period' in hfsc_class_stats. */ tbprintf("%u", node->qstats.data.period); - } else { - rate = node->qs.linkshare.m2.absolute; - for (i = 0; rate > 9999 && i <= 3; i++) { - rtmp = rate / 1000; - if (rtmp <= 9999) - rtmp += (rate % 1000) / 500; - rate = rtmp; - } + } else tbprintf("%u%c", rate, unit[i]); - } print_fld_tb(FLD_BANDW); print_fld_str(FLD_SCHED, node->qs.flags & PFQS_FLOWQUEUE ? "flow" : "fifo"); -- 2.13.0