This is an implementation of an FQ-CoDel algorithm that relies
on Pf for configuration and providing flow information via its
stateful inspection.

The purpose of FQ-CoDel is to provide fair sharing of bandwidth
to simultaneous connections (flows in FQ-CoDel terminology) of
different types in presence of a network bottleneck and keeping
the latency low at the same time.

It achieves that by using several algorithms: Modified Deficit
Round Robin++ algorithm services a pre-defined number of flow
queues with a specified quantum of service providing a fair
chance to all queues to send their data.  Individual queues are
managed with the CoDel algorithm that controls the time packets
spend in the queue and actively attempts to drop packets that
happen to stay enqueued for too long.

There's a lot that can be said about FQ-CoDel, but I'd like to
point curious readers to these two articles that contain the
best critique in my opinion:
1) http://www.sciencedirect.com/science/article/pii/S1389128615002479
2) https://riteproject.files.wordpress.com/2015/12/p555-grigorescu.pdf

In my personal opinion, this comes in handy when used on the
pppoe router or the like where it provides a much better multi-
user networking experience preventing multiple different
traffic flows from cannibalizing each other.

A few implementation notes:

1) instead of hashing the content of the packet we rely on the
flow id provided by the Pf.  First of all OpenBSD queueing
subsystem receives encapsulated layer 2 packets and digging
into the packet for the TCP port number would require massive
and invasive procedures.  Regarding the potential for hash
collision attacks since the number of buckets stays the same
and it's a small number, finding a hash collision would be easy
regardless of the used hash function.

2) microuptime is expensive but is not a big deal for APU2-like
hardware even on a 1Gbit link with a 100Mbit bottleneck;

3) we do not perform inverse square root computation in the
runtime but instead provide a pre-computed table of first 399
values for the value of the default interval (100ms).  If the
interval value is adjusted, we recompute the whole table for
this instance of fq-codel and scale values up (therefore the
interval cannot be smaller than 100ms).

4) this implementation uses parameters from Linux, which is
also suggested by the RFC draft, but in reality there's not
much difference in behavior compared to FreeBSD for instance.

4) drop during the enqueue is somewhat expensive and Linux guys
have made it much more aggressive.  this is something i have a
patch for as well, but it's not included here since it requires
changes to ifq.c.

This is not hooked up to the build yet as it lacks Pf bits and
requires an ifq_free diff from dlg@.

OK?

diff --git sys/net/fq_codel.c sys/net/fq_codel.c
new file mode 100644
index 00000000000..96a7cd3beaa
--- /dev/null
+++ sys/net/fq_codel.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2017 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Codel - The Controlled-Delay Active Queue Management algorithm
+ * IETF draft-ietf-aqm-codel-07
+ *
+ * Based on the algorithm by Kathleen Nichols and Van Jacobson with
+ * improvements from Dave Taht and Eric Dumazet.
+ */
+
+/*
+ * The FlowQueue-CoDel Packet Scheduler and Active Queue Management
+ * IETF draft-ietf-aqm-fq-codel-06
+ *
+ * Based on the implementation by Rasool Al-Saadi, Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/pfvar.h>
+#include <net/fq_codel.h>
+
+/* #define FQCODEL_DEBUG 1 */
+
+#ifdef FQCODEL_DEBUG
+#define DPRINTF(x...)          printf(x)
+#else
+#define DPRINTF(x...)
+#endif
+
+struct codel {
+       struct mbuf_list q;
+
+       unsigned int     dropping:1;    /* Dropping state */
+       unsigned int     backlog:31;    /* Number of bytes in the queue */
+
+       unsigned short   drops;         /* Free running counter of drops */
+       unsigned short   ldrops;        /* Value from the previous run */
+
+       struct timeval   start;         /* The moment queue was above target */
+       struct timeval   next;          /* Next interval */
+};
+
+struct codel_params {
+       struct timeval   target;
+       struct timeval   interval;
+       int              quantum;
+
+       uint32_t        *intervals;
+};
+
+void            codel_initparams(struct codel_params *, unsigned int,
+                   unsigned int, int);
+void            codel_freeparams(struct codel_params *);
+void            codel_gettime(struct timeval *);
+unsigned int    codel_backlog(struct codel *);
+unsigned int    codel_qlength(struct codel *);
+void            codel_enqueue(struct codel *, struct timeval *,
+                   struct mbuf *);
+struct mbuf    *codel_dequeue(struct codel *, struct codel_params *,
+                   struct timeval *, struct mbuf_list *, unsigned int *,
+                   unsigned int *);
+struct mbuf    *codel_commit(struct codel *, struct mbuf *);
+void            codel_purge(struct codel *, struct mbuf_list *ml);
+
+struct flow {
+       struct codel             cd;
+       int                      active:1;
+       int                      deficit:31;
+#ifdef FQCODEL_DEBUG
+       uint16_t                 id;
+#endif
+       SIMPLEQ_ENTRY(flow)      flowentry;
+};
+SIMPLEQ_HEAD(flowq, flow);
+
+struct fqcodel {
+       struct flowq             newq;
+       struct flowq             oldq;
+
+       struct flow             *flows;
+
+       struct codel_params      cparams;
+
+       unsigned int             nflows;
+       unsigned int             qlimit;
+       int                      quantum;
+
+       unsigned int             flags;
+#define FQCF_FIXED_QUANTUM       0x1
+
+       /* stats */
+       struct fqcodel_pktcntr   xmit_cnt;
+       struct fqcodel_pktcntr   drop_cnt;
+};
+
+unsigned int    fqcodel_idx(unsigned int, const struct mbuf *);
+void           *fqcodel_alloc(unsigned int, void *);
+void            fqcodel_free(unsigned int, void *);
+struct mbuf    *fqcodel_enq(struct ifqueue *, struct mbuf *);
+struct mbuf    *fqcodel_deq_begin(struct ifqueue *, void **);
+void            fqcodel_deq_commit(struct ifqueue *, struct mbuf *, void *);
+void            fqcodel_purge(struct ifqueue *, struct mbuf_list *);
+
+/*
+ * ifqueue glue.
+ */
+
+static const struct ifq_ops fqcodel_ops = {
+       fqcodel_idx,
+       fqcodel_enq,
+       fqcodel_deq_begin,
+       fqcodel_deq_commit,
+       fqcodel_purge,
+       fqcodel_alloc,
+       fqcodel_free,
+};
+
+const struct ifq_ops * const ifq_fqcodel_ops = &fqcodel_ops;
+
+/* Default aggregate queue depth */
+static const unsigned int fqcodel_qlimit = 1024;
+
+/* Packet drop threshold */
+static const unsigned int fqcodel_threshold = 64;
+
+/*
+ * CoDel implementation
+ */
+
+/* Delay target, 5ms */
+static const unsigned int codel_target = 5000;
+
+/* Grace period after last drop, 8 * 100ms RTT */
+static const struct timeval codel_grace = { 1, 600000 };
+
+/* First 399 "100 / sqrt(x)" intervarls, us */
+static const uint32_t codel_intervals[] = {
+       100000, 70711, 57735, 50000, 44721, 40825, 37796, 35355, 33333, 31623,
+        30151, 28868, 27735, 26726, 25820, 25000, 24254, 23570, 22942, 22361,
+        21822, 21320, 20851, 20412, 20000, 19612, 19245, 18898, 18570, 18257,
+        17961, 17678, 17408, 17150, 16903, 16667, 16440, 16222, 16013, 15811,
+        15617, 15430, 15250, 15076, 14907, 14744, 14586, 14434, 14286, 14142,
+        14003, 13868, 13736, 13608, 13484, 13363, 13245, 13131, 13019, 12910,
+        12804, 12700, 12599, 12500, 12403, 12309, 12217, 12127, 12039, 11952,
+        11868, 11785, 11704, 11625, 11547, 11471, 11396, 11323, 11251, 11180,
+        11111, 11043, 10976, 10911, 10847, 10783, 10721, 10660, 10600, 10541,
+        10483, 10426, 10370, 10314, 10260, 10206, 10153, 10102, 10050, 10000,
+         9950,  9901,  9853,  9806,  9759,  9713,  9667,  9623,  9578,  9535,
+         9492,  9449,  9407,  9366,  9325,  9285,  9245,  9206,  9167,  9129,
+         9091,  9054,  9017,  8980,  8944,  8909,  8874,  8839,  8805,  8771,
+         8737,  8704,  8671,  8639,  8607,  8575,  8544,  8513,  8482,  8452,
+         8422,  8392,  8362,  8333,  8305,  8276,  8248,  8220,  8192,  8165,
+         8138,  8111,  8085,  8058,  8032,  8006,  7981,  7956,  7931,  7906,
+         7881,  7857,  7833,  7809,  7785,  7762,  7738,  7715,  7692,  7670,
+         7647,  7625,  7603,  7581,  7559,  7538,  7516,  7495,  7474,  7454,
+         7433,  7412,  7392,  7372,  7352,  7332,  7313,  7293,  7274,  7255,
+         7236,  7217,  7198,  7180,  7161,  7143,  7125,  7107,  7089,  7071,
+         7053,  7036,  7019,  7001,  6984,  6967,  6950,  6934,  6917,  6901,
+         6884,  6868,  6852,  6836,  6820,  6804,  6788,  6773,  6757,  6742,
+         6727,  6712,  6696,  6682,  6667,  6652,  6637,  6623,  6608,  6594,
+         6580,  6565,  6551,  6537,  6523,  6509,  6496,  6482,  6468,  6455,
+         6442,  6428,  6415,  6402,  6389,  6376,  6363,  6350,  6337,  6325,
+         6312,  6299,  6287,  6275,  6262,  6250,  6238,  6226,  6214,  6202,
+         6190,  6178,  6166,  6155,  6143,  6131,  6120,  6108,  6097,  6086,
+         6075,  6063,  6052,  6041,  6030,  6019,  6008,  5998,  5987,  5976,
+         5965,  5955,  5944,  5934,  5923,  5913,  5903,  5893,  5882,  5872,
+         5862,  5852,  5842,  5832,  5822,  5812,  5803,  5793,  5783,  5774,
+         5764,  5754,  5745,  5735,  5726,  5717,  5707,  5698,  5689,  5680,
+         5670,  5661,  5652,  5643,  5634,  5625,  5617,  5608,  5599,  5590,
+         5581,  5573,  5564,  5556,  5547,  5538,  5530,  5522,  5513,  5505,
+         5496,  5488,  5480,  5472,  5464,  5455,  5447,  5439,  5431,  5423,
+         5415,  5407,  5399,  5392,  5384,  5376,  5368,  5361,  5353,  5345,
+         5338,  5330,  5322,  5315,  5307,  5300,  5293,  5285,  5278,  5270,
+         5263,  5256,  5249,  5241,  5234,  5227,  5220,  5213,  5206,  5199,
+         5192,  5185,  5178,  5171,  5164,  5157,  5150,  5143,  5137,  5130,
+         5123,  5116,  5110,  5103,  5096,  5090,  5083,  5077,  5070,  5064,
+         5057,  5051,  5044,  5038,  5032,  5025,  5019,  5013,  5006
+};
+
+/* Convert microsecond counter to a timeval */
+static inline void
+timerset(struct timeval *tv, unsigned int us)
+{
+       tv->tv_sec = 0;
+       tv->tv_usec = us;
+       while (tv->tv_usec >= 1000000) {
+               tv->tv_sec++;
+               tv->tv_usec -= 1000000;
+       }
+}
+
+void
+codel_initparams(struct codel_params *cp, unsigned int target,
+    unsigned int interval, int quantum)
+{
+       uint64_t mult;
+       unsigned int i;
+
+       /*
+        * Update tracking intervals according to the the user-supplied value
+        */
+       if (interval > codel_intervals[0]) {
+               /* Select either specified target or 10% of an interval */
+               timerset(&cp->target, MAX(target, interval / 10));
+               timerset(&cp->interval, interval);
+
+               /* convert from timeval to microseconds */
+               mult = cp->interval.tv_sec * 1000000 + cp->interval.tv_usec;
+               /* scale up */
+               mult *= 1000;
+               /* divide by the original interval */
+               mult /= codel_intervals[0];
+
+               /* Prepare table of intervals */
+               cp->intervals = mallocarray(nitems(codel_intervals),
+                   sizeof(codel_intervals[0]), M_DEVBUF, M_WAITOK | M_ZERO);
+               for (i = 0; i < nitems(codel_intervals); i++)
+                       cp->intervals[i] = (codel_intervals[i] * mult) / 1000;
+       } else {
+               timerset(&cp->target, MAX(target, codel_target));
+               timerset(&cp->interval, codel_intervals[0]);
+               cp->intervals = (uint32_t *)codel_intervals;
+       }
+
+       cp->quantum = quantum;
+}
+
+void
+codel_freeparams(struct codel_params *cp)
+{
+       if (cp->intervals != codel_intervals)
+               free(cp->intervals, M_DEVBUF, nitems(codel_intervals) *
+                   sizeof(codel_intervals[0]));
+       cp->intervals = NULL;
+}
+
+void
+codel_gettime(struct timeval *tvp)
+{
+       /* 1ms precision is required to make a decision */
+       microuptime(tvp);
+}
+
+unsigned int
+codel_backlog(struct codel *cd)
+{
+       return (cd->backlog);
+}
+
+unsigned int
+codel_qlength(struct codel *cd)
+{
+       return (ml_len(&cd->q));
+}
+
+void
+codel_enqueue(struct codel *cd, struct timeval *now, struct mbuf *m)
+{
+       memcpy(&m->m_pkthdr.ph_timestamp, now, sizeof(*now));
+
+       ml_enqueue(&cd->q, m);
+       cd->backlog += m->m_pkthdr.len;
+}
+
+/*
+ * Select the next interval according to the number of drops
+ * in the current one relative to the provided timestamp.
+ */
+static inline void
+control_law(struct codel *cd, struct codel_params *cp, struct timeval *rts)
+{
+       struct timeval itv;
+       unsigned int idx;
+
+       idx = min(cd->drops, nitems(codel_intervals) - 1);
+       itv.tv_sec = 0;
+       itv.tv_usec = cp->intervals[idx];
+       timeradd(rts, &itv, &cd->next);
+}
+
+/*
+ * Pick the next enqueued packet and determine the queueing delay
+ * as well as whether or not it's a good candidate for dropping
+ * from the queue.
+ *
+ * The decision whether to drop the packet or not is made based
+ * on the queueing delay target of 5ms and on the current queue
+ * lenght in bytes which shouldn't be less than the amount of data
+ * that arrives in a typical interarrival time (MTU-sized packets
+ * arriving spaced by the amount of time it takes to send such a
+ * packet on the bottleneck).
+ */
+static inline struct mbuf *
+codel_next_packet(struct codel *cd, struct codel_params *cp,
+    struct timeval *now, int *drop)
+{
+       struct timeval delay;
+       struct mbuf *m;
+
+       *drop = 0;
+
+       m = MBUF_LIST_FIRST(&cd->q);
+       if (m == NULL) {
+               KASSERT(cd->backlog == 0);
+               /* Empty queue, reset interval */
+               timerclear(&cd->start);
+               return (NULL);
+       }
+
+       timersub(now, &m->m_pkthdr.ph_timestamp, &delay);
+       if (timercmp(&delay, &cp->target, <) || cd->backlog <= cp->quantum) {
+               /*
+                * Went below target - stay below for at least one interval
+                */
+               timerclear(&cd->start);
+               return (m);
+       }
+
+       if (!timerisset(&cd->start)) {
+               /*
+                * Just went above from below.  If we stay above the
+                * target for at least RTT we'll say it's ok to drop.
+                */
+               timeradd(now, &cp->interval, &cd->start);
+       } else if (timercmp(now, &cd->start, >)) {
+               *drop = 1;
+       }
+       return (m);
+}
+
+enum { ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
+
+static inline int
+codel_state_change(struct codel *cd, struct codel_params *cp,
+    struct timeval *now, struct mbuf *m, int drop, int state)
+{
+       if (state == FIRSTDROP)
+               return (ACCEPTING);
+
+       if (cd->dropping) {
+               if (!drop)
+                       return (RECOVERY);
+               else if (timercmp(now, &cd->next, >=))
+                       return (state == CONTROL ? DROPPING : CONTROL);
+       } else if (drop)
+               return (FIRSTDROP);
+
+       if (m == NULL)
+               return (RECOVERY);
+
+       return (ACCEPTING);
+}
+
+struct mbuf *
+codel_dequeue(struct codel *cd, struct codel_params *cp, struct timeval *now,
+    struct mbuf_list *ml, unsigned int *dpkts, unsigned int *dbytes)
+{
+       struct timeval diff;
+       struct mbuf *m;
+       unsigned short delta;
+       int drop, state, done = 0;
+
+       *dpkts = *dbytes = 0;
+
+       state = cd->dropping ? DROPPING : ACCEPTING;
+
+       while (!done) {
+               m = codel_next_packet(cd, cp, now, &drop);
+               state = codel_state_change(cd, cp, now, m, drop, state);
+
+               switch (state) {
+               case FIRSTDROP:
+                       m = codel_commit(cd, m);
+                       ml_enqueue(ml, m);
+
+                       (*dpkts)++;
+                       *dbytes += m->m_pkthdr.len;
+
+                       cd->dropping = 1;
+
+                       /*
+                        * If min went above target close to when we last went
+                        * below it, assume that the drop rate that controlled
+                        * the queue on the last cycle is a good starting point
+                        * to control it now.
+                        */
+                       delta = cd->drops - cd->ldrops;
+                       if (delta > 1) {
+                               /*
+                                * If we're still within the grace period and
+                                * not meeting our delay target we treat this
+                                * condition as a continuation of the previous
+                                * interval and shrink it further.
+                                */
+                               timersub(now, &cd->next, &diff);
+                               if (timercmp(now, &cd->next, <) ||
+                                   timercmp(&diff, &codel_grace, <))
+                                       cd->drops = delta;
+                               else
+                                       cd->drops = 1;
+                       } else
+                               cd->drops = 1;
+                       control_law(cd, cp, now);
+                       cd->ldrops = cd->drops;
+
+                       /* fetches the next packet and goes to ACCEPTING */
+                       break;
+               case DROPPING:
+                       /*
+                        * It's time for the next drop. Drop the current
+                        * packet and dequeue the next. The dequeue might
+                        * take us out of dropping state. If not, schedule
+                        * the next drop. A large backlog might result in
+                        * drop rates so high that the next drop should
+                        * happen now, hence the while loop.
+                        */
+                       m = codel_commit(cd, m);
+                       ml_enqueue(ml, m);
+                       cd->drops++;
+
+                       (*dpkts)++;
+                       *dbytes += m->m_pkthdr.len;
+
+                       /* fetches the next packet and goes to CONTROL */
+                       break;
+               case CONTROL:
+                       if (drop) {
+                               control_law(cd, cp, &cd->next);
+                               continue;
+                       }
+                       /* FALLTHROUGH */
+               case RECOVERY:
+                       cd->dropping = 0;
+                       /* FALLTHROUGH */
+               case ACCEPTING:
+                       done = 1;
+                       break;
+               }
+       }
+
+       return (m);
+}
+
+struct mbuf *
+codel_commit(struct codel *cd, struct mbuf *m)
+{
+       struct mbuf *n;
+
+       n = ml_dequeue(&cd->q);
+       if (m)
+               KASSERT(n == m);
+       KASSERT(n != NULL);
+       KASSERT(cd->backlog >= n->m_pkthdr.len);
+       cd->backlog -= n->m_pkthdr.len;
+       return (n);
+}
+
+void
+codel_purge(struct codel *cd, struct mbuf_list *ml)
+{
+       ml_enlist(ml, &cd->q);
+       cd->backlog = 0;
+}
+
+/*
+ * FQ-CoDel implementation
+ */
+
+static inline struct flow *
+classify_flow(struct fqcodel *fqc, struct mbuf *m)
+{
+       unsigned int index;
+
+       if (m->m_pkthdr.ph_flowid & M_FLOWID_VALID)
+               index = (m->m_pkthdr.ph_flowid & M_FLOWID_MASK) % fqc->nflows;
+       else
+               index = arc4random_uniform(fqc->nflows);
+
+       DPRINTF("%s: %u\n", __func__, index);
+
+       return (&fqc->flows[index]);
+}
+
+struct mbuf *
+fqcodel_enq(struct ifqueue *ifq, struct mbuf *m)
+{
+       struct fqcodel *fqc = ifq->ifq_q;
+       struct flow *flow;
+       struct timeval now;
+       unsigned int backlog = 0;
+       int i;
+
+       flow = classify_flow(fqc, m);
+       if (flow == NULL)
+               return (m);
+
+       codel_gettime(&now);
+       codel_enqueue(&flow->cd, &now, m);
+
+       if (!flow->active) {
+               SIMPLEQ_INSERT_TAIL(&fqc->newq, flow, flowentry);
+               flow->deficit = fqc->quantum;
+               flow->active = 1;
+               DPRINTF("%s: flow %u active deficit %d\n", __func__,
+                   flow->id, flow->deficit);
+       }
+
+       /*
+        * Check the limit for all queues and remove a packet
+        * from the largest one, start with the queue for the
+        * unclassified traffic.
+        */
+       if (ifq_len(ifq) >= fqcodel_qlimit) {
+               for (i = 0; i < fqc->nflows; i++) {
+                       if (codel_backlog(&fqc->flows[i].cd) > backlog) {
+                               flow = &fqc->flows[i];
+                               backlog = codel_backlog(&flow->cd);
+                       }
+               }
+               KASSERT(flow != NULL);
+               m = codel_commit(&flow->cd, NULL);
+               DPRINTF("%s: dropping from flow %u\n", __func__,
+                   flow->id);
+               return (m);
+       }
+
+       return (NULL);
+}
+
+static inline struct flowq *
+select_queue(struct fqcodel *fqc)
+{
+       struct flowq *fq = NULL;
+
+       if (!SIMPLEQ_EMPTY(&fqc->newq))
+               fq = &fqc->newq;
+       else if (!SIMPLEQ_EMPTY(&fqc->oldq))
+               fq = &fqc->oldq;
+       return (fq);
+}
+
+static inline struct flow *
+first_flow(struct fqcodel *fqc, struct flowq **fq)
+{
+       struct flow *flow;
+
+       while ((*fq = select_queue(fqc)) != NULL) {
+               while ((flow = SIMPLEQ_FIRST(*fq)) != NULL) {
+                       if (flow->deficit <= 0) {
+                               flow->deficit += fqc->quantum;
+                               SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
+                               SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow,
+                                   flowentry);
+                               DPRINTF("%s: flow %u deficit %d\n", __func__,
+                                   flow->id, flow->deficit);
+                       } else
+                               return (flow);
+               }
+       }
+
+       return (NULL);
+}
+
+static inline struct flow *
+next_flow(struct fqcodel *fqc, struct flow *flow, struct flowq **fq)
+{
+       SIMPLEQ_REMOVE_HEAD(*fq, flowentry);
+
+       if (*fq == &fqc->newq && !SIMPLEQ_EMPTY(&fqc->oldq)) {
+               /* A packet was dropped, starve the queue */
+               SIMPLEQ_INSERT_TAIL(&fqc->oldq, flow, flowentry);
+               DPRINTF("%s: flow %u ->oldq deficit %d\n", __func__,
+                   flow->id, flow->deficit);
+       } else {
+               /* A packet was dropped on a starved queue, disable it */
+               flow->active = 0;
+               DPRINTF("%s: flow %u inactive deficit %d\n", __func__,
+                   flow->id, flow->deficit);
+       }
+
+       return (first_flow(fqc, fq));
+}
+
+struct mbuf *
+fqcodel_deq_begin(struct ifqueue *ifq, void **cookiep)
+{
+       struct timeval now;
+       struct ifnet *ifp = ifq->ifq_if;
+       struct fqcodel *fqc = ifq->ifq_q;
+       struct flowq *fq;
+       struct flow *flow;
+       struct mbuf *m;
+       unsigned int dpkts, dbytes;
+
+       if ((fqc->flags & FQCF_FIXED_QUANTUM) == 0)
+               fqc->quantum = ifp->if_mtu + max_linkhdr;
+
+       codel_gettime(&now);
+
+       for (flow = first_flow(fqc, &fq); flow != NULL;
+            flow = next_flow(fqc, flow, &fq)) {
+               m = codel_dequeue(&flow->cd, &fqc->cparams, &now,
+                   &ifq->ifq_free, &dpkts, &dbytes);
+
+               if (dpkts > 0) {
+                       KASSERT(ifq->ifq_len >= dpkts);
+                       ifq->ifq_len -= dpkts;
+                       ifq->ifq_qdrops += dpkts;
+                       fqc->drop_cnt.packets += dpkts;
+                       fqc->drop_cnt.bytes += dbytes;
+               }
+
+               if (m != NULL) {
+                       flow->deficit -= m->m_pkthdr.len;
+                       DPRINTF("%s: flow %u deficit %d\n", __func__,
+                           flow->id, flow->deficit);
+                       *cookiep = flow;
+                       return (m);
+               }
+       }
+
+       return (NULL);
+}
+
+void
+fqcodel_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
+{
+       struct fqcodel *fqc = ifq->ifq_q;
+       struct flow *flow = cookie;
+
+       fqc->xmit_cnt.packets++;
+       fqc->xmit_cnt.bytes += m->m_pkthdr.len;
+
+       (void)codel_commit(&flow->cd, m);
+}
+
+void
+fqcodel_purge(struct ifqueue *ifq, struct mbuf_list *ml)
+{
+       struct fqcodel *fqc = ifq->ifq_q;
+       unsigned int i;
+
+       for (i = 0; i < fqc->nflows; i++)
+               codel_purge(&fqc->flows[i].cd, ml);
+}
+
+unsigned int
+fqcodel_idx(unsigned int nqueues, const struct mbuf *m)
+{
+       return (0);
+}
+
+void *
+fqcodel_alloc(unsigned int idx, void *arg)
+{
+       struct fqcodel *fqc = arg;
+
+       SIMPLEQ_INIT(&fqc->newq);
+       SIMPLEQ_INIT(&fqc->oldq);
+
+       return (fqc);
+}
+
+void
+fqcodel_free(unsigned int idx, void *arg)
+{
+       /* nothing to do here */
+}
diff --git sys/net/fq_codel.h sys/net/fq_codel.h
new file mode 100644
index 00000000000..7d79f5bcc7e
--- /dev/null
+++ sys/net/fq_codel.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017 Mike Belopuhov
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _NET_FQ_CODEL_H_
+#define _NET_FQ_CODEL_H_
+
+/* compatible with hfsc_pktcntr */
+struct fqcodel_pktcntr {
+       uint64_t                packets;
+       uint64_t                bytes;
+};
+
+struct fqcodel_stats {
+       struct fqcodel_pktcntr  xmit_cnt;
+       struct fqcodel_pktcntr  drop_cnt;
+
+       uint32_t                qlength;
+       uint32_t                qlimit;
+
+       uint32_t                flows;
+       uint32_t                _unused;   /* padding */
+
+       uint32_t                target;
+       uint32_t                interval;
+
+       uint32_t                minqlen;
+       uint32_t                maxqlen;
+
+       /* the values below are used to calculate standard deviation */
+       uint64_t                qlensum;   /* sum of queue lenghts */
+       uint64_t                qlensumsq; /* sum of squared queue lenghts */
+};
+
+#ifdef _KERNEL
+extern const struct ifq_ops * const ifq_fqcodel_ops;
+#endif /* _KERNEL */
+
+#endif /* _NET_FQ_CODEL_H_ */

Reply via email to