On Fri, Apr 28, 2017 at 19:58 +0200, Mike Belopuhov wrote: > This is the last bit required in order to actually be able to > use FQ-CoDel. It might require some polish, but I think there's > nothing exceptionally ugly barring the statistics interface. > > fqcodel_stats is constructed to have a few fields "compatible" > with the hfsc_class_stats so that byte and packet counters can > be extracted in the exactly the same way. > > The man page will follow. > > OK? >
Here's an updated version with some additional polish. I've already got FQ-CoDel bits in so they're no longer part of this diff and I've also removed "interval" and "target" keywords for now: they need to be properly documented and at the moment I can't come up with a comprehensible description so to be honest with myself I'm withholding those bits until later. OK? --- sbin/pfctl/parse.y | 64 ++++++++++++++++++++++++++++++++++++++++++++--- sbin/pfctl/pfctl.c | 6 ++--- sbin/pfctl/pfctl_parser.c | 26 ++++++++++++------- sbin/pfctl/pfctl_queue.c | 53 +++++++++++++++++++++++++++++---------- sys/conf/files | 1 + sys/net/pf_ioctl.c | 17 ++++++++++--- sys/net/pfvar.h | 11 ++++++++ 7 files changed, 146 insertions(+), 32 deletions(-) diff --git sbin/pfctl/parse.y sbin/pfctl/parse.y index 4f49b102609..736d418071f 100644 --- sbin/pfctl/parse.y +++ sbin/pfctl/parse.y @@ -34,11 +34,10 @@ #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <netinet/icmp6.h> #include <net/pfvar.h> -#include <net/hfsc.h> #include <arpa/inet.h> #include <stdio.h> #include <unistd.h> #include <stdlib.h> @@ -304,19 +303,29 @@ struct node_sc { struct node_queue_bw m1; u_int d; struct node_queue_bw m2; }; +struct node_fq { + u_int flows; + u_int quantum; + u_int target; + u_int interval; +}; + struct queue_opts { int marker; #define QOM_BWSPEC 0x01 #define QOM_PARENT 0x02 #define QOM_DEFAULT 0x04 #define QOM_QLIMIT 0x08 +#define QOM_FLOWS 0x10 +#define QOM_QUANTUM 0x20 struct node_sc realtime; struct node_sc linkshare; struct node_sc upperlimit; + struct node_fq flowqueue; char *parent; int flags; u_int qlimit; } queue_opts; @@ -457,11 +466,11 @@ int parseport(char *, struct range *r, int); %token REASSEMBLE ANCHOR %token SET OPTIMIZATION TIMEOUT LIMIT LOGINTERFACE BLOCKPOLICY RANDOMID %token SYNPROXY FINGERPRINTS NOSYNC DEBUG SKIP HOSTID %token ANTISPOOF FOR INCLUDE MATCHES %token BITMASK RANDOM SOURCEHASH ROUNDROBIN LEASTSTATES STATICPORT PROBABILITY -%token WEIGHT BANDWIDTH +%token WEIGHT BANDWIDTH FLOWS QUANTUM %token QUEUE PRIORITY QLIMIT RTABLE RDOMAIN MINIMUM BURST PARENT %token LOAD RULESET_OPTIMIZATION RTABLE RDOMAIN PRIO ONCE DEFAULT %token STICKYADDRESS MAXSRCSTATES MAXSRCNODES SOURCETRACK GLOBAL RULE %token MAXSRCCONN MAXSRCCONNRATE OVERLOAD FLUSH SLOPPY PFLOW %token TAGGED TAG IFBOUND FLOATING STATEPOLICY STATEDEFAULTS ROUTE @@ -1317,10 +1326,15 @@ queue_opts_l : queue_opts_l queue_opt queue_opt : BANDWIDTH scspec optscs { if (queue_opts.marker & QOM_BWSPEC) { yyerror("bandwidth cannot be respecified"); YYERROR; } + if (queue_opts.marker & QOM_FLOWS) { + yyerror("bandwidth cannot be specified for " + "a flow queue"); + YYERROR; + } queue_opts.marker |= QOM_BWSPEC; queue_opts.linkshare = $2; queue_opts.realtime= $3.realtime; queue_opts.upperlimit = $3.upperlimit; } @@ -1336,13 +1350,13 @@ queue_opt : BANDWIDTH scspec optscs { if (queue_opts.marker & QOM_DEFAULT) { yyerror("default cannot be respecified"); YYERROR; } queue_opts.marker |= QOM_DEFAULT; - queue_opts.flags |= HFSC_DEFAULTCLASS; + queue_opts.flags |= PFQS_DEFAULT; } - | QLIMIT NUMBER { + | QLIMIT NUMBER { if (queue_opts.marker & QOM_QLIMIT) { yyerror("qlimit cannot be respecified"); YYERROR; } if ($2 < 0 || $2 > 65535) { @@ -1350,10 +1364,41 @@ queue_opt : BANDWIDTH scspec optscs { YYERROR; } queue_opts.marker |= QOM_QLIMIT; queue_opts.qlimit = $2; } + | FLOWS NUMBER { + if (queue_opts.marker & QOM_FLOWS) { + yyerror("number of flows cannot be respecified"); + YYERROR; + } + if (queue_opts.marker & QOM_BWSPEC) { + yyerror("bandwidth cannot be specified for " + "a flow queue"); + YYERROR; + } + if ($2 < 0 || $2 > 32767) { + yyerror("number of flows out of range: " + "max 32767"); + YYERROR; + } + queue_opts.marker |= QOM_FLOWS; + queue_opts.flags |= PFQS_FLOWQUEUE; + queue_opts.flowqueue.flows = $2; + } + | QUANTUM NUMBER { + if (queue_opts.marker & QOM_QUANTUM) { + yyerror("quantum cannot be respecified"); + YYERROR; + } + if ($2 < 0 || $2 > 65535) { + yyerror("quantum out of range: max 65535"); + YYERROR; + } + queue_opts.marker |= QOM_QUANTUM; + queue_opts.flowqueue.quantum = $2; + } ; optscs : /* nada */ { } @@ -4296,10 +4341,14 @@ expand_queue(char *qname, struct node_if *interfaces, struct queue_opts *opts) { struct pf_queuespec qspec; LOOP_THROUGH(struct node_if, interface, interfaces, bzero(&qspec, sizeof(qspec)); + if ((opts->flags & PFQS_FLOWQUEUE) && opts->parent) { + yyerror("discipline doesn't support hierarchy"); + return (1); + } if (strlcpy(qspec.qname, qname, sizeof(qspec.qname)) >= sizeof(qspec.qname)) { yyerror("queuename too long"); return (1); } @@ -4329,10 +4378,15 @@ expand_queue(char *qname, struct node_if *interfaces, struct queue_opts *opts) qspec.upperlimit.m1.percent = opts->upperlimit.m1.bw_percent; qspec.upperlimit.m2.absolute = opts->upperlimit.m2.bw_absolute; qspec.upperlimit.m2.percent = opts->upperlimit.m2.bw_percent; qspec.upperlimit.d = opts->upperlimit.d; + qspec.flowqueue.flows = opts->flowqueue.flows; + qspec.flowqueue.quantum = opts->flowqueue.quantum; + qspec.flowqueue.interval = opts->flowqueue.interval; + qspec.flowqueue.target = opts->flowqueue.target; + qspec.flags = opts->flags; qspec.qlimit = opts->qlimit; if (pfctl_add_queue(pf, &qspec)) { yyerror("cannot add queue"); @@ -4983,10 +5037,11 @@ lookup(char *s) { "dup-to", DUPTO}, { "file", FILENAME}, { "fingerprints", FINGERPRINTS}, { "flags", FLAGS}, { "floating", FLOATING}, + { "flows", FLOWS}, { "flush", FLUSH}, { "for", FOR}, { "fragment", FRAGMENT}, { "from", FROM}, { "global", GLOBAL}, @@ -5034,10 +5089,11 @@ lookup(char *s) { "port", PORT}, { "prio", PRIO}, { "probability", PROBABILITY}, { "proto", PROTO}, { "qlimit", QLIMIT}, + { "quantum", QUANTUM}, { "queue", QUEUE}, { "quick", QUICK}, { "random", RANDOM}, { "random-id", RANDOMID}, { "rdomain", RDOMAIN}, diff --git sbin/pfctl/pfctl.c sbin/pfctl/pfctl.c index 9c39f37bb14..0ad7c1649eb 100644 --- sbin/pfctl/pfctl.c +++ sbin/pfctl/pfctl.c @@ -38,11 +38,10 @@ #include <net/if.h> #include <netinet/in.h> #include <net/pfvar.h> #include <arpa/inet.h> -#include <net/hfsc.h> #include <sys/sysctl.h> #include <err.h> #include <errno.h> #include <fcntl.h> @@ -1311,11 +1310,11 @@ pfctl_find_childqs(struct pfctl_qsitem *qi) } TAILQ_FOREACH(p, &qi->children, entries) flags |= pfctl_find_childqs(p); - if (qi->qs.flags & HFSC_DEFAULTCLASS && !TAILQ_EMPTY(&qi->children)) + if (qi->qs.flags & PFQS_DEFAULT && !TAILQ_EMPTY(&qi->children)) errx(1, "default queue %s is not a leaf queue", qi->qs.qname); return (flags); } @@ -1418,11 +1417,12 @@ pfctl_check_qassignments(struct pf_ruleset *rs) /* main ruleset: need find_childqs to populate qi->children */ if (rs->anchor->path[0] == 0) { TAILQ_FOREACH(qi, &rootqs, entries) { flags = pfctl_find_childqs(qi); - if (!(flags & HFSC_DEFAULTCLASS)) + if (!(qi->qs.flags & PFQS_FLOWQUEUE) && + !(flags & PFQS_DEFAULT)) errx(1, "no default queue specified"); } } TAILQ_FOREACH(r, rs->rules.active.ptr, entries) { diff --git sbin/pfctl/pfctl_parser.c sbin/pfctl/pfctl_parser.c index e241b11f6fc..2a843b03c64 100644 --- sbin/pfctl/pfctl_parser.c +++ sbin/pfctl/pfctl_parser.c @@ -39,11 +39,10 @@ #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip_icmp.h> #include <netinet/icmp6.h> #include <net/pfvar.h> -#include <net/hfsc.h> #include <arpa/inet.h> #include <ctype.h> #include <err.h> #include <errno.h> @@ -1195,22 +1194,31 @@ print_scspec(const char *prefix, struct pf_queue_scspec *sc) } void print_queuespec(struct pf_queuespec *q) { - /* hide the _root_ifname queues */ - if (q->qname[0] == '_') - return; printf("queue %s", q->qname); - if (q->parent[0] && q->parent[0] != '_') + if (q->parent[0]) printf(" parent %s", q->parent); else if (q->ifname[0]) printf(" on %s", q->ifname); - print_scspec(" bandwidth ", &q->linkshare); - print_scspec(", min ", &q->realtime); - print_scspec(", max ", &q->upperlimit); - if (q->flags & HFSC_DEFAULTCLASS) + if (q->flags & PFQS_FLOWQUEUE) { + printf(" flows %u", q->flowqueue.flows); + if (q->flowqueue.quantum > 0) + printf(" quantum %u", q->flowqueue.quantum); + if (q->flowqueue.interval > 0) + printf(" interval %ums", + q->flowqueue.interval / 1000000); + if (q->flowqueue.target > 0) + printf(" target %ums", + q->flowqueue.target / 1000000); + } else { + print_scspec(" bandwidth ", &q->linkshare); + print_scspec(", min ", &q->realtime); + print_scspec(", max ", &q->upperlimit); + } + if (q->flags & PFQS_DEFAULT) printf(" default"); if (q->qlimit) printf(" qlimit %u", q->qlimit); printf("\n"); } diff --git sbin/pfctl/pfctl_queue.c sbin/pfctl/pfctl_queue.c index 6e59fff7d22..5650061da4b 100644 --- sbin/pfctl/pfctl_queue.c +++ sbin/pfctl/pfctl_queue.c @@ -24,25 +24,30 @@ #include <netinet/in.h> #include <net/pfvar.h> #include <arpa/inet.h> #include <err.h> +#include <math.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include <net/hfsc.h> +#include <net/fq_codel.h> #include "pfctl.h" #include "pfctl_parser.h" #define AVGN_MAX 8 #define STAT_INTERVAL 5 struct queue_stats { - struct hfsc_class_stats data; + union { + struct hfsc_class_stats hfsc; + struct fqcodel_stats fqc; + } data; int avgn; double avg_bytes; double avg_packets; u_int64_t prev_bytes; u_int64_t prev_packets; @@ -194,18 +199,37 @@ pfctl_print_queue_node(int dev, struct pfctl_queue_node *node, int opts) } void pfctl_print_queue_nodestat(int dev, const struct pfctl_queue_node *node) { + struct hfsc_class_stats *stats = + (struct hfsc_class_stats *)&node->qstats.data.hfsc; + struct fqcodel_stats *fqstats = + (struct fqcodel_stats *)&node->qstats.data.fqc; + printf(" [ pkts: %10llu bytes: %10llu " "dropped pkts: %6llu bytes: %6llu ]\n", - (unsigned long long)node->qstats.data.xmit_cnt.packets, - (unsigned long long)node->qstats.data.xmit_cnt.bytes, - (unsigned long long)node->qstats.data.drop_cnt.packets, - (unsigned long long)node->qstats.data.drop_cnt.bytes); - printf(" [ qlength: %3d/%3d ]\n", node->qstats.data.qlength, - node->qstats.data.qlimit); + (unsigned long long)stats->xmit_cnt.packets, + (unsigned long long)stats->xmit_cnt.bytes, + (unsigned long long)stats->drop_cnt.packets, + (unsigned long long)stats->drop_cnt.bytes); + if (node->qs.flags & PFQS_FLOWQUEUE) { + double avg = 0, dev = 0; + + if (fqstats->flows > 0) { + avg = (double)fqstats->delaysum / + (double)fqstats->flows; + dev = sqrt(fmax(0, (double)fqstats->delaysumsq / + (double)fqstats->flows) - avg * avg); + } + + printf(" [ qlength: %3d/%3d avg delay: %.3fms std-dev: %.3fms" + " flows: %3d ]\n", stats->qlength, stats->qlimit, + avg / 1000, dev / 1000, fqstats->flows); + } else + printf(" [ qlength: %3d/%3d ]\n", stats->qlength, + stats->qlimit); if (node->qstats.avgn < 2) return; printf(" [ measured: %7.1f packets/s, %s/s ]\n", @@ -214,23 +238,26 @@ pfctl_print_queue_nodestat(int dev, const struct pfctl_queue_node *node) } void update_avg(struct queue_stats *s) { + struct hfsc_class_stats *stats = + (struct hfsc_class_stats *)&s->data; + if (s->avgn > 0) { - if (s->data.xmit_cnt.bytes >= s->prev_bytes) + if (stats->xmit_cnt.bytes >= s->prev_bytes) s->avg_bytes = ((s->avg_bytes * (s->avgn - 1)) + - (s->data.xmit_cnt.bytes - s->prev_bytes)) / + (stats->xmit_cnt.bytes - s->prev_bytes)) / s->avgn; - if (s->data.xmit_cnt.packets >= s->prev_packets) + if (stats->xmit_cnt.packets >= s->prev_packets) s->avg_packets = ((s->avg_packets * (s->avgn - 1)) + - (s->data.xmit_cnt.packets - s->prev_packets)) / + (stats->xmit_cnt.packets - s->prev_packets)) / s->avgn; } - s->prev_bytes = s->data.xmit_cnt.bytes; - s->prev_packets = s->data.xmit_cnt.packets; + s->prev_bytes = stats->xmit_cnt.bytes; + s->prev_packets = stats->xmit_cnt.packets; if (s->avgn < AVGN_MAX) s->avgn++; } #define R2S_BUFS 8 diff --git sys/conf/files sys/conf/files index 66f5c23e285..dd37274ca6e 100644 --- sys/conf/files +++ sys/conf/files @@ -761,10 +761,11 @@ file tmpfs/tmpfs_vnops.c tmpfs file tmpfs/tmpfs_specops.c tmpfs file tmpfs/tmpfs_fifoops.c tmpfs & fifo file net/art.c art file net/bpf.c bpfilter needs-count file net/bpf_filter.c bpfilter +file net/fq_codel.c pf file net/if.c file net/ifq.c file net/if_ethersubr.c ether needs-flag file net/if_etherip.c etherip needs-flag file net/if_spppsubr.c sppp diff --git sys/net/pf_ioctl.c sys/net/pf_ioctl.c index a880590dffb..9f9ea6483b4 100644 --- sys/net/pf_ioctl.c +++ sys/net/pf_ioctl.c @@ -59,10 +59,11 @@ #include <net/if.h> #include <net/if_var.h> #include <net/route.h> #include <net/hfsc.h> +#include <net/fq_codel.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/in_pcb.h> #include <netinet/ip_var.h> @@ -596,12 +597,17 @@ pf_create_queues(void) continue; qif = malloc(sizeof(*qif), M_TEMP, M_WAITOK); qif->ifp = ifp; - qif->ifqops = ifq_hfsc_ops; - qif->pfqops = pfq_hfsc_ops; + if (q->flags & PFQS_FLOWQUEUE) { + qif->ifqops = ifq_fqcodel_ops; + qif->pfqops = pfq_fqcodel_ops; + } else { + qif->ifqops = ifq_hfsc_ops; + qif->pfqops = pfq_hfsc_ops; + } qif->disc = qif->pfqops->pfq_alloc(ifp); qif->next = list; list = qif; @@ -1086,11 +1092,16 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) if (qs == NULL) { error = EBUSY; break; } bcopy(qs, &pq->queue, sizeof(pq->queue)); - error = pfq_hfsc_ops->pfq_qstats(qs, pq->buf, &nbytes); + if (qs->flags & PFQS_FLOWQUEUE) + error = pfq_fqcodel_ops->pfq_qstats(qs, pq->buf, + &nbytes); + else + error = pfq_hfsc_ops->pfq_qstats(qs, pq->buf, + &nbytes); if (error == 0) pq->nbytes = nbytes; break; } diff --git sys/net/pfvar.h sys/net/pfvar.h index 981d778e37a..7f06261ced7 100644 --- sys/net/pfvar.h +++ sys/net/pfvar.h @@ -1308,25 +1308,36 @@ struct pf_queue_scspec { struct pf_queue_bwspec m1; struct pf_queue_bwspec m2; u_int d; }; +struct pf_queue_fqspec { + u_int flows; + u_int quantum; + u_int target; + u_int interval; +}; + struct pf_queuespec { TAILQ_ENTRY(pf_queuespec) entries; char qname[PF_QNAME_SIZE]; char parent[PF_QNAME_SIZE]; char ifname[IFNAMSIZ]; struct pf_queue_scspec realtime; struct pf_queue_scspec linkshare; struct pf_queue_scspec upperlimit; + struct pf_queue_fqspec flowqueue; struct pfi_kif *kif; u_int flags; u_int qlimit; u_int32_t qid; u_int32_t parent_qid; }; +#define PFQS_FLOWQUEUE 0x0001 +#define PFQS_DEFAULT 0x1000 /* maps to HFSC_DEFAULTCLASS */ + struct priq_opts { int flags; }; struct hfsc_opts { -- 2.12.2