ive been working on RFC 2983 support, with extended functionality. rfc 2983 is "Differentiated Services and Tunnels", and discusses where prio values should go and come from on tunnel ingress and egress. we currentl support setting the packet on tunnel ingress using the txprio functionliaty. this diff adds egress or rxprio handling.
the rfc talks about selecting the outer or inner dscp value on ip tunnels. this diff adds this support, and allows config to ignore both the inner and outer prio fields, or hardcode it to a specific value like we do on tx. it also extends on the rfc by allowing the config to apply to other encapsulations, eg, vlan, bpe, and the mpls tunnels can support this too. the diff below shows the vlan and gif diffs. i have changes for other interfaces in a tree somewhere, but i'm happy to commit those on my own if everyone's ok with the diff below. ok? Index: sys/sys/sockio.h =================================================================== RCS file: /cvs/src/sys/sys/sockio.h,v retrieving revision 1.81 diff -u -p -r1.81 sockio.h --- sys/sys/sockio.h 10 Apr 2019 09:49:50 -0000 1.81 +++ sys/sys/sockio.h 14 Apr 2019 07:14:01 -0000 @@ -207,6 +207,9 @@ #define SIOCSLIFPHYECN _IOW('i', 199, struct ifreq) /* set ecn copying */ #define SIOCGLIFPHYECN _IOWR('i', 200, struct ifreq) /* get ecn copying */ +#define SIOCSRXHPRIO _IOW('i', 219, struct ifreq) /* set rx hdr prio */ +#define SIOCGRXHPRIO _IOWR('i', 219, struct ifreq) /* get rx hdr prio */ + #define SIOCSPWE3CTRLWORD _IOW('i', 220, struct ifreq) #define SIOCGPWE3CTRLWORD _IOWR('i', 220, struct ifreq) #define SIOCSPWE3FAT _IOW('i', 221, struct ifreq) Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.575 diff -u -p -r1.575 if.c --- sys/net/if.c 14 Apr 2019 06:57:00 -0000 1.575 +++ sys/net/if.c 14 Apr 2019 07:14:01 -0000 @@ -2168,6 +2168,7 @@ ifioctl(struct socket *so, u_long cmd, c case SIOCSVNETID: case SIOCSVNETFLOWID: case SIOCSTXHPRIO: + case SIOCSRXHPRIO: case SIOCSIFPAIR: case SIOCSIFPARENT: case SIOCDIFPARENT: Index: sys/net/if.h =================================================================== RCS file: /cvs/src/sys/net/if.h,v retrieving revision 1.200 diff -u -p -r1.200 if.h --- sys/net/if.h 10 Apr 2019 09:49:22 -0000 1.200 +++ sys/net/if.h 14 Apr 2019 07:14:02 -0000 @@ -427,6 +427,7 @@ struct ifreq { #define IF_HDRPRIO_MAX IFQ_MAXPRIO #define IF_HDRPRIO_PACKET -1 /* use mbuf prio */ #define IF_HDRPRIO_PAYLOAD -2 /* copy payload prio */ +#define IF_HDRPRIO_OUTER -3 /* use outer prio */ #define IF_PWE3_ETHERNET 1 /* ethernet or ethernet tagged */ #define IF_PWE3_IP 2 /* IP layer 2 */ Index: sys/net/if_vlan.c =================================================================== RCS file: /cvs/src/sys/net/if_vlan.c,v retrieving revision 1.183 diff -u -p -r1.183 if_vlan.c --- sys/net/if_vlan.c 15 Feb 2019 13:00:51 -0000 1.183 +++ sys/net/if_vlan.c 14 Apr 2019 07:14:02 -0000 @@ -174,6 +174,7 @@ vlan_clone_create(struct if_clone *ifc, refcnt_init(&ifv->ifv_refcnt); ifv->ifv_prio = IF_HDRPRIO_PACKET; + ifv->ifv_rxprio = IF_HDRPRIO_OUTER; ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; ifp->if_xflags = IFXF_CLONED|IFXF_MPSAFE; @@ -373,11 +374,6 @@ vlan_input(struct ifnet *ifp0, struct mb /* From now on ether_vtag is fine */ tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag); - m->m_pkthdr.pf.prio = EVL_PRIOFTAG(m->m_pkthdr.ether_vtag); - - /* IEEE 802.1p has prio 0 and 1 swapped */ - if (m->m_pkthdr.pf.prio <= 1) - m->m_pkthdr.pf.prio = !m->m_pkthdr.pf.prio; list = &tagh[TAG_HASH(tag)]; SRPL_FOREACH(ifv, &sr, list, ifv_list) { @@ -408,6 +404,20 @@ vlan_input(struct ifnet *ifp0, struct mb m_adj(m, EVL_ENCAPLEN); } + switch (ifv->ifv_rxprio) { + case IF_HDRPRIO_PACKET: + break; + case IF_HDRPRIO_OUTER: + m->m_pkthdr.pf.prio = EVL_PRIOFTAG(m->m_pkthdr.ether_vtag); + break; + default: + m->m_pkthdr.pf.prio = ifv->ifv_rxprio; + /* IEEE 802.1p has prio 0 and 1 swapped */ + if (m->m_pkthdr.pf.prio <= 1) + m->m_pkthdr.pf.prio = !m->m_pkthdr.pf.prio; + break; + } + ml_enqueue(&ml, m); if_input(&ifv->ifv_if, &ml); SRPL_LEAVE(&sr); @@ -736,6 +746,22 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd break; case SIOCGTXHPRIO: ifr->ifr_hdrprio = ifv->ifv_prio; + break; + + case SIOCSRXHPRIO: + if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET || + ifr->ifr_hdrprio == IF_HDRPRIO_OUTER) + ; + else if (ifr->ifr_hdrprio > IF_HDRPRIO_MAX || + ifr->ifr_hdrprio < IF_HDRPRIO_MIN) { + error = EINVAL; + break; + } + + ifv->ifv_rxprio = ifr->ifr_hdrprio; + break; + case SIOCGRXHPRIO: + ifr->ifr_hdrprio = ifv->ifv_rxprio; break; default: Index: sys/net/if_vlan_var.h =================================================================== RCS file: /cvs/src/sys/net/if_vlan_var.h,v retrieving revision 1.39 diff -u -p -r1.39 if_vlan_var.h --- sys/net/if_vlan_var.h 15 Feb 2019 13:00:51 -0000 1.39 +++ sys/net/if_vlan_var.h 14 Apr 2019 07:14:02 -0000 @@ -62,6 +62,7 @@ struct vlan_mc_entry { struct ifvlan { struct arpcom ifv_ac; /* make this an interface */ unsigned int ifv_ifidx0; /* parent interface of this vlan */ + int ifv_rxprio; struct ifv_linkmib { int ifvm_prio; /* prio to apply on packet leaving if */ u_int16_t ifvm_proto; /* encapsulation ethertype */ Index: sys/net/if_gif.c =================================================================== RCS file: /cvs/src/sys/net/if_gif.c,v retrieving revision 1.125 diff -u -p -r1.125 if_gif.c --- sys/net/if_gif.c 29 Nov 2018 00:14:29 -0000 1.125 +++ sys/net/if_gif.c 14 Apr 2019 07:14:02 -0000 @@ -107,6 +107,7 @@ struct gif_softc { uint16_t sc_df; int sc_ttl; int sc_txhprio; + int sc_rxhprio; int sc_ecn; }; @@ -156,6 +157,7 @@ gif_clone_create(struct if_clone *ifc, i sc->sc_df = htons(0); sc->sc_ttl = ip_defttl; sc->sc_txhprio = IF_HDRPRIO_PAYLOAD; + sc->sc_rxhprio = IF_HDRPRIO_PAYLOAD; sc->sc_ecn = ECN_ALLOWED; snprintf(ifp->if_xname, sizeof(ifp->if_xname), @@ -568,6 +570,23 @@ gif_ioctl(struct ifnet *ifp, u_long cmd, ifr->ifr_hdrprio = sc->sc_txhprio; break; + case SIOCSRXHPRIO: + if (ifr->ifr_hdrprio == IF_HDRPRIO_PAYLOAD || + ifr->ifr_hdrprio == IF_HDRPRIO_PACKET || + ifr->ifr_hdrprio == IF_HDRPRIO_OUTER) + ; /* ok, fall through */ + else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN || + ifr->ifr_hdrprio > IF_HDRPRIO_MAX) { + error = EINVAL; + break; + } + + sc->sc_rxhprio = ifr->ifr_hdrprio; + break; + case SIOCGRXHPRIO: + ifr->ifr_hdrprio = sc->sc_rxhprio; + break; + default: error = ENOTTY; break; @@ -783,6 +802,7 @@ gif_input(struct gif_tunnel *key, struct struct ifnet *ifp; void (*input)(struct ifnet *, struct mbuf *); uint8_t itos; + int rxhprio; /* IP-in-IP header is caused by tunnel mode, so skip gif lookup */ if (m->m_flags & M_TUNNEL) { @@ -803,6 +823,7 @@ gif_input(struct gif_tunnel *key, struct m_adj(m, *offp); /* this is ours now */ ifp = &sc->sc_if; + rxhprio = sc->sc_rxhprio; switch (proto) { case IPPROTO_IPV4: { @@ -848,10 +869,19 @@ gif_input(struct gif_tunnel *key, struct } #endif /* INET6 */ #ifdef MPLS - case IPPROTO_MPLS: + case IPPROTO_MPLS: { + uint32_t shim; + m = *mp = m_pullup(m, sizeof(shim)); + if (m == NULL) + return (IPPROTO_DONE); + + shim = bemtoh32(mtod(m, uint32_t *)); + itos = ((shim & MPLS_EXP_MASK) >> MPLS_EXP_OFFSET) << 5; + m->m_pkthdr.ph_family = AF_MPLS; input = mpls_input; break; + } #endif /* MPLS */ default: return (-1); @@ -860,6 +890,21 @@ gif_input(struct gif_tunnel *key, struct m->m_flags &= ~(M_MCAST|M_BCAST); m->m_pkthdr.ph_ifidx = ifp->if_index; m->m_pkthdr.ph_rtableid = ifp->if_rdomain; + + switch (rxhprio) { + case IF_HDRPRIO_PACKET: + /* nop */ + break; + case IF_HDRPRIO_PAYLOAD: + m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(itos); + break; + case IF_HDRPRIO_OUTER: + m->m_pkthdr.pf.prio = IFQ_TOS2PRIO(otos); + break; + default: + m->m_pkthdr.pf.prio = rxhprio; + break; + } #if NPF > 0 pf_pkt_addr_changed(m); Index: sbin/ifconfig/ifconfig.c =================================================================== RCS file: /cvs/src/sbin/ifconfig/ifconfig.c,v retrieving revision 1.399 diff -u -p -r1.399 ifconfig.c --- sbin/ifconfig/ifconfig.c 11 Apr 2019 11:32:24 -0000 1.399 +++ sbin/ifconfig/ifconfig.c 14 Apr 2019 07:14:03 -0000 @@ -139,6 +139,8 @@ struct ifencap { #define IFE_TXHPRIO_SET 0x1000 int ife_txhprio; +#define IFE_RXHPRIO_SET 0x2000 + int ife_rxhprio; }; struct ifreq ifr, ridreq; @@ -295,6 +297,8 @@ void delvnetflowid(const char *, int); void getvnetflowid(struct ifencap *); void gettxprio(struct ifencap *); void settxprio(const char *, int); +void getrxprio(struct ifencap *); +void setrxprio(const char *, int); void settunneldf(const char *, int); void settunnelnodf(const char *, int); void settunnelecn(const char *, int); @@ -503,6 +507,7 @@ const struct cmd { { "vnetflowid", 0, 0, setvnetflowid }, { "-vnetflowid", 0, 0, delvnetflowid }, { "txprio", NEXTARG, 0, settxprio }, + { "rxprio", NEXTARG, 0, setrxprio }, { "pppoedev", NEXTARG, 0, setpppoe_dev }, { "pppoesvc", NEXTARG, 0, setpppoe_svc }, { "-pppoesvc", 1, 0, setpppoe_svc }, @@ -4214,6 +4219,46 @@ settxprio(const char *val, int d) if (ioctl(s, SIOCSTXHPRIO, (caddr_t)&ifr) < 0) warn("SIOCSTXHPRIO"); } + +void +getrxprio(struct ifencap *ife) +{ + if (strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)) >= + sizeof(ifr.ifr_name)) + errx(1, "hdr prio: name is too long"); + + if (ioctl(s, SIOCGRXHPRIO, (caddr_t)&ifr) == -1) + return; + + ife->ife_flags |= IFE_RXHPRIO_SET; + ife->ife_rxhprio = ifr.ifr_hdrprio; +} + +void +setrxprio(const char *val, int d) +{ + const char *errmsg = NULL; + + if (strlcpy(ifr.ifr_name, name, sizeof(ifr.ifr_name)) >= + sizeof(ifr.ifr_name)) + errx(1, "rx prio: name is too long"); + + if (strcmp(val, "packet") == 0) + ifr.ifr_hdrprio = IF_HDRPRIO_PACKET; + else if (strcmp(val, "payload") == 0) + ifr.ifr_hdrprio = IF_HDRPRIO_PAYLOAD; + else if (strcmp(val, "outer") == 0) + ifr.ifr_hdrprio = IF_HDRPRIO_OUTER; + else { + ifr.ifr_hdrprio = strtonum(val, + IF_HDRPRIO_MIN, IF_HDRPRIO_MAX, &errmsg); + if (errmsg) + errx(1, "rx prio %s: %s", val, errmsg); + } + + if (ioctl(s, SIOCSRXHPRIO, (caddr_t)&ifr) < 0) + warn("SIOCSRXHPRIO"); +} #endif void @@ -4226,6 +4271,7 @@ getencap(void) getifparent(&ife); #ifndef SMALL gettxprio(&ife); + getrxprio(&ife); #endif if (ife.ife_flags == 0) @@ -4258,15 +4304,34 @@ getencap(void) #ifndef SMALL if (ife.ife_flags & IFE_TXHPRIO_SET) { + printf(" txprio "); switch (ife.ife_txhprio) { case IF_HDRPRIO_PACKET: - printf(" txprio packet"); + printf("packet"); break; case IF_HDRPRIO_PAYLOAD: - printf(" txprio payload"); + printf("payload"); + break; + default: + printf("%d", ife.ife_txhprio); + break; + } + } + + if (ife.ife_flags & IFE_RXHPRIO_SET) { + printf(" rxprio "); + switch (ife.ife_rxhprio) { + case IF_HDRPRIO_PACKET: + printf("packet"); + break; + case IF_HDRPRIO_PAYLOAD: + printf("payload"); + break; + case IF_HDRPRIO_OUTER: + printf("outer"); break; default: - printf(" txprio %d", ife.ife_txhprio); + printf("%d", ife.ife_rxhprio); break; } }