On Thu, Feb 21, 2019 at 04:29:27PM -0300, Martin Pieuchot wrote:
> On 21/02/19(Thu) 14:19, David Gwynne wrote:
> > right now we add vlan_input as a possible input handler on the parent
> > interface, and if the packet is for a vlan we take it and pretend we
> > received it on the vlan interface by calling if_input against that mbuf.
> >
> > as mpi notes, the if input queue stuff looks like a lot of work,
> > especially for a single packet. my opinion is that we got away with
> > the if input stuff we've done to try and encourage an mpsafe network
> > stack because we amortised the cost of it over many packets off the
> > hardware ring. vlan does it a packet at a time.
> >
> > this moves the handling of vlan packets back into ether_input by
> > calling vlan_input directly on packets that are either marked as vlan
> > tagged or have a vlan ethertype. note that we have to do that anyway,
> > this just makes it explicit.
> >
> > vlan_input is then tweaked to implement all the important bits of if
> > input. part of what if input does is count the packets. because vlan
> > already has per cpu counters for bypassing queues on output, we can use
> > them again for input from any cpu. if i ever get round to making a
> > driver handle multiple rx rings this means we can rx vlan packets
> > concurrently, they don't get serialised to a single if input q.
> >
> > finally, hrvoje popovski has tested this diff and get's a significant
> > bump with it. on a machine that can forward 1100Kpps without vlan, it
> > goes from 790Kpps with vlan to 870Kpps. On a box that can do 730Kpps
> > without vlans, it goes from 550Kpps with vlan to 840Kpps. We're
> > still trying to figure that last one out, but it does appear to be
> > faster.
> >
> > thoughts? ok?
>
> Why do we need to move stuff to ether_input() if all we want is to
> bypass ifiq_input()? Isn't a 3 line diff enough^^ ?
Fair point. It turns out it's not quite three lines, but it's still
smaller.
> - ml_enqueue(&ml, m);
> - if_input(&ifv->ifv_if, &ml);
> + if_input_one(&ifv->ifv_if, m);
>
> I'm saying that because I'm afraid of the breakage that will happen if
> we remove the input handlers. So I'm not opposed to get rid of the
> handlers, but as you said we should consider all drivers to not break
> trunk on top of bridge on top of vlan or whatever crazy configuration
> people do.
I want to fold them up because there's semantic issues around which
handlers were added in which order, and I'd bet there's a small
performance win too. But it's mostly about the semantics.
> Another point to keep in mind if you're going to remove the handlers is:
> do we want to keep passing a single mbuf or was it a plan to pass the
> whole list?
My current thinking is you want to bundle mbufs on and off the
hardware as much as possible via lists of mbufs, but between layers
of pseudo interfaces it's better to dispatch each mbuf directly.
If the call stack becomes too deep (high?) then lists might be useful
for squashing that back down.
Index: if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.571
diff -u -p -r1.571 if.c
--- if.c 9 Jan 2019 01:14:21 -0000 1.571
+++ if.c 22 Feb 2019 02:13:03 -0000
@@ -895,11 +895,29 @@ if_ih_remove(struct ifnet *ifp, int (*in
}
void
-if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
+if_input_one(struct ifnet *ifp, struct mbuf *m)
{
- struct mbuf *m;
struct ifih *ifih;
struct srp_ref sr;
+
+ /*
+ * Pass this mbuf to all input handlers of its
+ * interface until it is consumed.
+ */
+ SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
+ if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
+ break;
+ }
+ SRPL_LEAVE(&sr);
+
+ if (ifih == NULL)
+ m_freem(m);
+}
+
+void
+if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
+{
+ struct mbuf *m;
int s;
if (ml_empty(ml))
@@ -922,20 +940,8 @@ if_input_process(struct ifnet *ifp, stru
*/
NET_RLOCK();
s = splnet();
- while ((m = ml_dequeue(ml)) != NULL) {
- /*
- * Pass this mbuf to all input handlers of its
- * interface until it is consumed.
- */
- SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
- if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
- break;
- }
- SRPL_LEAVE(&sr);
-
- if (ifih == NULL)
- m_freem(m);
- }
+ while ((m = ml_dequeue(ml)) != NULL)
+ if_input_one(ifp, m);
splx(s);
NET_RUNLOCK();
}
Index: if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.94
diff -u -p -r1.94 if_var.h
--- if_var.h 9 Jan 2019 01:14:21 -0000 1.94
+++ if_var.h 22 Feb 2019 02:13:03 -0000
@@ -334,6 +334,7 @@ void if_start(struct ifnet *);
int if_enqueue(struct ifnet *, struct mbuf *);
int if_enqueue_ifq(struct ifnet *, struct mbuf *);
void if_input(struct ifnet *, struct mbuf_list *);
+void if_input_one(struct ifnet *, struct mbuf *);
void if_input_process(struct ifnet *, struct mbuf_list *);
int if_input_local(struct ifnet *, struct mbuf *, sa_family_t);
int if_output_local(struct ifnet *, struct mbuf *, sa_family_t);
Index: if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.183
diff -u -p -r1.183 if_vlan.c
--- if_vlan.c 15 Feb 2019 13:00:51 -0000 1.183
+++ if_vlan.c 22 Feb 2019 02:13:03 -0000
@@ -342,13 +342,16 @@ int
vlan_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
{
struct ifvlan *ifv;
+ struct ifnet *ifp;
struct ether_vlan_header *evl;
struct ether_header *eh;
SRPL_HEAD(, ifvlan) *tagh, *list;
struct srp_ref sr;
u_int tag;
- struct mbuf_list ml = MBUF_LIST_INITIALIZER();
u_int16_t etype;
+#if NBPFILTER > 0
+ caddr_t if_bpf;
+#endif
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
@@ -359,7 +362,6 @@ vlan_input(struct ifnet *ifp0, struct mb
} else if ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ)) {
if (m->m_len < sizeof(*evl) &&
(m = m_pullup(m, sizeof(*evl))) == NULL) {
- ifp0->if_ierrors++;
return (1);
}
@@ -373,12 +375,6 @@ vlan_input(struct ifnet *ifp0, struct mb
/* From now on ether_vtag is fine */
tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
- m->m_pkthdr.pf.prio = EVL_PRIOFTAG(m->m_pkthdr.ether_vtag);
-
- /* IEEE 802.1p has prio 0 and 1 swapped */
- if (m->m_pkthdr.pf.prio <= 1)
- m->m_pkthdr.pf.prio = !m->m_pkthdr.pf.prio;
-
list = &tagh[TAG_HASH(tag)];
SRPL_FOREACH(ifv, &sr, list, ifv_list) {
if (ifp0->if_index == ifv->ifv_ifidx0 && tag == ifv->ifv_tag &&
@@ -395,6 +391,8 @@ vlan_input(struct ifnet *ifp0, struct mb
(IFF_UP|IFF_RUNNING))
goto drop;
+ /* The packet is ours now */
+
/*
* Having found a valid vlan interface corresponding to
* the given source interface and vlan tag, remove the
@@ -403,13 +401,34 @@ vlan_input(struct ifnet *ifp0, struct mb
if (m->m_flags & M_VLANTAG) {
m->m_flags &= ~M_VLANTAG;
} else {
- eh->ether_type = evl->evl_proto;
- memmove((char *)eh + EVL_ENCAPLEN, eh, sizeof(*eh));
+ memmove((char *)evl + EVL_ENCAPLEN, evl,
+ offsetof(struct ether_vlan_header, evl_encap_proto));
m_adj(m, EVL_ENCAPLEN);
}
- ml_enqueue(&ml, m);
- if_input(&ifv->ifv_if, &ml);
+ ifp = &ifv->ifv_if;
+ counters_pkt(ifp->if_counters,
+ ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
+
+ m->m_pkthdr.ph_ifidx = ifp->if_index;
+ m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+ m->m_pkthdr.pf.prio = EVL_PRIOFTAG(m->m_pkthdr.ether_vtag);
+ /* IEEE 802.1p has prio 0 and 1 swapped */
+ if (m->m_pkthdr.pf.prio <= 1)
+ m->m_pkthdr.pf.prio = !m->m_pkthdr.pf.prio;
+
+#if NBPFILTER > 0
+ if_bpf = ifp->if_bpf;
+ if (if_bpf) {
+ if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT)) {
+ m_freem(m);
+ goto leave;
+ }
+ }
+#endif
+
+ if_input_one(ifp, m);
+leave:
SRPL_LEAVE(&sr);
return (1);