On Fri, Feb 22, 2019 at 09:56:58AM -0300, Martin Pieuchot wrote:
> On 22/02/19(Fri) 15:01, David Gwynne wrote:
> > On Thu, Feb 21, 2019 at 04:29:27PM -0300, Martin Pieuchot wrote:
> > > On 21/02/19(Thu) 14:19, David Gwynne wrote:
> > > > right now we add vlan_input as a possible input handler on the parent
> > > > interface, and if the packet is for a vlan we take it and pretend we
> > > > received it on the vlan interface by calling if_input against that mbuf.
> > > >
> > > > as mpi notes, the if input queue stuff looks like a lot of work,
> > > > especially for a single packet. my opinion is that we got away with
> > > > the if input stuff we've done to try and encourage an mpsafe network
> > > > stack because we amortised the cost of it over many packets off the
> > > > hardware ring. vlan does it a packet at a time.
> > > >
> > > > this moves the handling of vlan packets back into ether_input by
> > > > calling vlan_input directly on packets that are either marked as vlan
> > > > tagged or have a vlan ethertype. note that we have to do that anyway,
> > > > this just makes it explicit.
> > > >
> > > > vlan_input is then tweaked to implement all the important bits of if
> > > > input. part of what if input does is count the packets. because vlan
> > > > already has per cpu counters for bypassing queues on output, we can use
> > > > them again for input from any cpu. if i ever get round to making a
> > > > driver handle multiple rx rings this means we can rx vlan packets
> > > > concurrently, they don't get serialised to a single if input q.
> > > >
> > > > finally, hrvoje popovski has tested this diff and get's a significant
> > > > bump with it. on a machine that can forward 1100Kpps without vlan, it
> > > > goes from 790Kpps with vlan to 870Kpps. On a box that can do 730Kpps
> > > > without vlans, it goes from 550Kpps with vlan to 840Kpps. We're
> > > > still trying to figure that last one out, but it does appear to be
> > > > faster.
> > > >
> > > > thoughts? ok?
> > >
> > > Why do we need to move stuff to ether_input() if all we want is to
> > > bypass ifiq_input()? Isn't a 3 line diff enough^^ ?
> >
> > Fair point. It turns out it's not quite three lines, but it's still
> > smaller.
>
> I'm unhappy to see the bpf & packet magic reappear in pseudo-drivers.
>
> This is going to spread in every pseudo-driver, no? So why not keeping
> it in the new API? Should we document if_input() vs if_input_one()?
> Should we assert that if_input_one() is only called from a network
> thread? If yes, should we pick a better name?
Maybe. How's if_vinput? And as you suggest, it can do some more of
the magic? Let me try converting a few more drivers before we
burden it with constraints.
Index: if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.571
diff -u -p -r1.571 if.c
--- if.c 9 Jan 2019 01:14:21 -0000 1.571
+++ if.c 23 Feb 2019 09:06:27 -0000
@@ -895,11 +895,29 @@ if_ih_remove(struct ifnet *ifp, int (*in
}
void
-if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
+if_input_ih(struct ifnet *ifp, struct mbuf *m)
{
- struct mbuf *m;
struct ifih *ifih;
struct srp_ref sr;
+
+ /*
+ * Pass this mbuf to all input handlers of its
+ * interface until it is consumed.
+ */
+ SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
+ if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
+ break;
+ }
+ SRPL_LEAVE(&sr);
+
+ if (ifih == NULL)
+ m_freem(m);
+}
+
+void
+if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
+{
+ struct mbuf *m;
int s;
if (ml_empty(ml))
@@ -922,22 +940,32 @@ if_input_process(struct ifnet *ifp, stru
*/
NET_RLOCK();
s = splnet();
- while ((m = ml_dequeue(ml)) != NULL) {
- /*
- * Pass this mbuf to all input handlers of its
- * interface until it is consumed.
- */
- SRPL_FOREACH(ifih, &sr, &ifp->if_inputs, ifih_next) {
- if ((*ifih->ifih_input)(ifp, m, ifih->ifih_cookie))
- break;
- }
- SRPL_LEAVE(&sr);
+ while ((m = ml_dequeue(ml)) != NULL)
+ if_input_ih(ifp, m);
+ splx(s);
+ NET_RUNLOCK();
+}
+
+void
+if_vinput(struct ifnet *ifp, struct mbuf *m)
+{
+#if NBPFILTER > 0
+ caddr_t if_bpf = ifp->if_bpf;
+#endif
+
+ m->m_pkthdr.ph_ifidx = ifp->if_index;
+ m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
- if (ifih == NULL)
+#if NBPFILTER > 0
+ if (if_bpf) {
+ if (bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT)) {
m_freem(m);
+ return;
+ }
}
- splx(s);
- NET_RUNLOCK();
+#endif
+
+ if_input_ih(ifp, m);
}
void
Index: if_var.h
===================================================================
RCS file: /cvs/src/sys/net/if_var.h,v
retrieving revision 1.94
diff -u -p -r1.94 if_var.h
--- if_var.h 9 Jan 2019 01:14:21 -0000 1.94
+++ if_var.h 23 Feb 2019 09:06:27 -0000
@@ -334,6 +334,7 @@ void if_start(struct ifnet *);
int if_enqueue(struct ifnet *, struct mbuf *);
int if_enqueue_ifq(struct ifnet *, struct mbuf *);
void if_input(struct ifnet *, struct mbuf_list *);
+void if_vinput(struct ifnet *, struct mbuf *);
void if_input_process(struct ifnet *, struct mbuf_list *);
int if_input_local(struct ifnet *, struct mbuf *, sa_family_t);
int if_output_local(struct ifnet *, struct mbuf *, sa_family_t);
Index: if_vlan.c
===================================================================
RCS file: /cvs/src/sys/net/if_vlan.c,v
retrieving revision 1.183
diff -u -p -r1.183 if_vlan.c
--- if_vlan.c 15 Feb 2019 13:00:51 -0000 1.183
+++ if_vlan.c 23 Feb 2019 09:06:27 -0000
@@ -342,12 +342,12 @@ int
vlan_input(struct ifnet *ifp0, struct mbuf *m, void *cookie)
{
struct ifvlan *ifv;
+ struct ifnet *ifp;
struct ether_vlan_header *evl;
struct ether_header *eh;
SRPL_HEAD(, ifvlan) *tagh, *list;
struct srp_ref sr;
u_int tag;
- struct mbuf_list ml = MBUF_LIST_INITIALIZER();
u_int16_t etype;
eh = mtod(m, struct ether_header *);
@@ -359,7 +359,6 @@ vlan_input(struct ifnet *ifp0, struct mb
} else if ((etype == ETHERTYPE_VLAN) || (etype == ETHERTYPE_QINQ)) {
if (m->m_len < sizeof(*evl) &&
(m = m_pullup(m, sizeof(*evl))) == NULL) {
- ifp0->if_ierrors++;
return (1);
}
@@ -373,12 +372,6 @@ vlan_input(struct ifnet *ifp0, struct mb
/* From now on ether_vtag is fine */
tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
- m->m_pkthdr.pf.prio = EVL_PRIOFTAG(m->m_pkthdr.ether_vtag);
-
- /* IEEE 802.1p has prio 0 and 1 swapped */
- if (m->m_pkthdr.pf.prio <= 1)
- m->m_pkthdr.pf.prio = !m->m_pkthdr.pf.prio;
-
list = &tagh[TAG_HASH(tag)];
SRPL_FOREACH(ifv, &sr, list, ifv_list) {
if (ifp0->if_index == ifv->ifv_ifidx0 && tag == ifv->ifv_tag &&
@@ -386,14 +379,18 @@ vlan_input(struct ifnet *ifp0, struct mb
break;
}
- if (ifv == NULL) {
- ifp0->if_noproto++;
- goto drop;
+ if (ifv == NULL || !ISSET(ifv->ifv_if.if_flags, IFF_RUNNING)) {
+ SRPL_LEAVE(&sr);
+ m_freem(m);
+ return (1);
}
- if ((ifv->ifv_if.if_flags & (IFF_UP|IFF_RUNNING)) !=
- (IFF_UP|IFF_RUNNING))
- goto drop;
+ /* The packet is ours now */
+
+ m->m_pkthdr.pf.prio = EVL_PRIOFTAG(m->m_pkthdr.ether_vtag);
+ /* IEEE 802.1p has prio 0 and 1 swapped */
+ if (m->m_pkthdr.pf.prio <= 1)
+ m->m_pkthdr.pf.prio = !m->m_pkthdr.pf.prio;
/*
* Having found a valid vlan interface corresponding to
@@ -403,19 +400,17 @@ vlan_input(struct ifnet *ifp0, struct mb
if (m->m_flags & M_VLANTAG) {
m->m_flags &= ~M_VLANTAG;
} else {
- eh->ether_type = evl->evl_proto;
- memmove((char *)eh + EVL_ENCAPLEN, eh, sizeof(*eh));
+ memmove((char *)evl + EVL_ENCAPLEN, evl,
+ offsetof(struct ether_vlan_header, evl_encap_proto));
m_adj(m, EVL_ENCAPLEN);
}
- ml_enqueue(&ml, m);
- if_input(&ifv->ifv_if, &ml);
- SRPL_LEAVE(&sr);
- return (1);
+ ifp = &ifv->ifv_if;
+ counters_pkt(ifp->if_counters,
+ ifc_ipackets, ifc_ibytes, m->m_pkthdr.len);
-drop:
+ if_vinput(ifp, m);
SRPL_LEAVE(&sr);
- m_freem(m);
return (1);
}