Hi,

by default, the ether_input() checks the destination MAC address of 
incoming unicast packets only if the interface is in promiscous mode. If 
not, it is assumed that the NIC filters unicast packets reliably. 
Unfortunately, for virtio-net this is not the case. There, unicast 
filtering is only best effort, and (depending on configuration) if the 
bridge on the VM host does unicast flodding, unicast packets that are not 
for the VM guest may still be delivered to the VM guest. This is a rather 
annoying problem because it can cause pf to send RST packets to foreign 
connections. (Kudos to mpf@ for debugging this).

There are two possible approaches to fix this problem. Either make the 
vio(4) driver filter out unicast packets that are not for the local MAC, 
which would involve duplicating quite a bit of code from ether_input() in 
vio(4). Or, and I would prefer this, allow the driver to tell 
ether_input() that it needs to check the MAC always, and not only if the 
interface is in promiscous mode.

This could be done with a new flag. There seem to be three possible places 
where this flag could be put:

* ifnet.if_flags
  This is a short and there is no free bit. But the IFF_NOTRAILERS bit has
  become unused recently and could be recycled.

* ifnet.if_xflags
  An int, lots of free bits. But comment says 'extra softnet flags'

* if_data.ifi_capabilities
  An u_int32_t, lots of free bits. 


In the diff below, I went with the first choice because the new IFF_NOMACFILTER
is somewhat similar to IFF_SIMPLEX and because the the check can then be 
nicely folded into the existing check for IFF_PROMISC.

I would welcome any comments, suggestions for a better flag name, OKs, ...

Cheers,
Stefan



diff --git sys/dev/pci/if_vio.c sys/dev/pci/if_vio.c
index 4cd80d5..22fd7cf 100644
--- sys/dev/pci/if_vio.c
+++ sys/dev/pci/if_vio.c
@@ -582,21 +582,21 @@ vio_attach(struct device *parent, struct device *self, 
void *aux)
                        virtio_start_vq_intr(vsc, &sc->sc_vq[VQCTL]);
                        vsc->sc_nvqs = 3;
                }
        }
 
        if (vio_alloc_mem(sc) < 0)
                goto err;
 
        strlcpy(ifp->if_xname, self->dv_xname, IFNAMSIZ);
        ifp->if_softc = sc;
-       ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+       ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | 
IFF_NOMACFILTER;
        ifp->if_start = vio_start;
        ifp->if_ioctl = vio_ioctl;
        ifp->if_capabilities = IFCAP_VLAN_MTU;
        if (features & VIRTIO_NET_F_CSUM)
                ifp->if_capabilities |= IFCAP_CSUM_TCPv4|IFCAP_CSUM_UDPv4;
        IFQ_SET_MAXLEN(&ifp->if_snd, vsc->sc_vqs[1].vq_num - 1);
        IFQ_SET_READY(&ifp->if_snd);
        ifmedia_init(&sc->sc_media, 0, vio_media_change, vio_media_status);
        ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
        ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
diff --git sys/net/if.h sys/net/if.h
index 8d7e390..91c6d18 100644
--- sys/net/if.h
+++ sys/net/if.h
@@ -182,36 +182,37 @@ struct if_status_description {
 /*
  * Length of interface description, including terminating '\0'.
  */
 #define        IFDESCRSIZE     64
 
 #define        IFF_UP          0x1             /* interface is up */
 #define        IFF_BROADCAST   0x2             /* broadcast address valid */
 #define        IFF_DEBUG       0x4             /* turn on debugging */
 #define        IFF_LOOPBACK    0x8             /* is a loopback net */
 #define        IFF_POINTOPOINT 0x10            /* interface is point-to-point 
link */
-#define        IFF_NOTRAILERS  0x20            /* avoid use of trailers */
+#define        IFF_NOMACFILTER 0x20            /* Does not reliably filter 
unicast MACs */
 #define        IFF_RUNNING     0x40            /* resources allocated */
 #define        IFF_NOARP       0x80            /* no address resolution 
protocol */
 #define        IFF_PROMISC     0x100           /* receive all packets */
 #define        IFF_ALLMULTI    0x200           /* receive all multicast 
packets */
 #define        IFF_OACTIVE     0x400           /* transmission in progress */
 #define        IFF_SIMPLEX     0x800           /* can't hear own transmissions 
*/
 #define        IFF_LINK0       0x1000          /* per link layer defined bit */
 #define        IFF_LINK1       0x2000          /* per link layer defined bit */
 #define        IFF_LINK2       0x4000          /* per link layer defined bit */
 #define        IFF_MULTICAST   0x8000          /* supports multicast */
 
+
 /* flags set internally only: */
 #define        IFF_CANTCHANGE \
        (IFF_BROADCAST|IFF_POINTOPOINT|IFF_RUNNING|IFF_OACTIVE|\
-           IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI)
+           IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_NOMACFILTER)
 
 #define IFXF_MPSAFE            0x1             /* if_start is mpsafe */
 #define        IFXF_INET6_NOPRIVACY    0x4             /* don't autoconf 
privacy */
 #define        IFXF_MPLS               0x8             /* supports MPLS */
 #define        IFXF_WOL                0x10            /* wake on lan enabled 
*/
 #define        IFXF_AUTOCONF6          0x20            /* v6 autoconf enabled 
*/
 
 #define        IFXF_CANTCHANGE \
        (IFXF_MPSAFE)
 
diff --git sys/net/if_ethersubr.c sys/net/if_ethersubr.c
index 2a44161..296e6f6 100644
--- sys/net/if_ethersubr.c
+++ sys/net/if_ethersubr.c
@@ -347,21 +347,21 @@ ether_input(struct ifnet *ifp, struct mbuf *m, void 
*cookie)
        if (m->m_flags & (M_FILDROP | M_VLANTAG)) {
                m_freem(m);
                return (1);
        }
 
        /*
         * If packet is unicast and we're in promiscuous mode, make sure it
         * is for us.  Drop otherwise.
         */
        if ((m->m_flags & (M_BCAST|M_MCAST)) == 0 &&
-           (ifp->if_flags & IFF_PROMISC)) {
+           (ifp->if_flags & (IFF_PROMISC|IFF_NOMACFILTER))) {
                if (memcmp(ac->ac_enaddr, eh->ether_dhost, ETHER_ADDR_LEN)) {
                        m_freem(m);
                        return (1);
                }
        }
 
        etype = ntohs(eh->ether_type);
 
 decapsulate:
        switch (etype) {

Reply via email to