Author: luigi
Date: Mon Jun  9 15:24:45 2014
New Revision: 267282
URL: http://svnweb.freebsd.org/changeset/base/267282

Log:
  sync netmap code with the version in HEAD:
  - fix handling of tx mbufs in emulated netmap mode;
  - introduce mbq_lock() and mbq_unlock()
  - rate limit some error messages
  - many whitespace and comment fixes

Modified:
  stable/10/sys/dev/netmap/netmap.c
  stable/10/sys/dev/netmap/netmap_freebsd.c
  stable/10/sys/dev/netmap/netmap_generic.c
  stable/10/sys/dev/netmap/netmap_kern.h
  stable/10/sys/dev/netmap/netmap_mbq.c
  stable/10/sys/dev/netmap/netmap_mbq.h
  stable/10/sys/dev/netmap/netmap_mem2.c
  stable/10/sys/dev/netmap/netmap_pipe.c
  stable/10/sys/dev/netmap/netmap_vale.c

Modified: stable/10/sys/dev/netmap/netmap.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap.c   Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap.c   Mon Jun  9 15:24:45 2014        
(r267282)
@@ -270,6 +270,7 @@ netmap_disable_ring(struct netmap_kring 
 }
 
 
+/* stop or enable all the rings of na */
 static void
 netmap_set_all_rings(struct ifnet *ifp, int stopped)
 {
@@ -303,6 +304,13 @@ netmap_set_all_rings(struct ifnet *ifp, 
 }
 
 
+/*
+ * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
+ * to finish and prevents any new one from starting.  Call this before turning
+ * netmap mode off, or before removing the harware rings (e.g., on module
+ * onload).  As a rule of thumb for linux drivers, this should be placed near
+ * each napi_disable().
+ */
 void
 netmap_disable_all_rings(struct ifnet *ifp)
 {
@@ -310,6 +318,11 @@ netmap_disable_all_rings(struct ifnet *i
 }
 
 
+/*
+ * Convenience function used in drivers.  Re-enables rxsync and txsync on the
+ * adapter's rings In linux drivers, this should be placed near each
+ * napi_enable().
+ */
 void
 netmap_enable_all_rings(struct ifnet *ifp)
 {
@@ -393,6 +406,7 @@ nm_dump_buf(char *p, int len, int lim, c
  * Fetch configuration from the device, to cope with dynamic
  * reconfigurations after loading the module.
  */
+/* call with NMG_LOCK held */
 int
 netmap_update_config(struct netmap_adapter *na)
 {
@@ -447,18 +461,20 @@ netmap_rxsync_compat(struct netmap_kring
        return na->nm_rxsync(na, kring->ring_id, flags);
 }
 
+/* kring->nm_sync callback for the host tx ring */
 static int
 netmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
 {
-       (void)flags;
+       (void)flags; /* unused */
        netmap_txsync_to_host(kring->na);
        return 0;
 }
 
+/* kring->nm_sync callback for the host rx ring */
 static int
 netmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
 {
-       (void)flags;
+       (void)flags; /* unused */
        netmap_rxsync_from_host(kring->na, NULL, NULL);
        return 0;
 }
@@ -489,6 +505,7 @@ netmap_rxsync_from_host_compat(struct ne
  * Note: for compatibility, host krings are created even when not needed.
  * The tailroom space is currently used by vale ports for allocating leases.
  */
+/* call with NMG_LOCK held */
 int
 netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
 {
@@ -567,6 +584,7 @@ netmap_krings_create(struct netmap_adapt
 
 
 /* undo the actions performed by netmap_krings_create */
+/* call with NMG_LOCK held */
 void
 netmap_krings_delete(struct netmap_adapter *na)
 {
@@ -586,6 +604,7 @@ netmap_krings_delete(struct netmap_adapt
  * on the rings connected to the host so we need to purge
  * them first.
  */
+/* call with NMG_LOCK held */
 static void
 netmap_hw_krings_delete(struct netmap_adapter *na)
 {
@@ -598,6 +617,12 @@ netmap_hw_krings_delete(struct netmap_ad
 }
 
 
+/* create a new netmap_if for a newly registered fd.
+ * If this is the first registration of the adapter,
+ * also create the netmap rings and their in-kernel view,
+ * the netmap krings.
+ */
+/* call with NMG_LOCK held */
 static struct netmap_if*
 netmap_if_new(const char *ifname, struct netmap_adapter *na)
 {
@@ -608,17 +633,23 @@ netmap_if_new(const char *ifname, struct
                return NULL;
        }
 
-       if (na->active_fds)
+       if (na->active_fds)     /* already registered */
                goto final;
 
+       /* create and init the krings arrays.
+        * Depending on the adapter, this may also create
+        * the netmap rings themselves
+        */
        if (na->nm_krings_create(na))
                goto cleanup;
 
+       /* create all missing netmap rings */
        if (netmap_mem_rings_create(na))
                goto cleanup;
 
 final:
 
+       /* in all cases, create a new netmap if */
        nifp = netmap_mem_if_new(ifname, na);
        if (nifp == NULL)
                goto cleanup;
@@ -638,8 +669,8 @@ cleanup:
 
 /* grab a reference to the memory allocator, if we don't have one already.  The
  * reference is taken from the netmap_adapter registered with the priv.
- *
  */
+/* call with NMG_LOCK held */
 static int
 netmap_get_memory_locked(struct netmap_priv_d* p)
 {
@@ -672,6 +703,7 @@ netmap_get_memory_locked(struct netmap_p
 }
 
 
+/* call with NMG_LOCK *not* held */
 int
 netmap_get_memory(struct netmap_priv_d* p)
 {
@@ -683,6 +715,7 @@ netmap_get_memory(struct netmap_priv_d* 
 }
 
 
+/* call with NMG_LOCK held */
 static int
 netmap_have_memory_locked(struct netmap_priv_d* p)
 {
@@ -690,6 +723,7 @@ netmap_have_memory_locked(struct netmap_
 }
 
 
+/* call with NMG_LOCK held */
 static void
 netmap_drop_memory_locked(struct netmap_priv_d* p)
 {
@@ -755,6 +789,7 @@ netmap_do_unregif(struct netmap_priv_d *
        netmap_mem_if_delete(na, nifp);
 }
 
+/* call with NMG_LOCK held */
 static __inline int
 nm_tx_si_user(struct netmap_priv_d *priv)
 {
@@ -762,6 +797,7 @@ nm_tx_si_user(struct netmap_priv_d *priv
                (priv->np_txqlast - priv->np_txqfirst > 1));
 }
 
+/* call with NMG_LOCK held */
 static __inline int
 nm_rx_si_user(struct netmap_priv_d *priv)
 {
@@ -771,8 +807,12 @@ nm_rx_si_user(struct netmap_priv_d *priv
 
 
 /*
+ * Destructor of the netmap_priv_d, called when the fd has
+ * no active open() and mmap(). Also called in error paths.
+ *
  * returns 1 if this is the last instance and we can free priv
  */
+/* call with NMG_LOCK held */
 int
 netmap_dtor_locked(struct netmap_priv_d *priv)
 {
@@ -805,6 +845,7 @@ netmap_dtor_locked(struct netmap_priv_d 
 }
 
 
+/* call with NMG_LOCK *not* held */
 void
 netmap_dtor(void *data)
 {
@@ -1009,7 +1050,7 @@ netmap_rxsync_from_host(struct netmap_ad
        (void)pwait;    /* disable unused warnings */
        (void)td;
 
-       mtx_lock(&q->lock);
+       mbq_lock(q);
 
        /* First part: import newly received packets */
        n = mbq_len(q);
@@ -1019,7 +1060,7 @@ netmap_rxsync_from_host(struct netmap_ad
 
                nm_i = kring->nr_hwtail;
                stop_i = nm_prev(nm_i, lim);
-               while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) { 
+               while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
                        int len = MBUF_LEN(m);
                        struct netmap_slot *slot = &ring->slot[nm_i];
 
@@ -1051,7 +1092,7 @@ netmap_rxsync_from_host(struct netmap_ad
        if (kring->rcur == kring->rtail && td) /* no bufs available */
                selrecord(td, &kring->si);
 
-       mtx_unlock(&q->lock);
+       mbq_unlock(q);
        return ret;
 }
 
@@ -1194,6 +1235,12 @@ netmap_get_na(struct nmreq *nmr, struct 
        if (*na != NULL) /* valid match in netmap_get_bdg_na() */
                goto pipes;
 
+       /*
+        * This must be a hardware na, lookup the name in the system.
+        * Note that by hardware we actually mean "it shows up in ifconfig".
+        * This may still be a tap, a veth/epair, or even a
+        * persistent VALE port.
+        */
        ifp = ifunit_ref(nmr->nr_name);
        if (ifp == NULL) {
                return ENXIO;
@@ -1212,6 +1259,11 @@ netmap_get_na(struct nmreq *nmr, struct 
        netmap_adapter_get(ret);
 
 pipes:
+       /*
+        * If we are opening a pipe whose parent was not in netmap mode,
+        * we have to allocate the pipe array now.
+        * XXX get rid of this clumsiness (2014-03-15)
+        */
        error = netmap_pipe_alloc(*na, nmr);
 
 out:
@@ -1219,7 +1271,7 @@ out:
                netmap_adapter_put(ret);
 
        if (ifp)
-               if_rele(ifp);
+               if_rele(ifp); /* allow live unloading of drivers modules */
 
        return error;
 }
@@ -1515,7 +1567,7 @@ netmap_set_ringid(struct netmap_priv_d *
        if (nm_rx_si_user(priv))
                na->rx_si_users++;
        if (netmap_verbose) {
-               D("%s: tx [%d,%d) rx [%d,%d) id %d", 
+               D("%s: tx [%d,%d) rx [%d,%d) id %d",
                        NM_IFPNAME(na->ifp),
                        priv->np_txqfirst,
                        priv->np_txqlast,
@@ -1555,10 +1607,9 @@ netmap_do_regif(struct netmap_priv_d *pr
                        goto out;
        }
        nifp = netmap_if_new(NM_IFPNAME(ifp), na);
+
+       /* Allocate a netmap_if and, if necessary, all the netmap_ring's */
        if (nifp == NULL) { /* allocation failed */
-               /* we should drop the allocator, but only
-                * if we were the ones who grabbed it
-                */
                error = ENOMEM;
                goto out;
        }
@@ -1568,10 +1619,8 @@ netmap_do_regif(struct netmap_priv_d *pr
        } else {
                /* Otherwise set the card in netmap mode
                 * and make it use the shared buffers.
-                *
-                * do not core lock because the race is harmless here,
-                * there cannot be any traffic to netmap_transmit()
                 */
+               /* cache the allocator info in the na */
                na->na_lut = na->nm_mem->pools[NETMAP_BUF_POOL].lut;
                ND("%p->na_lut == %p", na, na->na_lut);
                na->na_lut_objtotal = 
na->nm_mem->pools[NETMAP_BUF_POOL].objtotal;
@@ -1585,6 +1634,9 @@ out:
        *err = error;
        if (error) {
                priv->np_na = NULL;
+               /* we should drop the allocator, but only
+                * if we were the ones who grabbed it
+                */
                if (need_mem)
                        netmap_drop_memory_locked(priv);
        }
@@ -2008,6 +2060,12 @@ flush_tx:
                                continue;
                        /* only one thread does txsync */
                        if (nm_kr_tryget(kring)) {
+                               /* either busy or stopped
+                                * XXX if the ring is stopped, sleeping would
+                                * be better. In current code, however, we only
+                                * stop the rings for brief intervals 
(2014-03-14)
+                                */
+
                                if (netmap_verbose)
                                        RD(2, "%p lost race on txring %d, ok",
                                            priv, i);
@@ -2049,7 +2107,7 @@ flush_tx:
         */
        if (want_rx) {
                int send_down = 0; /* transparent mode */
-               /* two rounds here to for race avoidance */
+               /* two rounds here for race avoidance */
 do_retry_rx:
                for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
                        int found = 0;
@@ -2120,7 +2178,7 @@ do_retry_rx:
         * Transparent mode: marked bufs on rx rings between
         * kring->nr_hwcur and ring->head
         * are passed to the other endpoint.
-        * 
+        *
         * In this mode we also scan the sw rxring, which in
         * turn passes packets up.
         *
@@ -2139,6 +2197,7 @@ do_retry_rx:
 
 static int netmap_hw_krings_create(struct netmap_adapter *);
 
+/* default notify callback */
 static int
 netmap_notify(struct netmap_adapter *na, u_int n_ring,
        enum txrx tx, int flags)
@@ -2148,11 +2207,16 @@ netmap_notify(struct netmap_adapter *na,
        if (tx == NR_TX) {
                kring = na->tx_rings + n_ring;
                OS_selwakeup(&kring->si, PI_NET);
+               /* optimization: avoid a wake up on the global
+                * queue if nobody has registered for more
+                * than one ring
+                */
                if (na->tx_si_users > 0)
                        OS_selwakeup(&na->tx_si, PI_NET);
        } else {
                kring = na->rx_rings + n_ring;
                OS_selwakeup(&kring->si, PI_NET);
+               /* optimization: same as above */
                if (na->rx_si_users > 0)
                        OS_selwakeup(&na->rx_si, PI_NET);
        }
@@ -2160,7 +2224,11 @@ netmap_notify(struct netmap_adapter *na,
 }
 
 
-// XXX check handling of failures
+/* called by all routines that create netmap_adapters.
+ * Attach na to the ifp (if any) and provide defaults
+ * for optional callbacks. Defaults assume that we
+ * are creating an hardware netmap_adapter.
+ */
 int
 netmap_attach_common(struct netmap_adapter *na)
 {
@@ -2182,6 +2250,10 @@ netmap_attach_common(struct netmap_adapt
 
        NETMAP_SET_CAPABLE(ifp);
        if (na->nm_krings_create == NULL) {
+               /* we assume that we have been called by a driver,
+                * since other port types all provide their own
+                * nm_krings_create
+                */
                na->nm_krings_create = netmap_hw_krings_create;
                na->nm_krings_delete = netmap_hw_krings_delete;
        }
@@ -2195,10 +2267,11 @@ netmap_attach_common(struct netmap_adapt
 }
 
 
+/* standard cleanup, called by all destructors */
 void
 netmap_detach_common(struct netmap_adapter *na)
 {
-       if (na->ifp)
+       if (na->ifp != NULL)
                WNA(na->ifp) = NULL; /* XXX do we need this? */
 
        if (na->tx_rings) { /* XXX should not happen */
@@ -2255,12 +2328,17 @@ netmap_attach(struct netmap_adapter *arg
        hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
 #endif /* linux */
 
-       D("success for %s", NM_IFPNAME(ifp));
+       D("success for %s tx %d/%d rx %d/%d queues/slots",
+               NM_IFPNAME(ifp),
+               hwna->up.num_tx_rings, hwna->up.num_tx_desc,
+               hwna->up.num_rx_rings, hwna->up.num_rx_desc
+               );
        return 0;
 
 fail:
        D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
-       netmap_detach(ifp);
+       if (ifp)
+               netmap_detach(ifp);
        return (hwna ? EINVAL : ENOMEM);
 }
 
@@ -2294,6 +2372,7 @@ NM_DBG(netmap_adapter_put)(struct netmap
        return 1;
 }
 
+/* nm_krings_create callback for all hardware native adapters */
 int
 netmap_hw_krings_create(struct netmap_adapter *na)
 {
@@ -2309,8 +2388,7 @@ netmap_hw_krings_create(struct netmap_ad
 
 
 /*
- * Free the allocated memory linked to the given ``netmap_adapter``
- * object.
+ * Called on module unload by the netmap-enabled drivers
  */
 void
 netmap_detach(struct ifnet *ifp)
@@ -2381,7 +2459,7 @@ netmap_transmit(struct ifnet *ifp, struc
         * not possible on Linux).
         * Also avoid overflowing the queue.
         */
-       mtx_lock(&q->lock);
+       mbq_lock(q);
 
         space = kring->nr_hwtail - kring->nr_hwcur;
         if (space < 0)
@@ -2398,13 +2476,17 @@ netmap_transmit(struct ifnet *ifp, struc
                m = NULL;
                error = 0;
        }
-       mtx_unlock(&q->lock);
+       mbq_unlock(q);
 
 done:
        if (m)
                m_freem(m);
        /* unconditionally wake up listeners */
        na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
+       /* this is normally netmap_notify(), but for nics
+        * connected to a bridge it is netmap_bwrap_intr_notify(),
+        * that possibly forwards the frames through the switch
+        */
 
        return (error);
 }

Modified: stable/10/sys/dev/netmap/netmap_freebsd.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap_freebsd.c   Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_freebsd.c   Mon Jun  9 15:24:45 2014        
(r267282)
@@ -61,7 +61,8 @@
 
 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
 
-rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+rawsum_t
+nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
 {
        /* TODO XXX please use the FreeBSD implementation for this. */
        uint16_t *words = (uint16_t *)data;
@@ -80,7 +81,8 @@ rawsum_t nm_csum_raw(uint8_t *data, size
 /* Fold a raw checksum: 'cur_sum' is in host byte order, while the
  * return value is in network byte order.
  */
-uint16_t nm_csum_fold(rawsum_t cur_sum)
+uint16_t
+nm_csum_fold(rawsum_t cur_sum)
 {
        /* TODO XXX please use the FreeBSD implementation for this. */
        while (cur_sum >> 16)
@@ -89,7 +91,8 @@ uint16_t nm_csum_fold(rawsum_t cur_sum)
        return htobe16((~cur_sum) & 0xFFFF);
 }
 
-uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+uint16_t
+nm_csum_ipv4(struct nm_iphdr *iph)
 {
 #if 0
        return in_cksum_hdr((void *)iph);
@@ -98,7 +101,8 @@ uint16_t nm_csum_ipv4(struct nm_iphdr *i
 #endif
 }
 
-void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+void
+nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
                                        size_t datalen, uint16_t *check)
 {
 #ifdef INET
@@ -120,7 +124,8 @@ void nm_csum_tcpudp_ipv4(struct nm_iphdr
 #endif
 }
 
-void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+void
+nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
                                        size_t datalen, uint16_t *check)
 {
 #ifdef INET6
@@ -143,7 +148,8 @@ void nm_csum_tcpudp_ipv6(struct nm_ipv6h
 int
 netmap_catch_rx(struct netmap_adapter *na, int intercept)
 {
-       struct netmap_generic_adapter *gna = (struct netmap_generic_adapter 
*)na;
+       struct netmap_generic_adapter *gna =
+               (struct netmap_generic_adapter *)na;
        struct ifnet *ifp = na->ifp;
 
        if (intercept) {
@@ -209,11 +215,29 @@ generic_xmit_frame(struct ifnet *ifp, st
 {
        int ret;
 
-       m->m_len = m->m_pkthdr.len = 0;
+       /*
+        * The mbuf should be a cluster from our special pool,
+        * so we do not need to do an m_copyback but just copy
+        * (and eventually, just reference the netmap buffer)
+        */
 
-       // copy data to the mbuf
-       m_copyback(m, 0, len, addr);
-       // inc refcount. We are alone, so we can skip the atomic
+       if (*m->m_ext.ref_cnt != 1) {
+               D("invalid refcnt %d for %p",
+                       *m->m_ext.ref_cnt, m);
+               panic("in generic_xmit_frame");
+       }
+       // XXX the ext_size check is unnecessary if we link the netmap buf
+       if (m->m_ext.ext_size < len) {
+               RD(5, "size %d < len %d", m->m_ext.ext_size, len);
+               len = m->m_ext.ext_size;
+       }
+       if (1) { /* XXX seems to have negligible benefits */
+               m->m_ext.ext_buf = m->m_data = addr;
+       } else {
+               bcopy(addr, m->m_data, len);
+       }
+       m->m_len = m->m_pkthdr.len = len;
+       // inc refcount. All ours, we could skip the atomic
        atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
        m->m_flags |= M_FLOWID;
        m->m_pkthdr.flowid = ring_nr;
@@ -223,6 +247,14 @@ generic_xmit_frame(struct ifnet *ifp, st
 }
 
 
+#if __FreeBSD_version >= 1100005
+struct netmap_adapter *
+netmap_getna(if_t ifp)
+{
+       return (NA((struct ifnet *)ifp));
+}
+#endif /* __FreeBSD_version >= 1100005 */
+
 /*
  * The following two functions are empty until we have a generic
  * way to extract the info from the ifp
@@ -230,7 +262,7 @@ generic_xmit_frame(struct ifnet *ifp, st
 int
 generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
 {
-       D("called");
+       D("called, in tx %d rx %d", *tx, *rx);
        return 0;
 }
 
@@ -238,13 +270,14 @@ generic_find_num_desc(struct ifnet *ifp,
 void
 generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
 {
-       D("called");
+       D("called, in txq %d rxq %d", *txq, *rxq);
        *txq = netmap_generic_rings;
        *rxq = netmap_generic_rings;
 }
 
 
-void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter 
*na)
+void
+netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na)
 {
        ND("called");
        mit->mit_pending = 0;
@@ -252,26 +285,30 @@ void netmap_mitigation_init(struct nm_ge
 }
 
 
-void netmap_mitigation_start(struct nm_generic_mit *mit)
+void
+netmap_mitigation_start(struct nm_generic_mit *mit)
 {
        ND("called");
 }
 
 
-void netmap_mitigation_restart(struct nm_generic_mit *mit)
+void
+netmap_mitigation_restart(struct nm_generic_mit *mit)
 {
        ND("called");
 }
 
 
-int netmap_mitigation_active(struct nm_generic_mit *mit)
+int
+netmap_mitigation_active(struct nm_generic_mit *mit)
 {
        ND("called");
        return 0;
 }
 
 
-void netmap_mitigation_cleanup(struct nm_generic_mit *mit)
+void
+netmap_mitigation_cleanup(struct nm_generic_mit *mit)
 {
        ND("called");
 }

Modified: stable/10/sys/dev/netmap/netmap_generic.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap_generic.c   Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_generic.c   Mon Jun  9 15:24:45 2014        
(r267282)
@@ -81,20 +81,26 @@ __FBSDID("$FreeBSD$");
 #include <dev/netmap/netmap_kern.h>
 #include <dev/netmap/netmap_mem2.h>
 
-#define rtnl_lock() D("rtnl_lock called");
-#define rtnl_unlock() D("rtnl_unlock called");
+#define rtnl_lock()    ND("rtnl_lock called")
+#define rtnl_unlock()  ND("rtnl_unlock called")
 #define MBUF_TXQ(m)    ((m)->m_pkthdr.flowid)
 #define MBUF_RXQ(m)    ((m)->m_pkthdr.flowid)
 #define smp_mb()
 
 /*
- * mbuf wrappers
+ * FreeBSD mbuf allocator/deallocator in emulation mode:
+ *
+ * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
+ * so that the destructor, if invoked, will not free the packet.
+ *    In principle we should set the destructor only on demand,
+ * but since there might be a race we better do it on allocation.
+ * As a consequence, we also need to set the destructor or we
+ * would leak buffers.
  */
 
 /*
- * we allocate an EXT_PACKET
+ * mbuf wrappers
  */
-#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
 
 /* mbuf destructor, also need to change the type to EXT_EXTREF,
  * add an M_NOFREE flag, and then clear the flag and
@@ -106,6 +112,32 @@ __FBSDID("$FreeBSD$");
        (m)->m_ext.ext_type = EXT_EXTREF;       \
 } while (0)
 
+static void 
+netmap_default_mbuf_destructor(struct mbuf *m) 
+{ 
+       /* restore original mbuf */
+       m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
+       m->m_ext.ext_arg1 = NULL;
+       m->m_ext.ext_type = EXT_PACKET;
+       m->m_ext.ext_free = NULL;
+       if (*(m->m_ext.ref_cnt) == 0)
+               *(m->m_ext.ref_cnt) = 1;
+       uma_zfree(zone_pack, m);
+} 
+
+static inline struct mbuf * 
+netmap_get_mbuf(int len) 
+{ 
+       struct mbuf *m;
+       m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE);
+       if (m) {
+               m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
+               m->m_ext.ext_free = (void *)netmap_default_mbuf_destructor;
+               m->m_ext.ext_type = EXT_EXTREF;
+               ND(5, "create m %p refcnt %d", m, *m->m_ext.ref_cnt);
+       }
+       return m;
+} 
 
 #define GET_MBUF_REFCNT(m)     ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
 
@@ -223,7 +255,7 @@ generic_netmap_register(struct netmap_ad
 #endif /* REG_RESET */
 
        if (enable) { /* Enable netmap mode. */
-               /* Init the mitigation support. */
+               /* Init the mitigation support on all the rx queues. */
                gna->mit = malloc(na->num_rx_rings * sizeof(struct 
nm_generic_mit),
                                        M_DEVBUF, M_NOWAIT | M_ZERO);
                if (!gna->mit) {
@@ -373,15 +405,11 @@ out:
 static void
 generic_mbuf_destructor(struct mbuf *m)
 {
-       if (netmap_verbose)
-               D("Tx irq (%p) queue %d", m, MBUF_TXQ(m));
        netmap_generic_irq(MBUF_IFP(m), MBUF_TXQ(m), NULL);
 #ifdef __FreeBSD__
-       m->m_ext.ext_type = EXT_PACKET;
-       m->m_ext.ext_free = NULL;
-       if (*(m->m_ext.ref_cnt) == 0)
-               *(m->m_ext.ref_cnt) = 1;
-       uma_zfree(zone_pack, m);
+       if (netmap_verbose)
+               RD(5, "Tx irq (%p) queue %d index %d" , m, MBUF_TXQ(m), 
(int)(uintptr_t)m->m_ext.ext_arg1);
+       netmap_default_mbuf_destructor(m);
 #endif /* __FreeBSD__ */
        IFRATE(rate_ctx.new.txirq++);
 }
@@ -471,12 +499,12 @@ generic_set_tx_event(struct netmap_kring
        e = generic_tx_event_middle(kring, hwcur);
 
        m = kring->tx_pool[e];
+       ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? 
GET_MBUF_REFCNT(m) : -2 );
        if (m == NULL) {
                /* This can happen if there is already an event on the netmap
                   slot 'e': There is nothing to do. */
                return;
        }
-       ND("Event at %d mbuf %p refcnt %d", e, m, GET_MBUF_REFCNT(m));
        kring->tx_pool[e] = NULL;
        SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
 
@@ -770,6 +798,10 @@ generic_netmap_attach(struct ifnet *ifp)
 
        generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc);
        ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
+       if (num_tx_desc == 0 || num_rx_desc == 0) {
+               D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, 
num_rx_desc);
+               return EINVAL;
+       }
 
        gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
        if (gna == NULL) {

Modified: stable/10/sys/dev/netmap/netmap_kern.h
==============================================================================
--- stable/10/sys/dev/netmap/netmap_kern.h      Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_kern.h      Mon Jun  9 15:24:45 2014        
(r267282)
@@ -62,6 +62,9 @@
 #define NM_ATOMIC_TEST_AND_SET(p)       (!atomic_cmpset_acq_int((p), 0, 1))
 #define NM_ATOMIC_CLEAR(p)              atomic_store_rel_int((p), 0)
 
+#if __FreeBSD_version >= 1100005
+struct netmap_adapter *netmap_getna(if_t ifp);
+#endif
 
 MALLOC_DECLARE(M_NETMAP);
 
@@ -183,9 +186,6 @@ extern NMG_LOCK_T   netmap_global_lock;
  *     the next empty buffer as known by the hardware (next_to_check or so).
  * TX rings: hwcur + hwofs coincides with next_to_send
  *
- * Clients cannot issue concurrent syscall on a ring. The system
- * detects this and reports an error using two flags,
- * NKR_WBUSY and NKR_RBUSY
  * For received packets, slot->flags is set to nkr_slot_flags
  * so we can provide a proper initial value (e.g. set NS_FORWARD
  * when operating in 'transparent' mode).
@@ -208,7 +208,7 @@ extern NMG_LOCK_T   netmap_global_lock;
  * The kring is manipulated by txsync/rxsync and generic netmap function.
  *
  * Concurrent rxsync or txsync on the same ring are prevented through
- * by nm_kr_lock() which in turn uses nr_busy. This is all we need
+ * by nm_kr_(try)lock() which in turn uses nr_busy. This is all we need
  * for NIC rings, and for TX rings attached to the host stack.
  *
  * RX rings attached to the host stack use an mbq (rx_queue) on both
@@ -440,15 +440,18 @@ struct netmap_adapter {
        /*
         * nm_dtor() is the cleanup routine called when destroying
         *      the adapter.
+        *      Called with NMG_LOCK held.
         *
         * nm_register() is called on NIOCREGIF and close() to enter
         *      or exit netmap mode on the NIC
+        *      Called with NMG_LOCK held.
         *
         * nm_txsync() pushes packets to the underlying hw/switch
         *
         * nm_rxsync() collects packets from the underlying hw/switch
         *
         * nm_config() returns configuration information from the OS
+        *      Called with NMG_LOCK held.
         *
         * nm_krings_create() create and init the krings array
         *      (the array layout must conform to the description
@@ -456,13 +459,12 @@ struct netmap_adapter {
         *
         * nm_krings_delete() cleanup and delete the kring array
         *
-        * nm_notify() is used to act after data have become available.
+        * nm_notify() is used to act after data have become available
+        *      (or the stopped state of the ring has changed)
         *      For hw devices this is typically a selwakeup(),
         *      but for NIC/host ports attached to a switch (or vice-versa)
         *      we also need to invoke the 'txsync' code downstream.
         */
-
-       /* private cleanup */
        void (*nm_dtor)(struct netmap_adapter *);
 
        int (*nm_register)(struct netmap_adapter *, int onoff);
@@ -678,7 +680,7 @@ static inline uint32_t
 nm_kr_rxspace(struct netmap_kring *k)
 {
        int space = k->nr_hwtail - k->nr_hwcur;
-       if (space < 0) 
+       if (space < 0)
                space += k->nkr_num_slots;
        ND("preserving %d rx slots %d -> %d", space, k->nr_hwcur, k->nr_hwtail);
 
@@ -827,7 +829,7 @@ nm_txsync_finalize(struct netmap_kring *
 {
        /* update ring tail to what the kernel knows */
        kring->ring->tail = kring->rtail = kring->nr_hwtail;
-       
+
        /* note, head/rhead/hwcur might be behind cur/rcur
         * if no carrier
         */
@@ -1376,5 +1378,4 @@ void bdg_mismatch_datapath(struct netmap
                           struct netmap_vp_adapter *dst_na,
                           struct nm_bdg_fwd *ft_p, struct netmap_ring *ring,
                           u_int *j, u_int lim, u_int *howmany);
-
 #endif /* _NET_NETMAP_KERN_H_ */

Modified: stable/10/sys/dev/netmap/netmap_mbq.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap_mbq.c       Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_mbq.c       Mon Jun  9 15:24:45 2014        
(r267282)
@@ -76,9 +76,9 @@ static inline void __mbq_enqueue(struct 
 
 void mbq_safe_enqueue(struct mbq *q, struct mbuf *m)
 {
-    mtx_lock(&q->lock);
+    mbq_lock(q);
     __mbq_enqueue(q, m);
-    mtx_unlock(&q->lock);
+    mbq_unlock(q);
 }
 
 
@@ -110,9 +110,9 @@ struct mbuf *mbq_safe_dequeue(struct mbq
 {
     struct mbuf *ret;
 
-    mtx_lock(&q->lock);
+    mbq_lock(q);
     ret =  __mbq_dequeue(q);
-    mtx_unlock(&q->lock);
+    mbq_unlock(q);
 
     return ret;
 }

Modified: stable/10/sys/dev/netmap/netmap_mbq.h
==============================================================================
--- stable/10/sys/dev/netmap/netmap_mbq.h       Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_mbq.h       Mon Jun  9 15:24:45 2014        
(r267282)
@@ -62,7 +62,17 @@ void mbq_enqueue(struct mbq *q, struct m
 struct mbuf *mbq_dequeue(struct mbq *q);
 void mbq_purge(struct mbq *q);
 
-/* XXX missing mbq_lock() and mbq_unlock */
+static inline void
+mbq_lock(struct mbq *q)
+{
+       mtx_lock_spin(&q->lock);
+}
+
+static inline void
+mbq_unlock(struct mbq *q)
+{
+       mtx_unlock_spin(&q->lock);
+}
 
 void mbq_safe_init(struct mbq *q);
 void mbq_safe_destroy(struct mbq *q);

Modified: stable/10/sys/dev/netmap/netmap_mem2.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap_mem2.c      Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_mem2.c      Mon Jun  9 15:24:45 2014        
(r267282)
@@ -992,7 +992,7 @@ netmap_mem_private_new(const char *name,
        if (p[NETMAP_RING_POOL].num < v)
                p[NETMAP_RING_POOL].num = v;
        /* for each pipe we only need the buffers for the 4 "real" rings.
-         * On the other end, the pipe ring dimension may be different from 
+         * On the other end, the pipe ring dimension may be different from
          * the parent port ring dimension. As a compromise, we allocate twice 
the
          * space actually needed if the pipe rings were the same size as the 
parent rings
          */

Modified: stable/10/sys/dev/netmap/netmap_pipe.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap_pipe.c      Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_pipe.c      Mon Jun  9 15:24:45 2014        
(r267282)
@@ -391,7 +391,7 @@ err:
 /* netmap_pipe_reg.
  *
  * There are two cases on registration (onoff==1)
- * 
+ *
  * 1.a) state is
  *
  *        usr1 --> e1 --> e2
@@ -403,7 +403,7 @@ err:
  *        usr1 --> e1 --> e2 <-- usr2
  *
  *      and we are e2. Drop the ref e1 is holding.
- *  
+ *
  *  There are two additional cases on unregister (onoff==0)
  *
  *  2.a) state is
@@ -462,14 +462,14 @@ netmap_pipe_reg(struct netmap_adapter *n
  *
  * 1) state is
  *
- *                usr1 --> e1 --> e2      
+ *                usr1 --> e1 --> e2
  *
- *    and we are e1 (e2 is not registered, so krings_delete cannot be 
+ *    and we are e1 (e2 is not registered, so krings_delete cannot be
  *    called on it);
  *
  * 2) state is
  *
- *                usr1 --> e1     e2 <-- usr2 
+ *                usr1 --> e1     e2 <-- usr2
  *
  *    and we are either e1 or e2.
  *
@@ -519,7 +519,7 @@ netmap_pipe_dtor(struct netmap_adapter *
                pna->peer_ref = 0;
                netmap_adapter_put(&pna->peer->up);
        }
-       if (pna->role == NR_REG_PIPE_MASTER) 
+       if (pna->role == NR_REG_PIPE_MASTER)
                netmap_pipe_remove(pna->parent, pna);
        netmap_adapter_put(pna->parent);
        free(na->ifp, M_DEVBUF);
@@ -587,7 +587,7 @@ netmap_get_pipe_na(struct nmreq *nmr, st
                error = ENODEV;
                goto put_out;
        }
-       /* we create both master and slave. 
+       /* we create both master and slave.
          * The endpoint we were asked for holds a reference to
          * the other one.
          */

Modified: stable/10/sys/dev/netmap/netmap_vale.c
==============================================================================
--- stable/10/sys/dev/netmap/netmap_vale.c      Mon Jun  9 15:16:17 2014        
(r267281)
+++ stable/10/sys/dev/netmap/netmap_vale.c      Mon Jun  9 15:24:45 2014        
(r267282)
@@ -959,6 +959,14 @@ nm_bdg_preflush(struct netmap_vp_adapter
                ft[ft_i].ft_next = NM_FT_NULL;
                buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
                        (void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
+               if (unlikely(buf == NULL)) {
+                       RD(5, "NULL %s buffer pointer from %s slot %d len %d",
+                               (slot->flags & NS_INDIRECT) ? "INDIRECT" : 
"DIRECT",
+                               kring->name, j, ft[ft_i].ft_len);
+                       buf = ft[ft_i].ft_buf = NMB_VA(0); /* the 'null' buffer 
*/
+                       ft[ft_i].ft_len = 0;
+                       ft[ft_i].ft_flags = 0;
+               }
                __builtin_prefetch(buf);
                ++ft_i;
                if (slot->flags & NS_MOREFRAG) {
@@ -1064,7 +1072,7 @@ netmap_bdg_learning(char *buf, u_int buf
        uint64_t smac, dmac;
 
        if (buf_len < 14) {
-               D("invalid buf length %d", buf_len);
+               RD(5, "invalid buf length %d", buf_len);
                return NM_BDG_NOPORT;
        }
        dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
@@ -1312,6 +1320,7 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_in
                needed = d->bq_len + brddst->bq_len;
 
                if (unlikely(dst_na->virt_hdr_len != na->virt_hdr_len)) {
+                       RD(3, "virt_hdr_mismatch, src %d len %d", 
na->virt_hdr_len, dst_na->virt_hdr_len);
                        /* There is a virtio-net header/offloadings mismatch 
between
                         * source and destination. The slower mismatch datapath 
will
                         * be used to cope with all the mismatches.
@@ -1412,6 +1421,11 @@ retry:
                                        /* round to a multiple of 64 */
                                        copy_len = (copy_len + 63) & ~63;
 
+                                       if (unlikely(copy_len > NETMAP_BUF_SIZE 
||
+                                                       copy_len > 
NETMAP_BUF_SIZE)) {

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to