> Author: vmaffione
> Date: Wed Nov 14 15:39:48 2018
> New Revision: 340436
> URL: https://svnweb.freebsd.org/changeset/base/340436
> 
> Log:
>   vtnet: fix netmap support
>   
>   netmap(4) support for vtnet(4) was incomplete and had multiple bugs.
>   This commit fixes those bugs to bring netmap on vtnet in a functional state.
>   
>   Changelist:
>     - handle errors returned by virtqueue_enqueue() properly (they were
>       previously ignored)
>     - make sure netmap XOR rest of the kernel access each virtqueue.
>     - compute the number of netmap slots for TX and RX separately, according 
> to
>       whether indirect descriptors are used or not for a given virtqueue.
>     - make sure sglist are freed according to their type (mbufs or netmap
>       buffers)
>     - add support for mulitiqueue and netmap host (aka sw) rings.
>     - intercept VQ interrupts directly instead of intercepting them in txq_eof
>       and rxq_eof. This simplifies the code and makes it easier to make sure
>       taskqueues are not running for a VQ while it is in netmap mode.
>     - implement vntet_netmap_config() to cope with changes in the number of 
> queues.
>   
>   Reviewed by:        bryanv
>   Approved by:        gnn (mentor)
>   MFC after:  3 days
>   Sponsored by:       Sunny Valley Networks
>   Differential Revision:      https://reviews.freebsd.org/D17916

I would like to get some wider test of this in ^head/
specifically with use in a bhyve guest before we do
an early merge to stable/12 so that this can be in
the next build.

If you are capable of testing this within bhyve as a guest
please do so and provide feedback.  It does not need to be
a ^/head host, just the guest needs to be using vtnet
nic's.

Thanks,
Rod

> 
> Modified:
>   head/sys/dev/netmap/if_vtnet_netmap.h
>   head/sys/dev/virtio/network/if_vtnet.c
>   head/sys/dev/virtio/network/if_vtnetvar.h
> 
> Modified: head/sys/dev/netmap/if_vtnet_netmap.h
> ==============================================================================
> --- head/sys/dev/netmap/if_vtnet_netmap.h     Wed Nov 14 15:23:39 2018        
> (r340435)
> +++ head/sys/dev/netmap/if_vtnet_netmap.h     Wed Nov 14 15:39:48 2018        
> (r340436)
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (C) 2014 Vincenzo Maffione, Luigi Rizzo. All rights reserved.
> + * Copyright (C) 2014-2018 Vincenzo Maffione, Luigi Rizzo.
>   *
>   * Redistribution and use in source and binary forms, with or without
>   * modification, are permitted provided that the following conditions
> @@ -33,74 +33,148 @@
>  #include <vm/pmap.h>    /* vtophys ? */
>  #include <dev/netmap/netmap_kern.h>
>  
> +/*
> + * Return 1 if the queue identified by 't' and 'idx' is in netmap mode.
> + */
> +static int
> +vtnet_netmap_queue_on(struct vtnet_softc *sc, enum txrx t, int idx)
> +{
> +     struct netmap_adapter *na = NA(sc->vtnet_ifp);
>  
> -#define SOFTC_T      vtnet_softc
> +     if (!nm_native_on(na))
> +             return 0;
>  
> -/* Free all the unused buffer in all the RX virtqueues.
> - * This function is called when entering and exiting netmap mode.
> - * - buffers queued by the virtio driver return skbuf/mbuf pointer
> - *   and need to be freed;
> - * - buffers queued by netmap return the txq/rxq, and do not need work
> - */
> +     if (t == NR_RX)
> +             return !!(idx < na->num_rx_rings &&
> +                     na->rx_rings[idx]->nr_mode == NKR_NETMAP_ON);
> +
> +     return !!(idx < na->num_tx_rings &&
> +             na->tx_rings[idx]->nr_mode == NKR_NETMAP_ON);
> +}
> +
>  static void
> -vtnet_netmap_free_bufs(struct SOFTC_T* sc)
> +vtnet_free_used(struct virtqueue *vq, int netmap_bufs, enum txrx t, int idx)
>  {
> -     int i, nmb = 0, n = 0, last;
> +     void *cookie;
> +     int deq = 0;
>  
> -     for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
> -             struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
> -             struct virtqueue *vq;
> -             struct mbuf *m;
> -             struct vtnet_txq *txq = &sc->vtnet_txqs[i];
> -                struct vtnet_tx_header *txhdr;
> +     while ((cookie = virtqueue_dequeue(vq, NULL)) != NULL) {
> +             if (netmap_bufs) {
> +                     /* These are netmap buffers: there is nothing to do. */
> +             } else {
> +                     /* These are mbufs that we need to free. */
> +                     struct mbuf *m;
>  
> -             last = 0;
> -             vq = rxq->vtnrx_vq;
> -             while ((m = virtqueue_drain(vq, &last)) != NULL) {
> -                     n++;
> -                     if (m != (void *)rxq)
> +                     if (t == NR_TX) {
> +                             struct vtnet_tx_header *txhdr = cookie;
> +                             m = txhdr->vth_mbuf;
>                               m_freem(m);
> -                     else
> -                             nmb++;
> -             }
> -
> -             last = 0;
> -             vq = txq->vtntx_vq;
> -             while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
> -                     n++;
> -                     if (txhdr != (void *)txq) {
> -                             m_freem(txhdr->vth_mbuf);
>                               uma_zfree(vtnet_tx_header_zone, txhdr);
> -                     } else
> -                             nmb++;
> +                     } else {
> +                             m = cookie;
> +                             m_freem(m);
> +                     }
>               }
> +             deq++;
>       }
> -     D("freed %d mbufs, %d netmap bufs on %d queues",
> -             n - nmb, nmb, i);
> +
> +     if (deq)
> +             nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n",
> +                      deq, nm_txrx2str(t), idx, netmap_bufs);
>  }
>  
>  /* Register and unregister. */
>  static int
> -vtnet_netmap_reg(struct netmap_adapter *na, int onoff)
> +vtnet_netmap_reg(struct netmap_adapter *na, int state)
>  {
> -        struct ifnet *ifp = na->ifp;
> -     struct SOFTC_T *sc = ifp->if_softc;
> +     struct ifnet *ifp = na->ifp;
> +     struct vtnet_softc *sc = ifp->if_softc;
> +     int success;
> +     enum txrx t;
> +     int i;
>  
> +     /* Drain the taskqueues to make sure that there are no worker threads
> +      * accessing the virtqueues. */
> +     vtnet_drain_taskqueues(sc);
> +
>       VTNET_CORE_LOCK(sc);
> -     ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
> -     /* enable or disable flags and callbacks in na and ifp */
> -     if (onoff) {
> +
> +     /* We need nm_netmap_on() to return true when called by
> +      * vtnet_init_locked() below. */
> +     if (state)
>               nm_set_native_flags(na);
> +
> +     /* We need to trigger a device reset in order to unexpose guest buffers
> +      * published to the host. */
> +     ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
> +     /* Get pending used buffers. The way they are freed depends on whether
> +      * they are netmap buffer or they are mbufs. We can tell apart the two
> +      * cases by looking at kring->nr_mode, before this is possibly updated
> +      * in the loop below. */
> +     for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
> +             struct vtnet_txq *txq = &sc->vtnet_txqs[i];
> +             struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
> +             struct netmap_kring *kring;
> +
> +             VTNET_TXQ_LOCK(txq);
> +             kring = NMR(na, NR_TX)[i];
> +             vtnet_free_used(txq->vtntx_vq,
> +                             kring->nr_mode == NKR_NETMAP_ON, NR_TX, i);
> +             VTNET_TXQ_UNLOCK(txq);
> +
> +             VTNET_RXQ_LOCK(rxq);
> +             kring = NMR(na, NR_RX)[i];
> +             vtnet_free_used(rxq->vtnrx_vq,
> +                             kring->nr_mode == NKR_NETMAP_ON, NR_RX, i);
> +             VTNET_RXQ_UNLOCK(rxq);
> +     }
> +     vtnet_init_locked(sc);
> +     success = (ifp->if_drv_flags & IFF_DRV_RUNNING) ? 0 : ENXIO;
> +
> +     if (state) {
> +             for_rx_tx(t) {
> +                     /* Hardware rings. */
> +                     for (i = 0; i < nma_get_nrings(na, t); i++) {
> +                             struct netmap_kring *kring = NMR(na, t)[i];
> +
> +                             if (nm_kring_pending_on(kring))
> +                                     kring->nr_mode = NKR_NETMAP_ON;
> +                     }
> +
> +                     /* Host rings. */
> +                     for (i = 0; i < nma_get_host_nrings(na, t); i++) {
> +                             struct netmap_kring *kring =
> +                                     NMR(na, t)[nma_get_nrings(na, t) + i];
> +
> +                             if (nm_kring_pending_on(kring))
> +                                     kring->nr_mode = NKR_NETMAP_ON;
> +                     }
> +             }
>       } else {
>               nm_clear_native_flags(na);
> +             for_rx_tx(t) {
> +                     /* Hardware rings. */
> +                     for (i = 0; i < nma_get_nrings(na, t); i++) {
> +                             struct netmap_kring *kring = NMR(na, t)[i];
> +
> +                             if (nm_kring_pending_off(kring))
> +                                     kring->nr_mode = NKR_NETMAP_OFF;
> +                     }
> +
> +                     /* Host rings. */
> +                     for (i = 0; i < nma_get_host_nrings(na, t); i++) {
> +                             struct netmap_kring *kring =
> +                                     NMR(na, t)[nma_get_nrings(na, t) + i];
> +
> +                             if (nm_kring_pending_off(kring))
> +                                     kring->nr_mode = NKR_NETMAP_OFF;
> +                     }
> +             }
>       }
> -     /* drain queues so netmap and native drivers
> -      * do not interfere with each other
> -      */
> -     vtnet_netmap_free_bufs(sc);
> -        vtnet_init_locked(sc);       /* also enable intr */
> -        VTNET_CORE_UNLOCK(sc);
> -        return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
> +
> +     VTNET_CORE_UNLOCK(sc);
> +
> +     return success;
>  }
>  
>  
> @@ -109,20 +183,19 @@ static int
>  vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
>  {
>       struct netmap_adapter *na = kring->na;
> -        struct ifnet *ifp = na->ifp;
> +     struct ifnet *ifp = na->ifp;
>       struct netmap_ring *ring = kring->ring;
>       u_int ring_nr = kring->ring_id;
>       u_int nm_i;     /* index into the netmap ring */
> -     u_int nic_i;    /* index into the NIC ring */
> -     u_int n;
>       u_int const lim = kring->nkr_num_slots - 1;
>       u_int const head = kring->rhead;
>  
>       /* device-specific */
> -     struct SOFTC_T *sc = ifp->if_softc;
> +     struct vtnet_softc *sc = ifp->if_softc;
>       struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
>       struct virtqueue *vq = txq->vtntx_vq;
>       int interrupts = !(kring->nr_kflags & NKR_NOINTR);
> +     u_int n;
>  
>       /*
>        * First part: process new packets to send.
> @@ -133,15 +206,13 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int fl
>       if (nm_i != head) {     /* we have new packets to send */
>               struct sglist *sg = txq->vtntx_sg;
>  
> -             nic_i = netmap_idx_k2n(kring, nm_i);
> -             for (n = 0; nm_i != head; n++) {
> +             for (; nm_i != head; nm_i = nm_next(nm_i, lim)) {
>                       /* we use an empty header here */
> -                     static struct virtio_net_hdr_mrg_rxbuf hdr;
>                       struct netmap_slot *slot = &ring->slot[nm_i];
>                       u_int len = slot->len;
>                       uint64_t paddr;
>                       void *addr = PNMB(na, slot, &paddr);
> -                        int err;
> +                     int err;
>  
>                       NM_CHECK_ADDR_LEN(na, addr, len);
>  
> @@ -150,88 +221,63 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int fl
>                        * and kick the hypervisor (if necessary).
>                        */
>                       sglist_reset(sg); // cheap
> -                     // if vtnet_hdr_size > 0 ...
> -                     err = sglist_append(sg, &hdr, sc->vtnet_hdr_size);
> -                     // XXX later, support multi segment
> -                     err = sglist_append_phys(sg, paddr, len);
> -                     /* use na as the cookie */
> -                        err = virtqueue_enqueue(vq, txq, sg, sg->sg_nseg, 0);
> -                        if (unlikely(err < 0)) {
> -                                D("virtqueue_enqueue failed");
> -                                break;
> -                        }
> -
> -                     nm_i = nm_next(nm_i, lim);
> -                     nic_i = nm_next(nic_i, lim);
> +                     err = sglist_append(sg, &txq->vtntx_shrhdr, 
> sc->vtnet_hdr_size);
> +                     err |= sglist_append_phys(sg, paddr, len);
> +                     KASSERT(err == 0, ("%s: cannot append to sglist %d",
> +                                             __func__, err));
> +                     err = virtqueue_enqueue(vq, /*cookie=*/txq, sg,
> +                                             /*readable=*/sg->sg_nseg,
> +                                             /*writeable=*/0);
> +                     if (unlikely(err)) {
> +                             if (err != ENOSPC)
> +                                     nm_prerr("virtqueue_enqueue(%s) failed: 
> %d\n",
> +                                                     kring->name, err);
> +                             break;
> +                     }
>               }
> -             /* Update hwcur depending on where we stopped. */
> -             kring->nr_hwcur = nm_i; /* note we migth break early */
>  
> -             /* No more free TX slots? Ask the hypervisor for notifications,
> -              * possibly only when a considerable amount of work has been
> -              * done.
> -              */
> -             ND(3,"sent %d packets, hwcur %d", n, nm_i);
> -             virtqueue_disable_intr(vq);
>               virtqueue_notify(vq);
> -     } else {
> -             if (ring->head != ring->tail)
> -                 ND(5, "pure notify ? head %d tail %d nused %d %d",
> -                     ring->head, ring->tail, virtqueue_nused(vq),
> -                     (virtqueue_dump(vq), 1));
> -             virtqueue_notify(vq);
> -             if (interrupts) {
> -                     virtqueue_enable_intr(vq); // like postpone with 0
> -             }
> +
> +             /* Update hwcur depending on where we stopped. */
> +             kring->nr_hwcur = nm_i; /* note we migth break early */
>       }
>  
> -
> -        /* Free used slots. We only consider our own used buffers, recognized
> -      * by the token we passed to virtqueue_add_outbuf.
> +     /* Free used slots. We only consider our own used buffers, recognized
> +      * by the token we passed to virtqueue_enqueue.
>        */
> -        n = 0;
> -        for (;;) {
> -                struct vtnet_tx_header *txhdr = virtqueue_dequeue(vq, NULL);
> -                if (txhdr == NULL)
> -                        break;
> -                if (likely(txhdr == (void *)txq)) {
> -                        n++;
> -                     if (virtqueue_nused(vq) < 32) { // XXX slow release
> -                             break;
> -                     }
> -             } else { /* leftover from previous transmission */
> -                     m_freem(txhdr->vth_mbuf);
> -                     uma_zfree(vtnet_tx_header_zone, txhdr);
> -             }
> -        }
> -     if (n) {
> +     n = 0;
> +     for (;;) {
> +             void *token = virtqueue_dequeue(vq, NULL);
> +             if (token == NULL)
> +                     break;
> +             if (unlikely(token != (void *)txq))
> +                     nm_prerr("BUG: TX token mismatch\n");
> +             else
> +                     n++;
> +     }
> +     if (n > 0) {
>               kring->nr_hwtail += n;
>               if (kring->nr_hwtail > lim)
>                       kring->nr_hwtail -= lim + 1;
>       }
> -     if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 
> 0*/) {
> -             ND(3, "disable intr, hwcur %d", nm_i);
> -             virtqueue_disable_intr(vq);
> -     } else if (interrupts) {
> -             ND(3, "enable intr, hwcur %d", nm_i);
> -             virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
> -     }
>  
> -        return 0;
> +     if (interrupts && virtqueue_nfree(vq) < 32)
> +             virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG);
> +
> +     return 0;
>  }
>  
>  static int
> -vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_i, u_int head)
> +vtnet_netmap_kring_refill(struct netmap_kring *kring, u_int nm_i, u_int head)
>  {
>       struct netmap_adapter *na = kring->na;
> -        struct ifnet *ifp = na->ifp;
> +     struct ifnet *ifp = na->ifp;
>       struct netmap_ring *ring = kring->ring;
>       u_int ring_nr = kring->ring_id;
>       u_int const lim = kring->nkr_num_slots - 1;
> -     u_int n;
>  
>       /* device-specific */
> -     struct SOFTC_T *sc = ifp->if_softc;
> +     struct vtnet_softc *sc = ifp->if_softc;
>       struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
>       struct virtqueue *vq = rxq->vtnrx_vq;
>  
> @@ -239,12 +285,11 @@ vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_
>       struct sglist_seg ss[2];
>       struct sglist sg = { ss, 0, 0, 2 };
>  
> -     for (n = 0; nm_i != head; n++) {
> -             static struct virtio_net_hdr_mrg_rxbuf hdr;
> +     for (; nm_i != head; nm_i = nm_next(nm_i, lim)) {
>               struct netmap_slot *slot = &ring->slot[nm_i];
>               uint64_t paddr;
>               void *addr = PNMB(na, slot, &paddr);
> -             int err = 0;
> +             int err;
>  
>               if (addr == NETMAP_BUF_BASE(na)) { /* bad buf */
>                       if (netmap_ring_reinit(kring))
> @@ -252,99 +297,134 @@ vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_
>               }
>  
>               slot->flags &= ~NS_BUF_CHANGED;
> -             sglist_reset(&sg); // cheap
> -             err = sglist_append(&sg, &hdr, sc->vtnet_hdr_size);
> -             err = sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na));
> +             sglist_reset(&sg);
> +             err = sglist_append(&sg, &rxq->vtnrx_shrhdr, 
> sc->vtnet_hdr_size);
> +             err |= sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na));
> +             KASSERT(err == 0, ("%s: cannot append to sglist %d",
> +                                     __func__, err));
>               /* writable for the host */
> -             err = virtqueue_enqueue(vq, rxq, &sg, 0, sg.sg_nseg);
> -             if (err < 0) {
> -                     D("virtqueue_enqueue failed");
> +             err = virtqueue_enqueue(vq, /*cookie=*/rxq, &sg,
> +                             /*readable=*/0, /*writeable=*/sg.sg_nseg);
> +             if (unlikely(err)) {
> +                     if (err != ENOSPC)
> +                             nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
> +                                     kring->name, err);
>                       break;
>               }
> -             nm_i = nm_next(nm_i, lim);
>       }
> +
>       return nm_i;
>  }
>  
> +/*
> + * Publish netmap buffers on a RX virtqueue.
> + * Returns -1 if this virtqueue is not being opened in netmap mode.
> + * If the virtqueue is being opened in netmap mode, return 0 on success and
> + * a positive error code on failure.
> + */
> +static int
> +vtnet_netmap_rxq_populate(struct vtnet_rxq *rxq)
> +{
> +     struct netmap_adapter *na = NA(rxq->vtnrx_sc->vtnet_ifp);
> +     struct netmap_kring *kring;
> +     int error;
> +
> +     if (!nm_native_on(na) || rxq->vtnrx_id >= na->num_rx_rings)
> +             return -1;
> +
> +     kring = na->rx_rings[rxq->vtnrx_id];
> +     if (!(nm_kring_pending_on(kring) ||
> +                     kring->nr_pending_mode == NKR_NETMAP_ON))
> +             return -1;
> +
> +     /* Expose all the RX netmap buffers. Note that the number of
> +      * netmap slots in the RX ring matches the maximum number of
> +      * 2-elements sglist that the RX virtqueue can accommodate. */
> +     error = vtnet_netmap_kring_refill(kring, 0, na->num_rx_desc);
> +     virtqueue_notify(rxq->vtnrx_vq);
> +
> +     return error < 0 ? ENXIO : 0;
> +}
> +
>  /* Reconcile kernel and user view of the receive ring. */
>  static int
>  vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
>  {
>       struct netmap_adapter *na = kring->na;
> -        struct ifnet *ifp = na->ifp;
> +     struct ifnet *ifp = na->ifp;
>       struct netmap_ring *ring = kring->ring;
>       u_int ring_nr = kring->ring_id;
>       u_int nm_i;     /* index into the netmap ring */
> -     // u_int nic_i; /* index into the NIC ring */
> -     u_int n;
>       u_int const lim = kring->nkr_num_slots - 1;
>       u_int const head = kring->rhead;
> -     int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & 
> NKR_PENDINTR;
> +     int force_update = (flags & NAF_FORCE_READ) ||
> +                             (kring->nr_kflags & NKR_PENDINTR);
>       int interrupts = !(kring->nr_kflags & NKR_NOINTR);
>  
>       /* device-specific */
> -     struct SOFTC_T *sc = ifp->if_softc;
> +     struct vtnet_softc *sc = ifp->if_softc;
>       struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
>       struct virtqueue *vq = rxq->vtnrx_vq;
>  
> -     /* XXX netif_carrier_ok ? */
> -
> -     if (head > lim)
> -             return netmap_ring_reinit(kring);
> -
>       rmb();
>       /*
>        * First part: import newly received packets.
> -      * Only accept our
> -      * own buffers (matching the token). We should only get
> -      * matching buffers, because of vtnet_netmap_free_rx_unused_bufs()
> -      * and vtnet_netmap_init_buffers().
> +      * Only accept our own buffers (matching the token). We should only get
> +      * matching buffers. We may need to stop early to avoid hwtail to 
> overrun
> +      * hwcur.
>        */
>       if (netmap_no_pendintr || force_update) {
> -                struct netmap_adapter *token;
> +             uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim);
> +             void *token;
>  
> -                nm_i = kring->nr_hwtail;
> -                n = 0;
> -             for (;;) {
> +             vtnet_rxq_disable_intr(rxq);
> +
> +             nm_i = kring->nr_hwtail;
> +             while (nm_i != hwtail_lim) {
>                       int len;
> -                        token = virtqueue_dequeue(vq, &len);
> -                        if (token == NULL)
> -                                break;
> -                        if (likely(token == (void *)rxq)) {
> -                            ring->slot[nm_i].len = len;
> -                            ring->slot[nm_i].flags = 0;
> -                            nm_i = nm_next(nm_i, lim);
> -                            n++;
> -                        } else {
> -                         D("This should not happen");
> -                        }
> +                     token = virtqueue_dequeue(vq, &len);
> +                     if (token == NULL) {
> +                             if (interrupts && vtnet_rxq_enable_intr(rxq)) {
> +                                     vtnet_rxq_disable_intr(rxq);
> +                                     continue;
> +                             }
> +                             break;
> +                     }
> +                     if (unlikely(token != (void *)rxq)) {
> +                             nm_prerr("BUG: RX token mismatch\n");
> +                     } else {
> +                             /* Skip the virtio-net header. */
> +                             len -= sc->vtnet_hdr_size;
> +                             if (unlikely(len < 0)) {
> +                                     RD(1, "Truncated virtio-net-header, "
> +                                             "missing %d bytes", -len);
> +                                     len = 0;
> +                             }
> +                             ring->slot[nm_i].len = len;
> +                             ring->slot[nm_i].flags = 0;
> +                             nm_i = nm_next(nm_i, lim);
> +                     }
>               }
>               kring->nr_hwtail = nm_i;
>               kring->nr_kflags &= ~NKR_PENDINTR;
>       }
> -        ND("[B] h %d c %d hwcur %d hwtail %d",
> -             ring->head, ring->cur, kring->nr_hwcur,
> -                           kring->nr_hwtail);
> +     ND("[B] h %d c %d hwcur %d hwtail %d", ring->head, ring->cur,
> +                             kring->nr_hwcur, kring->nr_hwtail);
>  
>       /*
>        * Second part: skip past packets that userspace has released.
>        */
>       nm_i = kring->nr_hwcur; /* netmap ring index */
>       if (nm_i != head) {
> -             int err = vtnet_refill_rxq(kring, nm_i, head);
> -             if (err < 0)
> -                     return 1;
> -             kring->nr_hwcur = err;
> +             int nm_j = vtnet_netmap_kring_refill(kring, nm_i, head);
> +             if (nm_j < 0)
> +                     return nm_j;
> +             kring->nr_hwcur = nm_j;
>               virtqueue_notify(vq);
> -             /* After draining the queue may need an intr from the 
> hypervisor */
> -             if (interrupts) {
> -                     vtnet_rxq_enable_intr(rxq);
> -             }
>       }
>  
> -        ND("[C] h %d c %d t %d hwcur %d hwtail %d",
> -             ring->head, ring->cur, ring->tail,
> -             kring->nr_hwcur, kring->nr_hwtail);
> +     ND("[C] h %d c %d t %d hwcur %d hwtail %d", ring->head, ring->cur,
> +             ring->tail, kring->nr_hwcur, kring->nr_hwtail);
>  
>       return 0;
>  }
> @@ -352,9 +432,9 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int fl
>  
>  /* Enable/disable interrupts on all virtqueues. */
>  static void
> -vtnet_netmap_intr(struct netmap_adapter *na, int onoff)
> +vtnet_netmap_intr(struct netmap_adapter *na, int state)
>  {
> -     struct SOFTC_T *sc = na->ifp->if_softc;
> +     struct vtnet_softc *sc = na->ifp->if_softc;
>       int i;
>  
>       for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
> @@ -362,7 +442,7 @@ vtnet_netmap_intr(struct netmap_adapter *na, int onoff
>               struct vtnet_txq *txq = &sc->vtnet_txqs[i];
>               struct virtqueue *txvq = txq->vtntx_vq;
>  
> -             if (onoff) {
> +             if (state) {
>                       vtnet_rxq_enable_intr(rxq);
>                       virtqueue_enable_intr(txvq);
>               } else {
> @@ -372,60 +452,88 @@ vtnet_netmap_intr(struct netmap_adapter *na, int onoff
>       }
>  }
>  
> -/* Make RX virtqueues buffers pointing to netmap buffers. */
>  static int
> -vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
> +vtnet_netmap_tx_slots(struct vtnet_softc *sc)
>  {
> -     struct ifnet *ifp = sc->vtnet_ifp;
> -     struct netmap_adapter* na = NA(ifp);
> -     unsigned int r;
> +     int div;
>  
> -     if (!nm_native_on(na))
> -             return 0;
> -     for (r = 0; r < na->num_rx_rings; r++) {
> -                struct netmap_kring *kring = na->rx_rings[r];
> -             struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r];
> -             struct virtqueue *vq = rxq->vtnrx_vq;
> -             struct netmap_slot* slot;
> -             int err = 0;
> +     /* We need to prepend a virtio-net header to each netmap buffer to be
> +      * transmitted, therefore calling virtqueue_enqueue() passing sglist
> +      * with 2 elements.
> +      * TX virtqueues use indirect descriptors if the feature was negotiated
> +      * with the host, and if sc->vtnet_tx_nsegs > 1. With indirect
> +      * descriptors, a single virtio descriptor is sufficient to reference
> +      * each TX sglist. Without them, we need two separate virtio descriptors
> +      * for each TX sglist. We therefore compute the number of netmap TX
> +      * slots according to these assumptions.
> +      */
> +     if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_tx_nsegs > 1)
> +             div = 1;
> +     else
> +             div = 2;
>  
> -             slot = netmap_reset(na, NR_RX, r, 0);
> -             if (!slot) {
> -                     D("strange, null netmap ring %d", r);
> -                     return 0;
> -             }
> -             /* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue.
> -              * It's important to leave one virtqueue slot free, otherwise
> -              * we can run into ring->cur/ring->tail wraparounds.
> -              */
> -             err = vtnet_refill_rxq(kring, 0, na->num_rx_desc-1);
> -             if (err < 0)
> -                     return 0;
> -             virtqueue_notify(vq);
> -     }
> +     return virtqueue_size(sc->vtnet_txqs[0].vtntx_vq) / div;
> +}
>  
> -     return 1;
> +static int
> +vtnet_netmap_rx_slots(struct vtnet_softc *sc)
> +{
> +     int div;
> +
> +     /* We need to prepend a virtio-net header to each netmap buffer to be
> +      * received, therefore calling virtqueue_enqueue() passing sglist
> +      * with 2 elements.
> +      * RX virtqueues use indirect descriptors if the feature was negotiated
> +      * with the host, and if sc->vtnet_rx_nsegs > 1. With indirect
> +      * descriptors, a single virtio descriptor is sufficient to reference
> +      * each RX sglist. Without them, we need two separate virtio descriptors
> +      * for each RX sglist. We therefore compute the number of netmap RX
> +      * slots according to these assumptions.
> +      */
> +     if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_rx_nsegs > 1)
> +             div = 1;
> +     else
> +             div = 2;
> +
> +     return virtqueue_size(sc->vtnet_rxqs[0].vtnrx_vq) / div;
>  }
>  
> +static int
> +vtnet_netmap_config(struct netmap_adapter *na, struct nm_config_info *info)
> +{
> +     struct vtnet_softc *sc = na->ifp->if_softc;
> +
> +     info->num_tx_rings = sc->vtnet_act_vq_pairs;
> +     info->num_rx_rings = sc->vtnet_act_vq_pairs;
> +     info->num_tx_descs = vtnet_netmap_tx_slots(sc);
> +     info->num_rx_descs = vtnet_netmap_rx_slots(sc);
> +     info->rx_buf_maxsize = NETMAP_BUF_SIZE(na);
> +
> +     return 0;
> +}
> +
>  static void
> -vtnet_netmap_attach(struct SOFTC_T *sc)
> +vtnet_netmap_attach(struct vtnet_softc *sc)
>  {
>       struct netmap_adapter na;
>  
>       bzero(&na, sizeof(na));
>  
>       na.ifp = sc->vtnet_ifp;
> -     na.num_tx_desc =  1024;// sc->vtnet_rx_nmbufs;
> -     na.num_rx_desc =  1024; // sc->vtnet_rx_nmbufs;
> +     na.na_flags = 0;
> +     na.num_tx_desc = vtnet_netmap_tx_slots(sc);
> +     na.num_rx_desc = vtnet_netmap_rx_slots(sc);
> +     na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
> +     na.rx_buf_maxsize = 0;
>       na.nm_register = vtnet_netmap_reg;
>       na.nm_txsync = vtnet_netmap_txsync;
>       na.nm_rxsync = vtnet_netmap_rxsync;
>       na.nm_intr = vtnet_netmap_intr;
> -     na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
> -     D("max rings %d", sc->vtnet_max_vq_pairs);
> +     na.nm_config = vtnet_netmap_config;
> +
>       netmap_attach(&na);
>  
> -        D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d",
> +     nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n",
>                       na.num_tx_rings, na.num_tx_desc,
>                       na.num_tx_rings, na.num_rx_desc);
>  }
> 
> Modified: head/sys/dev/virtio/network/if_vtnet.c
> ==============================================================================
> --- head/sys/dev/virtio/network/if_vtnet.c    Wed Nov 14 15:23:39 2018        
> (r340435)
> +++ head/sys/dev/virtio/network/if_vtnet.c    Wed Nov 14 15:39:48 2018        
> (r340436)
> @@ -1192,6 +1192,12 @@ vtnet_rxq_populate(struct vtnet_rxq *rxq)
>       struct virtqueue *vq;
>       int nbufs, error;
>  
> +#ifdef DEV_NETMAP
> +     error = vtnet_netmap_rxq_populate(rxq);
> +     if (error >= 0)
> +             return (error);
> +#endif  /* DEV_NETMAP */
> +
>       vq = rxq->vtnrx_vq;
>       error = ENOSPC;
>  
> @@ -1221,12 +1227,20 @@ vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
>       struct virtqueue *vq;
>       struct mbuf *m;
>       int last;
> +#ifdef DEV_NETMAP
> +     int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX,
> +                                             rxq->vtnrx_id);
> +#else  /* !DEV_NETMAP */
> +     int netmap_bufs = 0;
> +#endif /* !DEV_NETMAP */
>  
>       vq = rxq->vtnrx_vq;
>       last = 0;
>  
> -     while ((m = virtqueue_drain(vq, &last)) != NULL)
> -             m_freem(m);
> +     while ((m = virtqueue_drain(vq, &last)) != NULL) {
> +             if (!netmap_bufs)
> +                     m_freem(m);
> +     }
>  
>       KASSERT(virtqueue_empty(vq),
>           ("%s: mbufs remaining in rx queue %p", __func__, rxq));
> @@ -1772,12 +1786,6 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
>  
>       VTNET_RXQ_LOCK_ASSERT(rxq);
>  
> -#ifdef DEV_NETMAP
> -     if (netmap_rx_irq(ifp, 0, &deq)) {
> -             return (FALSE);
> -     }
> -#endif /* DEV_NETMAP */
> -
>       while (count-- > 0) {
>               m = virtqueue_dequeue(vq, &len);
>               if (m == NULL)
> @@ -1871,6 +1879,11 @@ vtnet_rx_vq_intr(void *xrxq)
>               return;
>       }
>  
> +#ifdef DEV_NETMAP
> +     if (netmap_rx_irq(ifp, rxq->vtnrx_id, &more) != NM_IRQ_PASS)
> +             return;
> +#endif /* DEV_NETMAP */
> +
>       VTNET_RXQ_LOCK(rxq);
>  
>  again:
> @@ -1971,13 +1984,21 @@ vtnet_txq_free_mbufs(struct vtnet_txq *txq)
>       struct virtqueue *vq;
>       struct vtnet_tx_header *txhdr;
>       int last;
> +#ifdef DEV_NETMAP
> +     int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX,
> +                                             txq->vtntx_id);
> +#else  /* !DEV_NETMAP */
> +     int netmap_bufs = 0;
> +#endif /* !DEV_NETMAP */
>  
>       vq = txq->vtntx_vq;
>       last = 0;
>  
>       while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
> -             m_freem(txhdr->vth_mbuf);
> -             uma_zfree(vtnet_tx_header_zone, txhdr);
> +             if (!netmap_bufs) {
> +                     m_freem(txhdr->vth_mbuf);
> +                     uma_zfree(vtnet_tx_header_zone, txhdr);
> +             }
>       }
>  
>       KASSERT(virtqueue_empty(vq),
> @@ -2465,13 +2486,6 @@ vtnet_txq_eof(struct vtnet_txq *txq)
>       deq = 0;
>       VTNET_TXQ_LOCK_ASSERT(txq);
>  
> -#ifdef DEV_NETMAP
> -     if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
> -             virtqueue_disable_intr(vq); // XXX luigi
> -             return 0; // XXX or 1 ?
> -     }
> -#endif /* DEV_NETMAP */
> -
>       while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
>               m = txhdr->vth_mbuf;
>               deq++;
> @@ -2513,6 +2527,11 @@ vtnet_tx_vq_intr(void *xtxq)
>               return;
>       }
>  
> +#ifdef DEV_NETMAP
> +     if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
> +             return;
> +#endif /* DEV_NETMAP */
> +
>       VTNET_TXQ_LOCK(txq);
>  
>       if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
> @@ -2769,11 +2788,6 @@ vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
>       struct vtnet_txq *txq;
>       int i;
>  
> -#ifdef DEV_NETMAP
> -     if (nm_native_on(NA(sc->vtnet_ifp)))
> -             return;
> -#endif /* DEV_NETMAP */
> -
>       for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
>               rxq = &sc->vtnet_rxqs[i];
>               vtnet_rxq_free_mbufs(rxq);
> @@ -2938,11 +2952,6 @@ vtnet_init_rx_queues(struct vtnet_softc *sc)
>           ("%s: too many rx mbufs %d for %d segments", __func__,
>           sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
>  
> -#ifdef DEV_NETMAP
> -     if (vtnet_netmap_init_rx_buffers(sc))
> -             return 0;
> -#endif /* DEV_NETMAP */
> -
>       for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
>               rxq = &sc->vtnet_rxqs[i];
>  
> @@ -3092,13 +3101,6 @@ vtnet_init(void *xsc)
>       struct vtnet_softc *sc;
>  
>       sc = xsc;
> -
> -#ifdef DEV_NETMAP
> -     if (!NA(sc->vtnet_ifp)) {
> -             D("try to attach again");
> -             vtnet_netmap_attach(sc);
> -     }
> -#endif /* DEV_NETMAP */
>  
>       VTNET_CORE_LOCK(sc);
>       vtnet_init_locked(sc);
> 
> Modified: head/sys/dev/virtio/network/if_vtnetvar.h
> ==============================================================================
> --- head/sys/dev/virtio/network/if_vtnetvar.h Wed Nov 14 15:23:39 2018        
> (r340435)
> +++ head/sys/dev/virtio/network/if_vtnetvar.h Wed Nov 14 15:39:48 2018        
> (r340436)
> @@ -79,6 +79,9 @@ struct vtnet_rxq {
>       struct vtnet_rxq_stats   vtnrx_stats;
>       struct taskqueue        *vtnrx_tq;
>       struct task              vtnrx_intrtask;
> +#ifdef DEV_NETMAP
> +     struct virtio_net_hdr_mrg_rxbuf vtnrx_shrhdr;
> +#endif  /* DEV_NETMAP */
>       char                     vtnrx_name[16];
>  } __aligned(CACHE_LINE_SIZE);
>  
> @@ -114,6 +117,9 @@ struct vtnet_txq {
>  #ifndef VTNET_LEGACY_TX
>       struct task              vtntx_defrtask;
>  #endif
> +#ifdef DEV_NETMAP
> +     struct virtio_net_hdr_mrg_rxbuf vtntx_shrhdr;
> +#endif  /* DEV_NETMAP */
>       char                     vtntx_name[16];
>  } __aligned(CACHE_LINE_SIZE);
>  
> 
> 

-- 
Rod Grimes                                                 rgri...@freebsd.org
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to