If the vhost-user device is in busy-polling mode, the cachelines of
avali ring are raced by the driver process and the vhost-user process.
Because that the idx will be updated everytime, when the new ring items
are updated. So one cache line will be read too times, the two processes
will race the cache line. So I change the way the idx is updated, we
only update the idx before notifying the device.

test command:
    This is the command, that testpmd runs with virtio-net AF_XDP.

    ./build/app/dpdk-testpmd -l 1-2 --no-pci --main-lcore=2 \
            --vdev net_af_xdp0,iface=ens5,queue_count=1,busy_budget=0 \
            --log-level=pmd.net.af_xdp:8 \
            -- -i -a --nb-cores=1 --rxq=1 --txq=1 --forward-mode=macswap

without this commit:
    testpmd> show port stats all

      ######################## NIC statistics for port 0  
########################
      RX-packets: 3615824336 RX-missed: 0          RX-bytes:  202486162816
      RX-errors: 0
      RX-nombuf:  0
      TX-packets: 3615795592 TX-errors: 20738      TX-bytes:  202484553152

      Throughput (since last show)
      Rx-pps:      3790446          Rx-bps:   1698120056
      Tx-pps:      3790446          Tx-bps:   1698120056
      
############################################################################

with this commit:
    testpmd> show port stats all

      ######################## NIC statistics for port 0  
########################
      RX-packets: 68152727   RX-missed: 0          RX-bytes:  3816552712
      RX-errors: 0
      RX-nombuf:  0
      TX-packets: 68114967   TX-errors: 33216      TX-bytes:  3814438152

      Throughput (since last show)
      Rx-pps:      6333196          Rx-bps:   2837272088
      Tx-pps:      6333227          Tx-bps:   2837285936
      
############################################################################

perf c2c:

    On the vhost-user side, the perf c2c show 34.25% Hitm of the first cache
    line of the avail structure without this commit. The hitm reduces to
    1.57% when this commit is included.

dpdk:

    I read the code of the dpdk, there is the similar code.

    virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t 
nb_pkts)
    {
        [...]

        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {

                [...]

                /* Enqueue Packet buffers */
                virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect,
                        can_push, 0);
        }

        [...]

        if (likely(nb_tx)) {
    -->         vq_update_avail_idx(vq);

                if (unlikely(virtqueue_kick_prepare(vq))) {
                        virtqueue_notify(vq);
                        PMD_TX_LOG(DEBUG, "Notified backend after xmit");
                }
        }
    }

End:

    Is all the _prepared() is called before _notify()?
    I checked, all the _notify() is called after the _prepare().

Signed-off-by: Xuan Zhuo <xuanz...@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 51d8f3299c10..215a38065d22 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -687,12 +687,7 @@ static inline int virtqueue_add_split(struct virtqueue 
*_vq,
        avail = vq->split.avail_idx_shadow & (vq->split.vring.num - 1);
        vq->split.vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
 
-       /* Descriptors and available array need to be set before we expose the
-        * new available array entries. */
-       virtio_wmb(vq->weak_barriers);
        vq->split.avail_idx_shadow++;
-       vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
-                                               vq->split.avail_idx_shadow);
        vq->num_added++;
 
        pr_debug("Added buffer head %i to %p\n", head, vq);
@@ -738,6 +733,14 @@ static bool virtqueue_kick_prepare_split(struct virtqueue 
*_vq)
        bool needs_kick;
 
        START_USE(vq);
+
+       /* Descriptors and available array need to be set before we expose the
+        * new available array entries.
+        */
+       virtio_wmb(vq->weak_barriers);
+       vq->split.vring.avail->idx = cpu_to_virtio16(_vq->vdev,
+                                               vq->split.avail_idx_shadow);
+
        /* We need to expose available array entries before checking avail
         * event. */
        virtio_mb(vq->weak_barriers);
@@ -2355,6 +2358,8 @@ EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
  * virtqueue_notify - second half of split virtqueue_kick call.
  * @_vq: the struct virtqueue
  *
+ * The caller MUST call virtqueue_kick_prepare() firstly.
+ *
  * This does not need to be serialized.
  *
  * Returns false if host notify failed or queue is broken, otherwise true.
-- 
2.32.0.3.g01195cf9f

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to