Hi Ilya,

> Issue can be reproduced by stopping DPDK application (testpmd) inside guest
> while heavy traffic flows to this VM.
>

I tried both quitting testpmd without stopping the forwarding task and simply 
killing testpmd without crashing vswitch in the host.

What versions of dpdk are you using in the guest and host?

Are you using dpdkvhostuser or dpdkvhostuserclient type ports?

Thanks,
Billy. 

> -----Original Message-----
> From: [email protected] [mailto:ovs-dev-
> [email protected]] On Behalf Of Ilya Maximets
> Sent: Friday, October 6, 2017 11:50 AM
> To: [email protected]
> Cc: Ilya Maximets <[email protected]>; Maxime Coquelin
> <[email protected]>; Heetae Ahn <[email protected]>
> Subject: [ovs-dev] [PATCH] netdev-dpdk: Fix calling vhost API with negative 
> vid.
> 
> Currently, rx and tx functions for vhost interfaces always obtain 'vid' 
> twice. First
> time inside 'is_vhost_running' for checking the value and the second time in
> enqueue/dequeue function calls to send/receive packets. But second time we're
> not checking the returned value. If vhost device will be destroyed between
> checking and enqueue/dequeue, DPDK API will be called with '-1' instead of 
> valid
> 'vid'. DPDK API does not validate the 'vid'.
> This leads to getting random memory value as a pointer to internal device
> structure inside DPDK. Access by this pointer leads to segmentation fault. For
> example:
> 
>   |00503|dpdk|INFO|VHOST_CONFIG: read message
> VHOST_USER_GET_VRING_BASE
>   [New Thread 0x7fb6754910 (LWP 21246)]
> 
>   Program received signal SIGSEGV, Segmentation fault.
>   rte_vhost_enqueue_burst at lib/librte_vhost/virtio_net.c:630
>   630             if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF))
>   (gdb) bt full
>   #0  rte_vhost_enqueue_burst at lib/librte_vhost/virtio_net.c:630
>           dev = 0xffffffff
>   #1  __netdev_dpdk_vhost_send at lib/netdev-dpdk.c:1803
>           tx_pkts = <optimized out>
>           cur_pkts = 0x7f340084f0
>           total_pkts = 32
>           dropped = 0
>           i = <optimized out>
>           retries = 0
>   ...
>   (gdb) p *((struct netdev_dpdk *) netdev)
>   $8 = { ... ,
>         flags = (NETDEV_UP | NETDEV_PROMISC), ... ,
>         vid = {v = -1},
>         vhost_reconfigured = false, ... }
> 
> Issue can be reproduced by stopping DPDK application (testpmd) inside guest
> while heavy traffic flows to this VM.
> 
> Fix that by obtaining and checking the 'vid' only once.
> 
> CC: Ciara Loftus <[email protected]>
> Fixes: 0a0f39df1d5a ("netdev-dpdk: Add support for DPDK 16.07")
> Signed-off-by: Ilya Maximets <[email protected]>
> ---
>  lib/netdev-dpdk.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index c60f46f..bf30bb0 
> 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1637,18 +1637,18 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq
> *rxq,
>                             struct dp_packet_batch *batch)  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> -    int qid = rxq->queue_id;
>      struct ingress_policer *policer = netdev_dpdk_get_ingress_policer(dev);
>      uint16_t nb_rx = 0;
>      uint16_t dropped = 0;
> +    int qid = rxq->queue_id;
> +    int vid = netdev_dpdk_get_vid(dev);
> 
> -    if (OVS_UNLIKELY(!is_vhost_running(dev)
> +    if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured
>                       || !(dev->flags & NETDEV_UP))) {
>          return EAGAIN;
>      }
> 
> -    nb_rx = rte_vhost_dequeue_burst(netdev_dpdk_get_vid(dev),
> -                                    qid * VIRTIO_QNUM + VIRTIO_TXQ,
> +    nb_rx = rte_vhost_dequeue_burst(vid, qid * VIRTIO_QNUM +
> + VIRTIO_TXQ,
>                                      dev->dpdk_mp->mp,
>                                      (struct rte_mbuf **) batch->packets,
>                                      NETDEV_MAX_BURST); @@ -1783,10 +1783,11 
> @@
> __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
>      unsigned int total_pkts = cnt;
>      unsigned int dropped = 0;
>      int i, retries = 0;
> +    int vid = netdev_dpdk_get_vid(dev);
> 
>      qid = dev->tx_q[qid % netdev->n_txq].map;
> 
> -    if (OVS_UNLIKELY(!is_vhost_running(dev) || qid < 0
> +    if (OVS_UNLIKELY(vid < 0 || !dev->vhost_reconfigured || qid < 0
>                       || !(dev->flags & NETDEV_UP))) {
>          rte_spinlock_lock(&dev->stats_lock);
>          dev->stats.tx_dropped+= cnt;
> @@ -1805,8 +1806,7 @@ __netdev_dpdk_vhost_send(struct netdev *netdev,
> int qid,
>          int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
>          unsigned int tx_pkts;
> 
> -        tx_pkts = rte_vhost_enqueue_burst(netdev_dpdk_get_vid(dev),
> -                                          vhost_qid, cur_pkts, cnt);
> +        tx_pkts = rte_vhost_enqueue_burst(vid, vhost_qid, cur_pkts,
> + cnt);
>          if (OVS_LIKELY(tx_pkts)) {
>              /* Packets have been sent.*/
>              cnt -= tx_pkts;
> --
> 2.7.4
> 
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to