On Thu, Oct 12, 2017 at 05:43:29AM +0000, Wu, Jingjing wrote:
> Hi, Shjith
> 
> Could you help to review and verify if this fix can meet your requirement?
> 
> Thanks
> Jingjing
> 
> > -----Original Message-----
> > From: Wu, Jingjing
> > Sent: Tuesday, October 10, 2017 6:09 AM
> > To: Yigit, Ferruh <ferruh.yi...@intel.com>; Tan, Jianfeng
> > <jianfeng....@intel.com>; shijith.thot...@caviumnetworks.com;
> > greg...@weka.io; Xing, Beilei <beilei.x...@intel.com>
> > Cc: dev@dpdk.org; Wu, Jingjing <jingjing...@intel.com>; sta...@dpdk.org
> > Subject: [PATCH v2 2/2] igb_uio: fix interrupt enablement after FLR in VM
> > 
> > If pass-through a VF by vfio-pci to a Qemu VM, after FLR in VM, the 
> > interrupt
> > setting is not recoverd correctly to host as below:
> >  in VM guest:
> >         Capabilities: [70] MSI-X: Enable+ Count=5 Masked-  in Host:
> >         Capabilities: [70] MSI-X: Enable+ Count=5 Masked-
> > 
> > That was because in pci_reset_function, it first reads the PCI configure 
> > and set
> > FLR reset, and then writes PCI configure as restoration. But not all the 
> > writing
> > are successful to Host.
> > Because vfio-pci driver doesn't allow directly write PCI MSI-X Cap.
> > 
> > To fix this issue, we need to move the interrupt enablement from igb_uio 
> > probe
> > to open device file. While it is also the similar as the behaviour in 
> > vfio_pci
> > kernel module code.
> > 
> > Fixes: b58eedfc7dd5 ("igb_uio: issue FLR during open and release of device 
> > file")
> > 
> > Cc: sta...@dpdk.org
> > 
> > Signed-off-by: Jingjing Wu <jingjing...@intel.com>
> > Signed-off-by: Jianfeng Tan <jianfeng....@intel.com>

Tested-by: Shijith Thotton <shijith.thot...@caviumnetworks.com>

> > ---
> > v2 change:
> >  - fix typo
> >  - remove duplicated debug info
> > 
> >  lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 221 
> > ++++++++++++++++--------------
> >  1 file changed, 119 insertions(+), 102 deletions(-)
> > 
> > diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > index c8dd5f4..d943795 100644
> > --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > @@ -49,7 +49,6 @@ struct rte_uio_pci_dev {
> > 
> >  static char *intr_mode;
> >  static enum rte_intr_mode igbuio_intr_mode_preferred =
> > RTE_INTR_MODE_MSIX;
> > -
> >  /* sriov sysfs */
> >  static ssize_t
> >  show_max_vfs(struct device *dev, struct device_attribute *attr, @@ -144,8
> > +143,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state)
> >   * If yes, disable it here and will be enable later.
> >   */
> >  static irqreturn_t
> > -igbuio_pci_irqhandler(int irq, struct uio_info *info)
> > +igbuio_pci_irqhandler(int irq, void *dev_id)
> >  {
> > +   struct uio_device *idev = (struct uio_device *)dev_id;
> > +   struct uio_info *info = idev->info;
> >     struct rte_uio_pci_dev *udev = info->priv;
> > 
> >     /* Legacy mode need to mask in hardware */ @@ -153,10 +154,115
> > @@ igbuio_pci_irqhandler(int irq, struct uio_info *info)
> >         !pci_check_and_mask_intx(udev->pdev))
> >             return IRQ_NONE;
> > 
> > +   uio_event_notify(info);
> > +
> >     /* Message signal mode, no share IRQ and automasked */
> >     return IRQ_HANDLED;
> >  }
> > 
> > +static int
> > +igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) {
> > +   int err = 0;
> > +#ifndef HAVE_ALLOC_IRQ_VECTORS
> > +   struct msix_entry msix_entry;
> > +#endif
> > +
> > +   switch (igbuio_intr_mode_preferred) {
> > +   case RTE_INTR_MODE_MSIX:
> > +           /* Only 1 msi-x vector needed */
> > +#ifndef HAVE_ALLOC_IRQ_VECTORS
> > +           msix_entry.entry = 0;
> > +           if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
> > +                   dev_dbg(&udev->pdev->dev, "using MSI-X");
> > +                   udev->info.irq_flags = IRQF_NO_THREAD;
> > +                   udev->info.irq = msix_entry.vector;
> > +                   udev->mode = RTE_INTR_MODE_MSIX;
> > +                   break;
> > +           }
> > +#else
> > +           if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1)
> > {
> > +                   dev_dbg(&udev->pdev->dev, "using MSI-X");
> > +                   udev->info.irq_flags = IRQF_NO_THREAD;
> > +                   udev->info.irq = pci_irq_vector(udev->pdev, 0);
> > +                   udev->mode = RTE_INTR_MODE_MSIX;
> > +                   break;
> > +           }
> > +#endif
> > +
> > +   /* fall back to MSI */
> > +   case RTE_INTR_MODE_MSI:
> > +#ifndef HAVE_ALLOC_IRQ_VECTORS
> > +           if (pci_enable_msi(udev->pdev) == 0) {
> > +                   dev_dbg(&udev->pdev->dev, "using MSI");
> > +                   udev->info.irq_flags = IRQF_NO_THREAD;
> > +                   udev->info.irq = udev->pdev->irq;
> > +                   udev->mode = RTE_INTR_MODE_MSI;
> > +                   break;
> > +           }
> > +#else
> > +           if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1)
> > {
> > +                   dev_dbg(&udev->pdev->dev, "using MSI");
> > +                   udev->info.irq_flags = IRQF_NO_THREAD;
> > +                   udev->info.irq = pci_irq_vector(udev->pdev, 0);
> > +                   udev->mode = RTE_INTR_MODE_MSI;
> > +                   break;
> > +           }
> > +#endif
> > +   /* fall back to INTX */
> > +   case RTE_INTR_MODE_LEGACY:
> > +           if (pci_intx_mask_supported(udev->pdev)) {
> > +                   dev_dbg(&udev->pdev->dev, "using INTX");
> > +                   udev->info.irq_flags = IRQF_SHARED |
> > IRQF_NO_THREAD;
> > +                   udev->info.irq = udev->pdev->irq;
> > +                   udev->mode = RTE_INTR_MODE_LEGACY;
> > +                   break;
> > +           }
> > +           dev_notice(&udev->pdev->dev, "PCI INTX mask not
> > supported\n");
> > +   /* fall back to no IRQ */
> > +   case RTE_INTR_MODE_NONE:
> > +           udev->mode = RTE_INTR_MODE_NONE;
> > +           udev->info.irq = UIO_IRQ_NONE;
> > +           break;
> > +
> > +   default:
> > +           dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
> > +                   igbuio_intr_mode_preferred);
> > +           udev->info.irq = UIO_IRQ_NONE;
> > +           err = -EINVAL;
> > +   }
> > +
> > +   if (udev->info.irq != UIO_IRQ_NONE)
> > +           err = request_irq(udev->info.irq, igbuio_pci_irqhandler,
> > +                             udev->info.irq_flags, udev->info.name,
> > +                             udev->info.uio_dev);
> > +   dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n",
> > +            udev->info.irq);
> > +
> > +   return err;
> > +}
> > +
> > +static void
> > +igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) {
> > +   if (udev->info.irq) {
> > +           free_irq(udev->info.irq, udev->info.uio_dev);
> > +           udev->info.irq = 0;
> > +   }
> > +
> > +#ifndef HAVE_ALLOC_IRQ_VECTORS
> > +   if (udev->mode == RTE_INTR_MODE_MSIX)
> > +           pci_disable_msix(udev->pdev);
> > +   if (udev->mode == RTE_INTR_MODE_MSI)
> > +           pci_disable_msi(udev->pdev);
> > +#else
> > +   if (udev->mode == RTE_INTR_MODE_MSIX ||
> > +       udev->mode == RTE_INTR_MODE_MSI)
> > +           pci_free_irq_vectors(udev->pdev);
> > +#endif
> > +}
> > +
> > +
> >  /**
> >   * This gets called while opening uio device file.
> >   */
> > @@ -165,12 +271,19 @@ igbuio_pci_open(struct uio_info *info, struct inode
> > *inode)  {
> >     struct rte_uio_pci_dev *udev = info->priv;
> >     struct pci_dev *dev = udev->pdev;
> > +   int err;
> > 
> >     pci_reset_function(dev);
> > 
> >     /* set bus master, which was cleared by the reset function */
> >     pci_set_master(dev);
> > 
> > +   /* enable interrupts */
> > +   err = igbuio_pci_enable_interrupts(udev);
> > +   if (err) {
> > +           dev_err(&dev->dev, "Enable interrupt fails\n");
> > +           return err;
> > +   }
> >     return 0;
> >  }
> > 
> > @@ -180,6 +293,9 @@ igbuio_pci_release(struct uio_info *info, struct inode
> > *inode)
> >     struct rte_uio_pci_dev *udev = info->priv;
> >     struct pci_dev *dev = udev->pdev;
> > 
> > +   /* disable interrupts */
> > +   igbuio_pci_disable_interrupts(udev);
> > +
> >     /* stop the device from further DMA */
> >     pci_clear_master(dev);
> > 
> > @@ -250,94 +366,6 @@ igbuio_pci_release_iomem(struct uio_info *info)  }
> > 
> >  static int
> > -igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) -{
> > -   int err = 0;
> > -#ifndef HAVE_ALLOC_IRQ_VECTORS
> > -   struct msix_entry msix_entry;
> > -#endif
> > -
> > -   switch (igbuio_intr_mode_preferred) {
> > -   case RTE_INTR_MODE_MSIX:
> > -           /* Only 1 msi-x vector needed */
> > -#ifndef HAVE_ALLOC_IRQ_VECTORS
> > -           msix_entry.entry = 0;
> > -           if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) {
> > -                   dev_dbg(&udev->pdev->dev, "using MSI-X");
> > -                   udev->info.irq_flags = IRQF_NO_THREAD;
> > -                   udev->info.irq = msix_entry.vector;
> > -                   udev->mode = RTE_INTR_MODE_MSIX;
> > -                   break;
> > -           }
> > -#else
> > -           if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1)
> > {
> > -                   dev_dbg(&udev->pdev->dev, "using MSI-X");
> > -                   udev->info.irq_flags = IRQF_NO_THREAD;
> > -                   udev->info.irq = pci_irq_vector(udev->pdev, 0);
> > -                   udev->mode = RTE_INTR_MODE_MSIX;
> > -                   break;
> > -           }
> > -#endif
> > -   /* fall back to MSI */
> > -   case RTE_INTR_MODE_MSI:
> > -#ifndef HAVE_ALLOC_IRQ_VECTORS
> > -           if (pci_enable_msi(udev->pdev) == 0) {
> > -                   dev_dbg(&udev->pdev->dev, "using MSI");
> > -                   udev->info.irq_flags = IRQF_NO_THREAD;
> > -                   udev->info.irq = udev->pdev->irq;
> > -                   udev->mode = RTE_INTR_MODE_MSI;
> > -                   break;
> > -           }
> > -#else
> > -           if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1)
> > {
> > -                   dev_dbg(&udev->pdev->dev, "using MSI");
> > -                   udev->info.irq_flags = IRQF_NO_THREAD;
> > -                   udev->info.irq = pci_irq_vector(udev->pdev, 0);
> > -                   udev->mode = RTE_INTR_MODE_MSI;
> > -                   break;
> > -           }
> > -#endif
> > -   /* fall back to INTX */
> > -   case RTE_INTR_MODE_LEGACY:
> > -           if (pci_intx_mask_supported(udev->pdev)) {
> > -                   dev_dbg(&udev->pdev->dev, "using INTX");
> > -                   udev->info.irq_flags = IRQF_SHARED |
> > IRQF_NO_THREAD;
> > -                   udev->info.irq = udev->pdev->irq;
> > -                   udev->mode = RTE_INTR_MODE_LEGACY;
> > -                   break;
> > -           }
> > -           dev_notice(&udev->pdev->dev, "PCI INTX mask not
> > supported\n");
> > -   /* fall back to no IRQ */
> > -   case RTE_INTR_MODE_NONE:
> > -           udev->mode = RTE_INTR_MODE_NONE;
> > -           udev->info.irq = UIO_IRQ_NONE;
> > -           break;
> > -
> > -   default:
> > -           dev_err(&udev->pdev->dev, "invalid IRQ mode %u",
> > -                   igbuio_intr_mode_preferred);
> > -           err = -EINVAL;
> > -   }
> > -
> > -   return err;
> > -}
> > -
> > -static void
> > -igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) -{ -#ifndef
> > HAVE_ALLOC_IRQ_VECTORS
> > -   if (udev->mode == RTE_INTR_MODE_MSIX)
> > -           pci_disable_msix(udev->pdev);
> > -   if (udev->mode == RTE_INTR_MODE_MSI)
> > -           pci_disable_msi(udev->pdev);
> > -#else
> > -   if (udev->mode == RTE_INTR_MODE_MSIX ||
> > -       udev->mode == RTE_INTR_MODE_MSI)
> > -           pci_free_irq_vectors(udev->pdev);
> > -#endif
> > -}
> > -
> > -static int
> >  igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)  {
> >     int i, iom, iop, ret;
> > @@ -427,20 +455,15 @@ igbuio_pci_probe(struct pci_dev *dev, const struct
> > pci_device_id *id)
> >     /* fill uio infos */
> >     udev->info.name = "igb_uio";
> >     udev->info.version = "0.1";
> > -   udev->info.handler = igbuio_pci_irqhandler;
> >     udev->info.irqcontrol = igbuio_pci_irqcontrol;
> >     udev->info.open = igbuio_pci_open;
> >     udev->info.release = igbuio_pci_release;
> >     udev->info.priv = udev;
> >     udev->pdev = dev;
> > 
> > -   err = igbuio_pci_enable_interrupts(udev);
> > -   if (err != 0)
> > -           goto fail_release_iomem;
> > -
> >     err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
> >     if (err != 0)
> > -           goto fail_disable_interrupts;
> > +           goto fail_release_iomem;
> > 
> >     /* register uio driver */
> >     err = uio_register_device(&dev->dev, &udev->info); @@ -449,9 +472,6
> > @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
> > 
> >     pci_set_drvdata(dev, udev);
> > 
> > -   dev_info(&dev->dev, "uio device registered with irq %lx\n",
> > -            udev->info.irq);
> > -
> >     /*
> >      * Doing a harmless dma mapping for attaching the device to
> >      * the iommu identity mapping if kernel boots with iommu=pt.
> > @@ -477,8 +497,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct
> > pci_device_id *id)
> > 
> >  fail_remove_group:
> >     sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
> > -fail_disable_interrupts:
> > -   igbuio_pci_disable_interrupts(udev);
> >  fail_release_iomem:
> >     igbuio_pci_release_iomem(&udev->info);
> >     pci_disable_device(dev);
> > @@ -495,7 +513,6 @@ igbuio_pci_remove(struct pci_dev *dev)
> > 
> >     sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
> >     uio_unregister_device(&udev->info);
> > -   igbuio_pci_disable_interrupts(udev);
> >     igbuio_pci_release_iomem(&udev->info);
> >     pci_disable_device(dev);
> >     pci_set_drvdata(dev, NULL);
> > --
> > 2.7.4

Hi Jingjing,

This patch perfectly meets requirements as both resets are retained
(open and release). Tested it with LiquidIO NIC and it works fine.
I can see MSI-X re-enabled on each run with new patch.

Gregory, Harish,
Please verify the patch on your setup if possible.

Thanks,
Shijith

Reply via email to