On Thu, Oct 12, 2017 at 05:43:29AM +0000, Wu, Jingjing wrote: > Hi, Shjith > > Could you help to review and verify if this fix can meet your requirement? > > Thanks > Jingjing > > > -----Original Message----- > > From: Wu, Jingjing > > Sent: Tuesday, October 10, 2017 6:09 AM > > To: Yigit, Ferruh <ferruh.yi...@intel.com>; Tan, Jianfeng > > <jianfeng....@intel.com>; shijith.thot...@caviumnetworks.com; > > greg...@weka.io; Xing, Beilei <beilei.x...@intel.com> > > Cc: dev@dpdk.org; Wu, Jingjing <jingjing...@intel.com>; sta...@dpdk.org > > Subject: [PATCH v2 2/2] igb_uio: fix interrupt enablement after FLR in VM > > > > If pass-through a VF by vfio-pci to a Qemu VM, after FLR in VM, the > > interrupt > > setting is not recoverd correctly to host as below: > > in VM guest: > > Capabilities: [70] MSI-X: Enable+ Count=5 Masked- in Host: > > Capabilities: [70] MSI-X: Enable+ Count=5 Masked- > > > > That was because in pci_reset_function, it first reads the PCI configure > > and set > > FLR reset, and then writes PCI configure as restoration. But not all the > > writing > > are successful to Host. > > Because vfio-pci driver doesn't allow directly write PCI MSI-X Cap. > > > > To fix this issue, we need to move the interrupt enablement from igb_uio > > probe > > to open device file. While it is also the similar as the behaviour in > > vfio_pci > > kernel module code. > > > > Fixes: b58eedfc7dd5 ("igb_uio: issue FLR during open and release of device > > file") > > > > Cc: sta...@dpdk.org > > > > Signed-off-by: Jingjing Wu <jingjing...@intel.com> > > Signed-off-by: Jianfeng Tan <jianfeng....@intel.com>
Tested-by: Shijith Thotton <shijith.thot...@caviumnetworks.com> > > --- > > v2 change: > > - fix typo > > - remove duplicated debug info > > > > lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 221 > > ++++++++++++++++-------------- > > 1 file changed, 119 insertions(+), 102 deletions(-) > > > > diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > > b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > > index c8dd5f4..d943795 100644 > > --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > > +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c > > @@ -49,7 +49,6 @@ struct rte_uio_pci_dev { > > > > static char *intr_mode; > > static enum rte_intr_mode igbuio_intr_mode_preferred = > > RTE_INTR_MODE_MSIX; > > - > > /* sriov sysfs */ > > static ssize_t > > show_max_vfs(struct device *dev, struct device_attribute *attr, @@ -144,8 > > +143,10 @@ igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) > > * If yes, disable it here and will be enable later. > > */ > > static irqreturn_t > > -igbuio_pci_irqhandler(int irq, struct uio_info *info) > > +igbuio_pci_irqhandler(int irq, void *dev_id) > > { > > + struct uio_device *idev = (struct uio_device *)dev_id; > > + struct uio_info *info = idev->info; > > struct rte_uio_pci_dev *udev = info->priv; > > > > /* Legacy mode need to mask in hardware */ @@ -153,10 +154,115 > > @@ igbuio_pci_irqhandler(int irq, struct uio_info *info) > > !pci_check_and_mask_intx(udev->pdev)) > > return IRQ_NONE; > > > > + uio_event_notify(info); > > + > > /* Message signal mode, no share IRQ and automasked */ > > return IRQ_HANDLED; > > } > > > > +static int > > +igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) { > > + int err = 0; > > +#ifndef HAVE_ALLOC_IRQ_VECTORS > > + struct msix_entry msix_entry; > > +#endif > > + > > + switch (igbuio_intr_mode_preferred) { > > + case RTE_INTR_MODE_MSIX: > > + /* Only 1 msi-x vector needed */ > > +#ifndef HAVE_ALLOC_IRQ_VECTORS > > + msix_entry.entry = 0; > > + if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { > > + dev_dbg(&udev->pdev->dev, "using MSI-X"); > > + udev->info.irq_flags = IRQF_NO_THREAD; > > + udev->info.irq = msix_entry.vector; > > + udev->mode = RTE_INTR_MODE_MSIX; > > + break; > > + } > > +#else > > + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) > > { > > + dev_dbg(&udev->pdev->dev, "using MSI-X"); > > + udev->info.irq_flags = IRQF_NO_THREAD; > > + udev->info.irq = pci_irq_vector(udev->pdev, 0); > > + udev->mode = RTE_INTR_MODE_MSIX; > > + break; > > + } > > +#endif > > + > > + /* fall back to MSI */ > > + case RTE_INTR_MODE_MSI: > > +#ifndef HAVE_ALLOC_IRQ_VECTORS > > + if (pci_enable_msi(udev->pdev) == 0) { > > + dev_dbg(&udev->pdev->dev, "using MSI"); > > + udev->info.irq_flags = IRQF_NO_THREAD; > > + udev->info.irq = udev->pdev->irq; > > + udev->mode = RTE_INTR_MODE_MSI; > > + break; > > + } > > +#else > > + if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) > > { > > + dev_dbg(&udev->pdev->dev, "using MSI"); > > + udev->info.irq_flags = IRQF_NO_THREAD; > > + udev->info.irq = pci_irq_vector(udev->pdev, 0); > > + udev->mode = RTE_INTR_MODE_MSI; > > + break; > > + } > > +#endif > > + /* fall back to INTX */ > > + case RTE_INTR_MODE_LEGACY: > > + if (pci_intx_mask_supported(udev->pdev)) { > > + dev_dbg(&udev->pdev->dev, "using INTX"); > > + udev->info.irq_flags = IRQF_SHARED | > > IRQF_NO_THREAD; > > + udev->info.irq = udev->pdev->irq; > > + udev->mode = RTE_INTR_MODE_LEGACY; > > + break; > > + } > > + dev_notice(&udev->pdev->dev, "PCI INTX mask not > > supported\n"); > > + /* fall back to no IRQ */ > > + case RTE_INTR_MODE_NONE: > > + udev->mode = RTE_INTR_MODE_NONE; > > + udev->info.irq = UIO_IRQ_NONE; > > + break; > > + > > + default: > > + dev_err(&udev->pdev->dev, "invalid IRQ mode %u", > > + igbuio_intr_mode_preferred); > > + udev->info.irq = UIO_IRQ_NONE; > > + err = -EINVAL; > > + } > > + > > + if (udev->info.irq != UIO_IRQ_NONE) > > + err = request_irq(udev->info.irq, igbuio_pci_irqhandler, > > + udev->info.irq_flags, udev->info.name, > > + udev->info.uio_dev); > > + dev_info(&udev->pdev->dev, "uio device registered with irq %lx\n", > > + udev->info.irq); > > + > > + return err; > > +} > > + > > +static void > > +igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) { > > + if (udev->info.irq) { > > + free_irq(udev->info.irq, udev->info.uio_dev); > > + udev->info.irq = 0; > > + } > > + > > +#ifndef HAVE_ALLOC_IRQ_VECTORS > > + if (udev->mode == RTE_INTR_MODE_MSIX) > > + pci_disable_msix(udev->pdev); > > + if (udev->mode == RTE_INTR_MODE_MSI) > > + pci_disable_msi(udev->pdev); > > +#else > > + if (udev->mode == RTE_INTR_MODE_MSIX || > > + udev->mode == RTE_INTR_MODE_MSI) > > + pci_free_irq_vectors(udev->pdev); > > +#endif > > +} > > + > > + > > /** > > * This gets called while opening uio device file. > > */ > > @@ -165,12 +271,19 @@ igbuio_pci_open(struct uio_info *info, struct inode > > *inode) { > > struct rte_uio_pci_dev *udev = info->priv; > > struct pci_dev *dev = udev->pdev; > > + int err; > > > > pci_reset_function(dev); > > > > /* set bus master, which was cleared by the reset function */ > > pci_set_master(dev); > > > > + /* enable interrupts */ > > + err = igbuio_pci_enable_interrupts(udev); > > + if (err) { > > + dev_err(&dev->dev, "Enable interrupt fails\n"); > > + return err; > > + } > > return 0; > > } > > > > @@ -180,6 +293,9 @@ igbuio_pci_release(struct uio_info *info, struct inode > > *inode) > > struct rte_uio_pci_dev *udev = info->priv; > > struct pci_dev *dev = udev->pdev; > > > > + /* disable interrupts */ > > + igbuio_pci_disable_interrupts(udev); > > + > > /* stop the device from further DMA */ > > pci_clear_master(dev); > > > > @@ -250,94 +366,6 @@ igbuio_pci_release_iomem(struct uio_info *info) } > > > > static int > > -igbuio_pci_enable_interrupts(struct rte_uio_pci_dev *udev) -{ > > - int err = 0; > > -#ifndef HAVE_ALLOC_IRQ_VECTORS > > - struct msix_entry msix_entry; > > -#endif > > - > > - switch (igbuio_intr_mode_preferred) { > > - case RTE_INTR_MODE_MSIX: > > - /* Only 1 msi-x vector needed */ > > -#ifndef HAVE_ALLOC_IRQ_VECTORS > > - msix_entry.entry = 0; > > - if (pci_enable_msix(udev->pdev, &msix_entry, 1) == 0) { > > - dev_dbg(&udev->pdev->dev, "using MSI-X"); > > - udev->info.irq_flags = IRQF_NO_THREAD; > > - udev->info.irq = msix_entry.vector; > > - udev->mode = RTE_INTR_MODE_MSIX; > > - break; > > - } > > -#else > > - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSIX) == 1) > > { > > - dev_dbg(&udev->pdev->dev, "using MSI-X"); > > - udev->info.irq_flags = IRQF_NO_THREAD; > > - udev->info.irq = pci_irq_vector(udev->pdev, 0); > > - udev->mode = RTE_INTR_MODE_MSIX; > > - break; > > - } > > -#endif > > - /* fall back to MSI */ > > - case RTE_INTR_MODE_MSI: > > -#ifndef HAVE_ALLOC_IRQ_VECTORS > > - if (pci_enable_msi(udev->pdev) == 0) { > > - dev_dbg(&udev->pdev->dev, "using MSI"); > > - udev->info.irq_flags = IRQF_NO_THREAD; > > - udev->info.irq = udev->pdev->irq; > > - udev->mode = RTE_INTR_MODE_MSI; > > - break; > > - } > > -#else > > - if (pci_alloc_irq_vectors(udev->pdev, 1, 1, PCI_IRQ_MSI) == 1) > > { > > - dev_dbg(&udev->pdev->dev, "using MSI"); > > - udev->info.irq_flags = IRQF_NO_THREAD; > > - udev->info.irq = pci_irq_vector(udev->pdev, 0); > > - udev->mode = RTE_INTR_MODE_MSI; > > - break; > > - } > > -#endif > > - /* fall back to INTX */ > > - case RTE_INTR_MODE_LEGACY: > > - if (pci_intx_mask_supported(udev->pdev)) { > > - dev_dbg(&udev->pdev->dev, "using INTX"); > > - udev->info.irq_flags = IRQF_SHARED | > > IRQF_NO_THREAD; > > - udev->info.irq = udev->pdev->irq; > > - udev->mode = RTE_INTR_MODE_LEGACY; > > - break; > > - } > > - dev_notice(&udev->pdev->dev, "PCI INTX mask not > > supported\n"); > > - /* fall back to no IRQ */ > > - case RTE_INTR_MODE_NONE: > > - udev->mode = RTE_INTR_MODE_NONE; > > - udev->info.irq = UIO_IRQ_NONE; > > - break; > > - > > - default: > > - dev_err(&udev->pdev->dev, "invalid IRQ mode %u", > > - igbuio_intr_mode_preferred); > > - err = -EINVAL; > > - } > > - > > - return err; > > -} > > - > > -static void > > -igbuio_pci_disable_interrupts(struct rte_uio_pci_dev *udev) -{ -#ifndef > > HAVE_ALLOC_IRQ_VECTORS > > - if (udev->mode == RTE_INTR_MODE_MSIX) > > - pci_disable_msix(udev->pdev); > > - if (udev->mode == RTE_INTR_MODE_MSI) > > - pci_disable_msi(udev->pdev); > > -#else > > - if (udev->mode == RTE_INTR_MODE_MSIX || > > - udev->mode == RTE_INTR_MODE_MSI) > > - pci_free_irq_vectors(udev->pdev); > > -#endif > > -} > > - > > -static int > > igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) { > > int i, iom, iop, ret; > > @@ -427,20 +455,15 @@ igbuio_pci_probe(struct pci_dev *dev, const struct > > pci_device_id *id) > > /* fill uio infos */ > > udev->info.name = "igb_uio"; > > udev->info.version = "0.1"; > > - udev->info.handler = igbuio_pci_irqhandler; > > udev->info.irqcontrol = igbuio_pci_irqcontrol; > > udev->info.open = igbuio_pci_open; > > udev->info.release = igbuio_pci_release; > > udev->info.priv = udev; > > udev->pdev = dev; > > > > - err = igbuio_pci_enable_interrupts(udev); > > - if (err != 0) > > - goto fail_release_iomem; > > - > > err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); > > if (err != 0) > > - goto fail_disable_interrupts; > > + goto fail_release_iomem; > > > > /* register uio driver */ > > err = uio_register_device(&dev->dev, &udev->info); @@ -449,9 +472,6 > > @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) > > > > pci_set_drvdata(dev, udev); > > > > - dev_info(&dev->dev, "uio device registered with irq %lx\n", > > - udev->info.irq); > > - > > /* > > * Doing a harmless dma mapping for attaching the device to > > * the iommu identity mapping if kernel boots with iommu=pt. > > @@ -477,8 +497,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct > > pci_device_id *id) > > > > fail_remove_group: > > sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); > > -fail_disable_interrupts: > > - igbuio_pci_disable_interrupts(udev); > > fail_release_iomem: > > igbuio_pci_release_iomem(&udev->info); > > pci_disable_device(dev); > > @@ -495,7 +513,6 @@ igbuio_pci_remove(struct pci_dev *dev) > > > > sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); > > uio_unregister_device(&udev->info); > > - igbuio_pci_disable_interrupts(udev); > > igbuio_pci_release_iomem(&udev->info); > > pci_disable_device(dev); > > pci_set_drvdata(dev, NULL); > > -- > > 2.7.4 Hi Jingjing, This patch perfectly meets requirements as both resets are retained (open and release). Tested it with LiquidIO NIC and it works fine. I can see MSI-X re-enabled on each run with new patch. Gregory, Harish, Please verify the patch on your setup if possible. Thanks, Shijith