On Thu, 2016-09-22 at 10:07 -0600, Alex Williamson wrote:
> We use a BAR restore trick to try to detect when a user has performed
> a device reset, possibly through FLR or other backdoors, to put things
> back into a working state.  This is important for backdoor resets, but
> we can actually just virtualize the "front door" resets provided via
> PCIe and AF FLR.  Set these bits as virtualized + writable, allowing
> the default write to set them in vconfig, then we can simply check the
> bit, perform an FLR of our own, and clear the bit.  We don't actually
> have the granularity in PCI to specify the type of reset we want to
> do, but generally devices don't implement both PCIe and AF FLR and
> we'll favor these over other types of reset, so we should generally
> lineup.  We do test whether the device provides the requested FLR type
> to stay consistent with hardware capabilities though.
> 
> This seems to fix several instance of devices getting into bad states
> with userspace drivers, like dpdk, running inside a VM.
> 
> Signed-off-by: Alex Williamson <alex.william...@redhat.com>
> ---
>  drivers/vfio/pci/vfio_pci_config.c |   82 
> ++++++++++++++++++++++++++++++++++--
>  1 file changed, 77 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vfio/pci/vfio_pci_config.c 
> b/drivers/vfio/pci/vfio_pci_config.c
> index 688691d..3884888 100644
> --- a/drivers/vfio/pci/vfio_pci_config.c
> +++ b/drivers/vfio/pci/vfio_pci_config.c
> @@ -804,6 +804,40 @@ static int __init init_pci_cap_pcix_perm(struct 
> perm_bits *perm)
>       return 0;
>  }
>  
> +static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos,
> +                              int count, struct perm_bits *perm,
> +                              int offset, __le32 val)
> +{
> +     __le16 *ctrl = (__le16 *)(vdev->vconfig + pos -
> +                               offset + PCI_EXP_DEVCTL);
> +
> +     count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
> +     if (count < 0)
> +             return count;
> +
> +     /*
> +      * The FLR bit is virtualized, if set and the device supports PCIe
> +      * FLR, issue a reset_function.  Regardless, clear the bit, the spec
> +      * requires it to be always read as zero.  NB, reset_function might
> +      * not use a PCIe FLR, we don't have that level of granularity.
> +      */
> +     if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) {
> +             u32 cap;
> +             int ret;
> +
> +             *ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR);
> +
> +             ret = pci_user_read_config_dword(vdev->pdev,
> +                                              pos - offset + PCI_EXP_DEVCAP,
> +                                              &cap);
> +
> +             if (!ret && (cap & PCI_EXP_DEVCAP_FLR))
> +                     pci_try_reset_function(vdev->pdev);
> +     }
> +
> +     return count;
> +}
> +
>  /* Permissions for PCI Express capability */
>  static int __init init_pci_cap_exp_perm(struct perm_bits *perm)
>  {
> @@ -811,26 +845,64 @@ static int __init init_pci_cap_exp_perm(struct 
> perm_bits *perm)
>       if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2))
>               return -ENOMEM;
>  
> +     perm->writefn = vfio_exp_config_write;
> +
>       p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
>  
>       /*
> -      * Allow writes to device control fields (includes FLR!)
> -      * but not to devctl_phantom which could confuse IOMMU
> -      * or to the ARI bit in devctl2 which is set at probe time
> +      * Allow writes to device control fields, except devctl_phantom,
> +      * which could confuse IOMMU, and the ARI bit in devctl2, which
> +      * is set at probe time.  FLR gets virtualized via our writefn.
>        */
> -     p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM);
> +     p_setw(perm, PCI_EXP_DEVCTL,
> +            PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM);
>       p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI);
>       return 0;
>  }
>  
> +static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos,
> +                             int count, struct perm_bits *perm,
> +                             int offset, __le32 val)
> +{
> +     u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL;
> +
> +     count = vfio_default_config_write(vdev, pos, count, perm, offset, val);
> +     if (count < 0)
> +             return count;
> +
> +     /*
> +      * The FLR bit is virtualized, if set and the device supports AF
> +      * FLR, issue a reset_function.  Regardless, clear the bit, the spec
> +      * requires it to be always read as zero.  NB, reset_function might
> +      * not use an AF FLR, we don't have that level of granularity.
> +      */
> +     if (*ctrl & PCI_AF_CTRL_FLR) {
> +             u8 cap;
> +             int ret;
> +
> +             *ctrl &= ~PCI_AF_CTRL_FLR;
> +
> +             ret = pci_user_read_config_byte(vdev->pdev,
> +                                             pos - offset + PCI_AF_CAP,
> +                                             &cap);
> +
> +             if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP))
> +                     pci_try_reset_function(vdev->pdev);
> +     }
> +
> +     return count;
> +}
> +
>  /* Permissions for Advanced Function capability */
>  static int __init init_pci_cap_af_perm(struct perm_bits *perm)
>  {
>       if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF]))
>               return -ENOMEM;
>  
> +     perm->writefn = vfio_af_config_write;
> +
>       p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE);
> -     p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR);
> +     p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR);
>       return 0;
>  }
>  
> 

Looks good.

Reviewed-by: Greg Rose <gr...@lightfleet.com>


Reply via email to