On Tue, Jun 14, 2016 at 09:59:00PM +0200, Christoph Hellwig wrote:
> Add a helper to allocate a range of interrupt vectors, which will
> transparently use MSI-X and MSI if available or fallback to legacy
> vectors.  The interrupts are available in a core managed array
> in the pci_dev structure, and can also be released using a similar
> helper.
> 
> The next patch will also add automatic spreading of MSI / MSI-X
> vectors to this function.
> 
> Signed-off-by: Christoph Hellwig <h...@lst.de>
> ---
>  drivers/pci/msi.c   | 110 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pci.h |  18 +++++++++

New APIs should be documented in Documentation/PCI/MSI-HOWTO.txt, I guess.

>  2 files changed, 128 insertions(+)
> 
> diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
> index a080f44..a33adec 100644
> --- a/drivers/pci/msi.c
> +++ b/drivers/pci/msi.c
> @@ -4,6 +4,7 @@
>   *
>   * Copyright (C) 2003-2004 Intel
>   * Copyright (C) Tom Long Nguyen (tom.l.ngu...@intel.com)
> + * Copyright (c) 2016 Christoph Hellwig.
>   */
>  
>  #include <linux/err.h>
> @@ -1120,6 +1121,115 @@ int pci_enable_msix_range(struct pci_dev *dev, struct 
> msix_entry *entries,
>  }
>  EXPORT_SYMBOL(pci_enable_msix_range);
>  
> +static unsigned int pci_nr_irq_vectors(struct pci_dev *pdev)
> +{
> +     int nr_entries;
> +
> +     nr_entries = pci_msix_vec_count(pdev);
> +     if (nr_entries <= 0 && pci_msi_supported(pdev, 1))
> +             nr_entries = pci_msi_vec_count(pdev);
> +     if (nr_entries <= 0)
> +             nr_entries = 1;
> +     return nr_entries;
> +}

This function is strange, because it:
  (a) does not consider PCI_IRQ_NOMSIX flag;
  (b) only calls pci_msi_supported() for MSI case;
  (c) calls pci_msi_supported() with just one vector;
  (d) might return suboptimal number of vectors (number of MSI-X used 
      later for MSI or vice versa)

Overall, I would suggest simply return maximum between MSI-X and MSI
numbers and let the rest of the code (i.e the two range functions)
handle a-d.

> +static int pci_enable_msix_range_wrapper(struct pci_dev *pdev, u32 *irqs,
> +             unsigned int min_vecs, unsigned int max_vecs)
> +{
> +     struct msix_entry *msix_entries;
> +     int vecs, i;
> +
> +     msix_entries = kcalloc(max_vecs, sizeof(struct msix_entry), GFP_KERNEL);
> +     if (!msix_entries)
> +             return -ENOMEM;
> +
> +     for (i = 0; i < max_vecs; i++)
> +             msix_entries[i].entry = i;
> +
> +     vecs = pci_enable_msix_range(pdev, msix_entries, min_vecs, max_vecs);
> +     if (vecs > 0) {

This condition check is unneeded.

> +             for (i = 0; i < vecs; i++)
> +                     irqs[i] = msix_entries[i].vector;
> +     }
> +
> +     kfree(msix_entries);
> +     return vecs;
> +}
> +
> +/**
> + * pci_alloc_irq_vectors - allocate multiple IRQs for a device
> + * @dev:             PCI device to operate on
> + * @min_vecs:                minimum number of vectors required (must be >= 
> 1)
> + * @max_vecs:                maximum (desired) number of vectors
> + * @flags:           flags or quirks for the allocation
> + *
> + * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI
> + * vectors if available, and fall back to a single legacy vector
> + * if neither is available.  Return the number of vectors allocated,
> + * (which might be smaller than @max_vecs) if successful, or a negative
> + * error code on error.  The Linux irq numbers for the allocated
> + * vectors are stored in pdev->irqs.  If less than @min_vecs interrupt
> + * vectors are available for @dev the function will fail with -ENOSPC.
> + */
> +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
> +             unsigned int max_vecs, unsigned int flags)
> +{
> +     unsigned int vecs, i;
> +     u32 *irqs;
> +
> +     max_vecs = min(max_vecs, pci_nr_irq_vectors(dev));

Optionally, you could move this assignment to  pci_nr_irq_vectors() and
simply let it handle number of vectors to request.

> +     irqs = kcalloc(max_vecs, sizeof(u32), GFP_KERNEL);
> +     if (!irqs)
> +             return -ENOMEM;
> +
> +     if (!(flags & PCI_IRQ_NOMSIX)) {
> +             vecs = pci_enable_msix_range_wrapper(dev, irqs, min_vecs,
> +                             max_vecs);
> +             if (vecs > 0)
> +                     goto done;
> +     }
> +
> +     vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
> +     if (vecs > 0) {
> +             for (i = 0; i < vecs; i++)
> +                     irqs[i] = dev->irq + i;
> +             goto done;
> +     }
> +
> +     if (min_vecs > 1)
> +             return -ENOSPC;

irqs is leaked if (min_vecs > 1)

You can get rid of this check at all if you reorganize your code i.e.
like this:

        ...

        vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
        if (vecs < 0)
                goto legacy;

        for (i = 0; i < vecs; i++)
                irqs[i] = dev->irq + i;

done:
        ...


legacy:
        ...

> +
> +     /* use legacy irq */
> +     kfree(irqs);
> +     dev->irqs = &dev->irq;
> +     return 1;
> +
> +done:
> +     dev->irqs = irqs;
> +     return vecs;
> +}
> +EXPORT_SYMBOL(pci_alloc_irq_vectors);
> +
> +/**
> + * pci_free_irq_vectors - free previously allocated IRQs for a device
> + * @dev:             PCI device to operate on
> + *
> + * Undoes the allocations and enabling in pci_alloc_irq_vectors().
> + */
> +void pci_free_irq_vectors(struct pci_dev *dev)
> +{
> +     if (dev->msix_enabled)
> +             pci_disable_msix(dev);
> +     else if (dev->msi_enabled)
> +             pci_disable_msi(dev);

The checks are probably redundant or incomplete. Redundant - because
pci_disable_msi()/pci_disable_msix() do it anyways:

        if (!pci_msi_enable || !dev || !dev->msi_enabled)
                return;

Incomplete - because the two other conditions are not checked.

> +     if (dev->irqs != &dev->irq)
> +             kfree(dev->irqs);

Unset dev->irqs?

BTW, since (dev->irqs == &dev->irq) effectively checks if MSI/MSI-X
was enabled this function could bail out in case they did not.

> +}
> +EXPORT_SYMBOL(pci_free_irq_vectors);
> +
> +
>  struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
>  {
>       return to_pci_dev(desc->dev);
> diff --git a/include/linux/pci.h b/include/linux/pci.h
> index b67e4df..84a20fc 100644
> --- a/include/linux/pci.h
> +++ b/include/linux/pci.h
> @@ -320,6 +320,7 @@ struct pci_dev {
>        * directly, use the values stored here. They might be different!
>        */
>       unsigned int    irq;
> +     unsigned int    *irqs;
>       struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory 
> regions + expansion ROMs */
>  
>       bool match_driver;              /* Skip attaching driver */
> @@ -1237,6 +1238,8 @@ resource_size_t pcibios_iov_resource_alignment(struct 
> pci_dev *dev, int resno);
>  int pci_set_vga_state(struct pci_dev *pdev, bool decode,
>                     unsigned int command_bits, u32 flags);
>  
> +#define PCI_IRQ_NOMSIX               (1 << 0) /* don't try to use MSI-X 
> interrupts */

BTW, why PCI_IRQ_NOMSIX only and no PCI_IRQ_NOMSI?

>  /* kmem_cache style wrapper around pci_alloc_consistent() */
>  
>  #include <linux/pci-dma.h>
> @@ -1284,6 +1287,9 @@ static inline int pci_enable_msix_exact(struct pci_dev 
> *dev,
>               return rc;
>       return 0;
>  }
> +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
> +             unsigned int max_vecs, unsigned int flags);
> +void pci_free_irq_vectors(struct pci_dev *dev);
>  #else
>  static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; }
>  static inline void pci_msi_shutdown(struct pci_dev *dev) { }
> @@ -1307,6 +1313,18 @@ static inline int pci_enable_msix_range(struct pci_dev 
> *dev,
>  static inline int pci_enable_msix_exact(struct pci_dev *dev,
>                     struct msix_entry *entries, int nvec)
>  { return -ENOSYS; }
> +static inline int pci_alloc_irq_vectors(struct pci_dev *dev,
> +             unsigned int min_vecs, unsigned int max_vecs,
> +             unsigned int flags)
> +{
> +     if (min_vecs > 1)
> +             return -ENOSPC;
> +     dev->irqs = &dev->irq;
> +     return 1;
> +}
> +static inline void pci_free_irq_vectors(struct pci_dev *dev)
> +{

Unset dev->irqs?

> +}
>  #endif
>  
>  #ifdef CONFIG_PCIEPORTBUS
> -- 
> 2.1.4
> 

Reply via email to