Add a flexible mechanism to specify virtio configuration layout, using
pci vendor-specific capability.  A separate capability is used for each
of common, device specific and data-path accesses.

Warning: compiled only.
This patch also needs to be split up, pci_iomap changes
also need arch updates for non-x86.
There might also be more spec changes.

Posting here for early feedback, and to allow Sasha to
proceed with his "kvm tool" work.

Changes from v1:
Updated to match v3 of the spec, see:
        Subject: [PATCHv3 RFC] virtio-spec: flexible configuration layout
        Message-ID: <20111110122436.ga13...@redhat.com>
        In-Reply-To: <20111109195901.ga28...@redhat.com>

In particular:
- split ISR out
- reorg capability
- add offset alignment

Signed-off-by: Michael S. Tsirkin <m...@redhat.com>
---
 drivers/virtio/virtio_pci.c |  184 +++++++++++++++++++++++++++++++++++++++---
 include/asm-generic/io.h    |    4 +
 include/asm-generic/iomap.h |   11 +++
 include/linux/pci_regs.h    |    4 +
 include/linux/virtio_pci.h  |   41 ++++++++++
 lib/iomap.c                 |   40 ++++++++-
 6 files changed, 265 insertions(+), 19 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index ecb9254..bdd3876 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -37,8 +37,12 @@ struct virtio_pci_device
        struct virtio_device vdev;
        struct pci_dev *pci_dev;
 
-       /* the IO mapping for the PCI config space */
+       /* the IO address for the common PCI config space */
        void __iomem *ioaddr;
+       /* the IO address for device specific config */
+       void __iomem *ioaddr_device;
+       /* the IO address to use for notifications operations */
+       void __iomem *ioaddr_notify;
 
        /* a list of queues so we can dispatch IRQs */
        spinlock_t lock;
@@ -57,8 +61,158 @@ struct virtio_pci_device
        unsigned msix_used_vectors;
        /* Whether we have vector per vq */
        bool per_vq_vectors;
+
+       /* Various IO mappings: used for resource tracking only. */
+
+       /* Legacy BAR0: typically PIO. */
+       void __iomem *legacy_map;
+
+       /* Mappings specified by device capabilities: typically in MMIO */
+       void __iomem *isr_map;
+       void __iomem *notify_map;
+       void __iomem *common_map;
+       void __iomem *device_map;
 };
 
+/*
+ * With PIO, device-specific config moves as MSI-X is enabled/disabled.
+ * Use this accessor to keep pointer to that config in sync.
+ */
+static void virtio_pci_set_msix_enabled(struct virtio_pci_device *vp_dev, int 
enabled)
+{
+       vp_dev->msix_enabled = enabled;
+       if (vp_dev->device_map)
+               vp_dev->ioaddr_device = vp_dev->device_map;
+       else
+               vp_dev->ioaddr_device = vp_dev->legacy_map +
+                       VIRTIO_PCI_CONFIG(vp_dev);
+}
+
+static void __iomem *virtio_pci_map_cfg(struct virtio_pci_device *vp_dev, u8 
cap_id,
+                                       u32 align)
+{
+        u32 size;
+        u32 offset;
+        u8 bir;
+        u8 cap_len;
+       int pos;
+       struct pci_dev *dev = vp_dev->pci_dev;
+       void __iomem *p;
+
+       for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+            pos > 0;
+            pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+               u8 id;
+               pci_read_config_byte(dev, pos + PCI_VNDR_CAP_LEN, &cap_len);
+               if (cap_len < VIRTIO_PCI_CAP_ID + 1)
+                       continue;
+               pci_read_config_byte(dev, pos + VIRTIO_PCI_CAP_ID, &id);
+               id >>= VIRTIO_PCI_CAP_ID_SHIFT;
+               id &= VIRTIO_PCI_CAP_ID_MASK;
+               if (id == cap_id)
+                       break;
+       }
+
+       if (pos <= 0)
+               return NULL;
+
+       if (cap_len < VIRTIO_PCI_CAP_CFG_BIR + 1)
+               goto err;
+        pci_read_config_byte(dev, pos + VIRTIO_PCI_CAP_CFG_BIR, &bir);
+       if (cap_len < VIRTIO_PCI_CAP_CFG_OFF + 4)
+               goto err;
+        pci_read_config_dword(dev, pos + VIRTIO_PCI_CAP_CFG_OFF, &offset);
+       if (cap_len < VIRTIO_PCI_CAP_CFG_SIZE + 4)
+               goto err;
+        pci_read_config_dword(dev, pos + VIRTIO_PCI_CAP_CFG_SIZE, &size);
+        bir >>= VIRTIO_PCI_CAP_CFG_BIR_SHIFT;
+        bir &= VIRTIO_PCI_CAP_CFG_BIR_MASK;
+        size >>= VIRTIO_PCI_CAP_CFG_SIZE_SHIFT;
+        size &= VIRTIO_PCI_CAP_CFG_SIZE_MASK;
+        off >>= VIRTIO_PCI_CAP_CFG_OFF_SHIFT;
+        off &= VIRTIO_PCI_CAP_CFG_OFF_MASK;
+       /* Align offset appropriately */
+       off &= ~(align - 1);
+
+       /* It's possible for a device to supply a huge config space,
+        * but we'll never need to map more than a page ATM. */
+       p = pci_iomap_range(dev, bir, offset, size, PAGE_SIZE);
+       if (!p)
+               dev_err(&vp_dev->vdev.dev, "Unable to map virtio pci memory");
+       return p;
+err:
+       dev_err(&vp_dev->vdev.dev, "Unable to parse virtio pci capability");
+       return NULL;
+}
+
+static void virtio_pci_iounmap(struct virtio_pci_device *vp_dev)
+{
+       if (vp_dev->legacy_map)
+               pci_iounmap(vp_dev->pci_dev, vp_dev->legacy_map);
+       if (vp_dev->isr_map)
+               pci_iounmap(vp_dev->pci_dev, vp_dev->isr_map);
+       if (vp_dev->notify_map)
+               pci_iounmap(vp_dev->pci_dev, vp_dev->notify_map);
+       if (vp_dev->common_map)
+               pci_iounmap(vp_dev->pci_dev, vp_dev->common_map);
+       if (vp_dev->device_map)
+               pci_iounmap(vp_dev->pci_dev, vp_dev->device_map);
+}
+
+static int virtio_pci_iomap(struct virtio_pci_device *vp_dev)
+{
+       vp_dev->isr_map = virtio_pci_map_cfg(vp_dev,
+                                            VIRTIO_PCI_CAP_ISR_CFG,
+                                            sizeof(u8));
+       vp_dev->notify_map = virtio_pci_map_cfg(vp_dev,
+                                               VIRTIO_PCI_CAP_NOTIFY_CFG,
+                                               sizeof(u16));
+       vp_dev->common_map = virtio_pci_map_cfg(vp_dev,
+                                               VIRTIO_PCI_CAP_COMMON_CFG,
+                                               sizeof(u32));
+       vp_dev->device_map = virtio_pci_map_cfg(vp_dev,
+                                               VIRTIO_PCI_CAP_DEVICE_CFG,
+                                               sizeof(u8));
+
+       if (!vp_dev->notify_map || !vp_dev->common_map ||
+           !vp_dev->device_map) {
+               /*
+                * If not all capabilities present, map legacy PIO.
+                * Legacy access is at BAR 0. We never need to map
+                * more than 256 bytes there, since legacy config space
+                * used PIO which has this size limit.
+                * */
+               vp_dev->legacy_map = pci_iomap(vp_dev->pci_dev, 0, 256);
+               if (!vp_dev->legacy_map) {
+                       dev_err(&vp_dev->vdev.dev, "Unable to map legacy PIO");
+                       goto err;
+               }
+       }
+
+       /* Prefer MMIO if available. If not, fallback to legacy PIO. */
+       if (vp_dev->common_map)
+               vp_dev->ioaddr = vp_dev->common_map;
+       else
+               vp_dev->ioaddr = vp_dev->legacy_map;
+
+       if (vp_dev->device_map)
+               vp_dev->ioaddr_device = vp_dev->device_map;
+       else
+               vp_dev->ioaddr_device = vp_dev->legacy_map +
+                       VIRTIO_PCI_CONFIG(vp_dev);
+
+       if (vp_dev->notify_map)
+               vp_dev->ioaddr_notify = vp_dev->notify_map;
+       else
+               vp_dev->ioaddr_notify = vp_dev->legacy_map +
+                       VIRTIO_PCI_QUEUE_NOTIFY;
+
+       return 0;
+err:
+       virtio_pci_iounmap(vp_dev);
+       return -EINVAL;
+}
+
 /* Constants for MSI-X */
 /* Use first vector for configuration changes, second and the rest for
  * virtqueues Thus, we need at least 2 vectors for MSI. */
@@ -130,8 +284,7 @@ static void vp_get(struct virtio_device *vdev, unsigned 
offset,
                   void *buf, unsigned len)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       void __iomem *ioaddr = vp_dev->ioaddr +
-                               VIRTIO_PCI_CONFIG(vp_dev) + offset;
+       void __iomem *ioaddr = vp_dev->ioaddr_device + offset;
        u8 *ptr = buf;
        int i;
 
@@ -145,8 +298,7 @@ static void vp_set(struct virtio_device *vdev, unsigned 
offset,
                   const void *buf, unsigned len)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       void __iomem *ioaddr = vp_dev->ioaddr +
-                               VIRTIO_PCI_CONFIG(vp_dev) + offset;
+       void __iomem *ioaddr = vp_dev->ioaddr_device + offset;
        const u8 *ptr = buf;
        int i;
 
@@ -184,7 +336,7 @@ static void vp_notify(struct virtqueue *vq)
 
        /* we write the queue's selector into the notification register to
         * signal the other end */
-       iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+       iowrite16(info->queue_index, vp_dev->ioaddr_notify);
 }
 
 /* Handle a configuration change: Tell driver if it wants to know. */
@@ -231,7 +383,8 @@ static irqreturn_t vp_interrupt(int irq, void *opaque)
 
        /* reading the ISR has the effect of also clearing it so it's very
         * important to save off the value. */
-       isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+       isr = ioread8(vp_dev->ioaddr_notify +
+                     VIRTIO_PCI_ISR - VIRTIO_PCI_QUEUE_NOTIFY);
 
        /* It's definitely not us if the ISR was not high */
        if (!isr)
@@ -265,7 +418,7 @@ static void vp_free_vectors(struct virtio_device *vdev)
                ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 
                pci_disable_msix(vp_dev->pci_dev);
-               vp_dev->msix_enabled = 0;
+                virtio_pci_set_msix_enabled(vp_dev, 0);
                vp_dev->msix_vectors = 0;
        }
 
@@ -303,7 +456,7 @@ static int vp_request_msix_vectors(struct virtio_device 
*vdev, int nvectors,
        if (err)
                goto error;
        vp_dev->msix_vectors = nvectors;
-       vp_dev->msix_enabled = 1;
+        virtio_pci_set_msix_enabled(vp_dev, 1);
 
        /* Set the vector used for configuration */
        v = vp_dev->msix_used_vectors;
@@ -447,7 +600,10 @@ static void vp_del_vq(struct virtqueue *vq)
                iowrite16(VIRTIO_MSI_NO_VECTOR,
                          vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
                /* Flush the write out to device */
-               ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
+               ioread8(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
+               /* And clear ISR: TODO: really needed? */
+               ioread8(vp_dev->ioaddr_notify +
+                     VIRTIO_PCI_ISR - VIRTIO_PCI_QUEUE_NOTIFY);
        }
 
        vring_del_virtqueue(vq);
@@ -638,8 +794,8 @@ static int __devinit virtio_pci_probe(struct pci_dev 
*pci_dev,
        if (err)
                goto out_enable_device;
 
-       vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
-       if (vp_dev->ioaddr == NULL)
+       err = virtio_pci_iomap(vp_dev);
+       if (err)
                goto out_req_regions;
 
        pci_set_drvdata(pci_dev, vp_dev);
@@ -661,7 +817,7 @@ static int __devinit virtio_pci_probe(struct pci_dev 
*pci_dev,
 
 out_set_drvdata:
        pci_set_drvdata(pci_dev, NULL);
-       pci_iounmap(pci_dev, vp_dev->ioaddr);
+       virtio_pci_iounmap(vp_dev);
 out_req_regions:
        pci_release_regions(pci_dev);
 out_enable_device:
@@ -679,7 +835,7 @@ static void __devexit virtio_pci_remove(struct pci_dev 
*pci_dev)
 
        vp_del_vqs(&vp_dev->vdev);
        pci_set_drvdata(pci_dev, NULL);
-       pci_iounmap(pci_dev, vp_dev->ioaddr);
+       virtio_pci_iounmap(vp_dev);
        pci_release_regions(pci_dev);
        pci_disable_device(pci_dev);
        kfree(vp_dev);
diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index 9120887..3cf1787 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -286,6 +286,10 @@ static inline void writesb(const void __iomem *addr, const 
void *buf, int len)
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
 extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long 
max);
+extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+                                     unsigned offset,
+                                     unsigned long minlen,
+                                     unsigned long maxlen);
 static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
 {
 }
diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h
index 98dcd76..6f192d4 100644
--- a/include/asm-generic/iomap.h
+++ b/include/asm-generic/iomap.h
@@ -70,8 +70,19 @@ extern void ioport_unmap(void __iomem *);
 /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
 struct pci_dev;
 extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long 
max);
+extern void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+                                     unsigned offset,
+                                     unsigned long minlen,
+                                     unsigned long maxlen);
 extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
 #else
+static inline void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+                                            unsigned offset,
+                                            unsigned long minlen,
+                                            unsigned long maxlen)
+{
+       return NULL;
+}
 struct pci_dev;
 static inline void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned 
long max)
 {
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index e884096..2ec9f81 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -375,6 +375,10 @@
 #define  PCI_X_STATUS_266MHZ   0x40000000      /* 266 MHz capable */
 #define  PCI_X_STATUS_533MHZ   0x80000000      /* 533 MHz capable */
 
+/* Vendor specific capability */
+#define PCI_VNDR_CAP_LEN       2       /* Capability length (8 bits), including
+                                          bytes: ID, NEXT and LEN itself. */
+
 /* PCI Bridge Subsystem ID registers */
 
 #define PCI_SSVID_VENDOR_ID     4      /* PCI-Bridge subsystem vendor id 
register */
diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h
index ea66f3f..d6568e7 100644
--- a/include/linux/virtio_pci.h
+++ b/include/linux/virtio_pci.h
@@ -92,4 +92,45 @@
 /* The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. */
 #define VIRTIO_PCI_VRING_ALIGN         4096
+
+/*
+ * Layout for Virtio PCI vendor specific capability (little-endian):
+ * 5 bit virtio capability id.
+ * 3 bit BAR index register, specifying which BAR to use.
+ * 4 byte cfg offset within the BAR.
+ * 4 byte cfg size.
+ */
+
+/* A single virtio device has multiple vendor specific capabilities, we use the
+ * 5 bit ID field to distinguish between these. */
+#define VIRTIO_PCI_CAP_ID              3
+#define VIRTIO_PCI_CAP_ID_MASK         0xff
+#define VIRTIO_PCI_CAP_ID_SHIFT                0
+
+/* IDs for different capabilities. If a specific configuration
+ * is missing, legacy PIO path is used. */
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG      1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG      2
+/* ISR access */
+#define VIRTIO_PCI_CAP_ISR_CFG         3
+/* Device specific confiuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG      4
+
+/* Index of the BAR including this configuration */
+#define VIRTIO_PCI_CAP_CFG_BIR         4
+#define VIRTIO_PCI_CAP_CFG_BIR_MASK    (0x7)
+#define VIRTIO_PCI_CAP_CFG_BIR_SHIFT   0
+
+/* Size of the configuration space */
+#define VIRTIO_PCI_CAP_CFG_SIZE                4
+#define VIRTIO_PCI_CAP_CFG_SIZE_MASK   0xffffff
+#define VIRTIO_PCI_CAP_CFG_SIZE_SHIFT  8
+
+/* Offset within the BAR */
+#define VIRTIO_PCI_CAP_CFG_OFF         8
+#define VIRTIO_PCI_CAP_CFG_OFF_MASK    0xffffffff
+#define VIRTIO_PCI_CAP_CFG_OFF_SHIFT   0
+
 #endif
diff --git a/lib/iomap.c b/lib/iomap.c
index 5dbcb4b..f28720e 100644
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -243,26 +243,36 @@ EXPORT_SYMBOL(ioport_unmap);
 
 #ifdef CONFIG_PCI
 /**
- * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * pci_iomap_range - create a virtual mapping cookie for a PCI BAR
  * @dev: PCI device that owns the BAR
  * @bar: BAR number
- * @maxlen: length of the memory to map
+ * @offset: map memory at the given offset in BAR
+ * @minlen: min length of the memory to map
+ * @maxlen: max length of the memory to map
  *
  * Using this function you will get a __iomem address to your device BAR.
  * You can access it using ioread*() and iowrite*(). These functions hide
  * the details if this is a MMIO or PIO address space and will just do what
  * you expect from them in the correct way.
  *
+ * @minlen specifies the minimum length to map. We check that BAR is
+ * large enough.
  * @maxlen specifies the maximum length to map. If you want to get access to
- * the complete BAR without checking for its length first, pass %0 here.
+ * the complete BAR from offset to the end, pass %0 here.
  * */
-void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+void __iomem *pci_iomap_range(struct pci_dev *dev, int bar,
+                              unsigned offset,
+                              unsigned long minlen,
+                              unsigned long maxlen)
 {
        resource_size_t start = pci_resource_start(dev, bar);
        resource_size_t len = pci_resource_len(dev, bar);
        unsigned long flags = pci_resource_flags(dev, bar);
 
-       if (!len || !start)
+       if (len <= offset || !start)
+               return NULL;
+        len -= offset;
+        if (len < minlen)
                return NULL;
        if (maxlen && len > maxlen)
                len = maxlen;
@@ -277,10 +287,30 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, 
unsigned long maxlen)
        return NULL;
 }
 
+/**
+ * pci_iomap - create a virtual mapping cookie for a PCI BAR
+ * @dev: PCI device that owns the BAR
+ * @bar: BAR number
+ * @maxlen: length of the memory to map
+ *
+ * Using this function you will get a __iomem address to your device BAR.
+ * You can access it using ioread*() and iowrite*(). These functions hide
+ * the details if this is a MMIO or PIO address space and will just do what
+ * you expect from them in the correct way.
+ *
+ * @maxlen specifies the maximum length to map. If you want to get access to
+ * the complete BAR without checking for its length first, pass %0 here.
+ * */
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+    return pci_iomap_range(dev, bar, 0, 0, maxlen);
+}
+
 void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
 {
        IO_COND(addr, /* nothing */, iounmap(addr));
 }
 EXPORT_SYMBOL(pci_iomap);
+EXPORT_SYMBOL(pci_iomap_range);
 EXPORT_SYMBOL(pci_iounmap);
 #endif /* CONFIG_PCI */
-- 
1.7.5.53.gc233e
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to