Map the PBA in order to access it from the MSI-X read and write
handlers. Note that previously the handlers would pass the physical
host address into the {read,write}{l,q} handlers, which is wrong as
those expect a linear address.

Map the PBA using ioremap when the first access is performed. Note
that 32bit arches might want to abstract the call to ioremap into a
vPCI arch handler, so they can use a fixmap range to map the PBA.

Reported-by: Jan Beulich <jbeul...@suse.com>
Signed-off-by: Roger Pau Monné <roger....@citrix.com>
---
Cc: Alex Olson <this.is.a0l...@gmail.com>
---
Changes since v1:
 - Also handle writes.

I don't seem to have a box with a driver that will try to access the
PBA, so I would consider this specific code path only build tested. At
least it doesn't seem to regress the current state of vPCI.
---
 xen/drivers/vpci/msix.c | 55 +++++++++++++++++++++++++++++++++++++----
 xen/drivers/vpci/vpci.c |  2 ++
 xen/include/xen/vpci.h  |  2 ++
 3 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/xen/drivers/vpci/msix.c b/xen/drivers/vpci/msix.c
index a1fa7a5f13..9fbc111ecc 100644
--- a/xen/drivers/vpci/msix.c
+++ b/xen/drivers/vpci/msix.c
@@ -198,8 +198,13 @@ static int cf_check msix_read(
     if ( !access_allowed(msix->pdev, addr, len) )
         return X86EMUL_OKAY;
 
+    spin_lock(&msix->pdev->vpci->lock);
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        unsigned int idx = addr - base;
+
         /*
          * Access to PBA.
          *
@@ -207,25 +212,43 @@ static int cf_check msix_read(
          * guest address space. If this changes the address will need to be
          * translated.
          */
+
+        if ( !msix->pba )
+        {
+            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
+            if ( !msix->pba )
+            {
+                /*
+                 * If unable to map the PBA return all 1s (all pending): it's
+                 * likely better to trigger spurious events than drop them.
+                 */
+                spin_unlock(&vpci->lock);
+                gprintk(XENLOG_WARNING,
+                        "%pp: unable to map MSI-X PBA, report all pending\n",
+                        msix->pdev);
+                return X86EMUL_OKAY;
+           }
+        }
+
         switch ( len )
         {
         case 4:
-            *data = readl(addr);
+            *data = readl(msix->pba + idx);
             break;
 
         case 8:
-            *data = readq(addr);
+            *data = readq(msix->pba + idx);
             break;
 
         default:
             ASSERT_UNREACHABLE();
             break;
         }
+        spin_unlock(&vpci->lock);
 
         return X86EMUL_OKAY;
     }
 
-    spin_lock(&msix->pdev->vpci->lock);
     entry = get_entry(msix, addr);
     offset = addr & (PCI_MSIX_ENTRY_SIZE - 1);
 
@@ -273,27 +296,49 @@ static int cf_check msix_write(
     if ( !access_allowed(msix->pdev, addr, len) )
         return X86EMUL_OKAY;
 
+    spin_lock(&msix->pdev->vpci->lock);
     if ( VMSIX_ADDR_IN_RANGE(addr, msix->pdev->vpci, VPCI_MSIX_PBA) )
     {
+        struct vpci *vpci = msix->pdev->vpci;
+        paddr_t base = vmsix_table_addr(vpci, VPCI_MSIX_PBA);
+        unsigned int idx = addr - base;
 
         if ( !is_hardware_domain(d) )
+        {
             /* Ignore writes to PBA for DomUs, it's behavior is undefined. */
+            spin_unlock(&vpci->lock);
             return X86EMUL_OKAY;
+        }
+
+        if ( !msix->pba )
+        {
+            msix->pba = ioremap(base, vmsix_table_size(vpci, VPCI_MSIX_PBA));
+            if ( !msix->pba )
+            {
+                /* Unable to map the PBA, ignore write. */
+                spin_unlock(&vpci->lock);
+                gprintk(XENLOG_WARNING,
+                        "%pp: unable to map MSI-X PBA, write ignored\n",
+                        msix->pdev);
+                return X86EMUL_OKAY;
+           }
+        }
 
         switch ( len )
         {
         case 4:
-            writel(data, addr);
+            writel(data, msix->pba + idx);
             break;
 
         case 8:
-            writeq(data, addr);
+            writeq(data, msix->pba + idx);
             break;
 
         default:
             ASSERT_UNREACHABLE();
             break;
         }
+        spin_unlock(&vpci->lock);
 
         return X86EMUL_OKAY;
     }
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index f3b32d66cb..9fb3c05b2b 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -51,6 +51,8 @@ void vpci_remove_device(struct pci_dev *pdev)
         xfree(r);
     }
     spin_unlock(&pdev->vpci->lock);
+    if ( pdev->vpci->msix && pdev->vpci->msix->pba )
+        iounmap(pdev->vpci->msix->pba);
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index bcad1516ae..c399b101ee 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -127,6 +127,8 @@ struct vpci {
         bool enabled         : 1;
         /* Masked? */
         bool masked          : 1;
+        /* PBA map */
+        void *pba;
         /* Entries. */
         struct vpci_msix_entry {
             uint64_t addr;
-- 
2.34.1


Reply via email to