Accelerated SMMUv3 is only useful when the device can take advantage of
the host's SMMUv3 in nested mode. To keep things simple and correct, we
only allow this feature for vfio-pci endpoint devices that use the iommufd
backend. We also allow non-endpoint emulated devices like PCI bridges and
root ports, so that users can plug in these vfio-pci devices. We can only
enforce this if devices are cold plugged. For hotplug cases, give appropriate
warnings.

Another reason for this limit is to avoid problems with IOTLB
invalidations. Some commands (e.g., CMD_TLBI_NH_ASID) lack an associated
SID, making it difficult to trace the originating device. If we allowed
emulated endpoint devices, QEMU would have to invalidate both its own
software IOTLB and the host's hardware IOTLB, which could slow things
down.

Since vfio-pci devices in nested mode rely on the host SMMUv3's nested
translation (S1+S2), their get_address_space() callback must return the
system address space so that VFIO core can setup correct S2 mappings
for guest RAM.

So in short:
 - vfio-pci devices(with iommufd as backend) return the system address
   space.
 - bridges and root ports return the IOMMU address space.

Signed-off-by: Shameer Kolothum <[email protected]>
---
 hw/arm/smmuv3-accel.c               | 68 ++++++++++++++++++++++++++++-
 hw/pci-bridge/pci_expander_bridge.c |  1 -
 include/hw/pci/pci_bridge.h         |  1 +
 3 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index 79f1713be6..44410cfb2a 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -7,8 +7,13 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/error-report.h"
 
 #include "hw/arm/smmuv3.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci-host/gpex.h"
+#include "hw/vfio/pci.h"
+
 #include "smmuv3-accel.h"
 
 static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
@@ -29,15 +34,76 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState 
*bs, SMMUPciBus *sbus,
     return accel_dev;
 }
 
+static bool smmuv3_accel_pdev_allowed(PCIDevice *pdev, bool *vfio_pci)
+{
+
+    if (object_dynamic_cast(OBJECT(pdev), TYPE_PCI_BRIDGE) ||
+        object_dynamic_cast(OBJECT(pdev), TYPE_PXB_PCIE_DEV) ||
+        object_dynamic_cast(OBJECT(pdev), TYPE_GPEX_ROOT_DEVICE)) {
+        return true;
+    } else if ((object_dynamic_cast(OBJECT(pdev), TYPE_VFIO_PCI))) {
+        *vfio_pci = true;
+        if (object_property_get_link(OBJECT(pdev), "iommufd", NULL)) {
+            return true;
+        }
+    }
+    return false;
+}
+
 static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
                                               int devfn)
 {
+    PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
     SMMUState *bs = opaque;
     SMMUPciBus *sbus = smmu_get_sbus(bs, bus);
     SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, devfn);
     SMMUDevice *sdev = &accel_dev->sdev;
+    bool vfio_pci = false;
+
+    if (pdev && !smmuv3_accel_pdev_allowed(pdev, &vfio_pci)) {
+        if (DEVICE(pdev)->hotplugged) {
+            if (vfio_pci) {
+                warn_report("Hot plugging a vfio-pci device (%s) without "
+                            "iommufd as backend is not supported", pdev->name);
+            } else {
+                warn_report("Hot plugging an emulated device %s with "
+                            "accelerated SMMUv3. This will bring down "
+                            "performace", pdev->name);
+            }
+            /*
+             * Both cases, we will return IOMMU address space. For hotplugged
+             * vfio-pci dev without iommufd as backend, it will fail later in
+             * smmuv3_notify_flag_changed() with "requires iommu MAP notifier"
+             * error message.
+             */
+             return &sdev->as;
+        } else {
+            error_report("Device(%s) not allowed. Only PCIe root complex "
+                         "devices or PCI bridge devices or vfio-pci endpoint "
+                         "devices with iommufd as backend is allowed with "
+                         "arm-smmuv3,accel=on", pdev->name);
+            exit(1);
+        }
+    }
 
-    return &sdev->as;
+    /*
+     * We return the system address for vfio-pci devices(with iommufd as
+     * backend) so that the VFIO core can set up Stage-2 (S2) mappings for
+     * guest RAM. This is needed because, in the accelerated SMMUv3 case,
+     * the host SMMUv3 runs in nested (S1 + S2)  mode where the guest
+     * manages its own S1 page tables while the host manages S2.
+     *
+     * We are using the global &address_space_memory here, as this will ensure
+     * same system address space pointer for all devices behind the accelerated
+     * SMMUv3s in a VM. That way VFIO/iommufd can reuse a single IOAS ID in
+     * iommufd_cdev_attach(), allowing the Stage-2 page tables to be shared
+     * within the VM instead of duplicating them for every SMMUv3 instance.
+     */
+    if (vfio_pci) {
+        return &address_space_memory;
+    } else {
+        return &sdev->as;
+    }
 }
 
 static const PCIIOMMUOps smmuv3_accel_ops = {
diff --git a/hw/pci-bridge/pci_expander_bridge.c 
b/hw/pci-bridge/pci_expander_bridge.c
index 1bcceddbc4..a8eb2d2426 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -48,7 +48,6 @@ struct PXBBus {
     char bus_path[8];
 };
 
-#define TYPE_PXB_PCIE_DEV "pxb-pcie"
 OBJECT_DECLARE_SIMPLE_TYPE(PXBPCIEDev, PXB_PCIE_DEV)
 
 static GList *pxb_dev_list;
diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
index a055fd8d32..b61360b900 100644
--- a/include/hw/pci/pci_bridge.h
+++ b/include/hw/pci/pci_bridge.h
@@ -106,6 +106,7 @@ typedef struct PXBPCIEDev {
 
 #define TYPE_PXB_PCIE_BUS "pxb-pcie-bus"
 #define TYPE_PXB_CXL_BUS "pxb-cxl-bus"
+#define TYPE_PXB_PCIE_DEV "pxb-pcie"
 #define TYPE_PXB_DEV "pxb"
 OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
 
-- 
2.43.0


Reply via email to