On 7/14/25 5:59 PM, Shameer Kolothum wrote:
> Accelerated SMMUv3 is only useful when the device can take advantage of
> the host's SMMUv3 in nested mode. To keep things simple and correct, we
> only allow this feature for vfio-pci endpoint devices that use the iommufd
> backend. We also allow non-endpoint emulated devices like PCI bridges and
> root ports, so that users can plug in these vfio-pci devices.
>
> Another reason for this limit is to avoid problems with IOTLB
> invalidations. Some commands (e.g., CMD_TLBI_NH_ASID) lack an associated
> SID, making it difficult to trace the originating device. If we allowed
> emulated endpoint devices, QEMU would have to invalidate both its own
> software IOTLB and the host's hardware IOTLB, which could slow things
> down.
>
> Since vfio-pci devices in nested mode rely on the host SMMUv3's nested
> translation (S1+S2), their get_address_space() callback must return the
> system address space to enable correct S2 mappings of guest RAM.
>
> So in short:
> - vfio-pci devices return the system address space
> - bridges and root ports return the IOMMU address space
>
> Note: On ARM, MSI doorbell addresses are also translated via SMMUv3.
> Hence, if a vfio-pci device is behind the SMMuv3 with translation enabled,
> it must return the IOMMU address space for MSI. Support for this will be
> added in a follow-up patch.
It sounds antithetical to what is said above:
"vfio-pci devices return the system address space"
Eric
>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.th...@huawei.com>
> ---
> hw/arm/smmuv3-accel.c | 50 ++++++++++++++++++++++++++++-
> hw/arm/smmuv3-accel.h | 15 +++++++++
> hw/arm/smmuv3.c | 4 +++
> hw/pci-bridge/pci_expander_bridge.c | 1 -
> include/hw/arm/smmuv3.h | 1 +
> include/hw/pci/pci_bridge.h | 1 +
> 6 files changed, 70 insertions(+), 2 deletions(-)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index 2eac9c6ff4..0b0ddb03e2 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -7,13 +7,19 @@
> */
>
> #include "qemu/osdep.h"
> +#include "qemu/error-report.h"
>
> #include "hw/arm/smmuv3.h"
> +#include "hw/pci/pci_bridge.h"
> +#include "hw/pci-host/gpex.h"
> +#include "hw/vfio/pci.h"
> +
> #include "smmuv3-accel.h"
>
> static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus
> *sbus,
> PCIBus *bus, int devfn)
> {
> + SMMUv3State *s = ARM_SMMUV3(bs);
> SMMUDevice *sdev = sbus->pbdev[devfn];
> SMMUv3AccelDevice *accel_dev;
>
> @@ -25,30 +31,72 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState
> *bs, SMMUPciBus *sbus,
>
> sbus->pbdev[devfn] = sdev;
> smmu_init_sdev(bs, sdev, bus, devfn);
> + address_space_init(&accel_dev->as_sysmem, &s->s_accel->root,
> + "smmuv3-accel-sysmem");
> }
>
> return accel_dev;
> }
>
> +static bool smmuv3_accel_pdev_allowed(PCIDevice *pdev, bool *vfio_pci)
> +{
> +
> + if (object_dynamic_cast(OBJECT(pdev), TYPE_PCI_BRIDGE) ||
> + object_dynamic_cast(OBJECT(pdev), "pxb-pcie") ||
> + object_dynamic_cast(OBJECT(pdev), "gpex-root")) {
> + return true;
> + } else if ((object_dynamic_cast(OBJECT(pdev), TYPE_VFIO_PCI) &&
> + object_property_find(OBJECT(pdev), "iommufd"))) {
> + *vfio_pci = true;
> + return true;
> + }
> + return false;
> +}
> +
> static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque,
> int devfn)
> {
> + PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
> SMMUState *bs = opaque;
> + bool vfio_pci = false;
> SMMUPciBus *sbus;
> SMMUv3AccelDevice *accel_dev;
> SMMUDevice *sdev;
>
> + if (pdev && !smmuv3_accel_pdev_allowed(pdev, &vfio_pci)) {
> + error_report("Device(%s) not allowed. Only PCIe root complex devices
> "
> + "or PCI bridge devices or vfio-pci endpoint devices
> with "
> + "iommufd as backend is allowed with
> arm-smmuv3,accel=on",
> + pdev->name);
> + exit(1);
> + }
> sbus = smmu_get_sbus(bs, bus);
> accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, devfn);
> sdev = &accel_dev->sdev;
>
> - return &sdev->as;
> + if (vfio_pci) {
> + return &accel_dev->as_sysmem;
> + } else {
> + return &sdev->as;
> + }
> }
>
> static const PCIIOMMUOps smmuv3_accel_ops = {
> .get_address_space = smmuv3_accel_find_add_as,
> };
>
> +void smmuv3_accel_init(SMMUv3State *s)
> +{
> + SMMUv3AccelState *s_accel;
> +
> + s->s_accel = s_accel = g_new0(SMMUv3AccelState, 1);
> + memory_region_init(&s_accel->root, OBJECT(s), "root", UINT64_MAX);
> + memory_region_init_alias(&s_accel->sysmem, OBJECT(s),
> + "smmuv3-accel-sysmem", get_system_memory(), 0,
> + memory_region_size(get_system_memory()));
> + memory_region_add_subregion(&s_accel->root, 0, &s_accel->sysmem);
> +}
> +
> static void smmuv3_accel_class_init(ObjectClass *oc, const void *data)
> {
> SMMUBaseClass *sbc = ARM_SMMU_CLASS(oc);
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 4cf30b1291..2cd343103f 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -9,11 +9,26 @@
> #ifndef HW_ARM_SMMUV3_ACCEL_H
> #define HW_ARM_SMMUV3_ACCEL_H
>
> +#include "hw/arm/smmuv3.h"
> #include "hw/arm/smmu-common.h"
> #include CONFIG_DEVICES
>
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
> + AddressSpace as_sysmem;
> } SMMUv3AccelDevice;
>
> +typedef struct SMMUv3AccelState {
> + MemoryRegion root;
> + MemoryRegion sysmem;
> +} SMMUv3AccelState;
> +
> +#if defined(CONFIG_ARM_SMMUV3) && defined(CONFIG_IOMMUFD)
> +void smmuv3_accel_init(SMMUv3State *s);
> +#else
> +static inline void smmuv3_accel_init(SMMUv3State *d)
> +{
> +}
> +#endif
> +
> #endif /* HW_ARM_SMMUV3_ACCEL_H */
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index bcf8af8dc7..2f5a8157dd 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -32,6 +32,7 @@
> #include "qapi/error.h"
>
> #include "hw/arm/smmuv3.h"
> +#include "smmuv3-accel.h"
> #include "smmuv3-internal.h"
> #include "smmu-internal.h"
>
> @@ -1898,6 +1899,9 @@ static void smmu_realize(DeviceState *d, Error **errp)
> sysbus_init_mmio(dev, &sys->iomem);
>
> smmu_init_irq(s, dev);
> + if (sys->accel) {
> + smmuv3_accel_init(s);
> + }
> }
>
> static const VMStateDescription vmstate_smmuv3_queue = {
> diff --git a/hw/pci-bridge/pci_expander_bridge.c
> b/hw/pci-bridge/pci_expander_bridge.c
> index 1bcceddbc4..a8eb2d2426 100644
> --- a/hw/pci-bridge/pci_expander_bridge.c
> +++ b/hw/pci-bridge/pci_expander_bridge.c
> @@ -48,7 +48,6 @@ struct PXBBus {
> char bus_path[8];
> };
>
> -#define TYPE_PXB_PCIE_DEV "pxb-pcie"
> OBJECT_DECLARE_SIMPLE_TYPE(PXBPCIEDev, PXB_PCIE_DEV)
>
> static GList *pxb_dev_list;
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index d183a62766..3bdb92391a 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -63,6 +63,7 @@ struct SMMUv3State {
> qemu_irq irq[4];
> QemuMutex mutex;
> char *stage;
> + struct SMMUv3AccelState *s_accel;
> };
>
> typedef enum {
> diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
> index a055fd8d32..b61360b900 100644
> --- a/include/hw/pci/pci_bridge.h
> +++ b/include/hw/pci/pci_bridge.h
> @@ -106,6 +106,7 @@ typedef struct PXBPCIEDev {
>
> #define TYPE_PXB_PCIE_BUS "pxb-pcie-bus"
> #define TYPE_PXB_CXL_BUS "pxb-cxl-bus"
> +#define TYPE_PXB_PCIE_DEV "pxb-pcie"
> #define TYPE_PXB_DEV "pxb"
> OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
>