Hi Shameer,

On 11/20/25 2:21 PM, Shameer Kolothum wrote:
> From: Nicolin Chen <[email protected]>
>
> Implement the VFIO/PCI callbacks to attach and detach a HostIOMMUDevice
> to a vSMMUv3 when accel=on,
>
>  - set_iommu_device(): attach a HostIOMMUDevice to a vIOMMU
>  - unset_iommu_device(): detach and release associated resources
>
> In SMMUv3 accel=on mode, the guest SMMUv3 is backed by the host SMMUv3 via
> IOMMUFD. A vIOMMU object (created via IOMMU_VIOMMU_ALLOC) provides a per-VM,
> security-isolated handle to the physical SMMUv3. Without a vIOMMU, the
> vSMMUv3 cannot relay guest operations to the host hardware nor maintain
> isolation across VMs or devices. Therefore, set_iommu_device() allocates
> a vIOMMU object if one does not already exist.
>
> There are two main points to consider in this implementation:
>
> 1) VFIO core allocates and attaches a S2 HWPT that acts as the nesting
>    parent for nested HWPTs(IOMMU_DOMAIN_NESTED). This parent HWPT will
>    be shared across multiple vSMMU instances within a VM.
>
> 2) A device cannot attach directly to a vIOMMU. Instead, it attaches
>    through a proxy nested HWPT (IOMMU_DOMAIN_NESTED). Based on the STE
>    configuration,there are three types of nested HWPTs: bypass, abort,
>    and translate.
>     -The bypass and abort proxy HWPTs are pre-allocated. When SMMUv3
>      operates in global abort or bypass modes, as controlled by the GBPA
>      register, or issues a vSTE for bypass or abort we attach these
>      pre-allocated nested HWPTs.
>     -The translate HWPT requires a vDEVICE to be allocated first, since
>      invalidations and events depend on a valid vSID.
>     -The vDEVICE allocation and attach operations for vSTE based HWPTs
>      are implemented in subsequent patches.
>
> In summary, a device placed behind a vSMMU instance must have a vSID for
> translate vSTE. The bypass and abort vSTEs are pre-allocated as proxy
> nested HWPTs and is attached based on GBPA register. The core-managed
> nesting parent S2 HWPT is used as parent S2 HWPT for all the nested
> HWPTs and is intended to be shared across vSMMU instances within the
> same VM.
>
> set_iommu_device():
>   - Reuse an existing vIOMMU for the same physical SMMU if available.
>     If not, allocate a new one using the nesting parent S2 HWPT.
>   - Pre-allocate two proxy nested HWPTs (bypass and abort) under the
>     vIOMMU and install one based on GBPA.ABORT value.
>   - Add the device to the vIOMMU’s device list.
>
> unset_iommu_device():
>   - Re-attach device to the nesting parent S2 HWPT.
>   - Remove the device from the vIOMMU’s device list.
>   - If the list is empty, free the proxy HWPTs (bypass and abort)
>     and release the vIOMMU object.
>
> Introduce struct SMMUv3AccelState, representing an accelerated SMMUv3
> instance backed by an iommufd vIOMMU object, and storing the bypass and
> abort proxy HWPT IDs.
>
> Signed-off-by: Nicolin Chen <[email protected]>
> Signed-off-by: Shameer Kolothum <[email protected]
> Reviewed-by: Jonathan Cameron <[email protected]>
> Tested-by: Zhangfei Gao <[email protected]>
> Signed-off-by: Shameer Kolothum <[email protected]>
> ---
>  hw/arm/smmuv3-accel.c    | 154 +++++++++++++++++++++++++++++++++++++++
>  hw/arm/smmuv3-accel.h    |  16 ++++
>  hw/arm/smmuv3-internal.h |   3 +
>  hw/arm/trace-events      |   4 +
>  include/hw/arm/smmuv3.h  |   1 +
>  5 files changed, 178 insertions(+)
>
> diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
> index bd4a7dbde1..4dd56a8e65 100644
> --- a/hw/arm/smmuv3-accel.c
> +++ b/hw/arm/smmuv3-accel.c
> @@ -8,6 +8,7 @@
>  
>  #include "qemu/osdep.h"
>  #include "qemu/error-report.h"
> +#include "trace.h"
>  
>  #include "hw/arm/smmuv3.h"
>  #include "hw/iommu.h"
> @@ -15,6 +16,7 @@
>  #include "hw/pci-host/gpex.h"
>  #include "hw/vfio/pci.h"
>  
> +#include "smmuv3-internal.h"
>  #include "smmuv3-accel.h"
>  
>  /*
> @@ -43,6 +45,156 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState 
> *bs, SMMUPciBus *sbus,
>      return accel_dev;
>  }
>  
> +static uint32_t smmuv3_accel_gbpa_hwpt(SMMUv3State *s, SMMUv3AccelState 
> *accel)
> +{
> +    return FIELD_EX32(s->gbpa, GBPA, ABORT) ?
> +           accel->abort_hwpt_id : accel->bypass_hwpt_id;
> +}
> +
> +static bool
> +smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
> +                          Error **errp)
> +{
> +    struct iommu_hwpt_arm_smmuv3 bypass_data = {
> +        .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL },
> +    };
> +    struct iommu_hwpt_arm_smmuv3 abort_data = {
> +        .ste = { SMMU_STE_VALID, 0x0ULL },
> +    };
> +    uint32_t s2_hwpt_id = idev->hwpt_id;
> +    uint32_t viommu_id, hwpt_id;
> +    SMMUv3AccelState *accel;
> +
> +    if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
> +                                      IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
> +                                      s2_hwpt_id, &viommu_id, errp)) {
> +        return false;
> +    }
> +
> +    accel = g_new0(SMMUv3AccelState, 1);
> +    accel->viommu.viommu_id = viommu_id;
> +    accel->viommu.s2_hwpt_id = s2_hwpt_id;
> +    accel->viommu.iommufd = idev->iommufd;
> +
> +    /*
> +     * Pre-allocate HWPTs for S1 bypass and abort cases. These will be 
> attached
> +     * later for guest STEs or GBPAs that require bypass or abort 
> configuration.
> +     */
> +    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, viommu_id,
> +                                    0, IOMMU_HWPT_DATA_ARM_SMMUV3,
> +                                    sizeof(abort_data), &abort_data,
> +                                    &accel->abort_hwpt_id, errp)) {
> +        goto free_viommu;
> +    }
> +
> +    if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, viommu_id,
> +                                    0, IOMMU_HWPT_DATA_ARM_SMMUV3,
> +                                    sizeof(bypass_data), &bypass_data,
> +                                    &accel->bypass_hwpt_id, errp)) {
> +        goto free_abort_hwpt;
> +    }
> +
> +    /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
> +    hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
> +    if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
> +        goto free_bypass_hwpt;
> +    }
> +    s->s_accel = accel;
> +    return true;
> +
> +free_bypass_hwpt:
> +    iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id);
> +free_abort_hwpt:
> +    iommufd_backend_free_id(idev->iommufd, accel->abort_hwpt_id);
> +free_viommu:
> +    iommufd_backend_free_id(idev->iommufd, accel->viommu.viommu_id);
> +    g_free(accel);
> +    return false;
> +}
> +
> +static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int 
> devfn,
> +                                          HostIOMMUDevice *hiod, Error 
> **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
> +    SMMUState *bs = opaque;
> +    SMMUv3State *s = ARM_SMMUV3(bs);
> +    SMMUPciBus *sbus = smmu_get_sbus(bs, bus);
> +    SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, 
> devfn);
> +
> +    if (!idev) {
> +        return true;
> +    }
> +
> +    if (accel_dev->idev) {
> +        if (accel_dev->idev != idev) {
> +            error_setg(errp, "Device already has an associated idev 0x%x",
> +                       idev->devid);
> +            return false;
> +        }
> +        return true;
> +    }
> +
> +    if (s->s_accel) {
> +        goto done;
> +    }
> +
> +    if (!smmuv3_accel_alloc_viommu(s, idev, errp)) {
> +        error_append_hint(errp, "Unable to alloc vIOMMU: idev devid 0x%x: ",
> +                          idev->devid);
> +        return false;
> +    }
> +
> +done:
> +    accel_dev->idev = idev;
> +    accel_dev->s_accel = s->s_accel;
> +    QLIST_INSERT_HEAD(&s->s_accel->device_list, accel_dev, next);
> +    trace_smmuv3_accel_set_iommu_device(devfn, idev->devid);
> +    return true;
> +}
> +
> +static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
> +                                            int devfn)
> +{
> +    SMMUState *bs = opaque;
> +    SMMUv3State *s = ARM_SMMUV3(bs);
> +    SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus);
> +    HostIOMMUDeviceIOMMUFD *idev;
> +    SMMUv3AccelDevice *accel_dev;
> +    SMMUv3AccelState *accel;
> +    SMMUDevice *sdev;
> +
> +    if (!sbus) {
> +        return;
> +    }
> +
> +    sdev = sbus->pbdev[devfn];
> +    if (!sdev) {
> +        return;
> +    }
> +
> +    accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
> +    idev = accel_dev->idev;
> +    accel = accel_dev->s_accel;
> +    /* Re-attach the default s2 hwpt id */
> +    if (!host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, NULL)) {
> +        error_report("Unable to attach the default HW pagetable: idev devid "
> +                     "0x%x", idev->devid);
> +    }
> +
> +    accel_dev->idev = NULL;
> +    accel_dev->s_accel = NULL;
> +    QLIST_REMOVE(accel_dev, next);
> +    trace_smmuv3_accel_unset_iommu_device(devfn, idev->devid);
> +
> +    if (QLIST_EMPTY(&accel->device_list)) {
> +        iommufd_backend_free_id(accel->viommu.iommufd, 
> accel->bypass_hwpt_id);
> +        iommufd_backend_free_id(accel->viommu.iommufd, accel->abort_hwpt_id);
> +        iommufd_backend_free_id(accel->viommu.iommufd, 
> accel->viommu.viommu_id);
> +        g_free(accel);
> +        s->s_accel = NULL;
> +    }
> +}
> +
>  /*
>   * Only allow PCIe bridges, pxb-pcie roots, and GPEX roots so vfio-pci
>   * endpoints can sit downstream. Accelerated SMMUv3 requires a vfio-pci
> @@ -145,6 +297,8 @@ static const PCIIOMMUOps smmuv3_accel_ops = {
>      .supports_address_space = smmuv3_accel_supports_as,
>      .get_address_space = smmuv3_accel_find_add_as,
>      .get_viommu_flags = smmuv3_accel_get_viommu_flags,
> +    .set_iommu_device = smmuv3_accel_set_iommu_device,
> +    .unset_iommu_device = smmuv3_accel_unset_iommu_device,
>  };
>  
>  static void smmuv3_accel_as_init(SMMUv3State *s)
> diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
> index 0dc6b00d35..c72605caab 100644
> --- a/hw/arm/smmuv3-accel.h
> +++ b/hw/arm/smmuv3-accel.h
> @@ -10,10 +10,26 @@
>  #define HW_ARM_SMMUV3_ACCEL_H
>  
>  #include "hw/arm/smmu-common.h"
> +#include "system/iommufd.h"
> +#include <linux/iommufd.h>
>  #include CONFIG_DEVICES
>  
> +/*
> + * Represents an accelerated SMMU instance backed by an iommufd vIOMMU 
> object.
> + * Holds bypass and abort proxy HWPT IDs used for device attachment.
> + */
> +typedef struct SMMUv3AccelState {
> +    IOMMUFDViommu viommu;
> +    uint32_t bypass_hwpt_id;
> +    uint32_t abort_hwpt_id;
> +    QLIST_HEAD(, SMMUv3AccelDevice) device_list;
> +} SMMUv3AccelState;
> +
>  typedef struct SMMUv3AccelDevice {
>      SMMUDevice sdev;
> +    HostIOMMUDeviceIOMMUFD *idev;
> +    QLIST_ENTRY(SMMUv3AccelDevice) next;
> +    SMMUv3AccelState *s_accel;
>  } SMMUv3AccelDevice;
>  
>  #ifdef CONFIG_ARM_SMMUV3_ACCEL
> diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
> index b6b7399347..81212a58f1 100644
> --- a/hw/arm/smmuv3-internal.h
> +++ b/hw/arm/smmuv3-internal.h
> @@ -583,6 +583,9 @@ typedef struct CD {
>      ((extract64((x)->word[7], 0, 16) << 32) |           \
>       ((x)->word[6] & 0xfffffff0))
>  
> +#define SMMU_STE_VALID      (1ULL << 0)
> +#define SMMU_STE_CFG_BYPASS (1ULL << 3)
> +
>  static inline int oas2bits(int oas_field)
>  {
>      switch (oas_field) {
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index f3386bd7ae..2aaa0c40c7 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -66,6 +66,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL 
> SMMUNotifier node for iommu mr=%s
>  smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t 
> iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d 
> iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
>  smmu_reset_exit(void) ""
>  
> +#smmuv3-accel.c
> +smmuv3_accel_set_iommu_device(int devfn, uint32_t devid) "devfn=0x%x (idev 
> devid=0x%x)"
> +smmuv3_accel_unset_iommu_device(int devfn, uint32_t devid) "devfn=0x%x (idev 
> devid=0x%x)"
> +
>  # strongarm.c
>  strongarm_uart_update_parameters(const char *label, int speed, char parity, 
> int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
>  strongarm_ssp_read_underrun(void) "SSP rx underrun"
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index bb7076286b..e54ece2d38 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -66,6 +66,7 @@ struct SMMUv3State {
>  
>      /* SMMU has HW accelerator support for nested S1 + s2 */
>      bool accel;
> +    struct SMMUv3AccelState *s_accel;
>  };
>  
>  typedef enum {
Looks good to me

Reviewed-by: Eric Auger <[email protected]>

Eric


Reply via email to