From: Nicolin Chen <[email protected]>
Implement the VFIO/PCI callbacks to attach and detach a HostIOMMUDevice
to a vSMMUv3 when accel=on,
- set_iommu_device(): attach a HostIOMMUDevice to a vIOMMU
- unset_iommu_device(): detach and release associated resources
In SMMUv3 accel=on mode, the guest SMMUv3 is backed by the host SMMUv3 via
IOMMUFD. A vIOMMU object (created via IOMMU_VIOMMU_ALLOC) provides a per-VM,
security-isolated handle to the physical SMMUv3. Without a vIOMMU, the
vSMMUv3 cannot relay guest operations to the host hardware nor maintain
isolation across VMs or devices. Therefore, set_iommu_device() allocates
a vIOMMU object if one does not already exist.
There are two main points to consider in this implementation:
1) VFIO core allocates and attaches a S2 HWPT that acts as the nesting
parent for nested HWPTs(IOMMU_DOMAIN_NESTED). This parent HWPT will
be shared across multiple vSMMU instances within a VM.
2) A device cannot attach directly to a vIOMMU. Instead, it attaches
through a proxy nested HWPT (IOMMU_DOMAIN_NESTED). Based on the STE
configuration,there are three types of nested HWPTs: bypass, abort,
and translate.
-The bypass and abort proxy HWPTs are pre-allocated. When SMMUv3
operates in global abort or bypass modes, as controlled by the GBPA
register, or issues a vSTE for bypass or abort we attach these
pre-allocated nested HWPTs.
-The translate HWPT requires a vDEVICE to be allocated first, since
invalidations and events depend on a valid vSID.
-The vDEVICE allocation and attach operations for vSTE based HWPTs
are implemented in subsequent patches.
In summary, a device placed behind a vSMMU instance must have a vSID for
translate vSTE. The bypass and abort vSTEs are pre-allocated as proxy
nested HWPTs and is attached based on GBPA register. The core-managed
nesting parent S2 HWPT is used as parent S2 HWPT for all the nested
HWPTs and is intended to be shared across vSMMU instances within the
same VM.
set_iommu_device():
- Reuse an existing vIOMMU for the same physical SMMU if available.
If not, allocate a new one using the nesting parent S2 HWPT.
- Pre-allocate two proxy nested HWPTs (bypass and abort) under the
vIOMMU and install one based on GBPA.ABORT value.
- Add the device to the vIOMMU’s device list.
unset_iommu_device():
- Re-attach device to the nesting parent S2 HWPT.
- Remove the device from the vIOMMU’s device list.
- If the list is empty, free the proxy HWPTs (bypass and abort)
and release the vIOMMU object.
Introduce struct SMMUv3AccelState, representing an accelerated SMMUv3
instance backed by an iommufd vIOMMU object, and storing the bypass and
abort proxy HWPT IDs.
Signed-off-by: Nicolin Chen <[email protected]>
Signed-off-by: Shameer Kolothum <[email protected]
Reviewed-by: Jonathan Cameron <[email protected]>
Tested-by: Zhangfei Gao <[email protected]>
Signed-off-by: Shameer Kolothum <[email protected]>
---
hw/arm/smmuv3-accel.c | 154 +++++++++++++++++++++++++++++++++++++++
hw/arm/smmuv3-accel.h | 16 ++++
hw/arm/smmuv3-internal.h | 3 +
hw/arm/trace-events | 4 +
include/hw/arm/smmuv3.h | 1 +
5 files changed, 178 insertions(+)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index bd4a7dbde1..4dd56a8e65 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -8,6 +8,7 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "trace.h"
#include "hw/arm/smmuv3.h"
#include "hw/iommu.h"
@@ -15,6 +16,7 @@
#include "hw/pci-host/gpex.h"
#include "hw/vfio/pci.h"
+#include "smmuv3-internal.h"
#include "smmuv3-accel.h"
/*
@@ -43,6 +45,156 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState
*bs, SMMUPciBus *sbus,
return accel_dev;
}
+static uint32_t smmuv3_accel_gbpa_hwpt(SMMUv3State *s, SMMUv3AccelState *accel)
+{
+ return FIELD_EX32(s->gbpa, GBPA, ABORT) ?
+ accel->abort_hwpt_id : accel->bypass_hwpt_id;
+}
+
+static bool
+smmuv3_accel_alloc_viommu(SMMUv3State *s, HostIOMMUDeviceIOMMUFD *idev,
+ Error **errp)
+{
+ struct iommu_hwpt_arm_smmuv3 bypass_data = {
+ .ste = { SMMU_STE_CFG_BYPASS | SMMU_STE_VALID, 0x0ULL },
+ };
+ struct iommu_hwpt_arm_smmuv3 abort_data = {
+ .ste = { SMMU_STE_VALID, 0x0ULL },
+ };
+ uint32_t s2_hwpt_id = idev->hwpt_id;
+ uint32_t viommu_id, hwpt_id;
+ SMMUv3AccelState *accel;
+
+ if (!iommufd_backend_alloc_viommu(idev->iommufd, idev->devid,
+ IOMMU_VIOMMU_TYPE_ARM_SMMUV3,
+ s2_hwpt_id, &viommu_id, errp)) {
+ return false;
+ }
+
+ accel = g_new0(SMMUv3AccelState, 1);
+ accel->viommu.viommu_id = viommu_id;
+ accel->viommu.s2_hwpt_id = s2_hwpt_id;
+ accel->viommu.iommufd = idev->iommufd;
+
+ /*
+ * Pre-allocate HWPTs for S1 bypass and abort cases. These will be attached
+ * later for guest STEs or GBPAs that require bypass or abort
configuration.
+ */
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, viommu_id,
+ 0, IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(abort_data), &abort_data,
+ &accel->abort_hwpt_id, errp)) {
+ goto free_viommu;
+ }
+
+ if (!iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid, viommu_id,
+ 0, IOMMU_HWPT_DATA_ARM_SMMUV3,
+ sizeof(bypass_data), &bypass_data,
+ &accel->bypass_hwpt_id, errp)) {
+ goto free_abort_hwpt;
+ }
+
+ /* Attach a HWPT based on SMMUv3 GBPA.ABORT value */
+ hwpt_id = smmuv3_accel_gbpa_hwpt(s, accel);
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp)) {
+ goto free_bypass_hwpt;
+ }
+ s->s_accel = accel;
+ return true;
+
+free_bypass_hwpt:
+ iommufd_backend_free_id(idev->iommufd, accel->bypass_hwpt_id);
+free_abort_hwpt:
+ iommufd_backend_free_id(idev->iommufd, accel->abort_hwpt_id);
+free_viommu:
+ iommufd_backend_free_id(idev->iommufd, accel->viommu.viommu_id);
+ g_free(accel);
+ return false;
+}
+
+static bool smmuv3_accel_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
+ HostIOMMUDevice *hiod, Error **errp)
+{
+ HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(hiod);
+ SMMUState *bs = opaque;
+ SMMUv3State *s = ARM_SMMUV3(bs);
+ SMMUPciBus *sbus = smmu_get_sbus(bs, bus);
+ SMMUv3AccelDevice *accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, devfn);
+
+ if (!idev) {
+ return true;
+ }
+
+ if (accel_dev->idev) {
+ if (accel_dev->idev != idev) {
+ error_setg(errp, "Device already has an associated idev 0x%x",
+ idev->devid);
+ return false;
+ }
+ return true;
+ }
+
+ if (s->s_accel) {
+ goto done;
+ }
+
+ if (!smmuv3_accel_alloc_viommu(s, idev, errp)) {
+ error_append_hint(errp, "Unable to alloc vIOMMU: idev devid 0x%x: ",
+ idev->devid);
+ return false;
+ }
+
+done:
+ accel_dev->idev = idev;
+ accel_dev->s_accel = s->s_accel;
+ QLIST_INSERT_HEAD(&s->s_accel->device_list, accel_dev, next);
+ trace_smmuv3_accel_set_iommu_device(devfn, idev->devid);
+ return true;
+}
+
+static void smmuv3_accel_unset_iommu_device(PCIBus *bus, void *opaque,
+ int devfn)
+{
+ SMMUState *bs = opaque;
+ SMMUv3State *s = ARM_SMMUV3(bs);
+ SMMUPciBus *sbus = g_hash_table_lookup(bs->smmu_pcibus_by_busptr, bus);
+ HostIOMMUDeviceIOMMUFD *idev;
+ SMMUv3AccelDevice *accel_dev;
+ SMMUv3AccelState *accel;
+ SMMUDevice *sdev;
+
+ if (!sbus) {
+ return;
+ }
+
+ sdev = sbus->pbdev[devfn];
+ if (!sdev) {
+ return;
+ }
+
+ accel_dev = container_of(sdev, SMMUv3AccelDevice, sdev);
+ idev = accel_dev->idev;
+ accel = accel_dev->s_accel;
+ /* Re-attach the default s2 hwpt id */
+ if (!host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, NULL)) {
+ error_report("Unable to attach the default HW pagetable: idev devid "
+ "0x%x", idev->devid);
+ }
+
+ accel_dev->idev = NULL;
+ accel_dev->s_accel = NULL;
+ QLIST_REMOVE(accel_dev, next);
+ trace_smmuv3_accel_unset_iommu_device(devfn, idev->devid);
+
+ if (QLIST_EMPTY(&accel->device_list)) {
+ iommufd_backend_free_id(accel->viommu.iommufd, accel->bypass_hwpt_id);
+ iommufd_backend_free_id(accel->viommu.iommufd, accel->abort_hwpt_id);
+ iommufd_backend_free_id(accel->viommu.iommufd,
accel->viommu.viommu_id);
+ g_free(accel);
+ s->s_accel = NULL;
+ }
+}
+
/*
* Only allow PCIe bridges, pxb-pcie roots, and GPEX roots so vfio-pci
* endpoints can sit downstream. Accelerated SMMUv3 requires a vfio-pci
@@ -145,6 +297,8 @@ static const PCIIOMMUOps smmuv3_accel_ops = {
.supports_address_space = smmuv3_accel_supports_as,
.get_address_space = smmuv3_accel_find_add_as,
.get_viommu_flags = smmuv3_accel_get_viommu_flags,
+ .set_iommu_device = smmuv3_accel_set_iommu_device,
+ .unset_iommu_device = smmuv3_accel_unset_iommu_device,
};
static void smmuv3_accel_as_init(SMMUv3State *s)
diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
index 0dc6b00d35..c72605caab 100644
--- a/hw/arm/smmuv3-accel.h
+++ b/hw/arm/smmuv3-accel.h
@@ -10,10 +10,26 @@
#define HW_ARM_SMMUV3_ACCEL_H
#include "hw/arm/smmu-common.h"
+#include "system/iommufd.h"
+#include <linux/iommufd.h>
#include CONFIG_DEVICES
+/*
+ * Represents an accelerated SMMU instance backed by an iommufd vIOMMU object.
+ * Holds bypass and abort proxy HWPT IDs used for device attachment.
+ */
+typedef struct SMMUv3AccelState {
+ IOMMUFDViommu viommu;
+ uint32_t bypass_hwpt_id;
+ uint32_t abort_hwpt_id;
+ QLIST_HEAD(, SMMUv3AccelDevice) device_list;
+} SMMUv3AccelState;
+
typedef struct SMMUv3AccelDevice {
SMMUDevice sdev;
+ HostIOMMUDeviceIOMMUFD *idev;
+ QLIST_ENTRY(SMMUv3AccelDevice) next;
+ SMMUv3AccelState *s_accel;
} SMMUv3AccelDevice;
#ifdef CONFIG_ARM_SMMUV3_ACCEL
diff --git a/hw/arm/smmuv3-internal.h b/hw/arm/smmuv3-internal.h
index b6b7399347..81212a58f1 100644
--- a/hw/arm/smmuv3-internal.h
+++ b/hw/arm/smmuv3-internal.h
@@ -583,6 +583,9 @@ typedef struct CD {
((extract64((x)->word[7], 0, 16) << 32) | \
((x)->word[6] & 0xfffffff0))
+#define SMMU_STE_VALID (1ULL << 0)
+#define SMMU_STE_CFG_BYPASS (1ULL << 3)
+
static inline int oas2bits(int oas_field)
{
switch (oas_field) {
diff --git a/hw/arm/trace-events b/hw/arm/trace-events
index f3386bd7ae..2aaa0c40c7 100644
--- a/hw/arm/trace-events
+++ b/hw/arm/trace-events
@@ -66,6 +66,10 @@ smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier
node for iommu mr=%s
smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova,
uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d
iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d"
smmu_reset_exit(void) ""
+#smmuv3-accel.c
+smmuv3_accel_set_iommu_device(int devfn, uint32_t devid) "devfn=0x%x (idev
devid=0x%x)"
+smmuv3_accel_unset_iommu_device(int devfn, uint32_t devid) "devfn=0x%x (idev
devid=0x%x)"
+
# strongarm.c
strongarm_uart_update_parameters(const char *label, int speed, char parity,
int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d"
strongarm_ssp_read_underrun(void) "SSP rx underrun"
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index bb7076286b..e54ece2d38 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -66,6 +66,7 @@ struct SMMUv3State {
/* SMMU has HW accelerator support for nested S1 + s2 */
bool accel;
+ struct SMMUv3AccelState *s_accel;
};
typedef enum {
--
2.43.0