On Fri, Oct 31, 2025 at 10:49:46AM +0000, Shameer Kolothum wrote:
> +static bool
> +smmuv3_accel_alloc_vdev(SMMUv3AccelDevice *accel_dev, int sid, Error **errp)
> +{
> + SMMUViommu *vsmmu = accel_dev->vsmmu;
> + IOMMUFDVdev *vdev;
> + uint32_t vdevice_id;
> +
> + if (!accel_dev->idev || accel_dev->vdev) {
> + return true;
> + }
We probably don't need to check !accel_dev->dev. It should have
been blocked by its caller, which does block !accel_dev->vsmmu.
Once we fix the missing "accel_dev->vsmmu NULL", it should work.
> +
> + if (!iommufd_backend_alloc_vdev(vsmmu->iommufd, accel_dev->idev->devid,
> + vsmmu->viommu.viommu_id, sid,
> + &vdevice_id, errp)) {
> + return false;
> + }
> + if (!host_iommu_device_iommufd_attach_hwpt(accel_dev->idev,
> + vsmmu->bypass_hwpt_id, errp))
> {
> + iommufd_backend_free_id(vsmmu->iommufd, vdevice_id);
> + return false;
> + }
This should check SMMUEN bit?
Linux driver (as an example) seems to set CMDQEN and install all
the default bypass STEs, before SMMUEN=1.
In this case, the target hwpt here should follow guest's GBPA.
> +static bool
> +smmuv3_accel_dev_uninstall_nested_ste(SMMUv3AccelDevice *accel_dev, bool
> abort,
> + Error **errp)
> +{
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + uint32_t hwpt_id;
> +
> + if (!s1_hwpt || !accel_dev->vsmmu) {
> + return true;
> + }
> +
> + if (abort) {
> + hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
> + } else {
> + hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
> + }
This should probably check SMMUEN/GBPA as well.
Likely we need "enabled" and "gbpa_abort" flags in SMMUState.
> +static bool
> +smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
> + uint32_t data_type, uint32_t data_len,
> + void *data, Error **errp)
> +{
> + SMMUViommu *vsmmu = accel_dev->vsmmu;
> + SMMUS1Hwpt *s1_hwpt = accel_dev->s1_hwpt;
> + HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
> + uint32_t flags = 0;
> +
> + if (!idev || !vsmmu) {
> + error_setg(errp, "Device 0x%x has no associated IOMMU dev or vIOMMU",
> + smmu_get_sid(&accel_dev->sdev));
> + return false;
> + }
> +
> + if (s1_hwpt) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev, true, errp)) {
> + return false;
> + }
> + }
I think we could have some improvements here.
The current flow is:
(attached to s1_hwpt1)
attach to bypass/abort_hwpt // no issue though.
free s1_hwpt1
alloc s2_hwpt2
attach to s2_hwpt2
It could have been a flow like replace() in the kernel:
(attached to s1_hwpt1)
alloc s2_hwpt2
attach to s2_hwpt2 /* skipping bypass/abort */
free s1_hwpt
> +smmuv3_accel_install_nested_ste(SMMUv3State *s, SMMUDevice *sdev, int sid,\
[...]
> + config = STE_CONFIG(&ste);
> + if (!STE_VALID(&ste) || !STE_CFG_S1_ENABLED(config)) {
> + if (!smmuv3_accel_dev_uninstall_nested_ste(accel_dev,
> + STE_CFG_ABORT(config),
This smmuv3_accel_uninstall_nested_ste() feels a bit redundant now.
Perhaps we could try something like this:
#define accel_dev_to_smmuv3(dev) ARM_SMMUV3(&dev->sdev.smmu)
static bool smmuv3_accel_dev_install_nested_ste(SMMUv3AccelDevice *accel_dev,
int sid, STE *ste)
{
SMMUv3State *s = accel_dev_to_smmuv3(accel_dev);
HostIOMMUDeviceIOMMUFD *idev = accel_dev->idev;
uint32_t config = STE_CONFIG(ste);
SMMUS1Hwpt *s1_hwpt = NULL;
uint64_t ste_0, ste_1;
uint32_t hwpt_id = 0;
if (!s->enabled) {
if (s->gbpa_abort) {
hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
} else {
hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
}
} else {
if (!STE_VALID(ste) || STE_CFG_ABORT(config)) {
hwpt_id = accel_dev->vsmmu->abort_hwpt_id;
} else if (STE_CFG_BYPASS(config))
hwpt_id = accel_dev->vsmmu->bypass_hwpt_id;
} else {
// FIXME handle STE_CFG_S2_ENABLED()
}
}
if (!hwpt_id) {
uint64_t ste_0 = (uint64_t)ste->word[0] | (uint64_t)ste->word[1] << 32;
uint64_t ste_1 = (uint64_t)ste->word[2] | (uint64_t)ste->word[3] << 32;
struct iommu_hwpt_arm_smmuv3 nested_data = {
.ste[2] = {
cpu_to_le64(ste_0 & STE0_MASK),
cpu_to_le64(ste_1 & STE1_MASK),
},
};
trace_smmuv3_accel_install_nested_ste(sid, nested_data.ste[1],
nested_data.ste[0]);
s1_hwpt = g_new0(SMMUS1Hwpt, 1);
[...]
iommufd_backend_alloc_hwpt(..., &s1_hwpt->hwpt_id);
hwpt_id = s1_hwpt->hwpt_id;
}
host_iommu_device_iommufd_attach_hwpt(.., hwpt_id);
if (accel_dev->s1_hwpt) {
iommufd_backend_free_id(idev->iommufd, accel_dev->s1_hwpt->hwpt_id);
}
accel_dev->s1_hwpt = s1_hwpt;
return true;
}
> +bool smmuv3_accel_install_nested_ste_range(SMMUv3State *s, SMMUSIDRange
> *range,
> + Error **errp)
> +{
> + SMMUv3AccelState *s_accel = s->s_accel;
> + SMMUv3AccelDevice *accel_dev;
> +
> + if (!s_accel || !s_accel->vsmmu) {
> + return true;
> + }
> +
> + QLIST_FOREACH(accel_dev, &s_accel->vsmmu->device_list, next) {
> + uint32_t sid = smmu_get_sid(&accel_dev->sdev);
> +
> + if (sid >= range->start && sid <= range->end) {
> + if (!smmuv3_accel_install_nested_ste(s, &accel_dev->sdev,
> + sid, errp)) {
> + return false;
> + }
> + }
This is a bit tricky..
I think CFGI_STE_RANGE shouldn't stop in the middle, if one of the
STEs fails.
That being said, HW doesn't seem to propagate C_BAD_STE during a
CFGI_STE or CFGI_STE_RANGE, IIUIC. It reports C_BAD_STE event when
a transaction starts. If we want to perfectly mimic the hardware,
we'd have to set up a bad STE down to the HW, which will trigger a
C_BAD_STE vevent to be forwarded by vEVENTQ.
Nicolin