> -----Original Message-----
> From: Eric Auger <[email protected]>
> Sent: 06 November 2025 07:43
> To: Jason Gunthorpe <[email protected]>; Nicolin Chen <[email protected]>
> Cc: Shameer Kolothum <[email protected]>; qemu-
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Nathan Chen
> <[email protected]>; Matt Ochs <[email protected]>;
> [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> Krishnakant Jaju <[email protected]>
> Subject: Re: [PATCH v5 15/32] hw/pci/pci: Introduce optional
> get_msi_address_space() callback
>
> External email: Use caution opening links or attachments
>
>
> On 11/5/25 7:58 PM, Jason Gunthorpe wrote:
> > On Wed, Nov 05, 2025 at 10:33:08AM -0800, Nicolin Chen wrote:
> >> On Wed, Nov 05, 2025 at 02:10:49PM -0400, Jason Gunthorpe wrote:
> >>> On Wed, Nov 05, 2025 at 06:25:05PM +0100, Eric Auger wrote:
> >>>> if the guest doorbell address is wrong because not properly translated,
> >>>> vgic_msi_to_its() will fail to identify the ITS to inject the MSI in.
> >>>> See kernel kvm/vgic/vgic-its.c vgic_msi_to_its and
> >>>> vgic_its_inject_msi
> >>> Which has been exactly my point to Nicolin. There is no way to
> >>> "properly translate" the vMSI address in a HW accelerated SMMU
> >>> emulation.
> >> Hmm, I still can't connect the dots here. QEMU knows where the
> >> guest CD table is to get the stage-1 translation table to walk
> >> through. We could choose to not let it walk through. Yet, why?
> > You cannot walk any tables in guest memory without fully trapping all
> > invalidation on all command queues. Like real HW qemu needs to fence
> > its walks with any concurrent invalidate & sync to ensure it doesn't
> > walk into a UAF situation.
> But at the moment we do trap IOTLB invalidates so logically we can still
> do the translate in that config. The problem you describe will show up
> with vCMDQ which is not part of this series.
> >
> > Since we can't trap or mediate vCMDQ the walking simply cannot be
> > done.
> >
> > Thus, the general principle of the HW accelerated vSMMU is that it
> > NEVER walks any of these guest tables for any reason.
> >
> > Thus, we cannot do anything with vMSI address beyond program it
> > directly into a real PCI device so it undergoes real HW translation.
> But anyway you need to provide KVM a valid info about the guest doorbell
> for this latter to setup irqfd gsi routing and also program ITS
> translation tables. At the moment we have a single vITS in qemu so maybe
> we can cheat.
I have tried to address the “translate” issue below. This introduces a new
get_msi_address() callback to retrieve the MSI doorbell address directly
from the vIOMMU, so we can drop the existing get_msi_address_space() logic.
Please take a look and let me know your thoughts.
Thanks,
Shameer
---
hw/arm/smmuv3-accel.c | 10 ++++++++++
hw/arm/smmuv3.c | 1 +
hw/arm/virt.c | 4 ++++
hw/pci/pci.c | 17 +++++++++++++++++
include/hw/arm/smmuv3.h | 1 +
include/hw/pci/pci.h | 15 +++++++++++++++
target/arm/kvm.c | 14 ++++++++++++--
7 files changed, 60 insertions(+), 2 deletions(-)
diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index e6c81c4786..8b2a45a915 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -667,6 +667,15 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus,
void *opaque,
}
}
+static uint64_t smmuv3_accel_get_msi_address(PCIBus *bus, void *opaque,
+ int devfn)
+{
+ SMMUState *bs = opaque;
+ SMMUv3State *s = ARM_SMMUV3(bs);
+
+ g_assert(s->msi_doorbell);
+ return s->msi_doorbell;
+}
static AddressSpace *smmuv3_accel_get_msi_as(PCIBus *bus, void *opaque,
int devfn)
{
@@ -788,6 +797,7 @@ static const PCIIOMMUOps smmuv3_accel_ops = {
.set_iommu_device = smmuv3_accel_set_iommu_device,
.unset_iommu_device = smmuv3_accel_unset_iommu_device,
.get_msi_address_space = smmuv3_accel_get_msi_as,
+ .get_msi_address = smmuv3_accel_get_msi_address,
};
void smmuv3_accel_idr_override(SMMUv3State *s)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 43d297698b..3f2ee8bcce 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -2120,6 +2120,7 @@ static const Property smmuv3_properties[] = {
DEFINE_PROP_BOOL("ats", SMMUv3State, ats, false),
DEFINE_PROP_UINT8("oas", SMMUv3State, oas, 44),
DEFINE_PROP_BOOL("pasid", SMMUv3State, pasid, false),
+ DEFINE_PROP_UINT64("msi-doorbell", SMMUv3State, msi_doorbell, 0),
};
static void smmuv3_instance_init(Object *obj)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 2498e3beff..d2dcb89235 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3097,6 +3097,8 @@ static void virt_machine_device_plug_cb(HotplugHandler
*hotplug_dev,
create_smmuv3_dev_dtb(vms, dev, bus, errp);
if (object_property_get_bool(OBJECT(dev), "accel", &error_abort)) {
+ hwaddr db_start = base_memmap[VIRT_GIC_ITS].base +
+ ITS_TRANS_SIZE + GITS_TRANSLATER;
char *stage;
stage = object_property_get_str(OBJECT(dev), "stage",
&error_fatal);
@@ -3107,6 +3109,8 @@ static void virt_machine_device_plug_cb(HotplugHandler
*hotplug_dev,
return;
}
vms->pci_preserve_config = true;
+ object_property_set_uint(OBJECT(dev), "msi-doorbell", db_start,
+ &error_abort);
}
}
}
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1edd711247..45e79a3c23 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2982,6 +2982,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice
*dev)
return &address_space_memory;
}
+bool pci_device_iommu_msi_direct_address(PCIDevice *dev, hwaddr *out_doorbell)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus) {
+ if (iommu_bus->iommu_ops->get_msi_address) {
+ *out_doorbell = iommu_bus->iommu_ops->get_msi_address(bus,
+ iommu_bus->iommu_opaque, devfn);
+ return true;
+ }
+ }
+ return false;
+}
+
AddressSpace *pci_device_iommu_msi_address_space(PCIDevice *dev)
{
PCIBus *bus;
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index ee0b5ed74f..f50d8c72bd 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -72,6 +72,7 @@ struct SMMUv3State {
bool ats;
uint8_t oas;
bool pasid;
+ uint64_t msi_doorbell;
};
typedef enum {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b731443c67..e1709b0bfe 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -679,6 +679,20 @@ typedef struct PCIIOMMUOps {
*/
AddressSpace * (*get_msi_address_space)(PCIBus *bus, void *opaque,
int devfn);
+ /**
+ * @get_msi_address: get the address of MSI doorbell for the device
+ * on a PCI bus.
+ *
+ * Optional callback, if implemented must return a valid MSI doorbell
+ * address.
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ */
+ uint64_t (*get_msi_address)(PCIBus *bus, void *opaque, int devfn);
} PCIIOMMUOps;
bool pci_device_get_iommu_bus_devfn(PCIDevice *dev, PCIBus **piommu_bus,
@@ -688,6 +702,7 @@ bool pci_device_set_iommu_device(PCIDevice *dev,
HostIOMMUDevice *hiod,
Error **errp);
void pci_device_unset_iommu_device(PCIDevice *dev);
AddressSpace *pci_device_iommu_msi_address_space(PCIDevice *dev);
+bool pci_device_iommu_msi_direct_address(PCIDevice *dev, hwaddr *out_doorbell);
/**
* pci_device_get_viommu_flags: get vIOMMU flags.
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 0df41128d0..8d4d2be0bc 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1611,35 +1611,45 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int
level)
int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
uint64_t address, uint32_t data, PCIDevice *dev)
{
- AddressSpace *as = pci_device_iommu_msi_address_space(dev);
+ AddressSpace *as;
hwaddr xlat, len, doorbell_gpa;
MemoryRegionSection mrs;
MemoryRegion *mr;
+ /* Check if there is a direct msi address available */
+ if (pci_device_iommu_msi_direct_address(dev, &doorbell_gpa)) {
+ goto set_doorbell;
+ }
+
+ as = pci_device_iommu_msi_address_space(dev);
if (as == &address_space_memory) {
return 0;
}
/* MSI doorbell address is translated by an IOMMU */
- RCU_READ_LOCK_GUARD();
+ rcu_read_lock();
mr = address_space_translate(as, address, &xlat, &len, true,
MEMTXATTRS_UNSPECIFIED);
if (!mr) {
+ rcu_read_unlock();
return 1;
}
mrs = memory_region_find(mr, xlat, 1);
if (!mrs.mr) {
+ rcu_read_unlock();
return 1;
}
doorbell_gpa = mrs.offset_within_address_space;
memory_region_unref(mrs.mr);
+ rcu_read_unlock();
+set_doorbell:
route->u.msi.address_lo = doorbell_gpa;
route->u.msi.address_hi = doorbell_gpa >> 32;
--