On 10/24/25 10:43 AM, Zhenzhong Duan wrote:
> From: Yi Liu <[email protected]>
>
> This replays guest pasid bindings after context cache invalidation.
> Actually, programmer should issue pasid cache invalidation with proper
> granularity after issuing context cache invalidation.
>
> We see old linux such as 6.7.0-rc2 not following the spec, it sends
> pasid cache invalidation before context cache invalidation, then QEMU
> depends on context cache invalidation to get pasid entry and setup
> binding.
>
> Signed-off-by: Yi Liu <[email protected]>
> Signed-off-by: Zhenzhong Duan <[email protected]>
> ---
> hw/i386/intel_iommu.c | 47 +++++++++++++++++++++++++++++++++++++++++++
> hw/i386/trace-events | 1 +
> 2 files changed, 48 insertions(+)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 1f78274204..edd1416382 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -93,6 +93,8 @@ static void vtd_address_space_refresh_all(IntelIOMMUState
> *s);
> static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
> static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s);
> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> + gpointer user_data);
>
> static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
> {
> @@ -2388,6 +2390,13 @@ static void
> vtd_context_global_invalidate(IntelIOMMUState *s)
> * VT-d emulation codes.
> */
> vtd_iommu_replay_all(s);
> + /*
> + * Same for pasid cache invalidation, per VT-d spec 6.5.2.1, a global
> + * context cache invalidation should be followed by global PASID cache
> + * invalidation. In order to work with guest not following spec,
> + * handle global PASID cache invalidation here.
> + */
> + vtd_replay_pasid_bindings_all(s);
> }
>
> #ifdef CONFIG_IOMMUFD
> @@ -2589,6 +2598,35 @@ vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
> }
> #endif
>
> +static void vtd_pasid_cache_devsi(VTDAddressSpace *vtd_as)
> +{
> + IntelIOMMUState *s = vtd_as->iommu_state;
> + PCIBus *bus = vtd_as->bus;
> + uint8_t devfn = vtd_as->devfn;
> + struct vtd_as_key key = {
> + .bus = bus,
> + .devfn = devfn,
> + .pasid = vtd_as->pasid,
> + };
> + VTDPASIDCacheInfo pc_info;
> +
> + if (!s->fsts || !s->root_scalable || !s->dmar_enabled) {
> + return;
> + }
> +
> + trace_vtd_pasid_cache_devsi(pci_bus_num(bus),
> + VTD_PCI_SLOT(devfn), VTD_PCI_FUNC(devfn));
> +
> + /* We fake to be global invalidation just to bypass all checks */
can you clarify which checks and why you want to bypass them?
> + pc_info.type = VTD_INV_DESC_PASIDC_G_GLOBAL;
> +
> + /*
> + * We already get vtd_as of the device whose PASID cache is invalidated,
s/get/got. Not sure the comment is worth.
> + * so just call vtd_pasid_cache_sync_locked() once.
> + */
> + vtd_pasid_cache_sync_locked(&key, vtd_as, &pc_info);
> +}
> +
> /* Do a context-cache device-selective invalidation.
> * @func_mask: FM field after shifting
> */
> @@ -2647,6 +2685,15 @@ static void
> vtd_context_device_invalidate(IntelIOMMUState *s,
> * happened.
> */
> vtd_address_space_sync(vtd_as);
> + /*
> + * Per spec 6.5.2.1, context flush should be followed by PASID
> + * cache and iotlb flush. In order to work with a guest which
> does
> + * not follow spec and missed PASID cache flush, e.g., linux
> + * 6.7.0-rc2, we have vtd_pasid_cache_devsi() to invalidate PASID
> + * cache of passthrough device. Host iommu driver would flush
> + * piotlb when a pasid unbind is pass down to it.
passed
> + */
> + vtd_pasid_cache_devsi(vtd_as);
> }
> }
> }
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 5a3ee1cf64..5fa5e93b68 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
> vtd_inv_desc_pasid_cache_gsi(void) ""
> vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC
> invalidation domain 0x%"PRIx16
> vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID
> selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
> +vtd_pasid_cache_devsi(uint8_t bus, uint8_t dev, uint8_t fn) "Dev selective
> PC invalidation dev: %02"PRIx8":%02"PRIx8".%02"PRIx8
> vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
> vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8"
> devfn %"PRIu8" not present"
> vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t
> domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64"
> domain 0x%"PRIx16
Besides
Reviewed-by: Eric Auger <[email protected]>
Eric