On 8/22/25 8:40 AM, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l....@intel.com>
>
> This replays guest pasid bindings after context cache invalidation.
> This is a behavior to ensure safety. Actually, programmer should issue
> pasid cache invalidation with proper granularity after issuing a context
> cache invalidation.
So is this mandated? If the spec mandates specific invalidations and the
guest does not comply with the expected invalidation sequence shall we
do that behind the curtain?
>
> Signed-off-by: Yi Liu <yi.l....@intel.com>
> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 2 ++
> hw/i386/intel_iommu.c | 42 ++++++++++++++++++++++++++++++++++
> hw/i386/trace-events | 1 +
> 3 files changed, 45 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 61e35dbdc0..8af1004888 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -584,6 +584,8 @@ typedef enum VTDPCInvType {
>
> /* Reset all PASID cache entries, used in system level reset */
> VTD_PASID_CACHE_FORCE_RESET = 0x10,
> + /* Invalidate all PASID entries in a device */
> + VTD_PASID_CACHE_DEVSI,
invalidation type that is not defined in the spec. I would avoid and
find another solution if you really need to do such kind of invalidation.
> } VTDPCInvType;
>
> typedef struct VTDPASIDCacheInfo {
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index a10ee8eb4f..6c0e502d1c 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -91,6 +91,10 @@ static void vtd_address_space_refresh_all(IntelIOMMUState
> *s);
> static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>
> static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s);
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info);
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> + PCIBus *bus, uint16_t devfn);
>
> static void vtd_panic_require_caching_mode(void)
> {
> @@ -2442,6 +2446,8 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
>
> static void vtd_context_global_invalidate(IntelIOMMUState *s)
> {
> + VTDPASIDCacheInfo pc_info;
> +
> trace_vtd_inv_desc_cc_global();
> /* Protects context cache */
> vtd_iommu_lock(s);
> @@ -2459,6 +2465,9 @@ static void
> vtd_context_global_invalidate(IntelIOMMUState *s)
> * VT-d emulation codes.
> */
> vtd_iommu_replay_all(s);
> +
> + pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
> + vtd_pasid_cache_sync(s, &pc_info);
I would put this addition in a separate patch because it does not need
the new
VTD_PASID_CACHE_DEVSI stuff
> }
>
> #ifdef CONFIG_IOMMUFD
> @@ -2691,6 +2700,15 @@ static void
> vtd_context_device_invalidate(IntelIOMMUState *s,
> * happened.
> */
> vtd_address_space_sync(vtd_as);
> + /*
> + * Per spec, context flush should also be followed with PASID
> + * cache and iotlb flush. In order to work with a guest which
> + * doesn't follow spec and missed PASID cache flush, we have
> + * vtd_pasid_cache_devsi() to invalidate PASID caches of the
> + * passthrough device. Host iommu driver would flush piotlb
> + * when a pasid unbind is pass down to it.
> + */
> + vtd_pasid_cache_devsi(s, vtd_as->bus, devfn);
> }
> }
> }
> @@ -3422,6 +3440,11 @@ static gboolean vtd_flush_pasid_locked(gpointer key,
> gpointer value,
> break;
> case VTD_PASID_CACHE_FORCE_RESET:
> goto remove;
> + case VTD_PASID_CACHE_DEVSI:
> + if (pc_info->bus != vtd_as->bus || pc_info->devfn != vtd_as->devfn) {
> + return false;
> + }
> + break;
> default:
> error_setg(&error_fatal, "invalid pc_info->type for flush");
> }
> @@ -3635,6 +3658,11 @@ static void
> vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
> case VTD_PASID_CACHE_FORCE_RESET:
> /* For force reset, no need to go further replay */
> return;
> + case VTD_PASID_CACHE_DEVSI:
> + walk_info.bus = pc_info->bus;
> + walk_info.devfn = pc_info->devfn;
> + vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
> + return;
> default:
> error_setg(&error_fatal, "invalid pc_info->type for replay");
> }
> @@ -3683,6 +3711,20 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> VTDPASIDCacheInfo *pc_info)
> vtd_replay_guest_pasid_bindings(s, pc_info);
> }
>
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> + PCIBus *bus, uint16_t devfn)
> +{
> + VTDPASIDCacheInfo pc_info;
> +
> + trace_vtd_pasid_cache_devsi(devfn);
> +
> + pc_info.type = VTD_PASID_CACHE_DEVSI;
> + pc_info.bus = bus;
> + pc_info.devfn = devfn;
> +
> + vtd_pasid_cache_sync(s, &pc_info);
> +}
> +
> static bool vtd_process_pasid_desc(IntelIOMMUState *s,
> VTDInvDesc *inv_desc)
> {
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 1c31b9a873..830b11f68b 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
> vtd_pasid_cache_gsi(void) ""
> vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation
> domain 0x%"PRIx16
> vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC
> invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
> +vtd_pasid_cache_devsi(uint16_t devfn) "Dev selective PC invalidation dev:
> 0x%"PRIx16
> vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
> vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8"
> devfn %"PRIu8" not present"
> vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t
> domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64"
> domain 0x%"PRIx16
Eric