On 8/22/25 8:40 AM, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l....@intel.com>
>
> This replays guest pasid bindings after context cache invalidation.
> This is a behavior to ensure safety. Actually, programmer should issue
> pasid cache invalidation with proper granularity after issuing a context
> cache invalidation.
So is this mandated? If the spec mandates specific invalidations and the
guest does not comply with the expected invalidation sequence shall we
do that behind the curtain?
>
> Signed-off-by: Yi Liu <yi.l....@intel.com>
> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
> ---
>  hw/i386/intel_iommu_internal.h |  2 ++
>  hw/i386/intel_iommu.c          | 42 ++++++++++++++++++++++++++++++++++
>  hw/i386/trace-events           |  1 +
>  3 files changed, 45 insertions(+)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 61e35dbdc0..8af1004888 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -584,6 +584,8 @@ typedef enum VTDPCInvType {
>  
>      /* Reset all PASID cache entries, used in system level reset */
>      VTD_PASID_CACHE_FORCE_RESET = 0x10,
> +    /* Invalidate all PASID entries in a device */
> +    VTD_PASID_CACHE_DEVSI,
invalidation type that is not defined in the spec. I would avoid and
find another solution if you really need to do such kind of invalidation.
>  } VTDPCInvType;
>  
>  typedef struct VTDPASIDCacheInfo {
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index a10ee8eb4f..6c0e502d1c 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -91,6 +91,10 @@ static void vtd_address_space_refresh_all(IntelIOMMUState 
> *s);
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>  
>  static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s);
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> +                                 VTDPASIDCacheInfo *pc_info);
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> +                                  PCIBus *bus, uint16_t devfn);
>  
>  static void vtd_panic_require_caching_mode(void)
>  {
> @@ -2442,6 +2446,8 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
>  
>  static void vtd_context_global_invalidate(IntelIOMMUState *s)
>  {
> +    VTDPASIDCacheInfo pc_info;
> +
>      trace_vtd_inv_desc_cc_global();
>      /* Protects context cache */
>      vtd_iommu_lock(s);
> @@ -2459,6 +2465,9 @@ static void 
> vtd_context_global_invalidate(IntelIOMMUState *s)
>       * VT-d emulation codes.
>       */
>      vtd_iommu_replay_all(s);
> +
> +    pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
> +    vtd_pasid_cache_sync(s, &pc_info);
I would put this addition in a separate patch because it does not need
the new

VTD_PASID_CACHE_DEVSI stuff

>  }
>  
>  #ifdef CONFIG_IOMMUFD
> @@ -2691,6 +2700,15 @@ static void 
> vtd_context_device_invalidate(IntelIOMMUState *s,
>               * happened.
>               */
>              vtd_address_space_sync(vtd_as);
> +            /*
> +             * Per spec, context flush should also be followed with PASID
> +             * cache and iotlb flush. In order to work with a guest which
> +             * doesn't follow spec and missed PASID cache flush, we have
> +             * vtd_pasid_cache_devsi() to invalidate PASID caches of the
> +             * passthrough device. Host iommu driver would flush piotlb
> +             * when a pasid unbind is pass down to it.
> +             */
> +             vtd_pasid_cache_devsi(s, vtd_as->bus, devfn);
>          }
>      }
>  }
> @@ -3422,6 +3440,11 @@ static gboolean vtd_flush_pasid_locked(gpointer key, 
> gpointer value,
>          break;
>      case VTD_PASID_CACHE_FORCE_RESET:
>          goto remove;
> +    case VTD_PASID_CACHE_DEVSI:
> +        if (pc_info->bus != vtd_as->bus || pc_info->devfn != vtd_as->devfn) {
> +            return false;
> +        }
> +        break;
>      default:
>          error_setg(&error_fatal, "invalid pc_info->type for flush");
>      }
> @@ -3635,6 +3658,11 @@ static void 
> vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>      case VTD_PASID_CACHE_FORCE_RESET:
>          /* For force reset, no need to go further replay */
>          return;
> +    case VTD_PASID_CACHE_DEVSI:
> +        walk_info.bus = pc_info->bus;
> +        walk_info.devfn = pc_info->devfn;
> +        vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
> +        return;
>      default:
>          error_setg(&error_fatal, "invalid pc_info->type for replay");
>      }
> @@ -3683,6 +3711,20 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, 
> VTDPASIDCacheInfo *pc_info)
>      vtd_replay_guest_pasid_bindings(s, pc_info);
>  }
>  
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> +                                  PCIBus *bus, uint16_t devfn)
> +{
> +    VTDPASIDCacheInfo pc_info;
> +
> +    trace_vtd_pasid_cache_devsi(devfn);
> +
> +    pc_info.type = VTD_PASID_CACHE_DEVSI;
> +    pc_info.bus = bus;
> +    pc_info.devfn = devfn;
> +
> +    vtd_pasid_cache_sync(s, &pc_info);
> +}
> +
>  static bool vtd_process_pasid_desc(IntelIOMMUState *s,
>                                     VTDInvDesc *inv_desc)
>  {
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 1c31b9a873..830b11f68b 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
>  vtd_pasid_cache_gsi(void) ""
>  vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation 
> domain 0x%"PRIx16
>  vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC 
> invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
> +vtd_pasid_cache_devsi(uint16_t devfn) "Dev selective PC invalidation dev: 
> 0x%"PRIx16
>  vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
>  vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" 
> devfn %"PRIu8" not present"
>  vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t 
> domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" 
> domain 0x%"PRIx16
Eric


Reply via email to