>-----Original Message----- >From: Liu, Yi L <[email protected]> >Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation > >On 2025/10/13 15:37, Duan, Zhenzhong wrote: >> >> >>> -----Original Message----- >>> From: Liu, Yi L <[email protected]> >>> Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache >invalidation >>> >>> On 2025/9/18 16:57, Zhenzhong Duan wrote: >>>> This adds PASID cache sync for RID_PASID, non-RID_PASID isn't >supported. >>>> >>>> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache >the >>> pasid >>>> entry and track PASID usage and future PASID tagged DMA address >>> translation >>>> support in vIOMMU. >>>> >>>> When guest triggers pasid cache invalidation, QEMU will capture it and >>>> update or invalidate pasid cache. >>>> >>>> vIOMMU emulator could figure out the reason by fetching latest guest >pasid >>>> entry in memory and compare it with cached PASID entry if it's valid. >>>> >>>> Signed-off-by: Yi Liu <[email protected]> >>>> Signed-off-by: Zhenzhong Duan <[email protected]> >>>> --- >>>> hw/i386/intel_iommu_internal.h | 19 +++- >>>> include/hw/i386/intel_iommu.h | 6 ++ >>>> hw/i386/intel_iommu.c | 157 >>> ++++++++++++++++++++++++++++++--- >>>> hw/i386/trace-events | 3 + >>>> 4 files changed, 173 insertions(+), 12 deletions(-) >>>> >>>> diff --git a/hw/i386/intel_iommu_internal.h >>> b/hw/i386/intel_iommu_internal.h >>>> index 9cdc8d5dbb..d400bcee21 100644 >>>> --- a/hw/i386/intel_iommu_internal.h >>>> +++ b/hw/i386/intel_iommu_internal.h >>>> @@ -316,6 +316,7 @@ typedef enum VTDFaultReason { >>>> * request while disabled */ >>>> VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ >>>> >>>> + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */ >>>> /* PASID directory entry access failure */ >>>> VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, >>>> /* The Present(P) field of pasid directory entry is 0 */ >>>> @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc; >>>> #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 >>> 0xfff000000000f1c0ULL >>>> #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL >>>> >>>> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */ >>>> +#define VTD_INV_DESC_PASIDC_G(x) extract64((x)->val[0], 4, >2) >>>> +#define VTD_INV_DESC_PASIDC_G_DSI 0 >>>> +#define VTD_INV_DESC_PASIDC_G_PASID_SI 1 >>>> +#define VTD_INV_DESC_PASIDC_G_GLOBAL 3 >>>> +#define VTD_INV_DESC_PASIDC_DID(x) extract64((x)->val[0], 16, >>> 16) >>>> +#define VTD_INV_DESC_PASIDC_PASID(x) extract64((x)->val[0], 32, >>> 20) >>>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 >0xfff000000000f1c0ULL >>>> + >>>> /* Information about page-selective IOTLB invalidate */ >>>> struct VTDIOTLBPageInvInfo { >>>> uint16_t domain_id; >>>> @@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry; >>>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | >>> ~VTD_HAW_MASK(aw)) >>>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 >>> 0xffffffffffe00000ULL >>>> >>>> +typedef struct VTDPASIDCacheInfo { >>>> + uint8_t type; >>>> + uint16_t did; >>>> + uint32_t pasid; >>>> + bool reset; >>>> +} VTDPASIDCacheInfo; >>>> + >>>> /* PASID Table Related Definitions */ >>>> #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL) >>>> #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL) >>>> @@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry; >>>> #define VTD_SM_PASID_ENTRY_PT (4ULL << 6) >>>> >>>> #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted >>> guest-address-width */ >>>> -#define VTD_SM_PASID_ENTRY_DID(val) ((val) & >>> VTD_DOMAIN_ID_MASK) >>>> +#define VTD_SM_PASID_ENTRY_DID(x) extract64((x)->val[1], 0, >16) >>>> >>>> #define VTD_SM_PASID_ENTRY_FSPM 3ULL >>>> #define VTD_SM_PASID_ENTRY_FSPTPTR (~0xfffULL) >>>> diff --git a/include/hw/i386/intel_iommu.h >>> b/include/hw/i386/intel_iommu.h >>>> index 3351892da0..ff01e5c82d 100644 >>>> --- a/include/hw/i386/intel_iommu.h >>>> +++ b/include/hw/i386/intel_iommu.h >>>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry { >>>> uint64_t val[8]; >>>> }; >>>> >>>> +typedef struct VTDPASIDCacheEntry { >>>> + struct VTDPASIDEntry pasid_entry; >>>> + bool valid; >>>> +} VTDPASIDCacheEntry; >>>> + >>>> struct VTDAddressSpace { >>>> PCIBus *bus; >>>> uint8_t devfn; >>>> @@ -107,6 +112,7 @@ struct VTDAddressSpace { >>>> MemoryRegion iommu_ir_fault; /* Interrupt region for catching >>> fault */ >>>> IntelIOMMUState *iommu_state; >>>> VTDContextCacheEntry context_cache_entry; >>>> + VTDPASIDCacheEntry pasid_cache_entry; >>>> QLIST_ENTRY(VTDAddressSpace) next; >>>> /* Superset of notifier flags that this address space has */ >>>> IOMMUNotifierFlag notifier_flags; >>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >>>> index d37d47115a..24061f6dc6 100644 >>>> --- a/hw/i386/intel_iommu.c >>>> +++ b/hw/i386/intel_iommu.c >>>> @@ -1614,7 +1614,7 @@ static uint16_t >>> vtd_get_domain_id(IntelIOMMUState *s, >>>> >>>> if (s->root_scalable) { >>>> vtd_ce_get_pasid_entry(s, ce, &pe, pasid); >>>> - return VTD_SM_PASID_ENTRY_DID(pe.val[1]); >>>> + return VTD_SM_PASID_ENTRY_DID(&pe); >>>> } >>>> >>>> return VTD_CONTEXT_ENTRY_DID(ce->hi); >>>> @@ -3074,6 +3074,144 @@ static bool >>> vtd_process_piotlb_desc(IntelIOMMUState *s, >>>> return true; >>>> } >>>> >>>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as, >>>> + VTDPASIDEntry >*pe) >>>> +{ >>>> + IntelIOMMUState *s = vtd_as->iommu_state; >>>> + VTDContextEntry ce; >>>> + int ret; >>>> + >>>> + if (!s->root_scalable) { >>>> + return -VTD_FR_RTADDR_INV_TTM; >>>> + } >>>> + >>>> + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), >>> vtd_as->devfn, >>>> + &ce); >>>> + if (ret) { >>>> + return ret; >>>> + } >>>> + >>>> + return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid); >>>> +} >>>> + >>>> +/* >>>> + * For each IOMMUFD backed device, update or invalidate pasid cache >>> based on >>>> + * the value in memory. >>>> + */ >>>> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value, >>>> + gpointer user_data) >>>> +{ >>>> + VTDPASIDCacheInfo *pc_info = user_data; >>>> + VTDAddressSpace *vtd_as = value; >>>> + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; >>>> + VTDPASIDEntry pe; >>>> + uint16_t did; >>>> + >>>> + /* Ignore emulated device or legacy VFIO backed device */ >>>> + if (!vtd_find_hiod_iommufd(vtd_as)) { >>>> + return; >>>> + } >>>> + >>>> + /* non-RID_PASID isn't supported yet */ >>>> + assert(vtd_as->pasid == PCI_NO_PASID); >>>> + >>>> + if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) { >>>> + /* >>>> + * No valid pasid entry in guest memory. e.g. pasid entry was >>> modified >>>> + * to be either all-zero or non-present. Either case means >>> existing >>>> + * pasid cache should be invalidated. >>>> + */ >>>> + pc_entry->valid = false; >>>> + return; >>>> + } >>>> + >>>> + /* >>>> + * VTD_INV_DESC_PASIDC_G_DSI and >>> VTD_INV_DESC_PASIDC_G_PASID_SI require >>>> + * DID check. If DID doesn't match the value in cache or memory, >>> then >>>> + * it's not a pasid entry we want to invalidate. >>> >>> I think comparing DID applies to the case in which pc_entry->valid is >>> true. If pc_entry->valid is false, this means no cached pc_entry yet. If >>> pe in guest memory is valid, the pc_entry should be updated/set hence >>> the bind_pasid operation (added in later patch) would be conducted. >> >> We get here only when pe in guest memory is valid, or else we have >returned in "if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {" check. >> >> If no cached pe but valid pe in guest memory, that means a new pe. >> For new entry, guest constructs pasid cache invalidation request with DID >> field filled with DID from pe in memory. We don't unconditionally cache >new pe >> for all devices for one pasid cache invalidation except it's global >invalidation. > >I see. yes, intel iommu driver has already used the did configed in the >pasid entry to flush pasid cache per caching mode. But there seems no >words stated this in spec. Anyway, I don't see any reason why a guest >iommu driver wants to use a did unequal to the one in pasid entry when >this is a newly set pasid entry. So it's fine to me now. > >btw. it would be nice to note how you support the global invalidation >since it's no more part of pc_info->type.
It is still part of pc_info-> type, I have used VTD_INV_DESC_PASIDC_G_GLOBAL in "intel_iommu: Replay all pasid bindings when either SRTP or TE bit is changed" and "intel_iommu: Replay pasid bindings after context cache invalidation" For "intel_iommu: Replay pasid bindings after context cache invalidation", I have to bring it back in v7 as I found old linux kernel doesn't follow vtd spec, so need that workaround to support old guest. Thanks Zhenzhong
