Hi Zhenzhong, On 8/22/25 8:40 AM, Zhenzhong Duan wrote: > This adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the > pasid entry and track PASID usage and future PASID tagged DMA address > translation support in vIOMMU. > > VTDAddressSpace of PCI_NO_PASID is allocated when device is plugged and > never freed. For other pasid, VTDAddressSpace instance is created/destroyed > per the guest pasid entry set up/destroy. > > When guest removes or updates a PASID entry, QEMU will capture the guest pasid > selective pasid cache invalidation, removes VTDAddressSpace or update cached > PASID entry. > > vIOMMU emulator could figure out the reason by fetching latest guest pasid > entry > and compare it with cached PASID entry. > > Signed-off-by: Yi Liu <yi.l....@intel.com> > Signed-off-by: Yi Sun <yi.y....@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> > --- > hw/i386/intel_iommu_internal.h | 27 ++++- > include/hw/i386/intel_iommu.h | 6 + > hw/i386/intel_iommu.c | 196 +++++++++++++++++++++++++++++++-- > hw/i386/trace-events | 3 + > 4 files changed, 220 insertions(+), 12 deletions(-) > > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > index f7510861d1..b9b76dd996 100644 > --- a/hw/i386/intel_iommu_internal.h > +++ b/hw/i386/intel_iommu_internal.h > @@ -316,6 +316,7 @@ typedef enum VTDFaultReason { > * request while disabled */ > VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ > > + VTD_FR_RTADDR_INV_TTM = 0x31, /* Invalid TTM in RTADDR */ > /* PASID directory entry access failure */ > VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, > /* The Present(P) field of pasid directory entry is 0 */ > @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc; > #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL > #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL > > +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */ > +#define VTD_INV_DESC_PASIDC_G(x) extract64((x)->val[0], 4, 2) > +#define VTD_INV_DESC_PASIDC_G_DSI 0 > +#define VTD_INV_DESC_PASIDC_G_PASID_SI 1 > +#define VTD_INV_DESC_PASIDC_G_GLOBAL 3 > +#define VTD_INV_DESC_PASIDC_DID(x) extract64((x)->val[0], 16, 16) > +#define VTD_INV_DESC_PASIDC_PASID(x) extract64((x)->val[0], 32, 20) > +#define VTD_INV_DESC_PASIDC_RSVD_VAL0 0xfff000000000f1c0ULL > + > /* Information about page-selective IOTLB invalidate */ > struct VTDIOTLBPageInvInfo { > uint16_t domain_id; > @@ -553,6 +563,21 @@ typedef struct VTDRootEntry VTDRootEntry; > #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw)) > #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL > > +typedef enum VTDPCInvType { > + /* VTD spec defined PASID cache invalidation type */ > + VTD_PASID_CACHE_DOMSI = VTD_INV_DESC_PASIDC_G_DSI, > + VTD_PASID_CACHE_PASIDSI = VTD_INV_DESC_PASIDC_G_PASID_SI, > + VTD_PASID_CACHE_GLOBAL_INV = VTD_INV_DESC_PASIDC_G_GLOBAL, > +} VTDPCInvType; > + > +typedef struct VTDPASIDCacheInfo { > + VTDPCInvType type; > + uint16_t did; > + uint32_t pasid; > + PCIBus *bus; > + uint16_t devfn; > +} VTDPASIDCacheInfo; > + > /* PASID Table Related Definitions */ > #define VTD_PASID_DIR_BASE_ADDR_MASK (~0xfffULL) > #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL) > @@ -574,7 +599,7 @@ typedef struct VTDRootEntry VTDRootEntry; > #define VTD_SM_PASID_ENTRY_PT (4ULL << 6) > > #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width > */ > -#define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) > +#define VTD_SM_PASID_ENTRY_DID(x) extract64((x)->val[1], 0, 16) > > #define VTD_SM_PASID_ENTRY_FLPM 3ULL > #define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h > index 50f9b27a45..0e3826f6f0 100644 > --- a/include/hw/i386/intel_iommu.h > +++ b/include/hw/i386/intel_iommu.h > @@ -95,6 +95,11 @@ struct VTDPASIDEntry { > uint64_t val[8]; > }; > > +typedef struct VTDPASIDCacheEntry { > + struct VTDPASIDEntry pasid_entry; > + bool valid; > +} VTDPASIDCacheEntry; > + > struct VTDAddressSpace { > PCIBus *bus; > uint8_t devfn; > @@ -107,6 +112,7 @@ struct VTDAddressSpace { > MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */ > IntelIOMMUState *iommu_state; > VTDContextCacheEntry context_cache_entry; > + VTDPASIDCacheEntry pasid_cache_entry; > QLIST_ENTRY(VTDAddressSpace) next; > /* Superset of notifier flags that this address space has */ > IOMMUNotifierFlag notifier_flags; > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 1801f1cdf6..a2ee6d684e 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -1675,7 +1675,7 @@ static uint16_t vtd_get_domain_id(IntelIOMMUState *s, > > if (s->root_scalable) { > vtd_ce_get_pasid_entry(s, ce, &pe, pasid); > - return VTD_SM_PASID_ENTRY_DID(pe.val[1]); > + return VTD_SM_PASID_ENTRY_DID(&pe); > } > > return VTD_CONTEXT_ENTRY_DID(ce->hi); > @@ -3112,6 +3112,183 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState > *s, > return true; > } > > +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as, > + uint32_t pasid, VTDPASIDEntry > *pe) > +{ > + IntelIOMMUState *s = vtd_as->iommu_state; > + VTDContextEntry ce; > + int ret; > + > + if (!s->root_scalable) { > + return -VTD_FR_RTADDR_INV_TTM; > + } > + > + ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), > vtd_as->devfn, > + &ce); > + if (ret) { > + return ret; > + } > + > + return vtd_ce_get_pasid_entry(s, &ce, pe, pasid); > +} > + > +static bool vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2) > +{ > + return !memcmp(p1, p2, sizeof(*p1)); > +} > + > +/* > + * This function is a loop function which return value determines if whose returned value determines whether current vtd_as iterator matches the pasid cache entry info passed in user_data and needs to be removed from the pasid cache. > + * vtd_as including cached pasid entry is removed. > + * > + * For PCI_NO_PASID, when corresponding cached pasid entry is cleared, > + * it returns false so that vtd_as is reserved as it's owned by PCI > + * sub-system. For other pasid, it returns true so vtd_as is removed. > + */ > +static gboolean vtd_flush_pasid_locked(gpointer key, gpointer value, > + gpointer user_data) > +{ > + VTDPASIDCacheInfo *pc_info = user_data; > + VTDAddressSpace *vtd_as = value; > + VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry; > + VTDPASIDEntry pe; > + uint16_t did; > + uint32_t pasid; > + int ret; > + > + if (!pc_entry->valid) { > + return false; > + } > + did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry); > + > + if (vtd_as_to_iommu_pasid_locked(vtd_as, &pasid)) { > + goto remove; > + } > + > + switch (pc_info->type) { > + case VTD_PASID_CACHE_PASIDSI: > + if (pc_info->pasid != pasid) { > + return false; > + } > + /* fall through */ > + case VTD_PASID_CACHE_DOMSI: > + if (pc_info->did != did) { > + return false; > + } > + /* fall through */ > + case VTD_PASID_CACHE_GLOBAL_INV: > + break; > + default: > + error_setg(&error_fatal, "invalid pc_info->type for flush"); > + } > + > + /* > + * pasid cache invalidation may indicate a present pasid entry to present > + * pasid entry modification. To cover such case, vIOMMU emulator needs to > + * fetch latest guest pasid entry and compares with cached pasid entry, > + * then update pasid cache. > + */ > + ret = vtd_dev_get_pe_from_pasid(vtd_as, pasid, &pe); > + if (ret) { > + /* > + * No valid pasid entry in guest memory. e.g. pasid entry was > modified > + * to be either all-zero or non-present. Either case means existing > + * pasid cache should be removed. > + */ > + goto remove; > + } > + > + /* > + * Update cached pasid entry if it's stale compared to what's in guest > + * memory. > + */ > + if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) { > + pc_entry->pasid_entry = pe; > + } > + return false; > + > +remove: > + pc_entry->valid = false; > + > + /* > + * Don't remove address space of PCI_NO_PASID which is created for PCI > + * sub-system. > + */ > + if (vtd_as->pasid == PCI_NO_PASID) { > + return false; > + } > + return true; > +} > + > +/* > + * For a PASID cache invalidation, this function handles below scenarios: > + * a) a present cached pasid entry needs to be removed > + * b) a present cached pasid entry needs to be updated > + */ > +static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo > *pc_info) > +{ > + if (!s->flts || !s->root_scalable || !s->dmar_enabled) { > + return; > + } > + > + vtd_iommu_lock(s); > + /* > + * a,b): loop all the existing vtd_as instances for pasid cache removal > + or update. > + */ > + g_hash_table_foreach_remove(s->vtd_address_spaces, > vtd_flush_pasid_locked, > + pc_info); > + vtd_iommu_unlock(s); > +} > + > +static bool vtd_process_pasid_desc(IntelIOMMUState *s, > + VTDInvDesc *inv_desc) > +{ > + uint16_t did; > + uint32_t pasid; > + VTDPASIDCacheInfo pc_info; > + uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE, > + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; > + > + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, > + __func__, "pasid cache inv")) { > + return false; > + } > + > + did = VTD_INV_DESC_PASIDC_DID(inv_desc); > + pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc); > + > + switch (VTD_INV_DESC_PASIDC_G(inv_desc)) { > + case VTD_INV_DESC_PASIDC_G_DSI: > + trace_vtd_pasid_cache_dsi(did); > + pc_info.type = VTD_PASID_CACHE_DOMSI; > + pc_info.did = did; > + break; > + > + case VTD_INV_DESC_PASIDC_G_PASID_SI: > + /* PASID selective implies a DID selective */ > + trace_vtd_pasid_cache_psi(did, pasid); > + pc_info.type = VTD_PASID_CACHE_PASIDSI; > + pc_info.did = did; > + pc_info.pasid = pasid; > + break; > + > + case VTD_INV_DESC_PASIDC_G_GLOBAL: > + trace_vtd_pasid_cache_gsi(); > + pc_info.type = VTD_PASID_CACHE_GLOBAL_INV; > + break; > + > + default: > + error_report_once("invalid granularity field in PASID-cache > invalidate " > + "descriptor, hi: 0x%"PRIx64" lo: 0x%" PRIx64, > + inv_desc->val[1], inv_desc->val[0]); what's the point of printing the 2nd 64b? Looking at Figure 6-2 in the spec (6.5.2.2. PASID-cache invalidate descriptor) it does not seem to contain anything?
Besides I read in the spec: Domain-ID (DID): The DID field indicates the target domain-id. Hardware ignores bits 31:(16+N), where N is the domain-id width reported in the Capability Register. How do you make sure N is same on both pIOMMU and vIOMMU? > + return false; > + } > + > + vtd_pasid_cache_sync(s, &pc_info); > + return true; > +} > + > static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, > VTDInvDesc *inv_desc) > { > @@ -3274,6 +3451,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) > } > break; > > + case VTD_INV_DESC_PC: > + trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]); same here > + if (!vtd_process_pasid_desc(s, &inv_desc)) { > + return false; > + } > + break; > + > case VTD_INV_DESC_PIOTLB: > trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]); > if (!vtd_process_piotlb_desc(s, &inv_desc)) { > @@ -3309,16 +3493,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) > } > break; > > - /* > - * TODO: the entity of below two cases will be implemented in future > series. > - * To make guest (which integrates scalable mode support patch set in > - * iommu driver) work, just return true is enough so far. > - */ > - case VTD_INV_DESC_PC: > - if (s->scalable_mode) { > - break; > - } > - /* fallthrough */ > default: > error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64 > " (unknown type)", __func__, inv_desc.hi, > diff --git a/hw/i386/trace-events b/hw/i386/trace-events > index ac9e1a10aa..ae5bbfcdc0 100644 > --- a/hw/i386/trace-events > +++ b/hw/i386/trace-events > @@ -24,6 +24,9 @@ vtd_inv_qi_head(uint16_t head) "read head %d" > vtd_inv_qi_tail(uint16_t head) "write tail %d" > vtd_inv_qi_fetch(void) "" > vtd_context_cache_reset(void) "" > +vtd_pasid_cache_gsi(void) "" > +vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation > domain 0x%"PRIx16 > +vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC > invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32 > vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present" > vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" > devfn %"PRIu8" not present" > vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t > domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" > domain 0x%"PRIx16 Besides the code looks good to me Eric