>-----Original Message-----
>From: Liu, Yi L <[email protected]>
>Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
>
>On 2025/10/13 15:37, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <[email protected]>
>>> Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache
>invalidation
>>>
>>> On 2025/9/18 16:57, Zhenzhong Duan wrote:
>>>> This adds PASID cache sync for RID_PASID, non-RID_PASID isn't
>supported.
>>>>
>>>> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache
>the
>>> pasid
>>>> entry and track PASID usage and future PASID tagged DMA address
>>> translation
>>>> support in vIOMMU.
>>>>
>>>> When guest triggers pasid cache invalidation, QEMU will capture it and
>>>> update or invalidate pasid cache.
>>>>
>>>> vIOMMU emulator could figure out the reason by fetching latest guest
>pasid
>>>> entry in memory and compare it with cached PASID entry if it's valid.
>>>>
>>>> Signed-off-by: Yi Liu <[email protected]>
>>>> Signed-off-by: Zhenzhong Duan <[email protected]>
>>>> ---
>>>>    hw/i386/intel_iommu_internal.h |  19 +++-
>>>>    include/hw/i386/intel_iommu.h  |   6 ++
>>>>    hw/i386/intel_iommu.c          | 157
>>> ++++++++++++++++++++++++++++++---
>>>>    hw/i386/trace-events           |   3 +
>>>>    4 files changed, 173 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>>> index 9cdc8d5dbb..d400bcee21 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -316,6 +316,7 @@ typedef enum VTDFaultReason {
>>>>                                      * request while disabled */
>>>>        VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>>>
>>>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>>>        /* PASID directory entry access failure */
>>>>        VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>>>        /* The Present(P) field of pasid directory entry is 0 */
>>>> @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>    #define VTD_INV_DESC_PIOTLB_RSVD_VAL0
>>> 0xfff000000000f1c0ULL
>>>>    #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>>>
>>>> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
>>>> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4,
>2)
>>>> +#define VTD_INV_DESC_PASIDC_G_DSI       0
>>>> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
>>>> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
>>>> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16,
>>> 16)
>>>> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32,
>>> 20)
>>>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0
>0xfff000000000f1c0ULL
>>>> +
>>>>    /* Information about page-selective IOTLB invalidate */
>>>>    struct VTDIOTLBPageInvInfo {
>>>>        uint16_t domain_id;
>>>> @@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>>> ~VTD_HAW_MASK(aw))
>>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>>> 0xffffffffffe00000ULL
>>>>
>>>> +typedef struct VTDPASIDCacheInfo {
>>>> +    uint8_t type;
>>>> +    uint16_t did;
>>>> +    uint32_t pasid;
>>>> +    bool reset;
>>>> +} VTDPASIDCacheInfo;
>>>> +
>>>>    /* PASID Table Related Definitions */
>>>>    #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>>>    #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>>>> @@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>    #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>>>>
>>>>    #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted
>>> guest-address-width */
>>>> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) &
>>> VTD_DOMAIN_ID_MASK)
>>>> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0,
>16)
>>>>
>>>>    #define VTD_SM_PASID_ENTRY_FSPM          3ULL
>>>>    #define VTD_SM_PASID_ENTRY_FSPTPTR       (~0xfffULL)
>>>> diff --git a/include/hw/i386/intel_iommu.h
>>> b/include/hw/i386/intel_iommu.h
>>>> index 3351892da0..ff01e5c82d 100644
>>>> --- a/include/hw/i386/intel_iommu.h
>>>> +++ b/include/hw/i386/intel_iommu.h
>>>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>>>        uint64_t val[8];
>>>>    };
>>>>
>>>> +typedef struct VTDPASIDCacheEntry {
>>>> +    struct VTDPASIDEntry pasid_entry;
>>>> +    bool valid;
>>>> +} VTDPASIDCacheEntry;
>>>> +
>>>>    struct VTDAddressSpace {
>>>>        PCIBus *bus;
>>>>        uint8_t devfn;
>>>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>>>        MemoryRegion iommu_ir_fault; /* Interrupt region for catching
>>> fault */
>>>>        IntelIOMMUState *iommu_state;
>>>>        VTDContextCacheEntry context_cache_entry;
>>>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>>>        QLIST_ENTRY(VTDAddressSpace) next;
>>>>        /* Superset of notifier flags that this address space has */
>>>>        IOMMUNotifierFlag notifier_flags;
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index d37d47115a..24061f6dc6 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -1614,7 +1614,7 @@ static uint16_t
>>> vtd_get_domain_id(IntelIOMMUState *s,
>>>>
>>>>        if (s->root_scalable) {
>>>>            vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
>>>> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
>>>> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>>>>        }
>>>>
>>>>        return VTD_CONTEXT_ENTRY_DID(ce->hi);
>>>> @@ -3074,6 +3074,144 @@ static bool
>>> vtd_process_piotlb_desc(IntelIOMMUState *s,
>>>>        return true;
>>>>    }
>>>>
>>>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>>>> +                                            VTDPASIDEntry
>*pe)
>>>> +{
>>>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>>>> +    VTDContextEntry ce;
>>>> +    int ret;
>>>> +
>>>> +    if (!s->root_scalable) {
>>>> +        return -VTD_FR_RTADDR_INV_TTM;
>>>> +    }
>>>> +
>>>> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>>> vtd_as->devfn,
>>>> +                                   &ce);
>>>> +    if (ret) {
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
>>>> +}
>>>> +
>>>> +/*
>>>> + * For each IOMMUFD backed device, update or invalidate pasid cache
>>> based on
>>>> + * the value in memory.
>>>> + */
>>>> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>>> +                                        gpointer user_data)
>>>> +{
>>>> +    VTDPASIDCacheInfo *pc_info = user_data;
>>>> +    VTDAddressSpace *vtd_as = value;
>>>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>>>> +    VTDPASIDEntry pe;
>>>> +    uint16_t did;
>>>> +
>>>> +    /* Ignore emulated device or legacy VFIO backed device */
>>>> +    if (!vtd_find_hiod_iommufd(vtd_as)) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* non-RID_PASID isn't supported yet */
>>>> +    assert(vtd_as->pasid == PCI_NO_PASID);
>>>> +
>>>> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>>>> +        /*
>>>> +         * No valid pasid entry in guest memory. e.g. pasid entry was
>>> modified
>>>> +         * to be either all-zero or non-present. Either case means
>>> existing
>>>> +         * pasid cache should be invalidated.
>>>> +         */
>>>> +        pc_entry->valid = false;
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * VTD_INV_DESC_PASIDC_G_DSI and
>>> VTD_INV_DESC_PASIDC_G_PASID_SI require
>>>> +     * DID check. If DID doesn't match the value in cache or memory,
>>> then
>>>> +     * it's not a pasid entry we want to invalidate.
>>>
>>> I think comparing DID applies to the case in which pc_entry->valid is
>>> true. If pc_entry->valid is false, this means no cached pc_entry yet. If
>>> pe in guest memory is valid, the pc_entry should be updated/set hence
>>> the bind_pasid operation (added in later patch) would be conducted.
>>
>> We get here only when pe in guest memory is valid, or else we have
>returned in "if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {" check.
>>
>> If no cached pe but valid pe in guest memory, that means a new pe.
>> For new entry, guest constructs pasid cache invalidation request with DID
>> field filled with DID from pe in memory. We don't unconditionally cache
>new pe
>> for all devices for one pasid cache invalidation except it's global
>invalidation.
>
>I see. yes, intel iommu driver has already used the did configed in the
>pasid entry to flush pasid cache per caching mode. But there seems no
>words stated this in spec. Anyway, I don't see any reason why a guest
>iommu driver wants to use a did unequal to the one in pasid entry when
>this is a newly set pasid entry. So it's fine to me now.
>
>btw. it would be nice to note how you support the global invalidation
>since it's no more part of pc_info->type.

It is still part of pc_info-> type, I have used VTD_INV_DESC_PASIDC_G_GLOBAL
in "intel_iommu: Replay all pasid bindings when either SRTP or TE bit is 
changed" and
"intel_iommu: Replay pasid bindings after context cache invalidation"

For "intel_iommu: Replay pasid bindings after context cache invalidation",
I have to bring it back in v7 as I found old linux kernel doesn't follow vtd 
spec,
so need that workaround to support old guest.

Thanks
Zhenzhong

Reply via email to