On 1/18/24 09:43, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.au...@redhat.com>
>> Subject: Re: [PATCH rfcv1 3/6] intel_iommu: add set/unset_iommu_device
>> callback
>>
>> Hi Zhenzhong,
>>
>> On 1/15/24 11:13, Zhenzhong Duan wrote:
>>> From: Yi Liu <yi.l....@intel.com>
>>>
>>> This adds set/unset_iommu_device() implementation in Intel vIOMMU.
>>> In set call, IOMMUFDDevice is recorded in hash table indexed by
>>> PCI BDF.
>>>
>>> Signed-off-by: Yi Liu <yi.l....@intel.com>
>>> Signed-off-by: Yi Sun <yi.y....@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com>
>>> ---
>>> include/hw/i386/intel_iommu.h | 10 +++++
>>> hw/i386/intel_iommu.c | 79
>> +++++++++++++++++++++++++++++++++++
>>> 2 files changed, 89 insertions(+)
>>>
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index 7fa0a695c8..c65fdde56f 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -62,6 +62,7 @@ typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>>> typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>>> typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
>>> typedef struct VTDPASIDEntry VTDPASIDEntry;
>>> +typedef struct VTDIOMMUFDDevice VTDIOMMUFDDevice;
>>>
>>> /* Context-Entry */
>>> struct VTDContextEntry {
>>> @@ -148,6 +149,13 @@ struct VTDAddressSpace {
>>> IOVATree *iova_tree;
>>> };
>>>
>>> +struct VTDIOMMUFDDevice {
>>> + PCIBus *bus;
>>> + uint8_t devfn;
>>> + IOMMUFDDevice *idev;
>>> + IntelIOMMUState *iommu_state;
>>> +};
>>> +
>> Just wondering whether we shouldn't reuse the VTDAddressSpace to store
>> the idev, if any. How have you made your choice. What will it become
>> when PASID gets added?
> VTDAddressSpace is indexed by aliased BDF, but VTDIOMMUFDDevice is indexed
> by device's BDF. So we can't just store VTDIOMMUFDDevice as a pointer in
> VTDAddressSpace, may need a list in case more than one device in same address
> space. Then a global VTDIOMMUFDDevice list is better for lookup.
OK but if several devices are hidden under an aliased BDF, can't they
share the host properties (DMAR ecap/cap)?
>
> For PASID in modern mode which support stage-1 page table, we have
> VTDPASIDAddressSpace indexed by device's BDF+PASID, We didn't use
> VTDAddressSpace which is for stage-2 page table.
OK
Thanks
Eric
>
> Thanks
> Zhenzhong
>
>>> struct VTDIOTLBEntry {
>>> uint64_t gfn;
>>> uint16_t domain_id;
>>> @@ -292,6 +300,8 @@ struct IntelIOMMUState {
>>> /* list of registered notifiers */
>>> QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>>>
>>> + GHashTable *vtd_iommufd_dev; /* VTDIOMMUFDDevice */
>>> +
>>> /* interrupt remapping */
>>> bool intr_enabled; /* Whether guest enabled IR */
>>> dma_addr_t intr_root; /* Interrupt remapping table pointer */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index ed5677c0ae..95faf697eb 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -237,6 +237,13 @@ static gboolean vtd_as_equal(gconstpointer v1,
>> gconstpointer v2)
>>> (key1->pasid == key2->pasid);
>>> }
>>>
>>> +static gboolean vtd_as_idev_equal(gconstpointer v1, gconstpointer v2)
>>> +{
>>> + const struct vtd_as_key *key1 = v1;
>>> + const struct vtd_as_key *key2 = v2;
>>> +
>>> + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
>>> +}
>>> /*
>>> * Note that we use pointer to PCIBus as the key, so hashing/shifting
>>> * based on the pointer value is intended. Note that we deal with
>>> @@ -3812,6 +3819,74 @@ VTDAddressSpace
>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>> return vtd_dev_as;
>>> }
>>>
>>> +static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque,
>> int32_t devfn,
>>> + IOMMUFDDevice *idev, Error **errp)
>>> +{
>>> + IntelIOMMUState *s = opaque;
>>> + VTDIOMMUFDDevice *vtd_idev;
>>> + struct vtd_as_key key = {
>>> + .bus = bus,
>>> + .devfn = devfn,
>>> + };
>>> + struct vtd_as_key *new_key;
>>> +
>>> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>>> +
>>> + /* None IOMMUFD case */
>>> + if (!idev) {
>>> + return 0;
>>> + }
>>> +
>>> + vtd_iommu_lock(s);
>>> +
>>> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>>> +
>>> + if (vtd_idev) {
>>> + error_setg(errp, "IOMMUFD device already exist");
>>> + return -1;
>>> + }
>>> +
>>> + new_key = g_malloc(sizeof(*new_key));
>>> + new_key->bus = bus;
>>> + new_key->devfn = devfn;
>>> +
>>> + vtd_idev = g_malloc0(sizeof(VTDIOMMUFDDevice));
>>> + vtd_idev->bus = bus;
>>> + vtd_idev->devfn = (uint8_t)devfn;
>>> + vtd_idev->iommu_state = s;
>>> + vtd_idev->idev = idev;
>>> +
>>> + g_hash_table_insert(s->vtd_iommufd_dev, new_key, vtd_idev);
>>> +
>>> + vtd_iommu_unlock(s);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> +static void vtd_dev_unset_iommu_device(PCIBus *bus, void *opaque,
>> int32_t devfn)
>>> +{
>>> + IntelIOMMUState *s = opaque;
>>> + VTDIOMMUFDDevice *vtd_idev;
>>> + struct vtd_as_key key = {
>>> + .bus = bus,
>>> + .devfn = devfn,
>>> + };
>>> +
>>> + assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
>>> +
>>> + vtd_iommu_lock(s);
>>> +
>>> + vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key);
>>> + if (!vtd_idev) {
>>> + vtd_iommu_unlock(s);
>>> + return;
>>> + }
>>> +
>>> + g_hash_table_remove(s->vtd_iommufd_dev, &key);
>>> +
>>> + vtd_iommu_unlock(s);
>>> +}
>>> +
>>> /* Unmap the whole range in the notifier's scope. */
>>> static void vtd_address_space_unmap(VTDAddressSpace *as,
>> IOMMUNotifier *n)
>>> {
>>> @@ -4107,6 +4182,8 @@ static AddressSpace
>> *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
>>> static PCIIOMMUOps vtd_iommu_ops = {
>>> .get_address_space = vtd_host_dma_iommu,
>>> + .set_iommu_device = vtd_dev_set_iommu_device,
>>> + .unset_iommu_device = vtd_dev_unset_iommu_device,
>>> };
>>>
>>> static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>>> @@ -4230,6 +4307,8 @@ static void vtd_realize(DeviceState *dev, Error
>> **errp)
>>> g_free, g_free);
>>> s->vtd_address_spaces = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_equal,
>>> g_free, g_free);
>>> + s->vtd_iommufd_dev = g_hash_table_new_full(vtd_as_hash,
>> vtd_as_idev_equal,
>>> + g_free, g_free);
>>> vtd_init(s);
>>> pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>>> /* Pseudo address space under root PCI bus. */
>> Thanks
>>
>> Eric