>-----Original Message----- >From: Cédric Le Goater <c...@redhat.com> >Subject: Re: [PATCH rfcv1 6/6] intel_iommu: add a framework to check and >sync host IOMMU cap/ecap > >On 1/15/24 11:13, Zhenzhong Duan wrote: >> From: Yi Liu <yi.l....@intel.com> >> >> Add a framework to check and synchronize host IOMMU cap/ecap with >> vIOMMU cap/ecap. >> >> Currently only stage-2 translation is supported which is backed by >> shadow page table on host side. So we don't need exact matching of >> each bit of cap/ecap between vIOMMU and host. However, we can still >> utilize this framework to ensure compatibility of host and vIOMMU's >> address width at least, i.e., vIOMMU's aw_bits <= host aw_bits, >> which is missed before. >> >> When stage-1 translation is supported in future, a.k.a. scalable >> modern mode, we need to ensure compatibility of each bits. Some >> bits are user controllable, they should be checked with host side >> to ensure compatibility. Other bits are not, they should be synced >> into vIOMMU cap/ecap for compatibility. >> >> The sequence will be: >> >> vtd_cap_init() initializes iommu->cap/ecap. ---- vtd_cap_init() >> iommu->host_cap/ecap is initialized as iommu->cap/ecap. ---- vtd_init() >> iommu->host_cap/ecap is checked and updated some bits with host >cap/ecap. ---- vtd_sync_hw_info() >> iommu->cap/ecap is finalized as iommu->host_cap/ecap. ---- >vtd_machine_done_hook() >> >> iommu->host_cap/ecap is a temporary storage to hold intermediate value >> when synthesize host cap/ecap and vIOMMU's initial configured cap/ecap. > > >The above "sequence" paragraph is not very clear. The patch may need to >be split further.
OK, will do. > > > >> Signed-off-by: Yi Liu <yi.l....@intel.com> >> Signed-off-by: Yi Sun <yi.y....@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.d...@intel.com> >> --- >> include/hw/i386/intel_iommu.h | 4 ++ >> hw/i386/intel_iommu.c | 78 >+++++++++++++++++++++++++++++++---- >> 2 files changed, 75 insertions(+), 7 deletions(-) >> >> diff --git a/include/hw/i386/intel_iommu.h >b/include/hw/i386/intel_iommu.h >> index c65fdde56f..b8abbcce12 100644 >> --- a/include/hw/i386/intel_iommu.h >> +++ b/include/hw/i386/intel_iommu.h >> @@ -292,6 +292,9 @@ struct IntelIOMMUState { >> uint64_t cap; /* The value of capability reg */ >> uint64_t ecap; /* The value of extended capability >> reg */ >> >> + uint64_t host_cap; /* The value of host capability reg */ >> + uint64_t host_ecap; /* The value of host ext-capability reg >> */ >> + >> uint32_t context_cache_gen; /* Should be in [1,MAX] */ >> GHashTable *iotlb; /* IOTLB */ >> >> @@ -314,6 +317,7 @@ struct IntelIOMMUState { >> bool dma_translation; /* Whether DMA translation supported */ >> bool pasid; /* Whether to support PASID */ >> >> + bool cap_finalized; /* Whether VTD capability finalized */ >> /* >> * Protects IOMMU states in general. Currently it protects the >> * per-IOMMU IOTLB cache, and context entry cache in >VTDAddressSpace. >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 4c1d058ebd..be03fcbf52 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -3819,6 +3819,47 @@ VTDAddressSpace >*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, >> return vtd_dev_as; >> } >> >> +static bool vtd_sync_hw_info(IntelIOMMUState *s, struct >iommu_hw_info_vtd *vtd, >> + Error **errp) >> +{ >> + uint64_t addr_width; >> + >> + addr_width = (vtd->cap_reg >> 16) & 0x3fULL; > >Virek uses the same kind of macro in : > > https://lore.kernel.org/qemu-devel/20240118192049.1796763-1- >vivek.kasire...@intel.com/ > >What about the + 1 ? Looks like it's missing here, according to 11.4.2 >Capability Register. > >Could we introduce a common macro in intel_iommu_internal.h ? Sure. > > >> + if (s->aw_bits > addr_width) { >> + error_setg(errp, "User aw-bits: %u > host address width: %lu", > >I think %lu should be PRId64. This is a general comment. You should avoid >%llx, %lx, etc. in the code. Got it. > >> + s->aw_bits, addr_width); >> + return false; >> + } >> + >> + /* TODO: check and sync host cap/ecap into vIOMMU cap/ecap */ >> + >> + return true; >> +} >> + >> +/* >> + * virtual VT-d which wants nested needs to check the host IOMMU >> + * nesting cap info behind the assigned devices. Thus that vIOMMU >> + * could bind guest page table to host. >> + */ >> +static bool vtd_check_idev(IntelIOMMUState *s, IOMMUFDDevice *idev, >> + Error **errp) >> +{ >> + struct iommu_hw_info_vtd vtd; >> + enum iommu_hw_info_type type = >IOMMU_HW_INFO_TYPE_INTEL_VTD; >> + >> + if (iommufd_device_get_info(idev, &type, sizeof(vtd), &vtd)) { >> + error_setg(errp, "Failed to get IOMMU capability!!!"); >> + return false; >> + } >> + >> + if (type != IOMMU_HW_INFO_TYPE_INTEL_VTD) { >> + error_setg(errp, "IOMMU hardware is not compatible!!!"); >> + return false; >> + } >> + >> + return vtd_sync_hw_info(s, &vtd, errp); >> +} >> + >> static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, >int32_t devfn, >> IOMMUFDDevice *idev, Error **errp) >> { >> @@ -3837,6 +3878,10 @@ static int vtd_dev_set_iommu_device(PCIBus >*bus, void *opaque, int32_t devfn, >> return 0; >> } >> >> + if (!vtd_check_idev(s, idev, errp)) { >> + return -1; >> + } >> + >> vtd_iommu_lock(s); >> >> vtd_idev = g_hash_table_lookup(s->vtd_iommufd_dev, &key); >> @@ -4071,10 +4116,11 @@ static void vtd_init(IntelIOMMUState *s) >> { >> X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); >> >> - memset(s->csr, 0, DMAR_REG_SIZE); >> - memset(s->wmask, 0, DMAR_REG_SIZE); >> - memset(s->w1cmask, 0, DMAR_REG_SIZE); >> - memset(s->womask, 0, DMAR_REG_SIZE); >> + /* CAP/ECAP are initialized in machine create done stage */ >> + memset(s->csr + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> + memset(s->wmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> + memset(s->w1cmask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> + memset(s->womask + DMAR_GCMD_REG, 0, DMAR_REG_SIZE - >DMAR_GCMD_REG); >> >> s->root = 0; >> s->root_scalable = false; >> @@ -4110,13 +4156,16 @@ static void vtd_init(IntelIOMMUState *s) > >vtd_init() is called from reset and from realize. This is redundant. >reset should be enough. Looks so. I'll try to see if it break anything. > > >> vtd_spte_rsvd_large[3] &= ~VTD_SPTE_SNP; >> } >> >> - vtd_cap_init(s); >> + if (!s->cap_finalized) { > >ok. so this can only be done in reset. Not quite get, vtd_init can be called multiple times harmlessly before machine create done. But once is enough, i.e., in reset. The call in realize looks redundant. > >> + vtd_cap_init(s); >> + s->host_cap = s->cap; >> + s->host_ecap = s->ecap; >> + } >> + >> vtd_reset_caches(s); >> >> /* Define registers with default values and bit semantics */ >> vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0); >> - vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); >> - vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); >> vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0); >> vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL); >> vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0); >> @@ -4241,6 +4290,12 @@ static bool >vtd_decide_config(IntelIOMMUState *s, Error **errp) >> return true; >> } >> >> +static void vtd_setup_capability_reg(IntelIOMMUState *s) >> +{ >> + vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0); >> + vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0); >> +} >> + >> static int vtd_machine_done_notify_one(Object *child, void *unused) >> { >> IntelIOMMUState *iommu = >INTEL_IOMMU_DEVICE(x86_iommu_get_default()); >> @@ -4259,6 +4314,14 @@ static int >vtd_machine_done_notify_one(Object *child, void *unused) >> >> static void vtd_machine_done_hook(Notifier *notifier, void *unused) >> { >> + IntelIOMMUState *iommu = >INTEL_IOMMU_DEVICE(x86_iommu_get_default()); >> + >> + iommu->cap = iommu->host_cap; >> + iommu->ecap = iommu->host_ecap; >> + iommu->cap_finalized = true; >> + >> + vtd_setup_capability_reg(iommu); >> + > >This is confusing. Please split the patch better to reflect the ordering of >the e/cap register settings. Will do. Thanks Zhenzhong