[PATCH 1/1] iommu/vt-d: Support asynchronous IOMMU nested capabilities
Current VT-d implementation supports nested translation only if all underlying IOMMUs support the nested capability. This is unnecessary as the upper layer is allowed to create different containers and set them with different type of iommu backend. The IOMMU driver needs to guarantee that devices attached to a nested mode iommu_domain should support nested capabilility. Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f1742da42478..1cd4840e6f9f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4755,6 +4755,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && + !ecap_nest(iommu->ecap)) { + dev_err(dev, "%s: iommu not support nested translation\n", + iommu->name); + return -EINVAL; + } + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -5451,11 +5458,21 @@ static int intel_iommu_enable_nesting(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool has_nesting = false; unsigned long flags; - int ret = -ENODEV; + int ret = -EINVAL; + + for_each_active_iommu(iommu, drhd) + if (ecap_nest(iommu->ecap)) + has_nesting = true; + + if (!has_nesting) + return -ENODEV; spin_lock_irqsave(_domain_lock, flags); - if (nested_mode_support() && list_empty(_domain->devices)) { + if (list_empty(_domain->devices)) { dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; ret = 0; -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH 1/1] iommu/vt-d: Support asynchronous IOMMU nested capabilities
> From: Lu Baolu > Sent: Wednesday, May 12, 2021 3:04 PM > > Current VT-d implementation supports nested translation only if all > underlying IOMMUs support the nested capability. This is unnecessary > as the upper layer is allowed to create different containers and set > them with different type of iommu backend. The IOMMU driver needs to > guarantee that devices attached to a nested mode iommu_domain should > support nested capabilility. so the consistency check is now applied only to the IOMMUs that are spanned by a given iommu_domain? > > Signed-off-by: Lu Baolu > --- > drivers/iommu/intel/iommu.c | 21 +++-- > 1 file changed, 19 insertions(+), 2 deletions(-) > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index f1742da42478..1cd4840e6f9f 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -4755,6 +4755,13 @@ static int prepare_domain_attach_device(struct > iommu_domain *domain, > if (!iommu) > return -ENODEV; > > + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && > + !ecap_nest(iommu->ecap)) { > + dev_err(dev, "%s: iommu not support nested translation\n", > + iommu->name); > + return -EINVAL; > + } > + > /* check if this iommu agaw is sufficient for max mapped address */ > addr_width = agaw_to_width(iommu->agaw); > if (addr_width > cap_mgaw(iommu->cap)) > @@ -5451,11 +5458,21 @@ static int > intel_iommu_enable_nesting(struct iommu_domain *domain) > { > struct dmar_domain *dmar_domain = to_dmar_domain(domain); > + struct dmar_drhd_unit *drhd; > + struct intel_iommu *iommu; > + bool has_nesting = false; > unsigned long flags; > - int ret = -ENODEV; > + int ret = -EINVAL; > + > + for_each_active_iommu(iommu, drhd) > + if (ecap_nest(iommu->ecap)) > + has_nesting = true; > + > + if (!has_nesting) > + return -ENODEV; Isn't above still doing global consistency check? > > spin_lock_irqsave(_domain_lock, flags); > - if (nested_mode_support() && list_empty(_domain->devices)) > { > + if (list_empty(_domain->devices)) { > dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; > dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; > ret = 0; > -- > 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[RESEND PATACH 1/1] iommu/vt-d: Use user privilege for RID2PASID translation
When first-level page tables are used for IOVA translation, we use user privilege by setting U/S bit in the page table entry. This is to make it consistent with the second level translation, where the U/S enforcement is not available. Clear the SRE (Supervisor Request Enable) field in the pasid table entry of RID2PASID so that requests requesting the supervisor privilege are blocked and treated as DMA remapping faults. Suggested-by: Jacob Pan Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level") Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 7 +-- drivers/iommu/intel/pasid.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 708f430af1c4..f1742da42478 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2525,9 +2525,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, u32 pasid) { - int flags = PASID_FLAG_SUPERVISOR_MODE; struct dma_pte *pgd = domain->pgd; int agaw, level; + int flags = 0; /* * Skip top levels of page tables for iommu which has @@ -2543,7 +2543,10 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level != 4 && level != 5) return -EINVAL; - flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + if (pasid != PASID_RID2PASID) + flags |= PASID_FLAG_SUPERVISOR_MODE; + if (level == 5) + flags |= PASID_FLAG_FL5LP; if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) flags |= PASID_FLAG_PAGE_SNOOP; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 72646bafc52f..72dc84821dad 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -699,7 +699,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). */ - pasid_set_sre(pte); + if (pasid != PASID_RID2PASID) + pasid_set_sre(pte); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH v8 7/9] vfio/mdev: Add iommu related member in mdev_device
> From: Jason Gunthorpe > Sent: Wednesday, May 12, 2021 1:37 AM > > On Tue, May 11, 2021 at 02:56:05PM +0800, Lu Baolu wrote: > > > > After my next series the mdev drivers will have direct access to > > > the vfio_device. So an alternative to using the struct device, or > > > adding 'if mdev' is to add an API to the vfio_device world to > > > inject what iommu configuration is needed from that direction > > > instead of trying to discover it from a struct device. > > > > Just want to make sure that I understand you correctly. > > > > We should use the existing IOMMU in-kernel APIs to connect mdev with the > > iommu subsystem, so that the upper lays don't need to use something > > like (if dev_is_mdev) to handle mdev differently. Do I get you > > correctly? > > After going through all the /dev/ioasid stuff I'm pretty convinced > that none of the PASID use cases for mdev should need any iommu > connection from the mdev_device - this is an artifact of trying to > cram the vfio container and group model into the mdev world and is not > good design. > > The PASID interfaces for /dev/ioasid should use the 'struct > pci_device' for everything and never pass in a mdev_device to the > iommu layer. 'struct pci_device' -> 'struct device' since /dev/ioasid also needs to support non-pci devices? > > /dev/ioasid should be designed to support this operation and is why I > strongly want to see the actual vfio_device implementation handle the > connection to the iommu layer and not keep trying to hack through > building what is actually a vfio_device specific connection through > the type1 container code. > I assume the so-called connection here implies using iommu_attach_device to attach a device to an iommu domain. Did you suggest this connection must be done by the mdev driver which implements vfio_device and then passing iommu domain to /dev/ioasid when attaching the device to an IOASID? sort of like: ioctl(device_fd, VFIO_ATTACH_IOASID, ioasid, domain); If yes, this conflicts with one design in /dev/ioasid proposal that we're working on. In earlier discussion we agreed that each ioasid is associated to a singleton iommu domain and all devices that are attached to this ioasid with compatible iommu capabilities just share this domain. It implies that iommu domain is allocated at ATTACH_IOASID phase (e.g. when the 1st device is attached to an ioasid). Pre-allocating domain by vfio_device driver means that every device (SR-IOV or mdev) has its own domain thus cannot share ioasid then. Did I misunderstand your intention? Baolu and I discussed below draft proposal to avoid passing mdev_device to the iommu layer. Please check whether it makes sense: // for every device attached to an ioasid // mdev is represented by pasid (allocated by mdev driver) // pf/vf has INVALID_IOASID in pasid struct dev_info { struct list_headnext; struct device *device; u32 pasid; } // for every allocated ioasid struct ioasid_info { // the handle to convey iommu operations struct iommu_domain *domain; // metadata for map/unmap struct rb_node dma_list; // the list of attached device struct dev_info *dev_list; ... } // called by VFIO/VDPA int ioasid_attach_device(struct *device, u32 ioasid, u32 pasid) { // allocate a new dev_info, filled with device/pasid // allocate iommu domain if it's the 1st attached device // check iommu compatibility if an domain already exists // attach the device to the iommu domain if (pasid == INVALID_IOASID) iommu_attach_device(domain, device); else iommu_aux_attach_device(domain, device, pasid); // add dev_info to the dev_list of ioasid_info } // when attaching PF/VF to an ioasid ioctl(device_fd, VFIO_ATTACH_IOASID, ioasid); -> get vfio_device of device_fd -> ioasid_attach_device(vfio_device->dev, ioasid, INVALID_IOASID); // when attaching a mdev to an ioasid ioctl(device_fd, VFIO_ATTACH_IOASID, ioasid); -> get vfio_device of device_fd -> find mdev_parent of vfio_device -> find pasid allocated to this mdev -> ioasid_attach_device(parent->dev, ioasid, pasid); starting from this point the vfio device has been connected to the iommu layer. /dev/ioasid can accept pgtable cmd on this ioasid and associated domain. Thanks Kevin ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 1/2] iommu/sva: Tighten SVA bind API with explicit flags
On Tue, May 11, 2021 at 04:47:26PM -0300, Jason Gunthorpe wrote: > > Let me try to break down your concerns: > > 1. portability - driver uses DMA APIs can function w/ and w/o IOMMU. is > > that your concern? But PASID is intrinsically tied with IOMMU and if > > the drivers are using a generic sva-lib API, why they are not portable? > > SVA by its definition is to avoid map/unmap every time. > > Kernel explicitly does not support this programming model. All DMA is > explicit and the DMA API hides platform details like IOMMU and CPU > cache coherences. Just because x86 doesn't care about this doesn't > make any of it optional. Exactly. > If you want to do SVA PASID then it also must come with DMA APIs to > manage the CPU cache coherence that are all NOP's on x86. Yes. And we have plenty of precende where an IOMMU is in "bypass" mode to allow access to all memory and then uses the simple dma-direct case. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/1] iommu/vt-d: Select PCI_ATS explicitly
The Intel VT-d implementation supports device TLB management. Select PCI_ATS explicitly so that the pci_ats helpers are always available. Signed-off-by: Lu Baolu --- drivers/iommu/intel/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 28a3d1596c76..7e5b240b801d 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -14,6 +14,7 @@ config INTEL_IOMMU select SWIOTLB select IOASID select IOMMU_DMA + select PCI_ATS help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 1/1] iommu/vt-d: Tweak the description of a DMA fault
The Intel IOMMU driver reports the DMA fault reason in a decimal number while the VT-d specification uses a hexadecimal one. It's inconvenient that users need to covert them everytime before consulting the spec. Let's use hexadecimal number for a DMA fault reason. The fault message uses 0x as PASID for DMA requests w/o PASID. This is confusing. Tweak this by adding "w/o PASID" explicitly. Signed-off-by: Lu Baolu --- drivers/iommu/intel/dmar.c | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 1757ac1e1623..11e37d2c2af2 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1911,15 +1911,21 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, _type); if (fault_type == INTR_REMAP) - pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n", - source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr >> 48, - fault_reason, reason); - else - pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n", + pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02xh] %s\n", + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else if (pasid == INVALID_IOASID) + pr_err("[%s w/o PASID] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02xh] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), pasid, addr, + PCI_FUNC(source_id & 0xFF), addr, + fault_reason, reason); + else + pr_err("[%s w/ PASID %x] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02xh] %s\n", + type ? "DMA Read" : "DMA Write", pasid, + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); return 0; } @@ -1987,7 +1993,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : -1, + pasid_present ? pasid : INVALID_IOASID, source_id, guest_addr); fault_index++; -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RFC PATCH v4 01/13] iommu: Introduce dirty log tracking framework
On 2021/5/12 11:20, Lu Baolu wrote: > On 5/11/21 3:40 PM, Keqian Zhu wrote: >>> For upper layers, before starting page tracking, they check the >>> dirty_page_trackable attribution of the domain and start it only it's >>> capable. Once the page tracking is switched on the vendor iommu driver >>> (or iommu core) should block further device attach/detach operations >>> until page tracking is stopped. >> But when a domain becomes capable after detaching a device, the upper layer >> still needs to query it and enable dirty log for it... >> >> To make things coordinated, maybe the upper layer can register a notifier, >> when the domain's capability change, the upper layer do not need to query, >> instead >> they just need to realize a callback, and do their specific policy in the >> callback. >> What do you think? >> > > That might be an option. But why not checking domain's attribution every > time a new tracking period is about to start? Hi Baolu, I'll add an attribution in iommu_domain, and the vendor iommu driver will update the attribution when attach/detach devices. The attribute should be protected by a lock, so the upper layer shouldn't access the attribute directly. Then the iommu_domain_support_dirty_log() still should be retained. Does this design looks good to you? Thanks, Keqian ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 13/16] media: mtk-vcodec: Get rid of mtk_smi_larb_get/put
On Sat, Apr 10, 2021 at 5:14 PM Yong Wu wrote: > > MediaTek IOMMU has already added the device_link between the consumer > and smi-larb device. If the vcodec device call the pm_runtime_get_sync, > the smi-larb's pm_runtime_get_sync also be called automatically. > > CC: Tiffany Lin > CC: Irui Wang > Signed-off-by: Yong Wu > Reviewed-by: Evan Green > Acked-by: Tiffany Lin > --- > .../platform/mtk-vcodec/mtk_vcodec_dec_pm.c | 37 ++- > .../platform/mtk-vcodec/mtk_vcodec_drv.h | 3 -- > .../platform/mtk-vcodec/mtk_vcodec_enc.c | 1 - > .../platform/mtk-vcodec/mtk_vcodec_enc_pm.c | 46 ++- > 4 files changed, 10 insertions(+), 77 deletions(-) > > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c > index 32e1858e9f1d..2b3562e47f4f 100644 > --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c > @@ -8,14 +8,12 @@ > #include > #include > #include > -#include > > #include "mtk_vcodec_dec_pm.h" > #include "mtk_vcodec_util.h" > > int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev) > { > - struct device_node *node; > struct platform_device *pdev; > struct mtk_vcodec_pm *pm; > struct mtk_vcodec_clk *dec_clk; > @@ -26,18 +24,7 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev) > pm = >pm; > pm->mtkdev = mtkdev; > dec_clk = >vdec_clk; > - node = of_parse_phandle(pdev->dev.of_node, "mediatek,larb", 0); > - if (!node) { > - mtk_v4l2_err("of_parse_phandle mediatek,larb fail!"); > - return -1; > - } > > - pdev = of_find_device_by_node(node); > - of_node_put(node); > - if (WARN_ON(!pdev)) { > - return -1; > - } > - pm->larbvdec = >dev; > pdev = mtkdev->plat_dev; > pm->dev = >dev; > > @@ -47,14 +34,11 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev) > dec_clk->clk_info = devm_kcalloc(>dev, > dec_clk->clk_num, sizeof(*clk_info), > GFP_KERNEL); > - if (!dec_clk->clk_info) { > - ret = -ENOMEM; > - goto put_device; > - } > + if (!dec_clk->clk_info) > + return -ENOMEM; > } else { > mtk_v4l2_err("Failed to get vdec clock count"); > - ret = -EINVAL; > - goto put_device; > + return -EINVAL; > } > > for (i = 0; i < dec_clk->clk_num; i++) { > @@ -63,29 +47,24 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *mtkdev) > "clock-names", i, _info->clk_name); > if (ret) { > mtk_v4l2_err("Failed to get clock name id = %d", i); > - goto put_device; > + return ret; > } > clk_info->vcodec_clk = devm_clk_get(>dev, > clk_info->clk_name); > if (IS_ERR(clk_info->vcodec_clk)) { > mtk_v4l2_err("devm_clk_get (%d)%s fail", i, > clk_info->clk_name); > - ret = PTR_ERR(clk_info->vcodec_clk); > - goto put_device; > + return PTR_ERR(clk_info->vcodec_clk); > } > } > > pm_runtime_enable(>dev); > return 0; > -put_device: > - put_device(pm->larbvdec); > - return ret; > } > > void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev) > { > pm_runtime_disable(dev->pm.dev); > - put_device(dev->pm.larbvdec); > } > > void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) > @@ -121,11 +100,6 @@ void mtk_vcodec_dec_clock_on(struct mtk_vcodec_pm *pm) > } > } > > - ret = mtk_smi_larb_get(pm->larbvdec); > - if (ret) { > - mtk_v4l2_err("mtk_smi_larb_get larbvdec fail %d", ret); > - goto error; > - } > return; > > error: > @@ -138,7 +112,6 @@ void mtk_vcodec_dec_clock_off(struct mtk_vcodec_pm *pm) > struct mtk_vcodec_clk *dec_clk = >vdec_clk; > int i = 0; > > - mtk_smi_larb_put(pm->larbvdec); > for (i = dec_clk->clk_num - 1; i >= 0; i--) > clk_disable_unprepare(dec_clk->clk_info[i].vcodec_clk); > } > diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > index 869d958d2b99..659790398809 100644 > --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h > @@ -189,10 +189,7 @@ struct mtk_vcodec_clk { > */ > struct mtk_vcodec_pm { > struct mtk_vcodec_clk vdec_clk; > - struct device *larbvdec; > - >
Re: [RFC PATCH v4 01/13] iommu: Introduce dirty log tracking framework
Hi keqian, On 5/12/21 4:44 PM, Keqian Zhu wrote: On 2021/5/12 11:20, Lu Baolu wrote: On 5/11/21 3:40 PM, Keqian Zhu wrote: For upper layers, before starting page tracking, they check the dirty_page_trackable attribution of the domain and start it only it's capable. Once the page tracking is switched on the vendor iommu driver (or iommu core) should block further device attach/detach operations until page tracking is stopped. But when a domain becomes capable after detaching a device, the upper layer still needs to query it and enable dirty log for it... To make things coordinated, maybe the upper layer can register a notifier, when the domain's capability change, the upper layer do not need to query, instead they just need to realize a callback, and do their specific policy in the callback. What do you think? That might be an option. But why not checking domain's attribution every time a new tracking period is about to start? Hi Baolu, I'll add an attribution in iommu_domain, and the vendor iommu driver will update the attribution when attach/detach devices. The attribute should be protected by a lock, so the upper layer shouldn't access the attribute directly. Then the iommu_domain_support_dirty_log() still should be retained. Does this design looks good to you? Yes, that's what I was thinking of. But I am not sure whether it worth of a lock here. It seems not to be a valid behavior for upper layer to attach or detach any device while doing the dirty page tracking. Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 2/2] iommu/sva: Remove mm parameter from SVA bind API
On Mon, May 10, 2021 at 06:25:08AM -0700, Jacob Pan wrote: > The mm parameter in iommu_sva_bind_device() is intended for privileged > process perform bind() on behalf of other processes. This use case has > yet to be materialized, let alone potential security implications of > adding kernel hooks without explicit user consent. > In addition, with the agreement that IOASID allocation shall be subject > cgroup limit. It will be inline with misc cgroup proposal if IOASID > allocation as part of the SVA bind is limited to the current task. > > Link: > https://lore.kernel.org/linux-iommu/20210303160205.151d114e@jacob-builder/ > Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/ > Signed-off-by: Jacob Pan I'm not particularly enthusiastic about this change, because restoring the mm parameter will be difficult after IOMMU drivers start assuming everything is on current. Regardless, it looks correct and makes my life easier (and lightens my test suite quite a bit). Reviewed-by: Jean-Philippe Brucker ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Support asynchronous IOMMU nested capabilities
Hi Kevin, On 5/12/21 4:30 PM, Tian, Kevin wrote: From: Lu Baolu Sent: Wednesday, May 12, 2021 3:04 PM Current VT-d implementation supports nested translation only if all underlying IOMMUs support the nested capability. This is unnecessary as the upper layer is allowed to create different containers and set them with different type of iommu backend. The IOMMU driver needs to guarantee that devices attached to a nested mode iommu_domain should support nested capabilility. so the consistency check is now applied only to the IOMMUs that are spanned by a given iommu_domain? Yes. Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f1742da42478..1cd4840e6f9f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4755,6 +4755,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && + !ecap_nest(iommu->ecap)) { + dev_err(dev, "%s: iommu not support nested translation\n", + iommu->name); + return -EINVAL; + } + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -5451,11 +5458,21 @@ static int intel_iommu_enable_nesting(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool has_nesting = false; unsigned long flags; - int ret = -ENODEV; + int ret = -EINVAL; + + for_each_active_iommu(iommu, drhd) + if (ecap_nest(iommu->ecap)) + has_nesting = true; + + if (!has_nesting) + return -ENODEV; Isn't above still doing global consistency check? The logic is if nested mode is globally unsupported, return false. spin_lock_irqsave(_domain_lock, flags); - if (nested_mode_support() && list_empty(_domain->devices)) { + if (list_empty(_domain->devices)) { dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; ret = 0; -- 2.25.1 Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v5 13/16] media: mtk-vcodec: Get rid of mtk_smi_larb_get/put
On Wed, 2021-05-12 at 17:20 +0800, Hsin-Yi Wang wrote: > On Sat, Apr 10, 2021 at 5:14 PM Yong Wu wrote: > > > > MediaTek IOMMU has already added the device_link between the consumer > > and smi-larb device. If the vcodec device call the pm_runtime_get_sync, > > the smi-larb's pm_runtime_get_sync also be called automatically. > > > > CC: Tiffany Lin > > CC: Irui Wang > > Signed-off-by: Yong Wu > > Reviewed-by: Evan Green > > Acked-by: Tiffany Lin > > --- > > .../platform/mtk-vcodec/mtk_vcodec_dec_pm.c | 37 ++- > > .../platform/mtk-vcodec/mtk_vcodec_drv.h | 3 -- > > .../platform/mtk-vcodec/mtk_vcodec_enc.c | 1 - > > .../platform/mtk-vcodec/mtk_vcodec_enc_pm.c | 46 ++- > > 4 files changed, 10 insertions(+), 77 deletions(-) [...] > > @@ -108,13 +80,6 @@ void mtk_vcodec_enc_clock_on(struct mtk_vcodec_pm *pm) > > } > > } > > > > - ret = mtk_smi_larb_get(pm->larbvenc); > > - if (ret) { > > - mtk_v4l2_err("mtk_smi_larb_get larb3 fail %d", ret); > > - goto clkerr; > > - } > > - return; > > You can't delete the return; here, otherwise vcodec_clk will be turned > off immediately after they are turned on. Thanks very much for your review. Sorry for this. You are quite right. I checked this, it was introduced in v4 when I rebase the code. I will fix it in next time. > > > - > > clkerr: > > for (i -= 1; i >= 0; i--) > > clk_disable_unprepare(enc_clk->clk_info[i].vcodec_clk); > > @@ -125,7 +90,6 @@ void mtk_vcodec_enc_clock_off(struct mtk_vcodec_pm *pm) > > struct mtk_vcodec_clk *enc_clk = >venc_clk; > > int i = 0; > > > > - mtk_smi_larb_put(pm->larbvenc); > > for (i = enc_clk->clk_num - 1; i >= 0; i--) > > clk_disable_unprepare(enc_clk->clk_info[i].vcodec_clk); > > } > > -- > > 2.18.0 > > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH] iommu/vt-d: check for allocation failure in aux_detach_device()
In current kernels small allocations never fail, but checking for allocation failure is the correct thing to do. Fixes: 18abda7a2d55 ("iommu/vt-d: Fix general protection fault in aux_detach_device()") Signed-off-by: Dan Carpenter --- drivers/iommu/intel/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 708f430af1c4..9a7b79b5af18 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4606,6 +4606,8 @@ static int auxiliary_link_device(struct dmar_domain *domain, if (!sinfo) { sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); + if (!sinfo) + return -ENOMEM; sinfo->domain = domain; sinfo->pdev = dev; list_add(>link_phys, >subdevices); -- 2.30.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Resend RFC PATCH V2 10/12] HV/IOMMU: Add Hyper-V dma ops support
On 2021-05-12 17:01, Tianyu Lan wrote: Hi Christoph and Konrad: Current Swiotlb bounce buffer uses a pool for all devices. There is a high overhead to get or free bounce buffer during performance test. Swiotlb code now use a global spin lock to protect bounce buffer data. Several device queues try to acquire the spin lock and this introduce additional overhead. For performance and security perspective, each devices should have a separate swiotlb bounce buffer pool and so this part needs to rework. I want to check this is right way to resolve performance issues with swiotlb bounce buffer. If you have some other suggestions,welcome. We're already well on the way to factoring out SWIOTLB to allow for just this sort of more flexible usage like per-device bounce pools - see here: https://lore.kernel.org/linux-iommu/20210510095026.3477496-1-tien...@chromium.org/T/#t FWIW this looks to have an awful lot in common with what's going to be needed for Android's protected KVM and Arm's Confidential Compute Architecture, so we'll all be better off by doing it right. I'm getting the feeling that set_memory_decrypted() wants to grow into a more general abstraction that can return an alias at a different GPA if necessary. Robin. Thanks. On 4/14/2021 11:47 PM, Christoph Hellwig wrote: +static dma_addr_t hyperv_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t map, phys = (page_to_pfn(page) << PAGE_SHIFT) + offset; + + if (!hv_is_isolation_supported()) + return phys; + + map = swiotlb_tbl_map_single(dev, phys, size, HV_HYP_PAGE_SIZE, dir, + attrs); + if (map == (phys_addr_t)DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + return map; +} This largerly duplicates what dma-direct + swiotlb does. Please use force_dma_unencrypted to force bounce buffering and just use the generic code. + if (hv_isolation_type_snp()) { + ret = hv_set_mem_host_visibility( + phys_to_virt(hyperv_io_tlb_start), + hyperv_io_tlb_size, + VMBUS_PAGE_VISIBLE_READ_WRITE); + if (ret) + panic("%s: Fail to mark Hyper-v swiotlb buffer visible to host. err=%d\n", + __func__, ret); + + hyperv_io_tlb_remap = ioremap_cache(hyperv_io_tlb_start + + ms_hyperv.shared_gpa_boundary, + hyperv_io_tlb_size); + if (!hyperv_io_tlb_remap) + panic("%s: Fail to remap io tlb.\n", __func__); + + memset(hyperv_io_tlb_remap, 0x00, hyperv_io_tlb_size); + swiotlb_set_bounce_remap(hyperv_io_tlb_remap); And this really needs to go into a common hook where we currently just call set_memory_decrypted so that all the different schemes for these trusted VMs (we have about half a dozen now) can share code rather than reinventing it. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATACH 1/1] iommu/vt-d: Use user privilege for RID2PASID translation
On Wed, May 12, 2021 at 02:44:26PM +0800, Lu Baolu wrote: > When first-level page tables are used for IOVA translation, we use user > privilege by setting U/S bit in the page table entry. This is to make it > consistent with the second level translation, where the U/S enforcement > is not available. Clear the SRE (Supervisor Request Enable) field in the > pasid table entry of RID2PASID so that requests requesting the supervisor > privilege are blocked and treated as DMA remapping faults. > > Suggested-by: Jacob Pan > Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level") > Signed-off-by: Lu Baolu > --- > drivers/iommu/intel/iommu.c | 7 +-- > drivers/iommu/intel/pasid.c | 3 ++- > 2 files changed, 7 insertions(+), 3 deletions(-) > > diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > index 708f430af1c4..f1742da42478 100644 > --- a/drivers/iommu/intel/iommu.c > +++ b/drivers/iommu/intel/iommu.c > @@ -2525,9 +2525,9 @@ static int domain_setup_first_level(struct intel_iommu > *iommu, > struct device *dev, > u32 pasid) > { > - int flags = PASID_FLAG_SUPERVISOR_MODE; > struct dma_pte *pgd = domain->pgd; > int agaw, level; > + int flags = 0; > > /* >* Skip top levels of page tables for iommu which has > @@ -2543,7 +2543,10 @@ static int domain_setup_first_level(struct intel_iommu > *iommu, > if (level != 4 && level != 5) > return -EINVAL; > > - flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; > + if (pasid != PASID_RID2PASID) > + flags |= PASID_FLAG_SUPERVISOR_MODE; > + if (level == 5) > + flags |= PASID_FLAG_FL5LP; Given that we are still not bought into the supervisor PASID, should we make this an explicit request before turning on SUPERVISOR mode always? Since pasid_set_sre() has no return, we have no way to fail it. > > if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) > flags |= PASID_FLAG_PAGE_SNOOP; > diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c > index 72646bafc52f..72dc84821dad 100644 > --- a/drivers/iommu/intel/pasid.c > +++ b/drivers/iommu/intel/pasid.c > @@ -699,7 +699,8 @@ int intel_pasid_setup_second_level(struct intel_iommu > *iommu, >* Since it is a second level only translation setup, we should >* set SRE bit as well (addresses are expected to be GPAs). >*/ > - pasid_set_sre(pte); > + if (pasid != PASID_RID2PASID) > + pasid_set_sre(pte); > pasid_set_present(pte); > pasid_flush_caches(iommu, pte, pasid, did); > > -- > 2.25.1 > -- Cheers, Ashok [Forgiveness is the attribute of the STRONG - Gandhi] ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH -next] iommu/virtio: Add missing MODULE_DEVICE_TABLE
On Sat, May 08, 2021 at 11:14:51AM +0800, Bixuan Cui wrote: > This patch adds missing MODULE_DEVICE_TABLE definition which generates > correct modalias for automatic loading of this driver when it is built > as an external module. > > Reported-by: Hulk Robot > Signed-off-by: Bixuan Cui Fixes: fa4afd78ea12 ("iommu/virtio: Build virtio-iommu as module") Reviewed-by: Jean-Philippe Brucker > --- > drivers/iommu/virtio-iommu.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c > index 7c02481a81b4..c6e5ee4d9cef 100644 > --- a/drivers/iommu/virtio-iommu.c > +++ b/drivers/iommu/virtio-iommu.c > @@ -1136,6 +1136,7 @@ static struct virtio_device_id id_table[] = { > { VIRTIO_ID_IOMMU, VIRTIO_DEV_ANY_ID }, > { 0 }, > }; > +MODULE_DEVICE_TABLE(virtio, id_table); > > static struct virtio_driver virtio_iommu_drv = { > .driver.name= KBUILD_MODNAME, > ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 5.12 581/677] iommu/amd: Put newline after closing bracket in warning
From: Paul Menzel [ Upstream commit 304c73ba69459d4c18c2a4b843be6f5777b4b85c ] Currently, on the Dell OptiPlex 5055 the EFR mismatch warning looks like below. [1.479774] smpboot: CPU0: AMD Ryzen 5 PRO 1500 Quad-Core Processor (family: 0x17, model: 0x1, stepping: 0x1) […] [2.507370] AMD-Vi: [Firmware Warn]: EFR mismatch. Use IVHD EFR (0xf77ef22294ada : 0x400f77ef22294ada ). Add the newline after the `).`, so it’s on one line. Fixes: a44092e326d4 ("iommu/amd: Use IVHD EFR for early initialization of IOMMU features") Cc: iommu@lists.linux-foundation.org Cc: Suravee Suthikulpanit Cc: Brijesh Singh Cc: Robert Richter Signed-off-by: Paul Menzel Link: https://lore.kernel.org/r/20210412180141.29605-1-pmen...@molgen.mpg.de Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 321f5906e6ed..f7e31018cd0b 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1837,7 +1837,7 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * IVHD and MMIO conflict. */ if (features != iommu->features) - pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", features, iommu->features); } -- 2.30.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 5.11 514/601] iommu/amd: Put newline after closing bracket in warning
From: Paul Menzel [ Upstream commit 304c73ba69459d4c18c2a4b843be6f5777b4b85c ] Currently, on the Dell OptiPlex 5055 the EFR mismatch warning looks like below. [1.479774] smpboot: CPU0: AMD Ryzen 5 PRO 1500 Quad-Core Processor (family: 0x17, model: 0x1, stepping: 0x1) […] [2.507370] AMD-Vi: [Firmware Warn]: EFR mismatch. Use IVHD EFR (0xf77ef22294ada : 0x400f77ef22294ada ). Add the newline after the `).`, so it’s on one line. Fixes: a44092e326d4 ("iommu/amd: Use IVHD EFR for early initialization of IOMMU features") Cc: iommu@lists.linux-foundation.org Cc: Suravee Suthikulpanit Cc: Brijesh Singh Cc: Robert Richter Signed-off-by: Paul Menzel Link: https://lore.kernel.org/r/20210412180141.29605-1-pmen...@molgen.mpg.de Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 78339b0bb8e5..9846b01a5214 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1835,7 +1835,7 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * IVHD and MMIO conflict. */ if (features != iommu->features) - pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", features, iommu->features); } -- 2.30.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Tweak the description of a DMA fault
On Wed, May 12, 2021 at 02:50:12PM +0800, Lu Baolu wrote: > The Intel IOMMU driver reports the DMA fault reason in a decimal number > while the VT-d specification uses a hexadecimal one. It's inconvenient > that users need to covert them everytime before consulting the spec. > Let's use hexadecimal number for a DMA fault reason. > > The fault message uses 0x as PASID for DMA requests w/o PASID. > This is confusing. Tweak this by adding "w/o PASID" explicitly. > > Signed-off-by: Lu Baolu Maybe simpler to call it NO_PASID, and just PASID 0x instead? with the minor suggestions below Reviewed-by: Ashok Raj > --- > drivers/iommu/intel/dmar.c | 22 ++ > 1 file changed, 14 insertions(+), 8 deletions(-) > > diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c > index 1757ac1e1623..11e37d2c2af2 100644 > --- a/drivers/iommu/intel/dmar.c > +++ b/drivers/iommu/intel/dmar.c > @@ -1911,15 +1911,21 @@ static int dmar_fault_do_one(struct intel_iommu > *iommu, int type, > reason = dmar_get_fault_reason(fault_reason, _type); > > if (fault_type == INTR_REMAP) > - pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index > %llx [fault reason %02d] %s\n", > - source_id >> 8, PCI_SLOT(source_id & 0xFF), > - PCI_FUNC(source_id & 0xFF), addr >> 48, > - fault_reason, reason); > - else > - pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr > %llx [fault reason %02d] %s\n", > + pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index > %llx [fault reason %02xh] %s\n", > +source_id >> 8, PCI_SLOT(source_id & 0xFF), > +PCI_FUNC(source_id & 0xFF), addr >> 48, > +fault_reason, reason); > + else if (pasid == INVALID_IOASID) > + pr_err("[%s w/o PASID] Request device [%02x:%02x.%d] fault addr > %llx [fault reason %02xh] %s\n", > type ? "DMA Read" : "DMA Write", > source_id >> 8, PCI_SLOT(source_id & 0xFF), > -PCI_FUNC(source_id & 0xFF), pasid, addr, > +PCI_FUNC(source_id & 0xFF), addr, > +fault_reason, reason); > + else > + pr_err("[%s w/ PASID %x] Request device [%02x:%02x.%d] fault > addr %llx [fault reason %02xh] %s\n", Can you always lead hex values with 0x? > +type ? "DMA Read" : "DMA Write", pasid, > +source_id >> 8, PCI_SLOT(source_id & 0xFF), > +PCI_FUNC(source_id & 0xFF), addr, > fault_reason, reason); > return 0; > } > @@ -1987,7 +1993,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) > if (!ratelimited) > /* Using pasid -1 if pasid is not present */ > dmar_fault_do_one(iommu, type, fault_reason, > - pasid_present ? pasid : -1, > + pasid_present ? pasid : > INVALID_IOASID, > source_id, guest_addr); > > fault_index++; > -- > 2.25.1 > -- Cheers, Ashok [Forgiveness is the attribute of the STRONG - Gandhi] ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
[PATCH 5.10 451/530] iommu/amd: Put newline after closing bracket in warning
From: Paul Menzel [ Upstream commit 304c73ba69459d4c18c2a4b843be6f5777b4b85c ] Currently, on the Dell OptiPlex 5055 the EFR mismatch warning looks like below. [1.479774] smpboot: CPU0: AMD Ryzen 5 PRO 1500 Quad-Core Processor (family: 0x17, model: 0x1, stepping: 0x1) […] [2.507370] AMD-Vi: [Firmware Warn]: EFR mismatch. Use IVHD EFR (0xf77ef22294ada : 0x400f77ef22294ada ). Add the newline after the `).`, so it’s on one line. Fixes: a44092e326d4 ("iommu/amd: Use IVHD EFR for early initialization of IOMMU features") Cc: iommu@lists.linux-foundation.org Cc: Suravee Suthikulpanit Cc: Brijesh Singh Cc: Robert Richter Signed-off-by: Paul Menzel Link: https://lore.kernel.org/r/20210412180141.29605-1-pmen...@molgen.mpg.de Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/amd/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 3c215f0a6052..fa502c0e2e31 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1840,7 +1840,7 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * IVHD and MMIO conflict. */ if (features != iommu->features) - pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx\n).", + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", features, iommu->features); } -- 2.30.2 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Resend RFC PATCH V2 10/12] HV/IOMMU: Add Hyper-V dma ops support
Hi Christoph and Konrad: Current Swiotlb bounce buffer uses a pool for all devices. There is a high overhead to get or free bounce buffer during performance test. Swiotlb code now use a global spin lock to protect bounce buffer data. Several device queues try to acquire the spin lock and this introduce additional overhead. For performance and security perspective, each devices should have a separate swiotlb bounce buffer pool and so this part needs to rework. I want to check this is right way to resolve performance issues with swiotlb bounce buffer. If you have some other suggestions,welcome. Thanks. On 4/14/2021 11:47 PM, Christoph Hellwig wrote: +static dma_addr_t hyperv_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + phys_addr_t map, phys = (page_to_pfn(page) << PAGE_SHIFT) + offset; + + if (!hv_is_isolation_supported()) + return phys; + + map = swiotlb_tbl_map_single(dev, phys, size, HV_HYP_PAGE_SIZE, dir, +attrs); + if (map == (phys_addr_t)DMA_MAPPING_ERROR) + return DMA_MAPPING_ERROR; + + return map; +} This largerly duplicates what dma-direct + swiotlb does. Please use force_dma_unencrypted to force bounce buffering and just use the generic code. + if (hv_isolation_type_snp()) { + ret = hv_set_mem_host_visibility( + phys_to_virt(hyperv_io_tlb_start), + hyperv_io_tlb_size, + VMBUS_PAGE_VISIBLE_READ_WRITE); + if (ret) + panic("%s: Fail to mark Hyper-v swiotlb buffer visible to host. err=%d\n", + __func__, ret); + + hyperv_io_tlb_remap = ioremap_cache(hyperv_io_tlb_start + + ms_hyperv.shared_gpa_boundary, + hyperv_io_tlb_size); + if (!hyperv_io_tlb_remap) + panic("%s: Fail to remap io tlb.\n", __func__); + + memset(hyperv_io_tlb_remap, 0x00, hyperv_io_tlb_size); + swiotlb_set_bounce_remap(hyperv_io_tlb_remap); And this really needs to go into a common hook where we currently just call set_memory_decrypted so that all the different schemes for these trusted VMs (we have about half a dozen now) can share code rather than reinventing it. ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH v4 1/2] iommu/sva: Tighten SVA bind API with explicit flags
On Mon, May 10, 2021 at 06:25:07AM -0700, Jacob Pan wrote: > The void* drvdata parameter isn't really used in iommu_sva_bind_device() > API, the current IDXD code "borrows" the drvdata for a VT-d private flag > for supervisor SVA usage. > > Supervisor/Privileged mode request is a generic feature. It should be > promoted from the VT-d vendor driver to the generic code. > > This patch replaces void* drvdata with a unsigned int flags parameter > and adjusts callers accordingly. > > Link: https://lore.kernel.org/linux-iommu/YFhiMLR35WWMW%2FHu@myrica/ > Suggested-by: Jean-Philippe Brucker > Signed-off-by: Jacob Pan Thanks for cleaning this up. Whether Vt-d's supervisor mode will need rework or not, this is still good to have. One nit below if you resend Reviewed-by: Jean-Philippe Brucker [...] > diff --git a/include/linux/iommu.h b/include/linux/iommu.h > index 32d448050bf7..fcc9d36b4b01 100644 > --- a/include/linux/iommu.h > +++ b/include/linux/iommu.h > @@ -152,6 +152,19 @@ enum iommu_dev_features { > > #define IOMMU_PASID_INVALID (-1U) > > +/* > + * The IOMMU_SVA_BIND_SUPERVISOR flag requests a PASID which can be used only > + * for access to kernel addresses. No IOTLB flushes are automatically done > + * for kernel mappings; it is valid only for access to the kernel's static > + * 1:1 mapping of physical memory — not to vmalloc or even module mappings. > + * A future API addition may permit the use of such ranges, by means of an > + * explicit IOTLB flush call (akin to the DMA API's unmap method). > + * > + * It is unlikely that we will ever hook into flush_tlb_kernel_range() to > + * do such IOTLB flushes automatically. I would add that this is platform specific and not all IOMMU drivers support the feature. Thanks, Jean > + */ > +#define IOMMU_SVA_BIND_SUPERVISOR BIT(0) > + > #ifdef CONFIG_IOMMU_API ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH] iommu/vt-d: check for allocation failure in aux_detach_device()
On 5/12/21 6:05 PM, Dan Carpenter wrote: In current kernels small allocations never fail, but checking for allocation failure is the correct thing to do. Fixes: 18abda7a2d55 ("iommu/vt-d: Fix general protection fault in aux_detach_device()") Signed-off-by: Dan Carpenter --- drivers/iommu/intel/iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 708f430af1c4..9a7b79b5af18 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4606,6 +4606,8 @@ static int auxiliary_link_device(struct dmar_domain *domain, if (!sinfo) { sinfo = kzalloc(sizeof(*sinfo), GFP_ATOMIC); + if (!sinfo) + return -ENOMEM; sinfo->domain = domain; sinfo->pdev = dev; list_add(>link_phys, >subdevices); Thank you! Acked-by: Lu Baolu Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Support asynchronous IOMMU nested capabilities
On 5/13/21 10:26 AM, Tian, Kevin wrote: From: Lu Baolu Sent: Wednesday, May 12, 2021 7:31 PM Hi Kevin, On 5/12/21 4:30 PM, Tian, Kevin wrote: From: Lu Baolu Sent: Wednesday, May 12, 2021 3:04 PM Current VT-d implementation supports nested translation only if all underlying IOMMUs support the nested capability. This is unnecessary as the upper layer is allowed to create different containers and set them with different type of iommu backend. The IOMMU driver needs to guarantee that devices attached to a nested mode iommu_domain should support nested capabilility. so the consistency check is now applied only to the IOMMUs that are spanned by a given iommu_domain? Yes. Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 21 +++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f1742da42478..1cd4840e6f9f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4755,6 +4755,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, if (!iommu) return -ENODEV; + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && + !ecap_nest(iommu->ecap)) { + dev_err(dev, "%s: iommu not support nested translation\n", + iommu->name); + return -EINVAL; + } + /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) @@ -5451,11 +5458,21 @@ static int intel_iommu_enable_nesting(struct iommu_domain *domain) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + bool has_nesting = false; unsigned long flags; - int ret = -ENODEV; + int ret = -EINVAL; + + for_each_active_iommu(iommu, drhd) + if (ecap_nest(iommu->ecap)) + has_nesting = true; + + if (!has_nesting) + return -ENODEV; Isn't above still doing global consistency check? The logic is if nested mode is globally unsupported, return false. why is this check required? anyway you already have the check when prepare device attachment, and only at that point you have accurate info for which iommu to be checked. This check looks meaningless as even if some IOMMUs support nesting it doesn't mean the IOMMU relevant to this domain support it. Get you. It's really unnecessary. I will drop this check in the new version. Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH 1/1] iommu/vt-d: Support asynchronous IOMMU nested capabilities
> From: Lu Baolu > Sent: Wednesday, May 12, 2021 7:31 PM > > Hi Kevin, > > On 5/12/21 4:30 PM, Tian, Kevin wrote: > >> From: Lu Baolu > >> Sent: Wednesday, May 12, 2021 3:04 PM > >> > >> Current VT-d implementation supports nested translation only if all > >> underlying IOMMUs support the nested capability. This is unnecessary > >> as the upper layer is allowed to create different containers and set > >> them with different type of iommu backend. The IOMMU driver needs to > >> guarantee that devices attached to a nested mode iommu_domain should > >> support nested capabilility. > > > > so the consistency check is now applied only to the IOMMUs that are > > spanned by a given iommu_domain? > > Yes. > > > > >> > >> Signed-off-by: Lu Baolu > >> --- > >> drivers/iommu/intel/iommu.c | 21 +++-- > >> 1 file changed, 19 insertions(+), 2 deletions(-) > >> > >> diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c > >> index f1742da42478..1cd4840e6f9f 100644 > >> --- a/drivers/iommu/intel/iommu.c > >> +++ b/drivers/iommu/intel/iommu.c > >> @@ -4755,6 +4755,13 @@ static int > prepare_domain_attach_device(struct > >> iommu_domain *domain, > >>if (!iommu) > >>return -ENODEV; > >> > >> + if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) && > >> + !ecap_nest(iommu->ecap)) { > >> + dev_err(dev, "%s: iommu not support nested translation\n", > >> + iommu->name); > >> + return -EINVAL; > >> + } > >> + > >>/* check if this iommu agaw is sufficient for max mapped address */ > >>addr_width = agaw_to_width(iommu->agaw); > >>if (addr_width > cap_mgaw(iommu->cap)) > >> @@ -5451,11 +5458,21 @@ static int > >> intel_iommu_enable_nesting(struct iommu_domain *domain) > >> { > >>struct dmar_domain *dmar_domain = to_dmar_domain(domain); > >> + struct dmar_drhd_unit *drhd; > >> + struct intel_iommu *iommu; > >> + bool has_nesting = false; > >>unsigned long flags; > >> - int ret = -ENODEV; > >> + int ret = -EINVAL; > >> + > >> + for_each_active_iommu(iommu, drhd) > >> + if (ecap_nest(iommu->ecap)) > >> + has_nesting = true; > >> + > >> + if (!has_nesting) > >> + return -ENODEV; > > > > Isn't above still doing global consistency check? > > The logic is if nested mode is globally unsupported, return false. why is this check required? anyway you already have the check when prepare device attachment, and only at that point you have accurate info for which iommu to be checked. This check looks meaningless as even if some IOMMUs support nesting it doesn't mean the IOMMU relevant to this domain support it. > > > > >> > >>spin_lock_irqsave(_domain_lock, flags); > >> - if (nested_mode_support() && list_empty(_domain->devices)) > >> { > >> + if (list_empty(_domain->devices)) { > >>dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE; > >>dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL; > >>ret = 0; > >> -- > >> 2.25.1 > > > > Best regards, > baolu > ___ > iommu mailing list > iommu@lists.linux-foundation.org > https://lists.linuxfoundation.org/mailman/listinfo/iommu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
RE: [PATCH 3/6] vfio: remove the unused mdev iommu hook
> From: Jason Gunthorpe > Sent: Monday, May 10, 2021 11:55 PM > > On Mon, May 10, 2021 at 08:54:02AM +0200, Christoph Hellwig wrote: > > The iommu_device field in struct mdev_device has never been used > > since it was added more than 2 years ago. > > > > Signed-off-by: Christoph Hellwig > > --- > > drivers/vfio/vfio_iommu_type1.c | 132 ++-- > > include/linux/mdev.h| 20 - > > 2 files changed, 25 insertions(+), 127 deletions(-) > > I asked Intel folks to deal with this a month ago: > > https://lore.kernel.org/kvm/20210406200030.ga425...@nvidia.com/ > > So lets just remove it, it is clearly a bad idea > > Reviewed-by: Jason Gunthorpe > Want to get a clearer picture on what needs to be redesigned after this removal. Are you specially concerned about this iommu_device hack which directly connects mdev_device to iommu layer or the entire removed logic including the aux domain concept? For the former we are now following up the referred thread to find a clean way. But for the latter we feel it's still necessary regardless of how iommu interface is redesigned to support device connection from the upper level driver. The reason is that with mdev or subdevice one physical device could be attached to multiple domains now. there could be a primary domain with DOMAIN_ DMA type for DMA_API use by parent driver itself, and multiple auxiliary domains with DOMAIN_UNMANAGED types for subdevices assigned to different VMs. In this case It's a different model from existing policy which allows only one domain per device. In removed code we followed Joerg's suggestion to keep existing iommu_attach_device for connecting device to the primary domain and then add new iommu_aux_attach_ device for connecting device to auxiliary domains. Lots of removed iommu code deal with the management of auxiliary domains in the iommu core layer and intel-iommu drivers, which imho is largely generic and could be leveraged w/o intrusive refactoring to support redesigned iommu interface. Does it sound a reasonable position? Thanks Kevin ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATACH 1/1] iommu/vt-d: Use user privilege for RID2PASID translation
Hi Ashok, On 5/13/21 1:03 AM, Raj, Ashok wrote: On Wed, May 12, 2021 at 02:44:26PM +0800, Lu Baolu wrote: When first-level page tables are used for IOVA translation, we use user privilege by setting U/S bit in the page table entry. This is to make it consistent with the second level translation, where the U/S enforcement is not available. Clear the SRE (Supervisor Request Enable) field in the pasid table entry of RID2PASID so that requests requesting the supervisor privilege are blocked and treated as DMA remapping faults. Suggested-by: Jacob Pan Fixes: b802d070a52a1 ("iommu/vt-d: Use iova over first level") Signed-off-by: Lu Baolu --- drivers/iommu/intel/iommu.c | 7 +-- drivers/iommu/intel/pasid.c | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 708f430af1c4..f1742da42478 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2525,9 +2525,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu, struct device *dev, u32 pasid) { - int flags = PASID_FLAG_SUPERVISOR_MODE; struct dma_pte *pgd = domain->pgd; int agaw, level; + int flags = 0; /* * Skip top levels of page tables for iommu which has @@ -2543,7 +2543,10 @@ static int domain_setup_first_level(struct intel_iommu *iommu, if (level != 4 && level != 5) return -EINVAL; - flags |= (level == 5) ? PASID_FLAG_FL5LP : 0; + if (pasid != PASID_RID2PASID) + flags |= PASID_FLAG_SUPERVISOR_MODE; + if (level == 5) + flags |= PASID_FLAG_FL5LP; Given that we are still not bought into the supervisor PASID, should we make this an explicit request before turning on SUPERVISOR mode always? Since pasid_set_sre() has no return, we have no way to fail it. The supervisor PASID is now supported in VT-d implementation. This patch is only for RID2PASID. We need a separated patch to remove the superisor pasid code once we reach a consensus. Does this work for you? Best regards, baolu if (domain->domain.type == IOMMU_DOMAIN_UNMANAGED) flags |= PASID_FLAG_PAGE_SNOOP; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 72646bafc52f..72dc84821dad 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -699,7 +699,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu, * Since it is a second level only translation setup, we should * set SRE bit as well (addresses are expected to be GPAs). */ - pasid_set_sre(pte); + if (pasid != PASID_RID2PASID) + pasid_set_sre(pte); pasid_set_present(pte); pasid_flush_caches(iommu, pte, pasid, did); -- 2.25.1 ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [PATCH 1/1] iommu/vt-d: Tweak the description of a DMA fault
On 5/13/21 12:56 AM, Raj, Ashok wrote: On Wed, May 12, 2021 at 02:50:12PM +0800, Lu Baolu wrote: The Intel IOMMU driver reports the DMA fault reason in a decimal number while the VT-d specification uses a hexadecimal one. It's inconvenient that users need to covert them everytime before consulting the spec. Let's use hexadecimal number for a DMA fault reason. The fault message uses 0x as PASID for DMA requests w/o PASID. This is confusing. Tweak this by adding "w/o PASID" explicitly. Signed-off-by: Lu Baolu Maybe simpler to call it NO_PASID, and just PASID 0x instead? Yeah, it's okay for me. with the minor suggestions below Reviewed-by: Ashok Raj Thanks! --- drivers/iommu/intel/dmar.c | 22 ++ 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 1757ac1e1623..11e37d2c2af2 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1911,15 +1911,21 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type, reason = dmar_get_fault_reason(fault_reason, _type); if (fault_type == INTR_REMAP) - pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n", - source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), addr >> 48, - fault_reason, reason); - else - pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n", + pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02xh] %s\n", + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr >> 48, + fault_reason, reason); + else if (pasid == INVALID_IOASID) + pr_err("[%s w/o PASID] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02xh] %s\n", type ? "DMA Read" : "DMA Write", source_id >> 8, PCI_SLOT(source_id & 0xFF), - PCI_FUNC(source_id & 0xFF), pasid, addr, + PCI_FUNC(source_id & 0xFF), addr, + fault_reason, reason); + else + pr_err("[%s w/ PASID %x] Request device [%02x:%02x.%d] fault addr %llx [fault reason %02xh] %s\n", Can you always lead hex values with 0x? Yes. + type ? "DMA Read" : "DMA Write", pasid, + source_id >> 8, PCI_SLOT(source_id & 0xFF), + PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason); return 0; } @@ -1987,7 +1993,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id) if (!ratelimited) /* Using pasid -1 if pasid is not present */ dmar_fault_do_one(iommu, type, fault_reason, - pasid_present ? pasid : -1, + pasid_present ? pasid : INVALID_IOASID, source_id, guest_addr); fault_index++; -- 2.25.1 Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [Resend RFC PATCH V2 10/12] HV/IOMMU: Add Hyper-V dma ops support
On 5/13/21 12:01 AM, Tianyu Lan wrote: Hi Christoph and Konrad: Current Swiotlb bounce buffer uses a pool for all devices. There is a high overhead to get or free bounce buffer during performance test. Swiotlb code now use a global spin lock to protect bounce buffer data. Several device queues try to acquire the spin lock and this introduce additional overhead. For performance and security perspective, each devices should have a separate swiotlb bounce buffer pool and so this part needs to rework. I want to check this is right way to resolve performance issues with swiotlb bounce buffer. If you have some other suggestions,welcome. Is this what you want? https://lore.kernel.org/linux-iommu/20210510095026.3477496-1-tien...@chromium.org/ Best regards, baolu ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu
Re: [RESEND PATCH v2] iommu/amd: Fix extended features logging
Am 04.05.21 um 12:22 schrieb Alexander Monakov: print_iommu_info prints the EFR register and then the decoded list of features on a separate line: pci :00:00.2: AMD-Vi: Extended features (0x206d73ef22254ade): PPR X2APIC NX GT IA GA PC GA_vAPIC The second line is emitted via 'pr_cont', which causes it to have a different ('warn') loglevel compared to the previous line ('info'). Commit 9a295ff0ffc9 attempted to rectify this by removing the newline from the pci_info format string, but this doesn't work, as pci_info calls implicitly append a newline anyway. Printing the decoded features on the same line would make it quite long. Instead, change pci_info() to pr_info() to omit PCI bus location info, which is also shown in the preceding message. This results in: pci :00:00.2: AMD-Vi: Found IOMMU cap 0x40 AMD-Vi: Extended features (0x206d73ef22254ade): PPR X2APIC NX GT IA GA PC GA_vAPIC AMD-Vi: Interrupt remapping enabled Fixes: 9a295ff0ffc9 ("iommu/amd: Print extended features in one line to fix divergent log levels") Link: https://lore.kernel.org/lkml/alpine.lnx.2.20.13.2104112326460.11...@monopod.intra.ispras.ru Signed-off-by: Alexander Monakov Cc: Paul Menzel Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: iommu@lists.linux-foundation.org --- drivers/iommu/amd/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 429a4baa3aee..8f0eb865119a 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1954,8 +1954,8 @@ static void print_iommu_info(void) pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pci_info(pdev, "Extended features (%#llx):", -iommu->features); + pr_info("Extended features (%#llx):", iommu->features); + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); base-commit: 9f4ad9e425a1d3b6a34617b8ea226d56a119a717 Reviewed-by: Paul Menzel Kind regards, Paul ___ iommu mailing list iommu@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/iommu