Hi Daniel, Reviving this thread
On Thu, 2020-02-20 at 18:06 +0800, Daniel Drake wrote: > > On Wed, Feb 19, 2020 at 11:40 AM Lu Baolu <[email protected]> wrote: > > > With respect, this is problematical. The parent and all subdevices share > > > a single translation entry. The DMA mask should be consistent. > > > > > > Otherwise, for example, subdevice A has 64-bit DMA capability and uses > > > an identity domain for DMA translation. While subdevice B has 32-bit DMA > > > capability and is forced to switch to DMA domain. Subdevice A will be > > > impacted without any notification. > > Looking closer, this problematic codepath may already be happening for VMD, > under intel_iommu_add_device(). Consider this function running for a VMD > subdevice, we hit: > > domain = iommu_get_domain_for_dev(dev); > > I can't quite grasp the code flow here, but domain->type now always seems > to return the domain type of the real DMA device, which seems like pretty > reasonable behaviour. > > if (domain->type == IOMMU_DOMAIN_DMA) { > > and as detailed in previous mails, the real VMD device seems to be in a DMA > domain by default, so we continue. > > if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { > > Now we checked the default domain type of the subdevice. This seems rather > likely to return IDENTITY because that's effectively the default type... > > ret = iommu_request_dm_for_dev(dev); > if (ret) { > dmar_remove_one_dev_info(dev); > dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; > domain_add_dev_info(si_domain, dev); > dev_info(dev, > "Device uses a private identity domain.\n"); > } > } > > and now we're doing the bad stuff that Lu pointed out: we only have one > mapping shared for all the subdevices, so if we end up changing it for one > subdevice, we're likely to be breaking another. > In this case iommu_request_dm_for_dev() returns -EBUSY without doing anything > and the following private identity code fortunately seems to have no > consequential effects - the real DMA device continues to operate in the DMA > domain, and all subdevice DMA requests go through the DMA mapping codepath. > That's probably why VMD appears to be working fine anyway, but this seems > fragile. > > The following changes enforce that the real DMA device is in the DMA domain, > and avoid the intel_iommu_add_device() codepaths that would try to change > it to a different domain type. Let me know if I'm on the right lines... > --- > drivers/iommu/intel-iommu.c | 16 ++++++++++++++++ > drivers/pci/controller/intel-nvme-remap.c | 6 ++++++ > 2 files changed, 22 insertions(+) > > diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c > index 9644a5b3e0ae..8872b8d1780d 100644 > --- a/drivers/iommu/intel-iommu.c > +++ b/drivers/iommu/intel-iommu.c > @@ -2911,6 +2911,9 @@ static int device_def_domain_type(struct device *dev) > if (dev_is_pci(dev)) { > struct pci_dev *pdev = to_pci_dev(dev); > > + if (pci_real_dma_dev(pdev) != pdev) > + return IOMMU_DOMAIN_DMA; > + > if (device_is_rmrr_locked(dev)) > return IOMMU_DOMAIN_DMA; > > @@ -5580,6 +5583,7 @@ static bool intel_iommu_capable(enum iommu_cap cap) > > static int intel_iommu_add_device(struct device *dev) > { > + struct device *real_dev = dev; > struct dmar_domain *dmar_domain; > struct iommu_domain *domain; > struct intel_iommu *iommu; > @@ -5591,6 +5595,17 @@ static int intel_iommu_add_device(struct device *dev) > if (!iommu) > return -ENODEV; > > + if (dev_is_pci(dev)) > + real_dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; > + > + if (real_dev != dev) { > + domain = iommu_get_domain_for_dev(real_dev); > + if (domain->type != IOMMU_DOMAIN_DMA) { > + dev_err(dev, "Real DMA device not in DMA domain; can't > handle DMA\n"); > + return -ENODEV; > + } > + } > + > iommu_device_link(&iommu->iommu, dev); > > if (translation_pre_enabled(iommu)) > We need one additional change to enforce IOMMU_DOMAIN_DMA on the real dma dev, otherwise it could be put into Identity and the subdevices as DMA leading to this WARN: struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) { int iommu_id; /* si_domain and vm domain should not get here. */ if (WARN_ON(domain->domain.type != IOMMU_DOMAIN_DMA)) return NULL; We could probably define and enforce it in device_def_domain_type. We could also try moving real dma dev to DMA on the first subdevice, like below. Though there might be a few cases we can't do that. ndex 3851204f6ac0..6c80c6c9d808 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5783,13 +5783,32 @@ static bool intel_iommu_capable(enum iommu_cap cap) return false; } +static int intel_iommu_request_dma_domain_for_dev(struct device *dev, + struct dmar_domain *domain) +{ + int ret; + + ret = iommu_request_dma_domain_for_dev(dev); + if (ret) { + dmar_remove_one_dev_info(dev); + domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; + if (!get_private_domain_for_dev(dev)) { + dev_warn(dev, + "Failed to get a private domain.\n"); + return -ENOMEM; + } + } + + return 0; +} + static int intel_iommu_add_device(struct device *dev) { - struct device *real_dev = dev; struct dmar_domain *dmar_domain; struct iommu_domain *domain; struct intel_iommu *iommu; struct iommu_group *group; + struct device *real_dev = dev; u8 bus, devfn; int ret; @@ -5797,18 +5816,6 @@ static int intel_iommu_add_device(struct device *dev) if (!iommu) return -ENODEV; - if (dev_is_pci(dev)) - real_dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; - - if (real_dev != dev) { - domain = iommu_get_domain_for_dev(real_dev); - if (domain->type != IOMMU_DOMAIN_DMA) { - dmar_remove_one_dev_info(dev) - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - domain_add_dev_info(IOMMU_DOMAIN_DMA, dev); - } - } - iommu_device_link(&iommu->iommu, dev); if (translation_pre_enabled(iommu)) @@ -5825,6 +5832,21 @@ static int intel_iommu_add_device(struct device *dev) domain = iommu_get_domain_for_dev(dev); dmar_domain = to_dmar_domain(domain); + + if (dev_is_pci(dev)) + real_dev = &pci_real_dma_dev(to_pci_dev(dev))->dev; + + if (real_dev != dev) { + domain = iommu_get_domain_for_dev(real_dev); + if (domain->type != IOMMU_DOMAIN_DMA) { + dmar_remove_one_dev_info(real_dev); + + ret = intel_iommu_request_dma_domain_for_dev(real_dev, dmar_domain); + if (ret) + goto unlink; + } + } + if (domain->type == IOMMU_DOMAIN_DMA) { if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) { ret = iommu_request_dm_for_dev(dev); @@ -5838,20 +5860,12 @@ static int intel_iommu_add_device(struct device *dev) } } else { if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) { - ret = iommu_request_dma_domain_for_dev(dev); - if (ret) { - dmar_remove_one_dev_info(dev); - dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN; - if (!get_private_domain_for_dev(dev)) { - dev_warn(dev, - "Failed to get a private domain.\n"); - ret = -ENOMEM; - goto unlink; - } + ret = intel_iommu_request_dma_domain_for_dev(dev, dmar_domain); + if (ret) + goto unlink; - dev_info(dev, - "Device uses a private dma domain.\n"); - } + dev_info(dev, + "Device uses a private dma domain.\n"); } } ~ ~ _______________________________________________ iommu mailing list [email protected] https://lists.linuxfoundation.org/mailman/listinfo/iommu
