> From: Jason Gunthorpe <[email protected]>
> Sent: Wednesday, September 22, 2021 8:40 PM
> 
> > > Ie the basic flow would see the driver core doing some:
> >
> > Just double confirm. Is there concern on having the driver core to
> > call iommu functions?
> 
> It is always an interesting question, but I'd say iommu is
> foundantional to Linux and if it needs driver core help it shouldn't
> be any different from PM, pinctl, or other subsystems that have
> inserted themselves into the driver core.
> 
> Something kind of like the below.
> 
> If I recall, once it is done like this then the entire iommu notifier
> infrastructure can be ripped out which is a lot of code.

Currently vfio is the only user of this notifier mechanism. Now 
three events are handled in vfio_iommu_group_notifier():

NOTIFY_ADD_DEVICE: this is basically for some sanity check. suppose
not required once we handle it cleanly in the iommu/driver core.

NOTIFY_BOUND_DRIVER: the BUG_ON() logic to be fixed by this change.

NOTIFY_UNBOUND_DRIVER: still needs some thoughts. Based on
the comments the group->unbound_list is used to avoid breaking
group viability check between vfio_unregister_group_dev() and 
final dev/drv teardown. within that small window the device is
not tracked by vfio group but is still bound to a driver (e.g. vfio-pci
itself), while an external group user may hold a reference to the
group. Possibly it's not required now with the new mechanism as 
we rely on init/exit_user_dma() as the single switch to claim/
withdraw the group ownership. As long as exit_user_dma() is not 
called until vfio_group_release(), above small window is covered
thus no need to maintain a unbound_list.

But anyway since this corner case is tricky, will think more in case
of any oversight.

> 
> 
> diff --git a/drivers/base/dd.c b/drivers/base/dd.c
> index 68ea1f949daa90..e39612c99c6123 100644
> --- a/drivers/base/dd.c
> +++ b/drivers/base/dd.c
> @@ -566,6 +566,10 @@ static int really_probe(struct device *dev, struct
> device_driver *drv)
>                 goto done;
>         }
> 
> +       ret = iommu_set_kernel_ownership(dev);
> +       if (ret)
> +               return ret;
> +
>  re_probe:
>         dev->driver = drv;
> 
> @@ -673,6 +677,7 @@ static int really_probe(struct device *dev, struct
> device_driver *drv)
>                 dev->pm_domain->dismiss(dev);
>         pm_runtime_reinit(dev);
>         dev_pm_set_driver_flags(dev, 0);
> +       iommu_release_kernel_ownership(dev);
>  done:
>         return ret;
>  }
> @@ -1214,6 +1219,7 @@ static void __device_release_driver(struct device
> *dev, struct device *parent)
>                         dev->pm_domain->dismiss(dev);
>                 pm_runtime_reinit(dev);
>                 dev_pm_set_driver_flags(dev, 0);
> +               iommu_release_kernel_ownership(dev);
> 
>                 klist_remove(&dev->p->knode_driver);
>                 device_pm_check_callbacks(dev);

I expanded above into below conceptual draft. Please help check whether
it matches your thought:

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 68ea1f9..826a651 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -566,6 +566,10 @@ static int really_probe(struct device *dev, struct 
device_driver *drv)
                goto done;
        }
 
+       ret = iommu_device_set_dma_hint(dev, drv->dma_hint);
+       if (ret)
+               return ret;
+
 re_probe:
        dev->driver = drv;
 
@@ -673,6 +677,7 @@ static int really_probe(struct device *dev, struct 
device_driver *drv)
                dev->pm_domain->dismiss(dev);
        pm_runtime_reinit(dev);
        dev_pm_set_driver_flags(dev, 0);
+       iommu_device_clear_dma_hint(dev);
 done:
        return ret;
 }
@@ -1214,6 +1219,7 @@ static void __device_release_driver(struct device *dev, 
struct device *parent)
                        dev->pm_domain->dismiss(dev);
                pm_runtime_reinit(dev);
                dev_pm_set_driver_flags(dev, 0);
+               iommu_device_clear_dma_hint(dev);
 
                klist_remove(&dev->p->knode_driver);
                device_pm_check_callbacks(dev);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 3303d70..b12f335 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1064,6 +1064,104 @@ void iommu_group_put(struct iommu_group *group)
 }
 EXPORT_SYMBOL_GPL(iommu_group_put);
 
+static int iommu_dev_viable(struct device *dev, void *data)
+{
+       enum dma_hint hint = *data;
+       struct device_driver *drv = READ_ONCE(dev->driver);
+
+       /* no conflict if the new device doesn't do DMA */
+       if (hint == DMA_FOR_NONE)
+               return 0;
+
+       /* no conflict if this device is driver-less, or doesn't do DMA */
+       if (!drv || (drv->dma_hint == DMA_FOR_NONE))
+               return 0;
+
+       /* kernel dma and user dma are exclusive */
+       if (hint != drv->dma_hint)
+               return -EINVAL;
+
+       /*
+        * devices in the group could be bound to different user-dma
+        * drivers (e.g. vfio-pci, vdpa, etc.), or even bound to the
+        * same driver but eventually opened via different mechanisms
+        * (e.g. vfio group vs. nongroup interfaces). We rely on 
+        * iommu_{group/device}_init_user_dma() to ensure exclusive
+        * user-dma ownership (iommufd ctx, vfio container ctx, etc.)
+        * in such scenario.
+        */
+       return 0;
+}
+
+static int __iommu_group_viable(struct iommu_group *group, enum dma_hint hint)
+{
+       return (__iommu_group_for_each_dev(group, &hint,
+                                          iommu_dev_viable) == 0);
+}
+
+int iommu_device_set_dma_hint(struct device *dev, enum dma_hint hint)
+{
+       struct iommu_group *group;
+       int ret;
+
+       group = iommu_group_get(dev);
+       /* not an iommu-probed device */
+       if (!group)
+               return 0;
+
+       mutex_lock(&group->mutex);
+       ret = __iommu_group_viable(group, hint);
+       mutex_unlock(&group->mutex);
+
+       iommu_group_put(group);
+       return ret;
+}
+
+/* any housekeeping? */
+void iommu_device_clear_dma_hint(struct device *dev) {}
+
+/* claim group ownership for user-dma */
+int __iommu_group_init_user_dma(struct iommu_group *group,
+                               unsigned long owner)
+{
+       int ret;
+
+       ret = __iommu_group_viable(group, DMA_FOR_USER);
+       if (ret)
+               goto out;
+
+       /* other logic for exclusive user_dma ownership and refcounting */
+out:
+       return ret;
+}
+
+int iommu_group_init_user_dma(struct iommu_group *group,
+                             unsigned long owner)
+{
+       int ret;
+
+       mutex_lock(&group->mutex);
+       ret = __iommu_group_init_user_dma(group, owner);
+       mutex_unlock(&group->mutex);
+       return ret;
+}
+
+int iommu_device_init_user_dma(struct device *dev,
+                             unsigned long owner)
+{
+       struct iommu_group *group = iommu_group_get(dev);
+       int ret;
+
+       if (!group)
+               return -ENODEV;
+
+       mutex_lock(&group->mutex);
+       ret = __iommu_group_init_user_dma(group, owner);
+       mutex_unlock(&group->mutex);
+       iommu_grou_put(group);
+       return ret;
+}
+
 /**
  * iommu_group_register_notifier - Register a notifier for group changes
  * @group: the group to watch
diff --git a/drivers/pci/pci-stub.c b/drivers/pci/pci-stub.c
index e408099..4568811 100644
--- a/drivers/pci/pci-stub.c
+++ b/drivers/pci/pci-stub.c
@@ -36,6 +36,9 @@ static int pci_stub_probe(struct pci_dev *dev, const struct 
pci_device_id *id)
        .name           = "pci-stub",
        .id_table       = NULL, /* only dynamic id's */
        .probe          = pci_stub_probe,
+       .driver = {
+               .dma_hint       = DMA_FOR_NONE,
+       },
 };
 
 static int __init pci_stub_init(void)
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
index dcd648e..a613b78 100644
--- a/drivers/vdpa/ifcvf/ifcvf_main.c
+++ b/drivers/vdpa/ifcvf/ifcvf_main.c
@@ -678,6 +678,9 @@ static void ifcvf_remove(struct pci_dev *pdev)
        .id_table = ifcvf_pci_ids,
        .probe    = ifcvf_probe,
        .remove   = ifcvf_remove,
+       .driver = {
+               .dma_hint       = DMA_FOR_USER,
+       },
 };
 
 module_pci_driver(ifcvf_driver);
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a5ce92b..61c422d 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -193,6 +193,9 @@ static int vfio_pci_sriov_configure(struct pci_dev *pdev, 
int nr_virtfn)
        .remove                 = vfio_pci_remove,
        .sriov_configure        = vfio_pci_sriov_configure,
        .err_handler            = &vfio_pci_core_err_handlers,
+       .driver = {
+               .dma_hint       = DMA_FOR_USER,
+       },
 };
 
 static void __init vfio_pci_fill_ids(void)
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index a498ebc..6bddfd2 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -48,6 +48,17 @@ enum probe_type {
 };
 
 /**
+ * enum dma_hint - device driver dma hint
+ *     Device drivers may provide hints for whether dma is
+ *     intended for kernel driver, user driver, not not required.
+ */
+enum dma_hint {
+       DMA_FOR_KERNEL,
+       DMA_FOR_USER,
+       DMA_FOR_NONE,
+};
+
+/**
  * struct device_driver - The basic device driver structure
  * @name:      Name of the device driver.
  * @bus:       The bus which the device of this driver belongs to.
@@ -101,6 +112,7 @@ struct device_driver {
 
        bool suppress_bind_attrs;       /* disables bind/unbind via sysfs */
        enum probe_type probe_type;
+       enum dma_type dma_type;
 
        const struct of_device_id       *of_match_table;
        const struct acpi_device_id     *acpi_match_table;

Thanks
Kevin


_______________________________________________
iommu mailing list
[email protected]
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Reply via email to