Re: [PATCH v6 04/25] iommu: Add a page fault handler

2020-05-04 Thread Jean-Philippe Brucker
On Sun, May 03, 2020 at 01:49:01PM +0800, Lu Baolu wrote:
> > +static void iopf_handle_group(struct work_struct *work)
> > +{
> > +   struct iopf_group *group;
> > +   struct iopf_fault *iopf, *next;
> > +   enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
> > +
> > +   group = container_of(work, struct iopf_group, work);
> > +
> > +   list_for_each_entry_safe(iopf, next, >faults, head) {
> > +   /*
> > +* For the moment, errors are sticky: don't handle subsequent
> > +* faults in the group if there is an error.
> > +*/
> > +   if (status == IOMMU_PAGE_RESP_SUCCESS)
> > +   status = iopf_handle_single(iopf);
> > +
> > +   if (!(iopf->fault.prm.flags &
> > + IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
> > +   kfree(iopf);
> 
> The iopf is freed,but not removed from the list. This will cause wild
> pointer in code.

We free the list with the group below, so this one is fine.

> 
> > +   }
> > +
> > +   iopf_complete_group(group->dev, >last_fault, status);
> > +   kfree(group);
> > +}
> > +
> 
> [...]
> 
> > +/**
> > + * iopf_queue_flush_dev - Ensure that all queued faults have been processed
> > + * @dev: the endpoint whose faults need to be flushed.
> > + * @pasid: the PASID affected by this flush
> > + *
> > + * The IOMMU driver calls this before releasing a PASID, to ensure that all
> > + * pending faults for this PASID have been handled, and won't hit the 
> > address
> > + * space of the next process that uses this PASID. The driver must make 
> > sure
> > + * that no new fault is added to the queue. In particular it must flush its
> > + * low-level queue before calling this function.
> > + *
> > + * Return: 0 on success and <0 on error.
> > + */
> > +int iopf_queue_flush_dev(struct device *dev, int pasid)
> > +{
> > +   int ret = 0;
> > +   struct iopf_device_param *iopf_param;
> > +   struct dev_iommu *param = dev->iommu;
> > +
> > +   if (!param)
> > +   return -ENODEV;
> > +
> > +   mutex_lock(>lock);
> > +   iopf_param = param->iopf_param;
> > +   if (iopf_param)
> > +   flush_workqueue(iopf_param->queue->wq);
> 
> There may be other pasid iopf in the workqueue. Flush all tasks in
> the workqueue will hurt other pasids. I might lose any context.

Granted this isn't optimal because we don't take the PASID argument into
account (I think I'll remove it, don't know how to use it). But I don't
think it affects other PASIDs, because all flush_workqueue() does is wait
until all faults currently in the worqueue are processed. So it only
blocks the current thread, but nothing is lost.

> 
> > +   else
> > +   ret = -ENODEV;
> > +   mutex_unlock(>lock);
> > +
> > +   return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
> > +
> > +/**
> > + * iopf_queue_discard_partial - Remove all pending partial fault
> > + * @queue: the queue whose partial faults need to be discarded
> > + *
> > + * When the hardware queue overflows, last page faults in a group may have 
> > been
> > + * lost and the IOMMU driver calls this to discard all partial faults. The
> > + * driver shouldn't be adding new faults to this queue concurrently.
> > + *
> > + * Return: 0 on success and <0 on error.
> > + */
> > +int iopf_queue_discard_partial(struct iopf_queue *queue)
> > +{
> > +   struct iopf_fault *iopf, *next;
> > +   struct iopf_device_param *iopf_param;
> > +
> > +   if (!queue)
> > +   return -EINVAL;
> > +
> > +   mutex_lock(>lock);
> > +   list_for_each_entry(iopf_param, >devices, queue_list) {
> > +   list_for_each_entry_safe(iopf, next, _param->partial, head)
> > +   kfree(iopf);
> 
> iopf is freed but not removed from the list.

Ouch yes this is wrong, will fix.

> 
> > +   }
> > +   mutex_unlock(>lock);
> > +   return 0;
> > +}
> > +EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
> > +
> > +/**
> > + * iopf_queue_add_device - Add producer to the fault queue
> > + * @queue: IOPF queue
> > + * @dev: device to add
> > + *
> > + * Return: 0 on success and <0 on error.
> > + */
> > +int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
> > +{
> > +   int ret = -EBUSY;
> > +   struct iopf_device_param *iopf_param;
> > +   struct dev_iommu *param = dev->iommu;
> > +
> > +   if (!param)
> > +   return -ENODEV;
> > +
> > +   iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
> > +   if (!iopf_param)
> > +   return -ENOMEM;
> > +
> > +   INIT_LIST_HEAD(_param->partial);
> > +   iopf_param->queue = queue;
> > +   iopf_param->dev = dev;
> > +
> > +   mutex_lock(>lock);
> > +   mutex_lock(>lock);
> > +   if (!param->iopf_param) {
> > +   list_add(_param->queue_list, >devices);
> > +   param->iopf_param = iopf_param;
> > +   ret = 0;
> > +   }
> > +   mutex_unlock(>lock);
> > +   mutex_unlock(>lock);
> > +
> > +   if (ret)
> > +   kfree(iopf_param);
> > +
> > +   return ret;
> > +}
> > 

Re: [PATCH v6 04/25] iommu: Add a page fault handler

2020-05-02 Thread Lu Baolu

Hi Jean,

On 2020/4/30 22:34, Jean-Philippe Brucker wrote:

Some systems allow devices to handle I/O Page Faults in the core mm. For
example systems implementing the PCIe PRI extension or Arm SMMU stall
model. Infrastructure for reporting these recoverable page faults was
added to the IOMMU core by commit 0c830e6b3282 ("iommu: Introduce device
fault report API"). Add a page fault handler for host SVA.

IOMMU driver can now instantiate several fault workqueues and link them
to IOPF-capable devices. Drivers can choose between a single global
workqueue, one per IOMMU device, one per low-level fault queue, one per
domain, etc.

When it receives a fault event, supposedly in an IRQ handler, the IOMMU
driver reports the fault using iommu_report_device_fault(), which calls
the registered handler. The page fault handler then calls the mm fault
handler, and reports either success or failure with iommu_page_response().
When the handler succeeded, the IOMMU retries the access.

The iopf_param pointer could be embedded into iommu_fault_param. But
putting iopf_param into the iommu_param structure allows us not to care
about ordering between calls to iopf_queue_add_device() and
iommu_register_device_fault_handler().

Signed-off-by: Jean-Philippe Brucker 
---
v5->v6: Simplify flush. As we're not flushing in the mm exit path
   anymore, we can mandate that IOMMU drivers flush their low-level queue
   themselves before calling iopf_queue_flush_dev(). No need to register
   a flush callback anymore.
---
  drivers/iommu/Kconfig  |   3 +
  drivers/iommu/Makefile |   1 +
  include/linux/iommu.h  |  51 +
  drivers/iommu/io-pgfault.c | 383 +
  4 files changed, 438 insertions(+)
  create mode 100644 drivers/iommu/io-pgfault.c



[...]


+
+static void iopf_handle_group(struct work_struct *work)
+{
+   struct iopf_group *group;
+   struct iopf_fault *iopf, *next;
+   enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
+
+   group = container_of(work, struct iopf_group, work);
+
+   list_for_each_entry_safe(iopf, next, >faults, head) {
+   /*
+* For the moment, errors are sticky: don't handle subsequent
+* faults in the group if there is an error.
+*/
+   if (status == IOMMU_PAGE_RESP_SUCCESS)
+   status = iopf_handle_single(iopf);
+
+   if (!(iopf->fault.prm.flags &
+ IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
+   kfree(iopf);


The iopf is freed,but not removed from the list. This will cause wild
pointer in code.


+   }
+
+   iopf_complete_group(group->dev, >last_fault, status);
+   kfree(group);
+}
+


[...]


+/**
+ * iopf_queue_flush_dev - Ensure that all queued faults have been processed
+ * @dev: the endpoint whose faults need to be flushed.
+ * @pasid: the PASID affected by this flush
+ *
+ * The IOMMU driver calls this before releasing a PASID, to ensure that all
+ * pending faults for this PASID have been handled, and won't hit the address
+ * space of the next process that uses this PASID. The driver must make sure
+ * that no new fault is added to the queue. In particular it must flush its
+ * low-level queue before calling this function.
+ *
+ * Return: 0 on success and <0 on error.
+ */
+int iopf_queue_flush_dev(struct device *dev, int pasid)
+{
+   int ret = 0;
+   struct iopf_device_param *iopf_param;
+   struct dev_iommu *param = dev->iommu;
+
+   if (!param)
+   return -ENODEV;
+
+   mutex_lock(>lock);
+   iopf_param = param->iopf_param;
+   if (iopf_param)
+   flush_workqueue(iopf_param->queue->wq);


There may be other pasid iopf in the workqueue. Flush all tasks in
the workqueue will hurt other pasids. I might lose any context.


+   else
+   ret = -ENODEV;
+   mutex_unlock(>lock);
+
+   return ret;
+}
+EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
+
+/**
+ * iopf_queue_discard_partial - Remove all pending partial fault
+ * @queue: the queue whose partial faults need to be discarded
+ *
+ * When the hardware queue overflows, last page faults in a group may have been
+ * lost and the IOMMU driver calls this to discard all partial faults. The
+ * driver shouldn't be adding new faults to this queue concurrently.
+ *
+ * Return: 0 on success and <0 on error.
+ */
+int iopf_queue_discard_partial(struct iopf_queue *queue)
+{
+   struct iopf_fault *iopf, *next;
+   struct iopf_device_param *iopf_param;
+
+   if (!queue)
+   return -EINVAL;
+
+   mutex_lock(>lock);
+   list_for_each_entry(iopf_param, >devices, queue_list) {
+   list_for_each_entry_safe(iopf, next, _param->partial, head)
+   kfree(iopf);


iopf is freed but not removed from the list.


+   }
+   mutex_unlock(>lock);
+   return 0;
+}

[PATCH v6 04/25] iommu: Add a page fault handler

2020-04-30 Thread Jean-Philippe Brucker
Some systems allow devices to handle I/O Page Faults in the core mm. For
example systems implementing the PCIe PRI extension or Arm SMMU stall
model. Infrastructure for reporting these recoverable page faults was
added to the IOMMU core by commit 0c830e6b3282 ("iommu: Introduce device
fault report API"). Add a page fault handler for host SVA.

IOMMU driver can now instantiate several fault workqueues and link them
to IOPF-capable devices. Drivers can choose between a single global
workqueue, one per IOMMU device, one per low-level fault queue, one per
domain, etc.

When it receives a fault event, supposedly in an IRQ handler, the IOMMU
driver reports the fault using iommu_report_device_fault(), which calls
the registered handler. The page fault handler then calls the mm fault
handler, and reports either success or failure with iommu_page_response().
When the handler succeeded, the IOMMU retries the access.

The iopf_param pointer could be embedded into iommu_fault_param. But
putting iopf_param into the iommu_param structure allows us not to care
about ordering between calls to iopf_queue_add_device() and
iommu_register_device_fault_handler().

Signed-off-by: Jean-Philippe Brucker 
---
v5->v6: Simplify flush. As we're not flushing in the mm exit path
  anymore, we can mandate that IOMMU drivers flush their low-level queue
  themselves before calling iopf_queue_flush_dev(). No need to register
  a flush callback anymore.
---
 drivers/iommu/Kconfig  |   3 +
 drivers/iommu/Makefile |   1 +
 include/linux/iommu.h  |  51 +
 drivers/iommu/io-pgfault.c | 383 +
 4 files changed, 438 insertions(+)
 create mode 100644 drivers/iommu/io-pgfault.c

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 5327ec663dea1..4f33e489f0726 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -107,6 +107,9 @@ config IOMMU_SVA
bool
select IOASID
 
+config IOMMU_PAGE_FAULT
+   bool
+
 config FSL_PAMU
bool "Freescale IOMMU support"
depends on PCI
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 40c800dd4e3ef..bf5cb4ee84093 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
 obj-$(CONFIG_IOMMU_DEBUGFS) += iommu-debugfs.o
 obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
+obj-$(CONFIG_IOMMU_PAGE_FAULT) += io-pgfault.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) += io-pgtable-arm-v7s.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b62525747bd91..a1201c94f6ace 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -46,6 +46,7 @@ struct iommu_domain;
 struct notifier_block;
 struct iommu_sva;
 struct iommu_fault_event;
+struct iopf_queue;
 
 /* iommu fault flags */
 #define IOMMU_FAULT_READ   0x0
@@ -347,6 +348,7 @@ struct iommu_fault_param {
  * struct dev_iommu - Collection of per-device IOMMU data
  *
  * @fault_param: IOMMU detected device fault reporting data
+ * @iopf_param: I/O Page Fault queue and data
  * @fwspec: IOMMU fwspec data
  * @priv:   IOMMU Driver private data
  *
@@ -356,6 +358,7 @@ struct iommu_fault_param {
 struct dev_iommu {
struct mutex lock;
struct iommu_fault_param*fault_param;
+   struct iopf_device_param*iopf_param;
struct iommu_fwspec *fwspec;
void*priv;
 };
@@ -1067,4 +1070,52 @@ void iommu_debugfs_setup(void);
 static inline void iommu_debugfs_setup(void) {}
 #endif
 
+#ifdef CONFIG_IOMMU_PAGE_FAULT
+extern int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
+
+extern int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
+extern int iopf_queue_remove_device(struct iopf_queue *queue,
+   struct device *dev);
+extern int iopf_queue_flush_dev(struct device *dev, int pasid);
+extern struct iopf_queue *iopf_queue_alloc(const char *name);
+extern void iopf_queue_free(struct iopf_queue *queue);
+extern int iopf_queue_discard_partial(struct iopf_queue *queue);
+#else /* CONFIG_IOMMU_PAGE_FAULT */
+static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
+{
+   return -ENODEV;
+}
+
+static inline int iopf_queue_add_device(struct iopf_queue *queue,
+   struct device *dev)
+{
+   return -ENODEV;
+}
+
+static inline int iopf_queue_remove_device(struct iopf_queue *queue,
+  struct device *dev)
+{
+   return -ENODEV;
+}
+
+static inline int iopf_queue_flush_dev(struct device *dev, int pasid)
+{
+   return -ENODEV;
+}
+
+static inline struct iopf_queue *iopf_queue_alloc(const char *name)
+{
+   return NULL;
+}
+
+static inline void iopf_queue_free(struct iopf_queue *queue)
+{
+}
+