Re: [RFC PATCH v3 7/8] vfio/type1: Add selective DMA faulting support

2021-05-20 Thread Shenming Lu
On 2021/5/19 2:58, Alex Williamson wrote:
> On Fri, 9 Apr 2021 11:44:19 +0800
> Shenming Lu  wrote:
> 
>> Some devices only allow selective DMA faulting. Similar to the selective
>> dirty page tracking, the vendor driver can call vfio_pin_pages() to
>> indicate the non-faultable scope, we add a new struct vfio_range to
>> record it, then when the IOPF handler receives any page request out
>> of the scope, we can directly return with an invalid response.
> 
> Seems like this highlights a deficiency in the design, that the user
> can't specify mappings as iopf enabled or disabled.  Also, if the
> vendor driver has pinned pages within the range, shouldn't that prevent
> them from faulting in the first place?  Why do we need yet more
> tracking structures?  Pages pinned by the vendor driver need to count
> against the user's locked memory limits regardless of iopf.  Thanks,

Currently we only have a vfio_pfn struct to track the external pinned pages
(single page granularity), so I add a vfio_range struct for efficient lookup.

Yeah, by this patch, for the non-pinned scope, we can directly return INVALID,
but for the pinned(non-faultable) scope, tracking the pinned range doesn't seem
to help more...

Thanks,
Shenming

> 
> Alex
> 
> .
> 
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [RFC PATCH v3 7/8] vfio/type1: Add selective DMA faulting support

2021-05-18 Thread Alex Williamson
On Fri, 9 Apr 2021 11:44:19 +0800
Shenming Lu  wrote:

> Some devices only allow selective DMA faulting. Similar to the selective
> dirty page tracking, the vendor driver can call vfio_pin_pages() to
> indicate the non-faultable scope, we add a new struct vfio_range to
> record it, then when the IOPF handler receives any page request out
> of the scope, we can directly return with an invalid response.

Seems like this highlights a deficiency in the design, that the user
can't specify mappings as iopf enabled or disabled.  Also, if the
vendor driver has pinned pages within the range, shouldn't that prevent
them from faulting in the first place?  Why do we need yet more
tracking structures?  Pages pinned by the vendor driver need to count
against the user's locked memory limits regardless of iopf.  Thanks,

Alex

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC PATCH v3 7/8] vfio/type1: Add selective DMA faulting support

2021-04-08 Thread Shenming Lu
Some devices only allow selective DMA faulting. Similar to the selective
dirty page tracking, the vendor driver can call vfio_pin_pages() to
indicate the non-faultable scope, we add a new struct vfio_range to
record it, then when the IOPF handler receives any page request out
of the scope, we can directly return with an invalid response.

Suggested-by: Kevin Tian 
Signed-off-by: Shenming Lu 
---
 drivers/vfio/vfio.c |   4 +-
 drivers/vfio/vfio_iommu_type1.c | 357 +++-
 include/linux/vfio.h|   1 +
 3 files changed, 358 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 38779e6fd80c..44c8dfabf7de 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -2013,7 +2013,8 @@ int vfio_unpin_pages(struct device *dev, unsigned long 
*user_pfn, int npage)
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->unpin_pages))
-   ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
+   ret = driver->ops->unpin_pages(container->iommu_data,
+  group->iommu_group, user_pfn,
   npage);
else
ret = -ENOTTY;
@@ -2112,6 +2113,7 @@ int vfio_group_unpin_pages(struct vfio_group *group,
driver = container->iommu_driver;
if (likely(driver && driver->ops->unpin_pages))
ret = driver->ops->unpin_pages(container->iommu_data,
+  group->iommu_group,
   user_iova_pfn, npage);
else
ret = -ENOTTY;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index dcc93c3b258c..ba2b5a1cf6e9 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -150,10 +150,19 @@ struct vfio_regions {
 static struct rb_root iopf_group_list = RB_ROOT;
 static DEFINE_MUTEX(iopf_group_list_lock);
 
+struct vfio_range {
+   struct rb_node  node;
+   dma_addr_t  base_iova;
+   size_t  span;
+   unsigned intref_count;
+};
+
 struct vfio_iopf_group {
struct rb_node  node;
struct iommu_group  *iommu_group;
struct vfio_iommu   *iommu;
+   struct rb_root  pinned_range_list;
+   boolselective_faulting;
 };
 
 #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)\
@@ -496,6 +505,255 @@ static void vfio_unlink_iopf_group(struct vfio_iopf_group 
*old)
mutex_unlock(&iopf_group_list_lock);
 }
 
+/*
+ * Helper functions for range list, handle one page at a time.
+ */
+static struct vfio_range *vfio_find_range(struct rb_root *range_list,
+ dma_addr_t iova)
+{
+   struct rb_node *node = range_list->rb_node;
+   struct vfio_range *range;
+
+   while (node) {
+   range = rb_entry(node, struct vfio_range, node);
+
+   if (iova + PAGE_SIZE <= range->base_iova)
+   node = node->rb_left;
+   else if (iova >= range->base_iova + range->span)
+   node = node->rb_right;
+   else
+   return range;
+   }
+
+   return NULL;
+}
+
+/* Do the possible merge adjacent to the input range. */
+static void vfio_merge_range_list(struct rb_root *range_list,
+ struct vfio_range *range)
+{
+   struct rb_node *node_prev = rb_prev(&range->node);
+   struct rb_node *node_next = rb_next(&range->node);
+
+   if (node_next) {
+   struct vfio_range *range_next = rb_entry(node_next,
+struct vfio_range,
+node);
+
+   if (range_next->base_iova == (range->base_iova + range->span) &&
+   range_next->ref_count == range->ref_count) {
+   rb_erase(node_next, range_list);
+   range->span += range_next->span;
+   kfree(range_next);
+   }
+   }
+
+   if (node_prev) {
+   struct vfio_range *range_prev = rb_entry(node_prev,
+struct vfio_range,
+node);
+
+   if (range->base_iova == (range_prev->base_iova + 
range_prev->span)
+   && range->ref_count == range_prev->ref_count) {
+   rb_erase(&range->node, range_list);
+   range_prev->span += range->span;
+   kfree(range);
+   }
+   }
+}
+
+static void vfio_link_range(struct rb_root *range_list, struct vfio_range *new)
+{
+   struct rb_node **link, *parent = NULL;
+