Hi Shameer,

On 18/01/18 01:04, Alex Williamson wrote:
> On Fri, 12 Jan 2018 16:45:27 +0000
> Shameer Kolothum <[email protected]> wrote:
> 
>> This introduces an iova list that is valid for dma mappings. Make
>> sure the new iommu aperture window is valid and doesn't conflict
>> with any existing dma mappings during attach. Also update the iova
>> list with new aperture window during attach/detach.
>>
>> Signed-off-by: Shameer Kolothum <[email protected]>
>> ---
>>  drivers/vfio/vfio_iommu_type1.c | 177 
>> ++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 177 insertions(+)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c 
>> b/drivers/vfio/vfio_iommu_type1.c
>> index e30e29a..11cbd49 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -60,6 +60,7 @@
>>  
>>  struct vfio_iommu {
>>      struct list_head        domain_list;
>> +    struct list_head        iova_list;
>>      struct vfio_domain      *external_domain; /* domain for external user */
>>      struct mutex            lock;
>>      struct rb_root          dma_list;
>> @@ -92,6 +93,12 @@ struct vfio_group {
>>      struct list_head        next;
>>  };
>>  
>> +struct vfio_iova {
>> +    struct list_head        list;
>> +    phys_addr_t             start;
>> +    phys_addr_t             end;
>> +};
> 
> dma_list uses dma_addr_t for the iova.  IOVAs are naturally DMA
> addresses, why are we using phys_addr_t?
> 
>> +
>>  /*
>>   * Guest RAM pinning working set or DMA target
>>   */
>> @@ -1192,6 +1199,123 @@ static bool vfio_iommu_has_sw_msi(struct iommu_group 
>> *group, phys_addr_t *base)
>>      return ret;
>>  }
>>  
>> +static int vfio_insert_iova(phys_addr_t start, phys_addr_t end,
>> +                            struct list_head *head)
>> +{
>> +    struct vfio_iova *region;
>> +
>> +    region = kmalloc(sizeof(*region), GFP_KERNEL);
>> +    if (!region)
>> +            return -ENOMEM;
>> +
>> +    INIT_LIST_HEAD(&region->list);
>> +    region->start = start;
>> +    region->end = end;
>> +
>> +    list_add_tail(&region->list, head);
>> +    return 0;
>> +}
> 
> As I'm reading through this series, I'm learning that there are a lot
> of assumptions and subtle details that should be documented.  For
> instance, the IOMMU API only provides a single geometry and we build
> upon that here as this patch creates a list, but there's only a single
> entry for now.  The following patches carve that single iova range into
> pieces and somewhat subtly use the list_head passed to keep the list
> sorted, allowing the first/last_entry tricks used throughout.  Subtle
> interfaces are prone to bugs.
> 
>> +
>> +/*
>> + * Find whether a mem region overlaps with existing dma mappings
>> + */
>> +static bool vfio_find_dma_overlap(struct vfio_iommu *iommu,
>> +                              phys_addr_t start, phys_addr_t end)
>> +{
>> +    struct rb_node *n = rb_first(&iommu->dma_list);
>> +
>> +    for (; n; n = rb_next(n)) {
>> +            struct vfio_dma *dma;
>> +
>> +            dma = rb_entry(n, struct vfio_dma, node);
>> +
>> +            if (end < dma->iova)
>> +                    break;
>> +            if (start >= dma->iova + dma->size)
>> +                    continue;
>> +            return true;
>> +    }
>> +
>> +    return false;
>> +}
> 
> Why do we need this in addition to the existing vfio_find_dma()?  Why
> doesn't this use the tree structure of the dma_list?
> 
>> +
>> +/*
>> + * Check the new iommu aperture is a valid one
>> + */
>> +static int vfio_iommu_valid_aperture(struct vfio_iommu *iommu,
>> +                                 phys_addr_t start,
>> +                                 phys_addr_t end)
>> +{
>> +    struct vfio_iova *first, *last;
>> +    struct list_head *iova = &iommu->iova_list;
>> +
>> +    if (list_empty(iova))
>> +            return 0;
>> +
>> +    /* Check if new one is outside the current aperture */
> 
> "Disjoint sets"
> 
>> +    first = list_first_entry(iova, struct vfio_iova, list);
>> +    last = list_last_entry(iova, struct vfio_iova, list);
>> +    if ((start > last->end) || (end < first->start))
>> +            return -EINVAL;
>> +
>> +    /* Check for any existing dma mappings outside the new start */
>> +    if (start > first->start) {
>> +            if (vfio_find_dma_overlap(iommu, first->start, start - 1))
>> +                    return -EINVAL;
>> +    }
>> +
>> +    /* Check for any existing dma mappings outside the new end */
>> +    if (end < last->end) {
>> +            if (vfio_find_dma_overlap(iommu, end + 1, last->end))
>> +                    return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
> 
> I think this returns an int because you want to use it for the return
> value below, but it really seems like a bool question, ie. does this
> aperture conflict with existing mappings.  Additionally, the aperture
> is valid, it was provided to us by the IOMMU API, the question is
> whether it conflicts.  Please also name consistently to the other
> functions in this patch, vfio_iommu_aper_xxxx().
> 
>> +
>> +/*
>> + * Adjust the iommu aperture window if new aperture is a valid one
>> + */
>> +static int vfio_iommu_iova_aper_adjust(struct vfio_iommu *iommu,
>> +                                  phys_addr_t start,
>> +                                  phys_addr_t end)
> 
> Perhaps "resize", "prune", or "shrink" to make it more clear what is
> being adjusted?
> 
>> +{
>> +    struct vfio_iova *node, *next;
>> +    struct list_head *iova = &iommu->iova_list;
>> +
>> +    if (list_empty(iova))
>> +            return vfio_insert_iova(start, end, iova);
>> +
>> +    /* Adjust iova list start */
>> +    list_for_each_entry_safe(node, next, iova, list) {
>> +            if (start < node->start)
>> +                    break;
>> +            if ((start >= node->start) && (start <= node->end)) {
> 
> start == node->end results in a zero sized node.  s/<=/</
> 
>> +                    node->start = start;
>> +                    break;
>> +            }
>> +            /* Delete nodes before new start */
>> +            list_del(&node->list);
>> +            kfree(node);
>> +    }
>> +
>> +    /* Adjust iova list end */
>> +    list_for_each_entry_safe(node, next, iova, list) {
>> +            if (end > node->end)
>> +                    continue;
>> +
>> +            if ((end >= node->start) && (end <= node->end)) {
> 
> end == node->start results in a zero sized node.  s/>=/>/
> 
>> +                    node->end = end;
>> +                    continue;
>> +            }
>> +            /* Delete nodes after new end */
>> +            list_del(&node->list);
>> +            kfree(node);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static int vfio_iommu_type1_attach_group(void *iommu_data,
>>                                       struct iommu_group *iommu_group)
>>  {
>> @@ -1202,6 +1326,7 @@ static int vfio_iommu_type1_attach_group(void 
>> *iommu_data,
>>      int ret;
>>      bool resv_msi, msi_remap;
>>      phys_addr_t resv_msi_base;
>> +    struct iommu_domain_geometry geo;
>>  
>>      mutex_lock(&iommu->lock);
>>  
>> @@ -1271,6 +1396,14 @@ static int vfio_iommu_type1_attach_group(void 
>> *iommu_data,
>>      if (ret)
>>              goto out_domain;
>>  
>> +    /* Get aperture info */
>> +    iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY, &geo);
>> +
>> +    ret = vfio_iommu_valid_aperture(iommu, geo.aperture_start,
>> +                                    geo.aperture_end);
>> +    if (ret)
>> +            goto out_detach;
>> +
>>      resv_msi = vfio_iommu_has_sw_msi(iommu_group, &resv_msi_base);
>>  
>>      INIT_LIST_HEAD(&domain->group_list);
>> @@ -1327,6 +1460,11 @@ static int vfio_iommu_type1_attach_group(void 
>> *iommu_data,
>>                      goto out_detach;
>>      }
>>  
>> +    ret = vfio_iommu_iova_aper_adjust(iommu, geo.aperture_start,
>> +                                      geo.aperture_end);
>> +    if (ret)
>> +            goto out_detach;
>> +
>>      list_add(&domain->next, &iommu->domain_list);
>>  
>>      mutex_unlock(&iommu->lock);
>> @@ -1392,6 +1530,35 @@ static void vfio_sanity_check_pfn_list(struct 
>> vfio_iommu *iommu)
>>      WARN_ON(iommu->notifier.head);
>>  }
>>  
>> +/*
>> + * Called when a domain is removed in detach. It is possible that
>> + * the removed domain decided the iova aperture window. Modify the
>> + * iova aperture with the smallest window among existing domains.
>> + */
>> +static void vfio_iommu_iova_aper_refresh(struct vfio_iommu *iommu)
>> +{
>> +    struct vfio_domain *domain;
>> +    struct iommu_domain_geometry geo;
>> +    struct vfio_iova *node;
>> +    phys_addr_t start = 0;
>> +    phys_addr_t end = (phys_addr_t)~0;
>> +
>> +    list_for_each_entry(domain, &iommu->domain_list, next) {
>> +            iommu_domain_get_attr(domain->domain, DOMAIN_ATTR_GEOMETRY,
>> +                                  &geo);
>> +                    if (geo.aperture_start > start)
>> +                            start = geo.aperture_start;
>> +                    if (geo.aperture_end < end)
>> +                            end = geo.aperture_end;
>> +    }
>> +
>> +    /* modify iova aperture limits */
>> +    node = list_first_entry(&iommu->iova_list, struct vfio_iova, list);
>> +    node->start = start;
>> +    node = list_last_entry(&iommu->iova_list, struct vfio_iova, list);
>> +    node->end = end;
> 
> We can do this because the new aperture is the same or bigger than the
> current aperture, never smaller.  That's not fully obvious and should
> be noted in the comment.  Perhaps this function should be "expand"
> rather than "refresh".
This one is not obvious to me either:
assuming you have 2 domains, resp with aperture 1 and 2, resulting into
aperture 3. Holes are created by resv regions for instance. If you
remove domain 1, don't you get 4) instead of 2)?

1)   |------------|
 +
2) |---|    |--|       |-----|
=
3)   |-|    |--|


4) |---|    |----------------|

Thanks

Eric
> 
>> +}
>> +
>>  static void vfio_iommu_type1_detach_group(void *iommu_data,
>>                                        struct iommu_group *iommu_group)
>>  {
>> @@ -1445,6 +1612,7 @@ static void vfio_iommu_type1_detach_group(void 
>> *iommu_data,
>>                      iommu_domain_free(domain->domain);
>>                      list_del(&domain->next);
>>                      kfree(domain);
>> +                    vfio_iommu_iova_aper_refresh(iommu);
>>              }
>>              break;
>>      }
>> @@ -1475,6 +1643,7 @@ static void *vfio_iommu_type1_open(unsigned long arg)
>>      }
>>  
>>      INIT_LIST_HEAD(&iommu->domain_list);
>> +    INIT_LIST_HEAD(&iommu->iova_list);
>>      iommu->dma_list = RB_ROOT;
>>      mutex_init(&iommu->lock);
>>      BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
>> @@ -1502,6 +1671,7 @@ static void vfio_iommu_type1_release(void *iommu_data)
>>  {
>>      struct vfio_iommu *iommu = iommu_data;
>>      struct vfio_domain *domain, *domain_tmp;
>> +    struct vfio_iova *iova, *iova_tmp;
>>  
>>      if (iommu->external_domain) {
>>              vfio_release_domain(iommu->external_domain, true);
>> @@ -1517,6 +1687,13 @@ static void vfio_iommu_type1_release(void *iommu_data)
>>              list_del(&domain->next);
>>              kfree(domain);
>>      }
>> +
>> +    list_for_each_entry_safe(iova, iova_tmp,
>> +                             &iommu->iova_list, list) {
>> +            list_del(&iova->list);
>> +            kfree(iova);
>> +    }
>> +
>>      kfree(iommu);
>>  }
>>  
> 

Reply via email to