From: Honglei Huang <[email protected]>

Implement the attribute set path

- Attribute application: apply UAPI attributes to internal attrs
- Attribute tree set split remove.
- amdgpu_svm_attr_set with retry on -EAGAIN
- amdgpu_svm_attr_clear_pages: remove attribute ranges for unmapped
  operations.

Signed-off-by: Honglei Huang <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c | 548 +++++++++++++++++++
 1 file changed, 548 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
index 137dfcb58..cd972026f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_svm_attr.c
@@ -33,8 +33,23 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 
+#define AMDGPU_SVM_VALID_FLAG_MASK \
+       (AMDGPU_SVM_FLAG_HOST_ACCESS | AMDGPU_SVM_FLAG_COHERENT | \
+        AMDGPU_SVM_FLAG_HIVE_LOCAL | AMDGPU_SVM_FLAG_GPU_RO | \
+        AMDGPU_SVM_FLAG_GPU_EXEC | AMDGPU_SVM_FLAG_GPU_READ_MOSTLY | \
+        AMDGPU_SVM_FLAG_GPU_ALWAYS_MAPPED | AMDGPU_SVM_FLAG_EXT_COHERENT)
+
+
 static struct kmem_cache *amdgpu_svm_attr_range_cache;
 
+struct attr_set_ctx {
+       unsigned long start;
+       unsigned long last;
+       uint32_t trigger;
+       struct amdgpu_svm_attrs prev_attrs;
+       struct amdgpu_svm_attrs new_attrs;
+};
+
 struct attr_get_ctx {
        int32_t preferred_loc;
        int32_t prefetch_loc;
@@ -130,6 +145,48 @@ static bool amdgpu_svm_attr_equal(const struct 
amdgpu_svm_attrs *a,
                       a->access == b->access;
 }
 
+static uint32_t
+attr_change_ctx_trigger(const struct amdgpu_svm_attrs *prev_attrs,
+                     const struct amdgpu_svm_attrs *new_attrs)
+{
+       uint32_t trigger = 0;
+       uint32_t changed_flags = prev_attrs->flags ^ new_attrs->flags;
+
+       if (prev_attrs->access != new_attrs->access)
+               trigger |= AMDGPU_SVM_ATTR_TRIGGER_ACCESS_CHANGE;
+
+       if (changed_flags & AMDGPU_SVM_PTE_FLAG_MASK)
+               trigger |= AMDGPU_SVM_ATTR_TRIGGER_PTE_FLAG_CHANGE;
+       if (changed_flags & AMDGPU_SVM_MAPPING_FLAG_MASK)
+               trigger |= AMDGPU_SVM_ATTR_TRIGGER_MAPPING_FLAG_CHANGE;
+       if (prev_attrs->preferred_loc != new_attrs->preferred_loc ||
+           prev_attrs->prefetch_loc != new_attrs->prefetch_loc)
+               trigger |= AMDGPU_SVM_ATTR_TRIGGER_LOCATION_CHANGE;
+       if (prev_attrs->granularity != new_attrs->granularity)
+               trigger |= AMDGPU_SVM_ATTR_TRIGGER_GRANULARITY_CHANGE;
+
+       if (!trigger)
+               trigger = AMDGPU_SVM_ATTR_TRIGGER_ATTR_ONLY;
+
+       return trigger;
+}
+
+static bool attr_has_access(uint32_t nattr,
+                                         const struct drm_amdgpu_svm_attribute 
*attrs)
+{
+       uint32_t i;
+
+       for (i = 0; i < nattr; i++) {
+               switch (attrs[i].type) {
+               case AMDGPU_SVM_ATTR_ACCESS:
+               case AMDGPU_SVM_ATTR_ACCESS_IN_PLACE:
+                       return true;
+               }
+       }
+
+       return false;
+}
+
 static struct amdgpu_svm_attr_range *
 attr_alloc_range(unsigned long start,
                           unsigned long last,
@@ -179,6 +236,388 @@ static void attr_remove_range_locked(struct 
amdgpu_svm_attr_tree *attr_tree,
                kmem_cache_free(amdgpu_svm_attr_range_cache, range);
 }
 
+static void amdgpu_svm_attr_change_ctx_set(
+               struct attr_set_ctx *change,
+               unsigned long start,
+               unsigned long last,
+               uint32_t trigger,
+               const struct amdgpu_svm_attrs *prev_attrs,
+               const struct amdgpu_svm_attrs *new_attrs)
+{
+       change->start = start;
+       change->last = last;
+       change->trigger = trigger;
+       change->prev_attrs = *prev_attrs;
+       change->new_attrs = *new_attrs;
+}
+
+static int amdgpu_svm_attr_apply_change(
+                               struct amdgpu_svm *svm,
+                               const struct attr_set_ctx *change)
+{
+       int ret;
+
+       lockdep_assert_held_write(&svm->svm_lock);
+
+       if (!change->trigger ||
+           change->trigger == AMDGPU_SVM_ATTR_TRIGGER_ATTR_ONLY)
+               return 0;
+
+       ret = amdgpu_svm_range_apply_attr_change(svm, change->start, 
change->last,
+                                                change->trigger, 
&change->prev_attrs,
+                                                &change->new_attrs);
+       if (ret)
+               AMDGPU_SVM_TRACE("mapping apply failed ret=%d 
[0x%lx-0x%lx]-0x%lx trigger=0x%x\n",
+                                ret, change->start, change->last,
+                                change->last - change->start + 1,
+                                change->trigger);
+
+       return ret;
+}
+
+static inline int attr_check_preferred_loc(uint32_t value)
+{
+       /* casue one svm one gpu so value > 0 then means prefered loc is this 
GPU */
+       if (value == AMDGPU_SVM_LOCATION_SYSMEM || value == 
AMDGPU_SVM_LOCATION_UNDEFINED)
+               return 0;
+
+       return 0;
+}
+
+static inline int attr_check_prefetch_loc(uint32_t value)
+{
+       /* casue one svm one gpu so value > 0 then means prefetch loc is this 
GPU 
+        * keep prefetch loc to adapt to KFD API
+        */
+       if (value == AMDGPU_SVM_LOCATION_SYSMEM)
+               return 0;
+
+       if (value == AMDGPU_SVM_LOCATION_UNDEFINED)
+               return -EINVAL;
+
+       return 0;
+}
+
+static inline int attr_check_access(uint32_t value)
+{
+       if (!value || value == AMDGPU_SVM_LOCATION_UNDEFINED)
+               return -EINVAL;
+
+       return 0;
+}
+
+static inline int attr_check_flags(uint32_t value)
+{
+       if (value & ~AMDGPU_SVM_VALID_FLAG_MASK)
+               return -EINVAL;
+
+       return 0;
+}
+
+static inline int attr_check_granularity(uint32_t value)
+{
+       return 0;
+}
+
+static int
+amdgpu_svm_attr_validate_range_vma(struct amdgpu_svm_attr_tree *attr_tree,
+                                  unsigned long start_page,
+                                  unsigned long last_page)
+{
+       const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
+       struct mm_struct *mm;
+       unsigned long start, end;
+       int ret = 0;
+
+       if (start_page > last_page)
+               return -EINVAL;
+
+       if (last_page == ULONG_MAX)
+               return -EINVAL;
+
+       start = start_page << PAGE_SHIFT;
+       end = (last_page + 1) << PAGE_SHIFT;
+       mm = attr_tree->svm->gpusvm.mm;
+       if (!mm)
+               return -EFAULT;
+
+       mmap_read_lock(mm);
+       while (start < end) {
+               struct vm_area_struct *vma = vma_lookup(mm, start);
+
+               if (!vma || (vma->vm_flags & device_vma)) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               start = min(end, vma->vm_end);
+       }
+       mmap_read_unlock(mm);
+
+       return ret;
+}
+
+static int amdgpu_svm_attr_set_validate(const struct drm_amdgpu_svm_attribute 
*attr)
+{
+       switch (attr->type) {
+       case AMDGPU_SVM_ATTR_PREFERRED_LOC:
+               return attr_check_preferred_loc(attr->value);
+       case AMDGPU_SVM_ATTR_PREFETCH_LOC:
+               return attr_check_prefetch_loc(attr->value);
+       case AMDGPU_SVM_ATTR_ACCESS:
+       case AMDGPU_SVM_ATTR_ACCESS_IN_PLACE:
+       case AMDGPU_SVM_ATTR_NO_ACCESS:
+               return attr_check_access(attr->value);
+       case AMDGPU_SVM_ATTR_SET_FLAGS:
+       case AMDGPU_SVM_ATTR_CLR_FLAGS:
+               return attr_check_flags(attr->value);
+       case AMDGPU_SVM_ATTR_GRANULARITY:
+               return attr_check_granularity(attr->value);
+       default:
+               return -EINVAL;
+       }
+}
+
+static void amdgpu_svm_attr_apply(struct amdgpu_svm_attrs *attrs,
+                                       uint32_t nattr,
+                                       const struct drm_amdgpu_svm_attribute 
*pattrs)
+{
+       const struct drm_amdgpu_svm_attribute *attr;
+
+       for (attr = pattrs; nattr--; attr++) {
+               switch (attr->type) {
+               case AMDGPU_SVM_ATTR_PREFERRED_LOC:
+                       attrs->preferred_loc = (int32_t)attr->value;
+                       break;
+               case AMDGPU_SVM_ATTR_PREFETCH_LOC:
+                       attrs->prefetch_loc = (int32_t)attr->value;
+                       break;
+               case AMDGPU_SVM_ATTR_ACCESS:
+                       attrs->access = AMDGPU_SVM_ACCESS_ENABLE;
+                       break;
+               case AMDGPU_SVM_ATTR_ACCESS_IN_PLACE:
+                       attrs->access = AMDGPU_SVM_ACCESS_IN_PLACE;
+                       break;
+               case AMDGPU_SVM_ATTR_NO_ACCESS:
+                       attrs->access = AMDGPU_SVM_ACCESS_NONE;
+                       break;
+               case AMDGPU_SVM_ATTR_SET_FLAGS:
+                       attrs->flags |= attr->value;
+                       break;
+               case AMDGPU_SVM_ATTR_CLR_FLAGS:
+                       attrs->flags &= ~attr->value;
+                       break;
+               case AMDGPU_SVM_ATTR_GRANULARITY:
+                       attrs->granularity = min_t(uint32_t, attr->value, 0x3f);
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
+static bool attr_same_attrs(const struct amdgpu_svm_attr_range *range,
+                           uint32_t nattr,
+                           const struct drm_amdgpu_svm_attribute *attrs)
+{
+       struct amdgpu_svm_attrs target;
+
+       target = range->attrs;
+       amdgpu_svm_attr_apply(&target, nattr, attrs);
+       return amdgpu_svm_attr_equal(&range->attrs, &target);
+}
+
+static int
+amdgpu_svm_attr_set_hole(struct amdgpu_svm_attr_tree *attr_tree,
+                         const struct amdgpu_svm_attrs *default_attrs,
+                         unsigned long start, unsigned long last,
+                         uint32_t nattr,
+                         const struct drm_amdgpu_svm_attribute *attrs,
+                         struct attr_set_ctx *change)
+{
+       struct amdgpu_svm_attrs new_attrs;
+       struct amdgpu_svm_attr_range *range;
+       uint32_t trigger;
+
+       lockdep_assert_held(&attr_tree->lock);
+
+       if (start > last)
+               return 0;
+
+       /* no action if default attr */
+       new_attrs = *default_attrs;
+       amdgpu_svm_attr_apply(&new_attrs, nattr, attrs);
+       if (amdgpu_svm_attr_equal(default_attrs, &new_attrs))
+               return 0;
+
+       range = attr_alloc_range(start, last, &new_attrs);
+       if (!range)
+               return -ENOMEM;
+
+       attr_insert_range_locked(attr_tree, range);
+
+       trigger = attr_change_ctx_trigger(default_attrs, &new_attrs);
+       amdgpu_svm_attr_change_ctx_set(change, start, last, trigger,
+                                          default_attrs, &new_attrs);
+       return 0;
+}
+
+static int
+amdgpu_svm_attr_set_existing(struct amdgpu_svm_attr_tree *attr_tree,
+                            struct amdgpu_svm_attr_range *range,
+                            unsigned long start, unsigned long last,
+                            uint32_t nattr,
+                            const struct drm_amdgpu_svm_attribute *attrs,
+                            struct attr_set_ctx *change)
+{
+       unsigned long range_start = attr_start_page(range);
+       unsigned long range_last = attr_last_page(range);
+       struct amdgpu_svm_attr_range *left = NULL;
+       struct amdgpu_svm_attr_range *right = NULL;
+       struct amdgpu_svm_attrs old_attrs;
+       struct amdgpu_svm_attrs new_attrs;
+       uint32_t trigger;
+       bool force_trigger;
+
+       lockdep_assert_held(&attr_tree->lock);
+
+       old_attrs = range->attrs;
+
+       /* The attr layer doesn't store the gpu mapped state, and for align 
with KFD,
+        * need force trigger range layer to check if gpu mapped.
+        */
+       force_trigger = !attr_tree->svm->xnack_enabled && 
attr_has_access(nattr, attrs);
+
+       if (attr_same_attrs(range, nattr, attrs)) {
+               if (!force_trigger)
+                       return 0;
+
+               amdgpu_svm_attr_change_ctx_set(change, start, last,
+                                                  
AMDGPU_SVM_ATTR_TRIGGER_ACCESS_CHANGE,
+                                                  &old_attrs, &old_attrs);
+               return 0;
+       }
+
+       new_attrs = old_attrs;
+       amdgpu_svm_attr_apply(&new_attrs, nattr, attrs);
+       trigger = attr_change_ctx_trigger(&old_attrs, &new_attrs);
+
+       /* only need to update attr */
+       if (start == range_start && last == range_last) {
+               range->attrs = new_attrs;
+               amdgpu_svm_attr_change_ctx_set(change, start, last,
+                                                  trigger, &old_attrs, 
&new_attrs);
+               return 0;
+       }
+
+       /* split head */
+       if (start > range_start) {
+               left = attr_alloc_range(range_start, start - 1, &old_attrs);
+               if (!left)
+                       return -ENOMEM;
+       }
+
+       /* split tail */
+       if (last < range_last) {
+               right = attr_alloc_range(last + 1, range_last, &old_attrs);
+               if (!right) {
+                       if (left)
+                               kmem_cache_free(amdgpu_svm_attr_range_cache, 
left);
+                       return -ENOMEM;
+               }
+       }
+
+       attr_remove_range_locked(attr_tree, range, false);
+       if (left)
+               attr_insert_range_locked(attr_tree, left);
+       attr_set_interval(range, start, last);
+       range->attrs = new_attrs;
+       attr_insert_range_locked(attr_tree, range);
+       if (right)
+               attr_insert_range_locked(attr_tree, right);
+
+       amdgpu_svm_attr_change_ctx_set(change, start, last, trigger,
+                                          &old_attrs, &new_attrs);
+       return 0;
+}
+
+static int
+amdgpu_svm_attr_set_range(struct amdgpu_svm_attr_tree *attr_tree,
+                         const struct amdgpu_svm_attrs *default_attrs,
+                         unsigned long start, unsigned long last,
+                         uint32_t nattr,
+                         const struct drm_amdgpu_svm_attribute *attrs)
+{
+       struct amdgpu_svm *svm = attr_tree->svm;
+       unsigned long cursor = start;
+       bool need_retry = false;
+
+       while (cursor <= last) {
+               struct interval_tree_node *node;
+               unsigned long seg_last;
+               struct attr_set_ctx change = { 0 };
+               int ret;
+
+               mutex_lock(&attr_tree->lock);
+               node = interval_tree_iter_first(&attr_tree->tree, cursor, 
cursor);
+               if (node) {
+                       struct amdgpu_svm_attr_range *range;
+
+                       range = container_of(node, struct 
amdgpu_svm_attr_range, it_node);
+                       seg_last = min(last, attr_last_page(range));
+                       ret = amdgpu_svm_attr_set_existing(attr_tree, range,
+                                                                  cursor, 
seg_last,
+                                                                  nattr, 
attrs, &change);
+               } else {
+                       struct interval_tree_node *next;
+
+                       seg_last = last;
+                       if (cursor != ULONG_MAX) {
+                               next = 
interval_tree_iter_first(&attr_tree->tree,
+                                                               cursor + 1,
+                                                               ULONG_MAX);
+                               if (next) {
+                                       struct amdgpu_svm_attr_range 
*next_range;
+
+                                       next_range = container_of(next,
+                                               struct amdgpu_svm_attr_range,
+                                               it_node);
+                                       seg_last = min(last,
+                                                      
attr_start_page(next_range) - 1);
+                               }
+                       }
+                       ret = amdgpu_svm_attr_set_hole(attr_tree,
+                                                              default_attrs,
+                                                              cursor, seg_last,
+                                                              nattr, attrs,
+                                                              &change);
+               }
+               mutex_unlock(&attr_tree->lock);
+
+               if (ret)
+                       return ret;
+
+               down_write(&svm->svm_lock);
+               ret = amdgpu_svm_attr_apply_change(svm, &change);
+               up_write(&svm->svm_lock);
+
+               if (ret == -EAGAIN) {
+                       need_retry = true;
+                       ret = 0;
+               }
+
+               if (ret)
+                       return ret;
+
+               if (seg_last == ULONG_MAX || seg_last == last)
+                       break;
+
+               cursor = seg_last + 1;
+       }
+
+       return need_retry ? -EAGAIN : 0;
+}
+
 struct amdgpu_svm_attr_tree *
 amdgpu_svm_attr_tree_create(struct amdgpu_svm *svm)
 {
@@ -214,6 +653,115 @@ void amdgpu_svm_attr_tree_destroy(struct 
amdgpu_svm_attr_tree *attr_tree)
        kfree(attr_tree);
 }
 
+int amdgpu_svm_attr_set(struct amdgpu_svm_attr_tree *attr_tree,
+                       uint64_t start,
+                       uint64_t size,
+                       uint32_t nattr,
+                       const struct drm_amdgpu_svm_attribute *attrs)
+{
+       struct amdgpu_svm *svm = attr_tree->svm;
+       struct amdgpu_svm_attrs default_attrs;
+       unsigned long start_page, last_page;
+       uint32_t i;
+       int r;
+
+       start_page = start >> PAGE_SHIFT;
+       last_page = (start + size - 1) >> PAGE_SHIFT;
+
+       for (i = 0; i < nattr; i++) {
+               AMDGPU_SVM_TRACE("set attr type %u value 0x%08x for page range 
[%lx, %lx] xnack:%d", 
+                       attrs[i].type, attrs[i].value, start_page, last_page, 
svm->xnack_enabled ? 1 : 0);
+               r = amdgpu_svm_attr_set_validate(&attrs[i]);
+               if (r) {
+                       AMDGPU_SVM_TRACE("invalid attribute %u value 0x%08x", 
attrs[i].type, attrs[i].value);
+                       return r;
+               }
+       }
+
+       r = amdgpu_svm_attr_validate_range_vma(attr_tree, start_page, 
last_page);
+       if (r)
+               return r;
+
+       attr_set_default(attr_tree->svm, &default_attrs);
+
+       /*
+        * POC/WA:
+        * can not acquire the mmap lock because of drm gpu svm frame work 
design (drm_gpusvm_range_find_or_insert)
+        * the hmm operations and GPU mapping possiable to fail so add retry 
mechanism
+        * 
+        * TODO: add mmap locked flag in drm_gpusvm_ctx to acquire mmap lock in 
entire ioctl period
+        */
+retry:
+       r = amdgpu_svm_attr_set_range(attr_tree, &default_attrs,
+                                              start_page, last_page,
+                                              nattr, attrs);
+       if (r == -EAGAIN) {
+               AMDGPU_SVM_TRACE("attr_set retry [0x%lx-0x%lx]\n",
+                                start_page, last_page);
+               amdgpu_svm_range_flush(svm);
+               cond_resched();
+               goto retry;
+       }
+
+       return r;
+}
+
+int amdgpu_svm_attr_clear_pages(struct amdgpu_svm_attr_tree *attr_tree,
+                               unsigned long start_page,
+                               unsigned long last_page)
+{
+       struct interval_tree_node *node;
+       int r = 0;
+
+       if (start_page > last_page)
+               return -EINVAL;
+
+       mutex_lock(&attr_tree->lock);
+
+       node = interval_tree_iter_first(&attr_tree->tree, start_page, 
last_page);
+       while (node) {
+               struct interval_tree_node *next;
+               struct amdgpu_svm_attr_range *range;
+               unsigned long range_start;
+               unsigned long range_last;
+
+               range = container_of(node, struct amdgpu_svm_attr_range, 
it_node);
+               next = interval_tree_iter_next(node, start_page, last_page);
+               range_start = attr_start_page(range);
+               range_last = attr_last_page(range);
+
+               if (range_start < start_page && range_last > last_page) {
+                       struct amdgpu_svm_attr_range *tail;
+
+                       tail = attr_alloc_range(last_page + 1, range_last, 
&range->attrs);
+                       if (!tail) {
+                               r = -ENOMEM;
+                               break;
+                       }
+
+                       attr_remove_range_locked(attr_tree, range, false);
+                       attr_set_interval(range, range_start, start_page - 1);
+                       attr_insert_range_locked(attr_tree, range);
+                       attr_insert_range_locked(attr_tree, tail);
+               } else if (range_start < start_page) {
+                       attr_remove_range_locked(attr_tree, range, false);
+                       attr_set_interval(range, range_start, start_page - 1);
+                       attr_insert_range_locked(attr_tree, range);
+               } else if (range_last > last_page) {
+                       attr_remove_range_locked(attr_tree, range, false);
+                       attr_set_interval(range, last_page + 1, range_last);
+                       attr_insert_range_locked(attr_tree, range);
+               } else {
+                       attr_remove_range_locked(attr_tree, range, true);
+               }
+
+               node = next;
+       }
+
+       mutex_unlock(&attr_tree->lock);
+       return r;
+}
+
 static void attr_get_ctx_add(struct attr_get_ctx *ctx,
                               const struct amdgpu_svm_attrs *attrs)
 {
-- 
2.34.1

Reply via email to