On 2023-10-10 10:40, Philip Yang wrote:
Replace prange->mapped_to_gpu with prange->bitmap_mapped[], which is per
GPU flag and based on prange granularity, updated when map to GPUS or
unmap from GPUs, to optimize multiple GPU map, unmap and retry fault
recover.

svm_range_partial_mapped is false only if no part of the range mapping
on any GPUs.

Signed-off-by: Philip Yang <[email protected]>
---
  drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 256 +++++++++++++++++++++------
  drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   8 +-
  2 files changed, 213 insertions(+), 51 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index fb22b857adbc..4e1af4b181ea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -296,12 +296,12 @@ static void svm_range_free(struct svm_range *prange, bool 
do_unmap)
                                        KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
        }
- /* free dma_addr array for each gpu */
+       /* free dma_addr array, bitmap_mapped for each gpu */
        for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
-               if (prange->dma_addr[gpuidx]) {
+               if (prange->dma_addr[gpuidx])
                        kvfree(prange->dma_addr[gpuidx]);
-                               prange->dma_addr[gpuidx] = NULL;
-               }
+               if (prange->bitmap_mapped[gpuidx])
+                       bitmap_free(prange->bitmap_mapped[gpuidx]);
        }
mutex_destroy(&prange->lock);
@@ -327,19 +327,38 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
        uint64_t size = last - start + 1;
        struct svm_range *prange;
        struct kfd_process *p;
-
-       prange = kzalloc(sizeof(*prange), GFP_KERNEL);
-       if (!prange)
-               return NULL;
+       unsigned int nbits;
+       uint32_t gpuidx;
p = container_of(svms, struct kfd_process, svms);
        if (!p->xnack_enabled && update_mem_usage &&
            amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT,
                                    KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) {
                pr_info("SVM mapping failed, exceeds resident system memory 
limit\n");
-               kfree(prange);
                return NULL;
        }
+
+       prange = kzalloc(sizeof(*prange), GFP_KERNEL);
+       if (!prange)
+               return NULL;
+
+       svm_range_set_default_attributes(&prange->preferred_loc,
+                                        &prange->prefetch_loc,
+                                        &prange->granularity, &prange->flags);
+
+       nbits = svm_range_mapped_nbits(start, last, prange->granularity);
+       pr_debug("prange 0x%p [0x%llx 0x%llx] bitmap_mapped nbits %d\n", prange,
+                start, last, nbits);
+       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+               prange->bitmap_mapped[gpuidx] = bitmap_zalloc(nbits, 
GFP_KERNEL);
+               if (!prange->bitmap_mapped[gpuidx]) {
+                       while (gpuidx--)
+                               bitmap_free(prange->bitmap_mapped[gpuidx]);
+                       kfree(prange);
+                       return NULL;
+               }
+       }
+
        prange->npages = size;
        prange->svms = svms;
        prange->start = start;
@@ -359,10 +378,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
                bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
                            MAX_GPU_INSTANCE);
- svm_range_set_default_attributes(&prange->preferred_loc,
-                                        &prange->prefetch_loc,
-                                        &prange->granularity, &prange->flags);
-
        pr_debug("svms 0x%p [0x%llx 0x%llx]\n", svms, start, last);
return prange;
@@ -984,6 +999,47 @@ svm_range_split_nodes(struct svm_range *new, struct 
svm_range *old,
        return 0;
  }
+static int
+svm_range_split_bitmap_mapped(struct svm_range *new, struct svm_range *old,
+                             uint64_t start, uint64_t last)
+{
+       struct kfd_process *p = container_of(new->svms, struct kfd_process, 
svms);
+       unsigned int nbits, old_nbits, old_nbits2;
+       unsigned long *bits;
+       uint32_t gpuidx;
+
+       nbits = svm_range_mapped_nbits(new->start, new->last, new->granularity);
+       old_nbits = svm_range_mapped_nbits(old->start, old->last, 
old->granularity);
+       old_nbits2 = svm_range_mapped_nbits(start, last, old->granularity);
+
+       pr_debug("old 0x%p [0x%lx 0x%lx] => [0x%llx 0x%llx] nbits %d => %d\n",
+                old, old->start, old->last, start, last, old_nbits, 
old_nbits2);
+       pr_debug("new 0x%p [0x%lx 0x%lx] nbits %d\n", new, new->start, 
new->last,
+                nbits);
+
+       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+               bits = bitmap_alloc(old_nbits2, GFP_KERNEL);
+               if (!bits)
+                       return -ENOMEM;
+
+               if (start == old->start) {
+                       bitmap_shift_right(new->bitmap_mapped[gpuidx],
+                                          old->bitmap_mapped[gpuidx],
+                                          old_nbits2 - 1, old_nbits);
+                       bitmap_copy(bits, old->bitmap_mapped[gpuidx], 
old_nbits2);
+               } else {
+                       bitmap_copy(new->bitmap_mapped[gpuidx],
+                                   old->bitmap_mapped[gpuidx], nbits);
+                       bitmap_shift_right(bits, old->bitmap_mapped[gpuidx],
+                                          nbits - 1, old_nbits);
+               }
+               bitmap_free(old->bitmap_mapped[gpuidx]);
+               old->bitmap_mapped[gpuidx] = bits;
+       }
+
+       return 0;
+}
+
  /**
   * svm_range_split_adjust - split range and adjust
   *
@@ -1024,6 +1080,10 @@ svm_range_split_adjust(struct svm_range *new, struct 
svm_range *old,
                        return r;
        }
+ r = svm_range_split_bitmap_mapped(new, old, start, last);
+       if (r)
+               return r;
+
        old->npages = last - start + 1;
        old->start = start;
        old->last = last;
@@ -1032,7 +1092,6 @@ svm_range_split_adjust(struct svm_range *new, struct 
svm_range *old,
        new->prefetch_loc = old->prefetch_loc;
        new->actual_loc = old->actual_loc;
        new->granularity = old->granularity;
-       new->mapped_to_gpu = old->mapped_to_gpu;
        bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
        bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
@@ -1346,6 +1405,108 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                                      fence);
  }
+/**
+ * svm_range_partial_mapped_dev - check if prange mapped on the specific GPU
+ *
+ * @gpuidx: the GPU index to check
+ * @prange: prange to check
+ * @start: the start address in pages
+ * @last: the last address in pages
+ *
+ * Return:
+ * true: if any part of the range within [start, last] mapped on the GPU
+ * false: if the entire range [start, last] not mapped on the GPU
+ */
+static bool
+svm_range_partial_mapped_dev(uint32_t gpuidx, struct svm_range *prange,
+                            unsigned long start, unsigned long last)
+{
+       unsigned long start_index, last_index;
+
+       start = max(start, prange->start);
+       last = min(last, prange->last);
+       if (last < start)
+               return false;
+
+       start_index = (start - prange->start) >> prange->granularity;
+       last_index = (last - prange->start) >> prange->granularity;
+       return find_next_bit(prange->bitmap_mapped[gpuidx], last_index + 1,
+                            start_index) <= last_index;
+}
+
+/**
+ * svm_range_partial_mapped - check if prange mapped on any GPU
+ *
+ * @prange: prange to check
+ * @start: the start address in pages
+ * @last: the last address in pages
+ *
+ * Return:
+ * true: if any part of prange mapped on any GPU currently
+ * false: if the entire range is not mapped on any GPU
+ */
+static bool
+svm_range_partial_mapped(struct svm_range *prange, unsigned long start,
+                        unsigned long last)
+{
+       struct kfd_process *p = container_of(prange->svms, struct kfd_process, 
svms);
+       struct svm_range *pchild;
+       uint32_t gpuidx;
+
+       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+               list_for_each_entry(pchild, &prange->child_list, child_list) {
+                       if (svm_range_partial_mapped_dev(gpuidx, pchild, start, 
last))
+                               return true;
+               }
+
+               if (svm_range_partial_mapped_dev(gpuidx, prange, start, last))
+                       return true;
+       }
+       return false;
+}
+
+/**
+ * svm_range_complete_mapped - check if prange mapped on all GPUs completely
+ *
+ * @prange: prange to check
+ *
+ * Return:
+ * true: if the entire prange mapped completely on all GPUs that need access
+ * otherwise return false
+ */
+static bool svm_range_complete_mapped(struct svm_range *prange)
+{
+       int nbits = svm_range_mapped_nbits(prange->start, prange->last, 
prange->granularity);
+       DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+       uint32_t gpuidx;
+       int r;
+
+       r = svm_range_need_access_gpus(bitmap, prange);
+       if (r)
+               return false;
+
+       for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE)
+               if (!bitmap_full(prange->bitmap_mapped[gpuidx], nbits))
+                       return false;
+       return true;
+}
+
+static void
+svm_range_update_mapped(uint32_t gpuidx, struct svm_range *prange,
+                       unsigned long start, unsigned long last, bool mapped)
+{
+       unsigned long index, nbits;
+
+       index = (start - prange->start) >> prange->granularity;

This aligns the address "buckets" represented by bits in the bitmap with the start of the range. To be effective to protect page table fragments and PDE-PTEs, I think you need to align the buckets with the range granularity. That means that both the first and the last bucket may be less than the granularity size.

With that, I think the index calculation here should be:

    index = (start >> prange->granularity) - (prange->start >> prange->granularity);

This probably has more consequences throughout the rest of this patch, including calculating the size of the bitmap correctly.

Regards,
  Felix


+       nbits = svm_range_mapped_nbits(start, last, prange->granularity);
+       if (mapped)
+               bitmap_set(prange->bitmap_mapped[gpuidx], index, nbits);
+       else
+               bitmap_clear(prange->bitmap_mapped[gpuidx], index, nbits);
+       pr_debug("prange 0x%p [0x%lx 0x%lx] update mapped %d nbits %ld gpu 
%d\n",
+                prange, start, last, mapped, nbits, gpuidx);
+}
+
  static int
  svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,
                          unsigned long last, uint32_t trigger)
@@ -1357,29 +1518,28 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
        uint32_t gpuidx;
        int r = 0;
- if (!prange->mapped_to_gpu) {
-               pr_debug("prange 0x%p [0x%lx 0x%lx] not mapped to GPU\n",
-                        prange, prange->start, prange->last);
-               return 0;
-       }
-
-       if (prange->start == start && prange->last == last) {
-               pr_debug("unmap svms 0x%p prange 0x%p\n", prange->svms, prange);
-               prange->mapped_to_gpu = false;
-       }
-
        bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
                  MAX_GPU_INSTANCE);
        p = container_of(prange->svms, struct kfd_process, svms);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
-               pr_debug("unmap from gpu idx 0x%x\n", gpuidx);
                pdd = kfd_process_device_from_gpuidx(p, gpuidx);
                if (!pdd) {
                        pr_debug("failed to find device idx %d\n", gpuidx);
-                       return -EINVAL;
+                       continue;
                }
+ if (!svm_range_partial_mapped_dev(gpuidx, prange, start, last)) {
+                       pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] not mapped on 
gpu %d\n",
+                                prange->svms, prange, start, last, gpuidx);
+                       continue;
+               }
+
+               svm_range_update_mapped(gpuidx, prange, start, last, false);
+
+               pr_debug("unmap svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 
%d\n",
+                        prange->svms, prange, start, last, gpuidx);
+
                kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid,
                                             start, last, trigger);
@@ -1519,6 +1679,10 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,
                if (r)
                        break;
+ if (!r)
+                       svm_range_update_mapped(gpuidx, prange, prange->start + 
offset,
+                                               prange->start + offset + npages 
- 1, true);
+
                if (fence) {
                        r = dma_fence_wait(fence, false);
                        dma_fence_put(fence);
@@ -1670,7 +1834,7 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
                bitmap_copy(ctx->bitmap, prange->bitmap_access, 
MAX_GPU_INSTANCE);
-               if (!prange->mapped_to_gpu ||
+               if (!svm_range_partial_mapped(prange, prange->start, 
prange->last) ||
                    bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
                        r = 0;
                        goto free_ctx;
@@ -1755,9 +1919,6 @@ static int svm_range_validate_and_map(struct mm_struct 
*mm,
                        r = svm_range_map_to_gpus(prange, offset, npages, 
readonly,
                                                  ctx->bitmap, flush_tlb);
- if (!r && next == end)
-                       prange->mapped_to_gpu = true;
-
                svm_range_unlock(prange);
addr = next;
@@ -1939,22 +2100,8 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
        if (!p->xnack_enabled ||
            (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
                int evicted_ranges;
-               bool mapped = prange->mapped_to_gpu;
-
-               list_for_each_entry(pchild, &prange->child_list, child_list) {
-                       if (!pchild->mapped_to_gpu)
-                               continue;
-                       mapped = true;
-                       mutex_lock_nested(&pchild->lock, 1);
-                       if (pchild->start <= last && pchild->last >= start) {
-                               pr_debug("increment pchild invalid [0x%lx 
0x%lx]\n",
-                                        pchild->start, pchild->last);
-                               atomic_inc(&pchild->invalid);
-                       }
-                       mutex_unlock(&pchild->lock);
-               }
- if (!mapped)
+               if (!svm_range_partial_mapped(prange, start, last))
                        return r;
if (prange->start <= last && prange->last >= start)
@@ -2005,7 +2152,10 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
static struct svm_range *svm_range_clone(struct svm_range *old)
  {
+       struct kfd_process *p = container_of(old->svms, struct kfd_process, 
svms);
        struct svm_range *new;
+       uint32_t gpuidx;
+       int nbits;
new = svm_range_new(old->svms, old->start, old->last, false);
        if (!new)
@@ -2027,8 +2177,13 @@ static struct svm_range *svm_range_clone(struct 
svm_range *old)
        new->prefetch_loc = old->prefetch_loc;
        new->actual_loc = old->actual_loc;
        new->granularity = old->granularity;
-       new->mapped_to_gpu = old->mapped_to_gpu;
        new->vram_pages = old->vram_pages;
+       nbits = svm_range_mapped_nbits(new->start, new->last, new->granularity);
+       for_each_set_bit(gpuidx, p->svms.bitmap_supported, p->n_pdds) {
+               bitmap_copy(new->bitmap_mapped[gpuidx],
+                           old->bitmap_mapped[gpuidx],
+                           nbits);
+       };
        bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
        bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
@@ -2147,7 +2302,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
                next_start = min(node->last, last) + 1;
if (svm_range_is_same_attrs(p, prange, nattr, attrs) &&
-                   prange->mapped_to_gpu) {
+                   svm_range_complete_mapped(prange)) {
                        /* nothing to do */
                } else if (node->start < start || node->last > last) {
                        /* node intersects the update range and its attributes
@@ -3641,7 +3796,7 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
if (migrated && (!p->xnack_enabled ||
                    (prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
-                   prange->mapped_to_gpu) {
+                   svm_range_partial_mapped(prange, prange->start, 
prange->last)) {
                        pr_debug("restore_work will update mappings of GPUs\n");
                        mutex_unlock(&prange->migrate_mutex);
                        continue;
@@ -3652,7 +3807,8 @@ svm_range_set_attr(struct kfd_process *p, struct 
mm_struct *mm,
                        continue;
                }
- flush_tlb = !migrated && update_mapping && prange->mapped_to_gpu;
+               flush_tlb = !migrated && update_mapping &&
+                           svm_range_partial_mapped(prange, prange->start, 
prange->last);
r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
                                               true, flush_tlb);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index bf2fde008115..7e165854bc0e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -136,7 +136,7 @@ struct svm_range {
        struct list_head                child_list;
        DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
        DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
-       bool                            mapped_to_gpu;
+       unsigned long                   *bitmap_mapped[MAX_GPU_INSTANCE];
  };
static inline void svm_range_lock(struct svm_range *prange)
@@ -167,6 +167,12 @@ static inline struct svm_range_bo *svm_range_bo_ref(struct 
svm_range_bo *svm_bo)
                return NULL;
  }
+static inline int svm_range_mapped_nbits(uint64_t start, uint64_t last,
+                                        uint8_t granularity)
+{
+       return (last >> granularity) - (start >> granularity) + 1;
+}
+
  int svm_range_list_init(struct kfd_process *p);
  void svm_range_list_fini(struct kfd_process *p);
  int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start,

Reply via email to