With multi-device we are much more likely to have multiple
drm-gpusvm ranges pointing to the same struct mm range.

To avoid calling into drm_pagemap_populate_mm(), which is always
very costly, introduce a much less costly drm_gpusvm function,
drm_gpusvm_scan_mm() to scan the current migration state.
The device fault-handler and prefetcher can use this function to
determine whether migration is really necessary.

There are a couple of performance improvements that can be done
for this function if it turns out to be too costly. Those are
documented in the code.

v3:
- New patch.

Signed-off-by: Thomas Hellström <[email protected]>
---
 drivers/gpu/drm/drm_gpusvm.c | 121 +++++++++++++++++++++++++++++++++++
 include/drm/drm_gpusvm.h     |  29 +++++++++
 2 files changed, 150 insertions(+)

diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index 4c7474a331bc..aa9a0b60e727 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -743,6 +743,127 @@ static bool drm_gpusvm_check_pages(struct drm_gpusvm 
*gpusvm,
        return err ? false : true;
 }
 
+/**
+ * drm_gpusvm_scan_mm() - Check the migration state of a drm_gpusvm_range
+ * @range: Pointer to the struct drm_gpusvm_range to check.
+ * @dev_private_owner: The struct dev_private_owner to use to determine
+ * compatible device-private pages.
+ * @pagemap: The struct dev_pagemap pointer to use for pagemap-specific
+ * checks.
+ *
+ * Scan the CPU address space corresponding to @range and return the
+ * current migration state. Note that the result may be invalid as
+ * soon as the function returns. It's an advisory check.
+ *
+ * TODO: Bail early and call hmm_range_fault() for subranges.
+ *
+ * Return: See &enum drm_gpusvm_scan_result.
+ */
+enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range,
+                                              void *dev_private_owner,
+                                              const struct dev_pagemap 
*pagemap)
+{
+       struct mmu_interval_notifier *notifier = &range->notifier->notifier;
+       unsigned long start = drm_gpusvm_range_start(range);
+       unsigned long end = drm_gpusvm_range_end(range);
+       struct hmm_range hmm_range = {
+               .default_flags = 0,
+               .notifier = notifier,
+               .start = start,
+               .end = end,
+               .dev_private_owner = dev_private_owner,
+       };
+       unsigned long timeout =
+               jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+       enum drm_gpusvm_scan_result state = DRM_GPUSVM_SCAN_UNPOPULATED, 
new_state;
+       unsigned long *pfns;
+       unsigned long npages = npages_in_range(start, end);
+       const struct dev_pagemap *other = NULL;
+       int err, i;
+
+       pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+       if (!pfns)
+               return DRM_GPUSVM_SCAN_UNPOPULATED;
+
+       hmm_range.hmm_pfns = pfns;
+
+retry:
+       hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
+       mmap_read_lock(range->gpusvm->mm);
+
+       while (true) {
+               err = hmm_range_fault(&hmm_range);
+               if (err == -EBUSY) {
+                       if (time_after(jiffies, timeout))
+                               break;
+
+                       hmm_range.notifier_seq =
+                               mmu_interval_read_begin(notifier);
+                       continue;
+               }
+               break;
+       }
+       mmap_read_unlock(range->gpusvm->mm);
+       if (err)
+               goto err_free;
+
+       drm_gpusvm_notifier_lock(range->gpusvm);
+       if (mmu_interval_read_retry(notifier, hmm_range.notifier_seq)) {
+               drm_gpusvm_notifier_unlock(range->gpusvm);
+               goto retry;
+       }
+
+       for (i = 0; i < npages;) {
+               struct page *page;
+               const struct dev_pagemap *cur = NULL;
+
+               if (!(pfns[i] & HMM_PFN_VALID)) {
+                       state = DRM_GPUSVM_SCAN_UNPOPULATED;
+                       goto err_free;
+               }
+
+               page = hmm_pfn_to_page(pfns[i]);
+               if (is_device_private_page(page) ||
+                   is_device_coherent_page(page))
+                       cur = page_pgmap(page);
+
+               if (cur == pagemap) {
+                       new_state = DRM_GPUSVM_SCAN_EQUAL;
+               } else if (cur && (cur == other || !other)) {
+                       new_state = DRM_GPUSVM_SCAN_OTHER;
+                       other = cur;
+               } else if (cur) {
+                       new_state = DRM_GPUSVM_SCAN_MIXED_DEVICE;
+               } else {
+                       new_state = DRM_GPUSVM_SCAN_SYSTEM;
+               }
+
+               /*
+                * TODO: Could use an array for state
+                * transitions, and caller might want it
+                * to bail early for some results.
+                */
+               if (state == DRM_GPUSVM_SCAN_UNPOPULATED) {
+                       state = new_state;
+               } else if (state != new_state) {
+                       if (new_state == DRM_GPUSVM_SCAN_SYSTEM ||
+                           state == DRM_GPUSVM_SCAN_SYSTEM)
+                               state = DRM_GPUSVM_SCAN_MIXED;
+                       else if (state != DRM_GPUSVM_SCAN_MIXED)
+                               state = DRM_GPUSVM_SCAN_MIXED_DEVICE;
+               }
+
+               i += 1ul << drm_gpusvm_hmm_pfn_to_order(pfns[i], i, npages);
+       }
+
+err_free:
+       drm_gpusvm_notifier_unlock(range->gpusvm);
+
+       kvfree(pfns);
+       return state;
+}
+EXPORT_SYMBOL(drm_gpusvm_scan_mm);
+
 /**
  * drm_gpusvm_range_chunk_size() - Determine chunk size for GPU SVM range
  * @gpusvm: Pointer to the GPU SVM structure
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index 632e100e6efb..2578ac92a8d4 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -328,6 +328,35 @@ void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
                           struct drm_gpusvm_pages *svm_pages,
                           unsigned long npages);
 
+/**
+ * enum drm_gpusvm_scan_result - Scan result from the drm_gpusvm_scan_mm() 
function.
+ * @DRM_GPUSVM_SCAN_UNPOPULATED: At least one page was not present or 
inaccessible.
+ * @DRM_GPUSVM_SCAN_EQUAL: All pages belong to the struct dev_pagemap 
indicated as
+ * the @pagemap argument to the drm_gpusvm_scan_mm() function.
+ * @DRM_GPUSVM_SCAN_OTHER: All pages belong to exactly one dev_pagemap, which 
is
+ * *NOT* the @pagemap argument to the drm_gpusvm_scan_mm(). All pages belong to
+ * the same device private owner.
+ * @DRM_GPUSVM_SCAN_SYSTEM: All pages are present and system pages.
+ * @DRM_GPUSVM_SCAN_MIXED_DEVICE: All pages are device pages and belong to at 
least
+ * two different struct dev_pagemaps. All pages belong to the same device 
private
+ * owner.
+ * @DRM_GPUSVM_SCAN_MIXED: Pages are present and are a mix of system pages
+ * and device-private pages. All device-private pages belong to the same device
+ * private owner.
+ */
+enum drm_gpusvm_scan_result {
+       DRM_GPUSVM_SCAN_UNPOPULATED,
+       DRM_GPUSVM_SCAN_EQUAL,
+       DRM_GPUSVM_SCAN_OTHER,
+       DRM_GPUSVM_SCAN_SYSTEM,
+       DRM_GPUSVM_SCAN_MIXED_DEVICE,
+       DRM_GPUSVM_SCAN_MIXED,
+};
+
+enum drm_gpusvm_scan_result drm_gpusvm_scan_mm(struct drm_gpusvm_range *range,
+                                              void *dev_private_owner,
+                                              const struct dev_pagemap 
*pagemap);
+
 #ifdef CONFIG_LOCKDEP
 /**
  * drm_gpusvm_driver_set_lock() - Set the lock protecting accesses to GPU SVM
-- 
2.51.1

Reply via email to