The device-to-device IOVA alloc, link, and sync APIs perform
significantly better than individual map/unmap operations, as they
avoid costly synchronization. This improvement is especially noticeable
when mapping a 2MB region using 4KB pages. In addition, IOVAs may be
required for future high-speed fabric connections between devices.

Use the IOVA alloc, link, and sync APIs for GPU SVM, which create DMA
mappings between the devices.

Signed-off-by: Matthew Brost <[email protected]>
---
 drivers/gpu/drm/drm_gpusvm.c | 100 ++++++++++++++++++++++++++++++++---
 include/drm/drm_gpusvm.h     |   2 +
 2 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index 084e78fa0f32..59de50d030b3 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1142,9 +1142,19 @@ static void __drm_gpusvm_unmap_pages(struct drm_gpusvm 
*gpusvm,
                struct dma_iova_state __state = {};
 
                if (dma_use_iova(&svm_pages->state)) {
+                       drm_WARN_ON(gpusvm->drm, svm_pages->iova_cookie);
+
                        dma_iova_destroy(dev, &svm_pages->state,
                                         npages * PAGE_SIZE,
                                         svm_pages->dma_addr[0].dir, 0);
+               } else if (svm_pages->iova_cookie) {
+                       struct drm_pagemap_addr *addr = &svm_pages->dma_addr[0];
+
+                       dpagemap->ops->device_iova_unlink(dpagemap, dev,
+                                                         npages *
+                                                         PAGE_SIZE,
+                                                         
svm_pages->iova_cookie,
+                                                         addr->dir);
                } else {
                        for (i = 0, j = 0; i < npages; j++) {
                                struct drm_pagemap_addr *addr = 
&svm_pages->dma_addr[j];
@@ -1166,8 +1176,10 @@ static void __drm_gpusvm_unmap_pages(struct drm_gpusvm 
*gpusvm,
                flags.has_dma_mapping = false;
                WRITE_ONCE(svm_pages->flags.__flags, flags.__flags);
 
-               drm_pagemap_put(svm_pages->dpagemap);
-               svm_pages->dpagemap = NULL;
+               if (!svm_pages->iova_cookie) {
+                       drm_pagemap_put(svm_pages->dpagemap);
+                       svm_pages->dpagemap = NULL;
+               }
                svm_pages->state = __state;
        }
 }
@@ -1190,6 +1202,28 @@ static void __drm_gpusvm_free_pages(struct drm_gpusvm 
*gpusvm,
        }
 }
 
+/**
+ * drm_gpusvm_pages_iova_free() - Free IOVA associated with GPU SVM pages
+ * @gpusvm: Pointer to the GPU SVM structure
+ * @svm_pages: Pointer to the GPU SVM pages structure
+ * @size: Size of IOVA to free
+ *
+ * This function frees the IOVA associated with a GPU SVM range.
+ */
+static void drm_gpusvm_pages_iova_free(struct drm_gpusvm *gpusvm,
+                                      struct drm_gpusvm_pages *svm_pages,
+                                      size_t size)
+{
+       if (svm_pages->iova_cookie) {
+               struct drm_pagemap *dpagemap = svm_pages->dpagemap;
+
+               dpagemap->ops->device_iova_free(dpagemap, gpusvm->drm->dev,
+                                               size, svm_pages->iova_cookie);
+               drm_pagemap_put(dpagemap);
+               svm_pages->dpagemap = NULL;
+       }
+}
+
 /**
  * drm_gpusvm_free_pages() - Free dma-mapping associated with GPU SVM pages
  * struct
@@ -1208,6 +1242,8 @@ void drm_gpusvm_free_pages(struct drm_gpusvm *gpusvm,
        __drm_gpusvm_unmap_pages(gpusvm, svm_pages, npages);
        __drm_gpusvm_free_pages(gpusvm, svm_pages);
        drm_gpusvm_notifier_unlock(gpusvm);
+
+       drm_gpusvm_pages_iova_free(gpusvm, svm_pages, npages * PAGE_SIZE);
 }
 EXPORT_SYMBOL_GPL(drm_gpusvm_free_pages);
 
@@ -1241,6 +1277,8 @@ void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
        __drm_gpusvm_range_remove(notifier, range);
        drm_gpusvm_notifier_unlock(gpusvm);
 
+       drm_gpusvm_pages_iova_free(gpusvm, &range->pages,
+                                  drm_gpusvm_range_size(range));
        drm_gpusvm_range_put(range);
 
        if (RB_EMPTY_ROOT(&notifier->root.rb_root)) {
@@ -1418,6 +1456,7 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
        enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE :
                                                           DMA_BIDIRECTIONAL;
        struct dma_iova_state *state = &svm_pages->state;
+       bool try_alloc;
 
 retry:
        if (time_after(jiffies, timeout))
@@ -1427,6 +1466,9 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
        if (drm_gpusvm_pages_valid_unlocked(gpusvm, svm_pages))
                goto set_seqno;
 
+       drm_gpusvm_pages_iova_free(gpusvm, svm_pages, npages * PAGE_SIZE);
+       try_alloc = false;
+
        pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
        if (!pfns)
                return -ENOMEM;
@@ -1535,12 +1577,47 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
                                        err = -EAGAIN;
                                        goto err_unmap;
                                }
+
+                               if (!try_alloc) {
+                                       void *iova_cookie;
+
+                                       /* Unlock and restart mapping to 
allocate IOVA. */
+                                       drm_gpusvm_notifier_unlock(gpusvm);
+
+                                       drm_WARN_ON(gpusvm->drm,
+                                                   svm_pages->iova_cookie);
+
+                                       iova_cookie =
+                                               
dpagemap->ops->device_iova_alloc(dpagemap,
+                                                                               
 gpusvm->drm->dev,
+                                                                               
 npages * PAGE_SIZE,
+                                                                               
 dma_dir);
+                                       if (IS_ERR(iova_cookie)) {
+                                               err = PTR_ERR(iova_cookie);
+                                               goto err_unmap;
+                                       }
+
+                                       svm_pages->iova_cookie = iova_cookie;
+                                       try_alloc = true;
+                                       goto map_pages;
+                               }
                        }
-                       svm_pages->dma_addr[j] =
-                               dpagemap->ops->device_map(dpagemap,
-                                                         gpusvm->drm->dev,
-                                                         page, order,
-                                                         dma_dir);
+
+                       if (svm_pages->iova_cookie)
+                               svm_pages->dma_addr[j] =
+                                       
dpagemap->ops->device_iova_link(dpagemap,
+                                                                       
gpusvm->drm->dev,
+                                                                       page,
+                                                                       
PAGE_SHIFT << order,
+                                                                       j * 
PAGE_SIZE,
+                                                                       
svm_pages->iova_cookie,
+                                                                       
dma_dir);
+                       else
+                               svm_pages->dma_addr[j] =
+                                       dpagemap->ops->device_map(dpagemap,
+                                                                 
gpusvm->drm->dev,
+                                                                 page, order,
+                                                                 dma_dir);
                        if (dma_mapping_error(gpusvm->drm->dev,
                                              svm_pages->dma_addr[j].addr)) {
                                err = -EFAULT;
@@ -1600,10 +1677,19 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm,
        }
 
        if (dma_use_iova(state)) {
+               drm_WARN_ON(gpusvm->drm, svm_pages->iova_cookie);
+
                err = dma_iova_sync(gpusvm->drm->dev, state, 0,
                                    npages * PAGE_SIZE);
                if (err)
                        goto err_unmap;
+       } else if (svm_pages->iova_cookie) {
+               err = dpagemap->ops->device_iova_sync(dpagemap,
+                                                     gpusvm->drm->dev,
+                                                     npages * PAGE_SIZE,
+                                                     svm_pages->iova_cookie);
+               if (err)
+                       goto err_unmap;
        }
 
        if (pagemap) {
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index 6772d8a92788..8c7640625cb1 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -138,6 +138,7 @@ struct drm_gpusvm_pages_flags {
  * @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
  *            Note this is assuming only one drm_pagemap per range is allowed.
  * @state: DMA IOVA state for mapping.
+ * @iova_cookie: IOVA cookie for mapping.
  * @notifier_seq: Notifier sequence number of the range's pages
  * @flags: Flags for range
  * @flags.migrate_devmem: Flag indicating whether the range can be migrated to 
device memory
@@ -150,6 +151,7 @@ struct drm_gpusvm_pages {
        struct drm_pagemap_addr *dma_addr;
        struct drm_pagemap *dpagemap;
        struct dma_iova_state state;
+       void *iova_cookie;
        unsigned long notifier_seq;
        struct drm_gpusvm_pages_flags flags;
 };
-- 
2.34.1

Reply via email to