On Tue, Jan 27, 2026 at 04:48:32PM -0800, Matthew Brost wrote: > The dma-map IOVA alloc, link, and sync APIs perform significantly better > than dma-map / dma-unmap, as they avoid costly IOMMU synchronizations. > This difference is especially noticeable when mapping a 2MB region in > 4KB pages. > > Use the IOVA alloc, link, and sync APIs for GPU SVM, which create DMA > mappings between the CPU and GPU. > > v3: > - Always link IOVA in mixed mappings > - Sync IOVA
The same comment as before. > > Signed-off-by: Matthew Brost <[email protected]> > --- > drivers/gpu/drm/drm_gpusvm.c | 87 ++++++++++++++++++++++++++++-------- > include/drm/drm_gpusvm.h | 3 ++ > 2 files changed, 71 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c > index 585d913d3d19..084e78fa0f32 100644 > --- a/drivers/gpu/drm/drm_gpusvm.c > +++ b/drivers/gpu/drm/drm_gpusvm.c > @@ -1139,19 +1139,26 @@ static void __drm_gpusvm_unmap_pages(struct > drm_gpusvm *gpusvm, > struct drm_gpusvm_pages_flags flags = { > .__flags = svm_pages->flags.__flags, > }; > + struct dma_iova_state __state = {}; Why don't you use svm_pages->state directly? > > - for (i = 0, j = 0; i < npages; j++) { > - struct drm_pagemap_addr *addr = &svm_pages->dma_addr[j]; > - > - if (addr->proto == DRM_INTERCONNECT_SYSTEM) > - dma_unmap_page(dev, > - addr->addr, > - PAGE_SIZE << addr->order, > - addr->dir); > - else if (dpagemap && dpagemap->ops->device_unmap) > - dpagemap->ops->device_unmap(dpagemap, > - dev, *addr); > - i += 1 << addr->order; > + if (dma_use_iova(&svm_pages->state)) { > + dma_iova_destroy(dev, &svm_pages->state, > + npages * PAGE_SIZE, > + svm_pages->dma_addr[0].dir, 0); > + } else { > + for (i = 0, j = 0; i < npages; j++) { > + struct drm_pagemap_addr *addr = > &svm_pages->dma_addr[j]; > + > + if (addr->proto == DRM_INTERCONNECT_SYSTEM) > + dma_unmap_page(dev, > + addr->addr, > + PAGE_SIZE << addr->order, > + addr->dir); > + else if (dpagemap && > dpagemap->ops->device_unmap) > + dpagemap->ops->device_unmap(dpagemap, > + dev, *addr); > + i += 1 << addr->order; > + } > } > > /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */ > @@ -1161,6 +1168,7 @@ static void __drm_gpusvm_unmap_pages(struct drm_gpusvm > *gpusvm, > > drm_pagemap_put(svm_pages->dpagemap); > svm_pages->dpagemap = NULL; > + svm_pages->state = __state; > } > } > > @@ -1402,12 +1410,14 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, > unsigned long num_dma_mapped; > unsigned int order = 0; > unsigned long *pfns; > + phys_addr_t last_phys; > int err = 0; > struct dev_pagemap *pagemap; > struct drm_pagemap *dpagemap; > struct drm_gpusvm_pages_flags flags; > enum dma_data_direction dma_dir = ctx->read_only ? DMA_TO_DEVICE : > DMA_BIDIRECTIONAL; > + struct dma_iova_state *state = &svm_pages->state; > > retry: > if (time_after(jiffies, timeout)) > @@ -1496,6 +1506,17 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, > err = -EOPNOTSUPP; > goto err_unmap; > } > + > + if (dma_use_iova(state)) { > + err = dma_iova_link(gpusvm->drm->dev, state, > + last_phys, > + i * PAGE_SIZE, > + PAGE_SIZE << order, > + dma_dir, 0); > + if (err) > + goto err_unmap; > + } > + > zdd = __zdd; > if (pagemap != page_pgmap(page)) { > if (i > 0) { > @@ -1539,13 +1560,34 @@ int drm_gpusvm_get_pages(struct drm_gpusvm *gpusvm, > goto err_unmap; > } > > - addr = dma_map_page(gpusvm->drm->dev, > - page, 0, > - PAGE_SIZE << order, > - dma_dir); > - if (dma_mapping_error(gpusvm->drm->dev, addr)) { > - err = -EFAULT; > - goto err_unmap; > + if (!i) > + dma_iova_try_alloc(gpusvm->drm->dev, state, > + npages * PAGE_SIZE >= > + HPAGE_PMD_SIZE ? > + HPAGE_PMD_SIZE : 0, > + npages * PAGE_SIZE); > + > + if (dma_use_iova(state)) { > + last_phys = page_to_phys(page); hmm_pfn_to_phys(). > + > + err = dma_iova_link(gpusvm->drm->dev, state, > + page_to_phys(page), you have last_phys here. > + i * PAGE_SIZE, > + PAGE_SIZE << order, > + dma_dir, 0); > + if (err) > + goto err_unmap; > + Thanks
