The dma-map IOVA alloc, link, and sync APIs perform significantly better than dma-map / dma-unmap, as they avoid costly IOMMU synchronizations. This difference is especially noticeable when mapping a 2MB region in 4KB pages.
Use the IOVA alloc, link, and sync APIs for DRM pagemap, which create DMA mappings between the CPU and GPU for copying data. Signed-off-by: Matthew Brost <[email protected]> --- v4: - Pack IOVA and drop dummy page (Jason) drivers/gpu/drm/drm_pagemap.c | 84 +++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/drm_pagemap.c b/drivers/gpu/drm/drm_pagemap.c index 29677b19bb69..52a196bc8459 100644 --- a/drivers/gpu/drm/drm_pagemap.c +++ b/drivers/gpu/drm/drm_pagemap.c @@ -280,6 +280,20 @@ drm_pagemap_migrate_map_device_pages(struct device *dev, return 0; } +/** + * struct drm_pagemap_iova_state - DRM pagemap IOVA state + * + * @dma_state: DMA IOVA state. + * @offset: Current offset in IOVA. + * + * This structure acts as an iterator for packing all IOVA addresses within a + * contiguous range. + */ +struct drm_pagemap_iova_state { + struct dma_iova_state dma_state; + unsigned long offset; +}; + /** * drm_pagemap_migrate_map_system_pages() - Map system migration pages for GPU SVM migration * @dev: The device performing the migration. @@ -287,6 +301,7 @@ drm_pagemap_migrate_map_device_pages(struct device *dev, * @migrate_pfn: Array of page frame numbers of system pages or peer pages to map. * @npages: Number of system pages or peer pages to map. * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * @state: DMA IOVA state for mapping. * * This function maps pages of memory for migration usage in GPU SVM. It * iterates over each page frame number provided in @migrate_pfn, maps the @@ -300,9 +315,11 @@ drm_pagemap_migrate_map_system_pages(struct device *dev, struct drm_pagemap_addr *pagemap_addr, unsigned long *migrate_pfn, unsigned long npages, - enum dma_data_direction dir) + enum dma_data_direction dir, + struct drm_pagemap_iova_state *state) { unsigned long i; + bool try_alloc = false; for (i = 0; i < npages;) { struct page *page = migrate_pfn_to_page(migrate_pfn[i]); @@ -317,9 +334,31 @@ drm_pagemap_migrate_map_system_pages(struct device *dev, folio = page_folio(page); order = folio_order(folio); - dma_addr = dma_map_page(dev, page, 0, page_size(page), dir); - if (dma_mapping_error(dev, dma_addr)) - return -EFAULT; + if (!try_alloc) { + dma_iova_try_alloc(dev, &state->dma_state, + npages * PAGE_SIZE >= + HPAGE_PMD_SIZE ? + HPAGE_PMD_SIZE : 0, + npages * PAGE_SIZE); + try_alloc = true; + } + + if (dma_use_iova(&state->dma_state)) { + int err = dma_iova_link(dev, &state->dma_state, + page_to_phys(page), + state->offset, page_size(page), + dir, 0); + if (err) + return err; + + dma_addr = state->dma_state.addr + state->offset; + state->offset += page_size(page); + } else { + dma_addr = dma_map_page(dev, page, 0, page_size(page), + dir); + if (dma_mapping_error(dev, dma_addr)) + return -EFAULT; + } pagemap_addr[i] = drm_pagemap_addr_encode(dma_addr, @@ -330,6 +369,9 @@ drm_pagemap_migrate_map_system_pages(struct device *dev, i += NR_PAGES(order); } + if (dma_use_iova(&state->dma_state)) + return dma_iova_sync(dev, &state->dma_state, 0, state->offset); + return 0; } @@ -341,6 +383,7 @@ drm_pagemap_migrate_map_system_pages(struct device *dev, * @pagemap_addr: Array of DMA information corresponding to mapped pages * @npages: Number of pages to unmap * @dir: Direction of data transfer (e.g., DMA_BIDIRECTIONAL) + * @state: DMA IOVA state for mapping. * * This function unmaps previously mapped pages of memory for GPU Shared Virtual * Memory (SVM). It iterates over each DMA address provided in @dma_addr, checks @@ -350,10 +393,17 @@ static void drm_pagemap_migrate_unmap_pages(struct device *dev, struct drm_pagemap_addr *pagemap_addr, unsigned long *migrate_pfn, unsigned long npages, - enum dma_data_direction dir) + enum dma_data_direction dir, + struct drm_pagemap_iova_state *state) { unsigned long i; + if (state && dma_use_iova(&state->dma_state)) { + dma_iova_unlink(dev, &state->dma_state, 0, state->offset, dir, 0); + dma_iova_free(dev, &state->dma_state); + return; + } + for (i = 0; i < npages;) { struct page *page = migrate_pfn_to_page(migrate_pfn[i]); @@ -406,7 +456,7 @@ drm_pagemap_migrate_remote_to_local(struct drm_pagemap_devmem *devmem, devmem->pre_migrate_fence); out: drm_pagemap_migrate_unmap_pages(remote_device, pagemap_addr, local_pfns, - npages, DMA_FROM_DEVICE); + npages, DMA_FROM_DEVICE, NULL); return err; } @@ -416,11 +466,13 @@ drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem, struct page *local_pages[], struct drm_pagemap_addr pagemap_addr[], unsigned long npages, - const struct drm_pagemap_devmem_ops *ops) + const struct drm_pagemap_devmem_ops *ops, + struct drm_pagemap_iova_state *state) { int err = drm_pagemap_migrate_map_system_pages(devmem->dev, pagemap_addr, sys_pfns, - npages, DMA_TO_DEVICE); + npages, DMA_TO_DEVICE, + state); if (err) goto out; @@ -429,7 +481,7 @@ drm_pagemap_migrate_sys_to_dev(struct drm_pagemap_devmem *devmem, devmem->pre_migrate_fence); out: drm_pagemap_migrate_unmap_pages(devmem->dev, pagemap_addr, sys_pfns, npages, - DMA_TO_DEVICE); + DMA_TO_DEVICE, state); return err; } @@ -457,6 +509,7 @@ static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem, const struct migrate_range_loc *cur, const struct drm_pagemap_migrate_details *mdetails) { + struct drm_pagemap_iova_state state = {}; int ret = 0; if (cur->start == 0) @@ -484,7 +537,7 @@ static int drm_pagemap_migrate_range(struct drm_pagemap_devmem *devmem, &pages[last->start], &pagemap_addr[last->start], cur->start - last->start, - last->ops); + last->ops, &state); out: *last = *cur; @@ -1001,6 +1054,7 @@ EXPORT_SYMBOL(drm_pagemap_put); int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) { const struct drm_pagemap_devmem_ops *ops = devmem_allocation->ops; + struct drm_pagemap_iova_state state = {}; unsigned long npages, mpages = 0; struct page **pages; unsigned long *src, *dst; @@ -1042,7 +1096,7 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) err = drm_pagemap_migrate_map_system_pages(devmem_allocation->dev, pagemap_addr, dst, npages, - DMA_FROM_DEVICE); + DMA_FROM_DEVICE, &state); if (err) goto err_finalize; @@ -1059,7 +1113,7 @@ int drm_pagemap_evict_to_ram(struct drm_pagemap_devmem *devmem_allocation) migrate_device_pages(src, dst, npages); migrate_device_finalize(src, dst, npages); drm_pagemap_migrate_unmap_pages(devmem_allocation->dev, pagemap_addr, dst, npages, - DMA_FROM_DEVICE); + DMA_FROM_DEVICE, &state); err_free: kvfree(buf); @@ -1103,6 +1157,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, MIGRATE_VMA_SELECT_DEVICE_COHERENT, .fault_page = page, }; + struct drm_pagemap_iova_state state = {}; struct drm_pagemap_zdd *zdd; const struct drm_pagemap_devmem_ops *ops; struct device *dev = NULL; @@ -1162,7 +1217,7 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, err = drm_pagemap_migrate_map_system_pages(dev, pagemap_addr, migrate.dst, npages, - DMA_FROM_DEVICE); + DMA_FROM_DEVICE, &state); if (err) goto err_finalize; @@ -1180,7 +1235,8 @@ static int __drm_pagemap_migrate_to_ram(struct vm_area_struct *vas, migrate_vma_finalize(&migrate); if (dev) drm_pagemap_migrate_unmap_pages(dev, pagemap_addr, migrate.dst, - npages, DMA_FROM_DEVICE); + npages, DMA_FROM_DEVICE, + &state); err_free: kvfree(buf); err_out: -- 2.34.1
