Re: [PATCH 3/5] drm/amdgpu: handle multiple MM nodes in the VMs
I looked really hard and couldn't find anything obviously broken. It makes me a bit nervous that there is no bounds checking on the nodes array, though. Just one minor nit pick. With that fixed, Reviewed-by: Felix KuehlingOn 16-08-29 05:20 AM, Christian König wrote: > From: Christian König > > This allows us to map scattered VRAM BOs to the VMs. > > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 > +++--- > 1 file changed, 44 insertions(+), 35 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index ea1bd67..df6506d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1048,8 +1048,8 @@ error_free: > * @pages_addr: DMA addresses to use for mapping > * @vm: requested vm > * @mapping: mapped range and flags to use for the update > - * @addr: addr to set the area to > * @flags: HW flags for the mapping > + * @nodes: array of drm_mm_nodes with the MC addresses > * @fence: optional resulting fence > * > * Split the mapping into smaller chunks so that each update fits > @@ -1062,12 +1062,11 @@ static int amdgpu_vm_bo_split_mapping(struct > amdgpu_device *adev, > dma_addr_t *pages_addr, > struct amdgpu_vm *vm, > struct amdgpu_bo_va_mapping *mapping, > - uint32_t flags, uint64_t addr, > + uint32_t flags, > + struct drm_mm_node *nodes, > struct fence **fence) > { > - const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / > AMDGPU_GPU_PAGE_SIZE; > - > - uint64_t src = 0, start = mapping->it.start; > + uint64_t offset, src = 0, start = mapping->it.start; > int r; > > /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go > here > @@ -1080,23 +1079,38 @@ static int amdgpu_vm_bo_split_mapping(struct > amdgpu_device *adev, > > trace_amdgpu_vm_bo_update(mapping); > > - if (pages_addr) { > - if (flags == gtt_flags) > - src = adev->gart.table_addr + (addr >> 12) * 8; > - addr = 0; > + offset = mapping->offset; > + if (nodes) { > + while (offset > (nodes->size << PAGE_SHIFT)) { > + offset -= (nodes->size << PAGE_SHIFT); > + ++nodes; > + } > } > - addr += mapping->offset; > > - if (!pages_addr || src) > - return amdgpu_vm_bo_update_mapping(adev, exclusive, > -src, pages_addr, vm, > -start, mapping->it.last, > -flags, addr, fence); > + do { > + uint64_t max_entries; > + uint64_t addr, last; > > - while (start != mapping->it.last + 1) { > - uint64_t last; > + if (nodes) { > + addr = nodes->start << PAGE_SHIFT; > + max_entries = nodes->size - (offset >> PAGE_SHIFT); [FK] I think max_entries is the number of GPU page table entries. So you should convert from the system page size to the GPU page size: max_entries = ((nodes->size << PAGE_SHIFT) - offset) >> AMDGPU_GPU_PAGE_SHIFT; > + } else { > + addr = 0; > + max_entries = S64_MAX; > + } > + addr += offset; > + > + if (pages_addr) { > + if (flags == gtt_flags) > + src = adev->gart.table_addr + (addr >> 12) * 8; > + else > + max_entries = min(max_entries, 16ull * 1024ull); > + addr = 0; > + } else if (flags & AMDGPU_PTE_VALID) { > + addr += adev->vm_manager.vram_base_offset; > + } > > - last = min((uint64_t)mapping->it.last, start + max_size - 1); > + last = min((uint64_t)mapping->it.last, start + max_entries - 1); > r = amdgpu_vm_bo_update_mapping(adev, exclusive, > src, pages_addr, vm, > start, last, flags, addr, > @@ -1104,9 +1118,14 @@ static int amdgpu_vm_bo_split_mapping(struct > amdgpu_device *adev, > if (r) > return r; > > + offset += (last - start + 1) * AMDGPU_GPU_PAGE_SIZE; > + if (nodes && nodes->size == (offset >> PAGE_SHIFT)) { > + offset = 0; > + ++nodes; > + } > start = last + 1; > - addr +=
[PATCH 3/5] drm/amdgpu: handle multiple MM nodes in the VMs
From: Christian KönigThis allows us to map scattered VRAM BOs to the VMs. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 +++--- 1 file changed, 44 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ea1bd67..df6506d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1048,8 +1048,8 @@ error_free: * @pages_addr: DMA addresses to use for mapping * @vm: requested vm * @mapping: mapped range and flags to use for the update - * @addr: addr to set the area to * @flags: HW flags for the mapping + * @nodes: array of drm_mm_nodes with the MC addresses * @fence: optional resulting fence * * Split the mapping into smaller chunks so that each update fits @@ -1062,12 +1062,11 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, - uint32_t flags, uint64_t addr, + uint32_t flags, + struct drm_mm_node *nodes, struct fence **fence) { - const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; - - uint64_t src = 0, start = mapping->it.start; + uint64_t offset, src = 0, start = mapping->it.start; int r; /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here @@ -1080,23 +1079,38 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, trace_amdgpu_vm_bo_update(mapping); - if (pages_addr) { - if (flags == gtt_flags) - src = adev->gart.table_addr + (addr >> 12) * 8; - addr = 0; + offset = mapping->offset; + if (nodes) { + while (offset > (nodes->size << PAGE_SHIFT)) { + offset -= (nodes->size << PAGE_SHIFT); + ++nodes; + } } - addr += mapping->offset; - if (!pages_addr || src) - return amdgpu_vm_bo_update_mapping(adev, exclusive, - src, pages_addr, vm, - start, mapping->it.last, - flags, addr, fence); + do { + uint64_t max_entries; + uint64_t addr, last; - while (start != mapping->it.last + 1) { - uint64_t last; + if (nodes) { + addr = nodes->start << PAGE_SHIFT; + max_entries = nodes->size - (offset >> PAGE_SHIFT); + } else { + addr = 0; + max_entries = S64_MAX; + } + addr += offset; + + if (pages_addr) { + if (flags == gtt_flags) + src = adev->gart.table_addr + (addr >> 12) * 8; + else + max_entries = min(max_entries, 16ull * 1024ull); + addr = 0; + } else if (flags & AMDGPU_PTE_VALID) { + addr += adev->vm_manager.vram_base_offset; + } - last = min((uint64_t)mapping->it.last, start + max_size - 1); + last = min((uint64_t)mapping->it.last, start + max_entries - 1); r = amdgpu_vm_bo_update_mapping(adev, exclusive, src, pages_addr, vm, start, last, flags, addr, @@ -1104,9 +1118,14 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (r) return r; + offset += (last - start + 1) * AMDGPU_GPU_PAGE_SIZE; + if (nodes && nodes->size == (offset >> PAGE_SHIFT)) { + offset = 0; + ++nodes; + } start = last + 1; - addr += max_size * AMDGPU_GPU_PAGE_SIZE; - } + + } while (likely(start != mapping->it.last + 1)); return 0; } @@ -1130,34 +1149,24 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, dma_addr_t *pages_addr = NULL; uint32_t gtt_flags, flags; struct ttm_mem_reg *mem; + struct drm_mm_node *nodes; struct fence *exclusive; - uint64_t addr; int r; if (clear) { mem = NULL; - addr = 0; + nodes = NULL; exclusive = NULL; } else { struct ttm_dma_tt