Re: [PATCH 3/5] drm/amdgpu: handle multiple MM nodes in the VMs

2016-08-29 Thread Felix Kuehling
I looked really hard and couldn't find anything obviously broken. It
makes me a bit nervous that there is no bounds checking on the nodes
array, though.

Just one minor nit pick.

With that fixed, Reviewed-by: Felix Kuehling 


On 16-08-29 05:20 AM, Christian König wrote:
> From: Christian König 
>
> This allows us to map scattered VRAM BOs to the VMs.
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 
> +++---
>  1 file changed, 44 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index ea1bd67..df6506d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1048,8 +1048,8 @@ error_free:
>   * @pages_addr: DMA addresses to use for mapping
>   * @vm: requested vm
>   * @mapping: mapped range and flags to use for the update
> - * @addr: addr to set the area to
>   * @flags: HW flags for the mapping
> + * @nodes: array of drm_mm_nodes with the MC addresses
>   * @fence: optional resulting fence
>   *
>   * Split the mapping into smaller chunks so that each update fits
> @@ -1062,12 +1062,11 @@ static int amdgpu_vm_bo_split_mapping(struct 
> amdgpu_device *adev,
> dma_addr_t *pages_addr,
> struct amdgpu_vm *vm,
> struct amdgpu_bo_va_mapping *mapping,
> -   uint32_t flags, uint64_t addr,
> +   uint32_t flags,
> +   struct drm_mm_node *nodes,
> struct fence **fence)
>  {
> - const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / 
> AMDGPU_GPU_PAGE_SIZE;
> -
> - uint64_t src = 0, start = mapping->it.start;
> + uint64_t offset, src = 0, start = mapping->it.start;
>   int r;
>  
>   /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go 
> here
> @@ -1080,23 +1079,38 @@ static int amdgpu_vm_bo_split_mapping(struct 
> amdgpu_device *adev,
>  
>   trace_amdgpu_vm_bo_update(mapping);
>  
> - if (pages_addr) {
> - if (flags == gtt_flags)
> - src = adev->gart.table_addr + (addr >> 12) * 8;
> - addr = 0;
> + offset = mapping->offset;
> + if (nodes) {
> + while (offset > (nodes->size << PAGE_SHIFT)) {
> + offset -= (nodes->size << PAGE_SHIFT);
> + ++nodes;
> + }
>   }
> - addr += mapping->offset;
>  
> - if (!pages_addr || src)
> - return amdgpu_vm_bo_update_mapping(adev, exclusive,
> -src, pages_addr, vm,
> -start, mapping->it.last,
> -flags, addr, fence);
> + do {
> + uint64_t max_entries;
> + uint64_t addr, last;
>  
> - while (start != mapping->it.last + 1) {
> - uint64_t last;
> + if (nodes) {
> + addr = nodes->start << PAGE_SHIFT;
> + max_entries = nodes->size - (offset >> PAGE_SHIFT);

[FK] I think max_entries is the number of GPU page table entries. So you
should convert from the system page size to the GPU page size:
max_entries = ((nodes->size << PAGE_SHIFT) - offset) >>
AMDGPU_GPU_PAGE_SHIFT;

> + } else {
> + addr = 0;
> + max_entries = S64_MAX;
> + }
> + addr += offset;
> +
> + if (pages_addr) {
> + if (flags == gtt_flags)
> + src = adev->gart.table_addr + (addr >> 12) * 8;
> + else
> + max_entries = min(max_entries, 16ull * 1024ull);
> + addr = 0;
> + } else if (flags & AMDGPU_PTE_VALID) {
> + addr += adev->vm_manager.vram_base_offset;
> + }
>  
> - last = min((uint64_t)mapping->it.last, start + max_size - 1);
> + last = min((uint64_t)mapping->it.last, start + max_entries - 1);
>   r = amdgpu_vm_bo_update_mapping(adev, exclusive,
>   src, pages_addr, vm,
>   start, last, flags, addr,
> @@ -1104,9 +1118,14 @@ static int amdgpu_vm_bo_split_mapping(struct 
> amdgpu_device *adev,
>   if (r)
>   return r;
>  
> + offset += (last - start + 1) * AMDGPU_GPU_PAGE_SIZE;
> + if (nodes && nodes->size == (offset >> PAGE_SHIFT)) {
> + offset = 0;
> + ++nodes;
> + }
>   start = last + 1;
> - addr += 

[PATCH 3/5] drm/amdgpu: handle multiple MM nodes in the VMs

2016-08-29 Thread Christian König
From: Christian König 

This allows us to map scattered VRAM BOs to the VMs.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 +++---
 1 file changed, 44 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ea1bd67..df6506d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1048,8 +1048,8 @@ error_free:
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @mapping: mapped range and flags to use for the update
- * @addr: addr to set the area to
  * @flags: HW flags for the mapping
+ * @nodes: array of drm_mm_nodes with the MC addresses
  * @fence: optional resulting fence
  *
  * Split the mapping into smaller chunks so that each update fits
@@ -1062,12 +1062,11 @@ static int amdgpu_vm_bo_split_mapping(struct 
amdgpu_device *adev,
  dma_addr_t *pages_addr,
  struct amdgpu_vm *vm,
  struct amdgpu_bo_va_mapping *mapping,
- uint32_t flags, uint64_t addr,
+ uint32_t flags,
+ struct drm_mm_node *nodes,
  struct fence **fence)
 {
-   const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / 
AMDGPU_GPU_PAGE_SIZE;
-
-   uint64_t src = 0, start = mapping->it.start;
+   uint64_t offset, src = 0, start = mapping->it.start;
int r;
 
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go 
here
@@ -1080,23 +1079,38 @@ static int amdgpu_vm_bo_split_mapping(struct 
amdgpu_device *adev,
 
trace_amdgpu_vm_bo_update(mapping);
 
-   if (pages_addr) {
-   if (flags == gtt_flags)
-   src = adev->gart.table_addr + (addr >> 12) * 8;
-   addr = 0;
+   offset = mapping->offset;
+   if (nodes) {
+   while (offset > (nodes->size << PAGE_SHIFT)) {
+   offset -= (nodes->size << PAGE_SHIFT);
+   ++nodes;
+   }
}
-   addr += mapping->offset;
 
-   if (!pages_addr || src)
-   return amdgpu_vm_bo_update_mapping(adev, exclusive,
-  src, pages_addr, vm,
-  start, mapping->it.last,
-  flags, addr, fence);
+   do {
+   uint64_t max_entries;
+   uint64_t addr, last;
 
-   while (start != mapping->it.last + 1) {
-   uint64_t last;
+   if (nodes) {
+   addr = nodes->start << PAGE_SHIFT;
+   max_entries = nodes->size - (offset >> PAGE_SHIFT);
+   } else {
+   addr = 0;
+   max_entries = S64_MAX;
+   }
+   addr += offset;
+
+   if (pages_addr) {
+   if (flags == gtt_flags)
+   src = adev->gart.table_addr + (addr >> 12) * 8;
+   else
+   max_entries = min(max_entries, 16ull * 1024ull);
+   addr = 0;
+   } else if (flags & AMDGPU_PTE_VALID) {
+   addr += adev->vm_manager.vram_base_offset;
+   }
 
-   last = min((uint64_t)mapping->it.last, start + max_size - 1);
+   last = min((uint64_t)mapping->it.last, start + max_entries - 1);
r = amdgpu_vm_bo_update_mapping(adev, exclusive,
src, pages_addr, vm,
start, last, flags, addr,
@@ -1104,9 +1118,14 @@ static int amdgpu_vm_bo_split_mapping(struct 
amdgpu_device *adev,
if (r)
return r;
 
+   offset += (last - start + 1) * AMDGPU_GPU_PAGE_SIZE;
+   if (nodes && nodes->size == (offset >> PAGE_SHIFT)) {
+   offset = 0;
+   ++nodes;
+   }
start = last + 1;
-   addr += max_size * AMDGPU_GPU_PAGE_SIZE;
-   }
+
+   } while (likely(start != mapping->it.last + 1));
 
return 0;
 }
@@ -1130,34 +1149,24 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
dma_addr_t *pages_addr = NULL;
uint32_t gtt_flags, flags;
struct ttm_mem_reg *mem;
+   struct drm_mm_node *nodes;
struct fence *exclusive;
-   uint64_t addr;
int r;
 
if (clear) {
mem = NULL;
-   addr = 0;
+   nodes = NULL;
exclusive = NULL;
} else {
struct ttm_dma_tt