Re: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory v2

2020-02-06 Thread Felix Kuehling

On 2020-02-06 9:30, Christian König wrote:

Make use of the better performance here as well.

This patch is only compile tested!

v2: fix calculation bug pointed out by Felix

Signed-off-by: Christian König 
Acked-by: Jonathan Kim 


The series is

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++--
  1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 58d143b24ba0..2c1d1eb1a7e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
  
  	while (len && pos < adev->gmc.mc_vram_size) {

uint64_t aligned_pos = pos & ~(uint64_t)3;
-   uint32_t bytes = 4 - (pos & 3);
+   uint64_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0x << shift;
  
@@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,

bytes = len;
}
  
-		spin_lock_irqsave(>mmio_idx_lock, flags);

-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   if (!write || mask != 0x)
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(>mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, , bytes);
+   if (mask != 0x) {
+   spin_lock_irqsave(>mmio_idx_lock, flags);
+   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
+   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
+   if (!write || mask != 0x)
+   value = RREG32_NO_KIQ(mmMM_DATA);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   WREG32_NO_KIQ(mmMM_DATA, value);
+   }
+   spin_unlock_irqrestore(>mmio_idx_lock, flags);
+   if (!write) {
+   value = (value & mask) >> shift;
+   memcpy(buf, , bytes);
+   }
+   } else {
+   bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
+   bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
+
+   amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
+ bytes, write);
}
  
  		ret += bytes;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory v2

2020-02-06 Thread Christian König
Make use of the better performance here as well.

This patch is only compile tested!

v2: fix calculation bug pointed out by Felix

Signed-off-by: Christian König 
Acked-by: Jonathan Kim 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++--
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 58d143b24ba0..2c1d1eb1a7e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
 
while (len && pos < adev->gmc.mc_vram_size) {
uint64_t aligned_pos = pos & ~(uint64_t)3;
-   uint32_t bytes = 4 - (pos & 3);
+   uint64_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0x << shift;
 
@@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
bytes = len;
}
 
-   spin_lock_irqsave(>mmio_idx_lock, flags);
-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   if (!write || mask != 0x)
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(>mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, , bytes);
+   if (mask != 0x) {
+   spin_lock_irqsave(>mmio_idx_lock, flags);
+   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
+   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
+   if (!write || mask != 0x)
+   value = RREG32_NO_KIQ(mmMM_DATA);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   WREG32_NO_KIQ(mmMM_DATA, value);
+   }
+   spin_unlock_irqrestore(>mmio_idx_lock, flags);
+   if (!write) {
+   value = (value & mask) >> shift;
+   memcpy(buf, , bytes);
+   }
+   } else {
+   bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
+   bytes = min(bytes - pos, (uint64_t)len & ~0x3ull);
+
+   amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
+ bytes, write);
}
 
ret += bytes;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory

2020-02-05 Thread Felix Kuehling

On 2020-02-05 10:22 a.m., Christian König wrote:

Make use of the better performance here as well.

This patch is only compile tested!

Signed-off-by: Christian König 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++--
  1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 58d143b24ba0..538c3b52b712 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
  
  	while (len && pos < adev->gmc.mc_vram_size) {

uint64_t aligned_pos = pos & ~(uint64_t)3;
-   uint32_t bytes = 4 - (pos & 3);
+   uint64_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0x << shift;
  
@@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,

bytes = len;
}
  
-		spin_lock_irqsave(>mmio_idx_lock, flags);

-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   if (!write || mask != 0x)
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(>mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, , bytes);
+   if (mask != 0x) {
+   spin_lock_irqsave(>mmio_idx_lock, flags);
+   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
+   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
+   if (!write || mask != 0x)
+   value = RREG32_NO_KIQ(mmMM_DATA);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   WREG32_NO_KIQ(mmMM_DATA, value);
+   }
+   spin_unlock_irqrestore(>mmio_idx_lock, flags);
+   if (!write) {
+   value = (value & mask) >> shift;
+   memcpy(buf, , bytes);
+   }
+   } else {
+   bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
+   bytes = min(pos - bytes, (uint64_t)len & ~0x3ull);


I think this is incorrect. The following should be true: pos < 
((nodes->start + nodes->size) << PAGE_SHIFT). Consequently pos - bytes 
is always negative here. But because you're doing unsigned math it will 
underflow to a big positive number, which is never the minimum. 
Therefore the min will always be len & ~0x3ull.


I believe this should be min(bytes - pos, (uint64_t)len & ~0x3ull).

Jon, to catch this bug, you'd need a test that first fragments VRAM 
(allocates lots of 2MB buffers and frees every other buffer), then 
allocates a large non-contiguous buffer. Then you need one 4KB or 
smaller access that crosses a boundary between 2MB VRAM buffer chunks.


Christian, your optimized VRAM allocator that tries to get large 
contiguous chunks is nice for performance, but it probably has a 
tendency to hide this kind of bug. I wonder if we should have a debug 
mode that forces non-contiguous buffers to be actually non-contiguous.


Regards,
  Felix


+
+   amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
+ bytes, write);
}
  
  		ret += bytes;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory

2020-02-05 Thread Alex Deucher
Series is:
Reviewed-by: Alex Deucher 

On Wed, Feb 5, 2020 at 10:22 AM Christian König
 wrote:
>
> Make use of the better performance here as well.
>
> This patch is only compile tested!
>
> Signed-off-by: Christian König 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++--
>  1 file changed, 23 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 58d143b24ba0..538c3b52b712 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct 
> ttm_buffer_object *bo,
>
> while (len && pos < adev->gmc.mc_vram_size) {
> uint64_t aligned_pos = pos & ~(uint64_t)3;
> -   uint32_t bytes = 4 - (pos & 3);
> +   uint64_t bytes = 4 - (pos & 3);
> uint32_t shift = (pos & 3) * 8;
> uint32_t mask = 0x << shift;
>
> @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct 
> ttm_buffer_object *bo,
> bytes = len;
> }
>
> -   spin_lock_irqsave(>mmio_idx_lock, flags);
> -   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
> 0x8000);
> -   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> -   if (!write || mask != 0x)
> -   value = RREG32_NO_KIQ(mmMM_DATA);
> -   if (write) {
> -   value &= ~mask;
> -   value |= (*(uint32_t *)buf << shift) & mask;
> -   WREG32_NO_KIQ(mmMM_DATA, value);
> -   }
> -   spin_unlock_irqrestore(>mmio_idx_lock, flags);
> -   if (!write) {
> -   value = (value & mask) >> shift;
> -   memcpy(buf, , bytes);
> +   if (mask != 0x) {
> +   spin_lock_irqsave(>mmio_idx_lock, flags);
> +   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
> 0x8000);
> +   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
> +   if (!write || mask != 0x)
> +   value = RREG32_NO_KIQ(mmMM_DATA);
> +   if (write) {
> +   value &= ~mask;
> +   value |= (*(uint32_t *)buf << shift) & mask;
> +   WREG32_NO_KIQ(mmMM_DATA, value);
> +   }
> +   spin_unlock_irqrestore(>mmio_idx_lock, flags);
> +   if (!write) {
> +   value = (value & mask) >> shift;
> +   memcpy(buf, , bytes);
> +   }
> +   } else {
> +   bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
> +   bytes = min(pos - bytes, (uint64_t)len & ~0x3ull);
> +
> +   amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
> + bytes, write);
> }
>
> ret += bytes;
> --
> 2.17.1
>
> ___
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory

2020-02-05 Thread Kim, Jonathan
[AMD Public Use]

Tested on Vega20 via proc mem op reads.

Old MMIO ~2.7MB/s, Improved MMIO ~3.2MB/s, BAR ~44MB/s

Acked-by: Jonathan Kim 

-Original Message-
From: Christian König  
Sent: Wednesday, February 5, 2020 10:23 AM
To: amd-gfx@lists.freedesktop.org; Kuehling, Felix ; 
Kim, Jonathan 
Subject: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in 
amdgpu_ttm_access_memory

[CAUTION: External Email]

Make use of the better performance here as well.

This patch is only compile tested!

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++--
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 58d143b24ba0..538c3b52b712 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,

while (len && pos < adev->gmc.mc_vram_size) {
uint64_t aligned_pos = pos & ~(uint64_t)3;
-   uint32_t bytes = 4 - (pos & 3);
+   uint64_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0x << shift;

@@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
bytes = len;
}

-   spin_lock_irqsave(>mmio_idx_lock, flags);
-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   if (!write || mask != 0x)
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(>mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, , bytes);
+   if (mask != 0x) {
+   spin_lock_irqsave(>mmio_idx_lock, flags);
+   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
+   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
+   if (!write || mask != 0x)
+   value = RREG32_NO_KIQ(mmMM_DATA);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   WREG32_NO_KIQ(mmMM_DATA, value);
+   }
+   spin_unlock_irqrestore(>mmio_idx_lock, flags);
+   if (!write) {
+   value = (value & mask) >> shift;
+   memcpy(buf, , bytes);
+   }
+   } else {
+   bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
+   bytes = min(pos - bytes, (uint64_t)len & ~0x3ull);
+
+   amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
+ bytes, write);
}

ret += bytes;
--
2.17.1
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory

2020-02-05 Thread Christian König
Make use of the better performance here as well.

This patch is only compile tested!

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++--
 1 file changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 58d143b24ba0..538c3b52b712 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
 
while (len && pos < adev->gmc.mc_vram_size) {
uint64_t aligned_pos = pos & ~(uint64_t)3;
-   uint32_t bytes = 4 - (pos & 3);
+   uint64_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
uint32_t mask = 0x << shift;
 
@@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
bytes = len;
}
 
-   spin_lock_irqsave(>mmio_idx_lock, flags);
-   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000);
-   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-   if (!write || mask != 0x)
-   value = RREG32_NO_KIQ(mmMM_DATA);
-   if (write) {
-   value &= ~mask;
-   value |= (*(uint32_t *)buf << shift) & mask;
-   WREG32_NO_KIQ(mmMM_DATA, value);
-   }
-   spin_unlock_irqrestore(>mmio_idx_lock, flags);
-   if (!write) {
-   value = (value & mask) >> shift;
-   memcpy(buf, , bytes);
+   if (mask != 0x) {
+   spin_lock_irqsave(>mmio_idx_lock, flags);
+   WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 
0x8000);
+   WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
+   if (!write || mask != 0x)
+   value = RREG32_NO_KIQ(mmMM_DATA);
+   if (write) {
+   value &= ~mask;
+   value |= (*(uint32_t *)buf << shift) & mask;
+   WREG32_NO_KIQ(mmMM_DATA, value);
+   }
+   spin_unlock_irqrestore(>mmio_idx_lock, flags);
+   if (!write) {
+   value = (value & mask) >> shift;
+   memcpy(buf, , bytes);
+   }
+   } else {
+   bytes = (nodes->start + nodes->size) << PAGE_SHIFT;
+   bytes = min(pos - bytes, (uint64_t)len & ~0x3ull);
+
+   amdgpu_device_vram_access(adev, pos, (uint32_t *)buf,
+ bytes, write);
}
 
ret += bytes;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx