Re: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory v2
On 2020-02-06 9:30, Christian König wrote: Make use of the better performance here as well. This patch is only compile tested! v2: fix calculation bug pointed out by Felix Signed-off-by: Christian König Acked-by: Jonathan Kim The series is Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++-- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58d143b24ba0..2c1d1eb1a7e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; - uint32_t bytes = 4 - (pos & 3); + uint64_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; uint32_t mask = 0x << shift; @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, bytes = len; } - spin_lock_irqsave(>mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - if (!write || mask != 0x) - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(>mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, , bytes); + if (mask != 0x) { + spin_lock_irqsave(>mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0x) + value = RREG32_NO_KIQ(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32_NO_KIQ(mmMM_DATA, value); + } + spin_unlock_irqrestore(>mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, , bytes); + } + } else { + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; + bytes = min(bytes - pos, (uint64_t)len & ~0x3ull); + + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, + bytes, write); } ret += bytes; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory v2
Make use of the better performance here as well. This patch is only compile tested! v2: fix calculation bug pointed out by Felix Signed-off-by: Christian König Acked-by: Jonathan Kim --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++-- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58d143b24ba0..2c1d1eb1a7e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; - uint32_t bytes = 4 - (pos & 3); + uint64_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; uint32_t mask = 0x << shift; @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, bytes = len; } - spin_lock_irqsave(>mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - if (!write || mask != 0x) - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(>mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, , bytes); + if (mask != 0x) { + spin_lock_irqsave(>mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0x) + value = RREG32_NO_KIQ(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32_NO_KIQ(mmMM_DATA, value); + } + spin_unlock_irqrestore(>mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, , bytes); + } + } else { + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; + bytes = min(bytes - pos, (uint64_t)len & ~0x3ull); + + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, + bytes, write); } ret += bytes; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory
On 2020-02-05 10:22 a.m., Christian König wrote: Make use of the better performance here as well. This patch is only compile tested! Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++-- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58d143b24ba0..538c3b52b712 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; - uint32_t bytes = 4 - (pos & 3); + uint64_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; uint32_t mask = 0x << shift; @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, bytes = len; } - spin_lock_irqsave(>mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - if (!write || mask != 0x) - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(>mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, , bytes); + if (mask != 0x) { + spin_lock_irqsave(>mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0x) + value = RREG32_NO_KIQ(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32_NO_KIQ(mmMM_DATA, value); + } + spin_unlock_irqrestore(>mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, , bytes); + } + } else { + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; + bytes = min(pos - bytes, (uint64_t)len & ~0x3ull); I think this is incorrect. The following should be true: pos < ((nodes->start + nodes->size) << PAGE_SHIFT). Consequently pos - bytes is always negative here. But because you're doing unsigned math it will underflow to a big positive number, which is never the minimum. Therefore the min will always be len & ~0x3ull. I believe this should be min(bytes - pos, (uint64_t)len & ~0x3ull). Jon, to catch this bug, you'd need a test that first fragments VRAM (allocates lots of 2MB buffers and frees every other buffer), then allocates a large non-contiguous buffer. Then you need one 4KB or smaller access that crosses a boundary between 2MB VRAM buffer chunks. Christian, your optimized VRAM allocator that tries to get large contiguous chunks is nice for performance, but it probably has a tendency to hide this kind of bug. I wonder if we should have a debug mode that forces non-contiguous buffers to be actually non-contiguous. Regards, Felix + + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, + bytes, write); } ret += bytes; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory
Series is: Reviewed-by: Alex Deucher On Wed, Feb 5, 2020 at 10:22 AM Christian König wrote: > > Make use of the better performance here as well. > > This patch is only compile tested! > > Signed-off-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++-- > 1 file changed, 23 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > index 58d143b24ba0..538c3b52b712 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c > @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct > ttm_buffer_object *bo, > > while (len && pos < adev->gmc.mc_vram_size) { > uint64_t aligned_pos = pos & ~(uint64_t)3; > - uint32_t bytes = 4 - (pos & 3); > + uint64_t bytes = 4 - (pos & 3); > uint32_t shift = (pos & 3) * 8; > uint32_t mask = 0x << shift; > > @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct > ttm_buffer_object *bo, > bytes = len; > } > > - spin_lock_irqsave(>mmio_idx_lock, flags); > - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | > 0x8000); > - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); > - if (!write || mask != 0x) > - value = RREG32_NO_KIQ(mmMM_DATA); > - if (write) { > - value &= ~mask; > - value |= (*(uint32_t *)buf << shift) & mask; > - WREG32_NO_KIQ(mmMM_DATA, value); > - } > - spin_unlock_irqrestore(>mmio_idx_lock, flags); > - if (!write) { > - value = (value & mask) >> shift; > - memcpy(buf, , bytes); > + if (mask != 0x) { > + spin_lock_irqsave(>mmio_idx_lock, flags); > + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | > 0x8000); > + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); > + if (!write || mask != 0x) > + value = RREG32_NO_KIQ(mmMM_DATA); > + if (write) { > + value &= ~mask; > + value |= (*(uint32_t *)buf << shift) & mask; > + WREG32_NO_KIQ(mmMM_DATA, value); > + } > + spin_unlock_irqrestore(>mmio_idx_lock, flags); > + if (!write) { > + value = (value & mask) >> shift; > + memcpy(buf, , bytes); > + } > + } else { > + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; > + bytes = min(pos - bytes, (uint64_t)len & ~0x3ull); > + > + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, > + bytes, write); > } > > ret += bytes; > -- > 2.17.1 > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory
[AMD Public Use] Tested on Vega20 via proc mem op reads. Old MMIO ~2.7MB/s, Improved MMIO ~3.2MB/s, BAR ~44MB/s Acked-by: Jonathan Kim -Original Message- From: Christian König Sent: Wednesday, February 5, 2020 10:23 AM To: amd-gfx@lists.freedesktop.org; Kuehling, Felix ; Kim, Jonathan Subject: [PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory [CAUTION: External Email] Make use of the better performance here as well. This patch is only compile tested! Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++-- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58d143b24ba0..538c3b52b712 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; - uint32_t bytes = 4 - (pos & 3); + uint64_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; uint32_t mask = 0x << shift; @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, bytes = len; } - spin_lock_irqsave(>mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - if (!write || mask != 0x) - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(>mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, , bytes); + if (mask != 0x) { + spin_lock_irqsave(>mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0x) + value = RREG32_NO_KIQ(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32_NO_KIQ(mmMM_DATA, value); + } + spin_unlock_irqrestore(>mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, , bytes); + } + } else { + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; + bytes = min(pos - bytes, (uint64_t)len & ~0x3ull); + + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, + bytes, write); } ret += bytes; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/4] drm/amdgpu: use amdgpu_device_vram_access in amdgpu_ttm_access_memory
Make use of the better performance here as well. This patch is only compile tested! Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 38 +++-- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58d143b24ba0..538c3b52b712 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1565,7 +1565,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, while (len && pos < adev->gmc.mc_vram_size) { uint64_t aligned_pos = pos & ~(uint64_t)3; - uint32_t bytes = 4 - (pos & 3); + uint64_t bytes = 4 - (pos & 3); uint32_t shift = (pos & 3) * 8; uint32_t mask = 0x << shift; @@ -1574,20 +1574,28 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, bytes = len; } - spin_lock_irqsave(>mmio_idx_lock, flags); - WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); - WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); - if (!write || mask != 0x) - value = RREG32_NO_KIQ(mmMM_DATA); - if (write) { - value &= ~mask; - value |= (*(uint32_t *)buf << shift) & mask; - WREG32_NO_KIQ(mmMM_DATA, value); - } - spin_unlock_irqrestore(>mmio_idx_lock, flags); - if (!write) { - value = (value & mask) >> shift; - memcpy(buf, , bytes); + if (mask != 0x) { + spin_lock_irqsave(>mmio_idx_lock, flags); + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x8000); + WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31); + if (!write || mask != 0x) + value = RREG32_NO_KIQ(mmMM_DATA); + if (write) { + value &= ~mask; + value |= (*(uint32_t *)buf << shift) & mask; + WREG32_NO_KIQ(mmMM_DATA, value); + } + spin_unlock_irqrestore(>mmio_idx_lock, flags); + if (!write) { + value = (value & mask) >> shift; + memcpy(buf, , bytes); + } + } else { + bytes = (nodes->start + nodes->size) << PAGE_SHIFT; + bytes = min(pos - bytes, (uint64_t)len & ~0x3ull); + + amdgpu_device_vram_access(adev, pos, (uint32_t *)buf, + bytes, write); } ret += bytes; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx