When running e.g. many deqp-gles31 tests in parallel kernel warnings "radeon: writing more dwords to the ring than expected!" are issued. On evergree this happens because in evengreen_copy_dma not enough space is allocated in the ring memory, i.e. the amount of allocated memory only accounts for one possible wait semaphore and doesn't take the padding of the ring buffer into account. The code suggest that in r600_copy_dma and rv770_copy_dma the same problem exists.
Fix this by assuming the worst case, i.e. that RADEON_NUM_SYNCS wait semaphores need to be emitted and take also the padding into account. The patch fixes the issue in evengreen_copy_dma, r600_copy_dma, and rv770_copy_dma. Signed-off-by: Gert Wollny <[email protected]> --- drivers/gpu/drm/radeon/evergreen_dma.c | 13 +++++++++++-- drivers/gpu/drm/radeon/r600_dma.c | 13 +++++++++++-- drivers/gpu/drm/radeon/rv770_dma.c | 13 +++++++++++-- 3 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 52c79da1ecf5..1f07ee6f3f6a 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -113,7 +113,7 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; + u32 size_in_dw, cur_size_in_dw, ring_size_reserve; int i, num_loops; int r = 0; @@ -121,7 +121,16 @@ struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev, size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11); + + /* Worst case needed dwords: + * - 3 * RADEON_NUM_SYNCS for semaphore waits + * - 5 * num_loops for copy packages + * - 8 for fence + * Finally, the block align block size to account for padding. + */ + ring_size_reserve = __ALIGN_MASK(RADEON_NUM_SYNCS * 3 + num_loops * 5 + 8, + ring->align_mask); + r = radeon_ring_lock(rdev, ring, ring_size_reserve); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_sync_free(rdev, &sync, NULL); diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 89ca2738c5d4..d55b0d721fb0 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -449,7 +449,7 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; + u32 size_in_dw, cur_size_in_dw, ring_size_reserve; int i, num_loops; int r = 0; @@ -457,7 +457,16 @@ struct radeon_fence *r600_copy_dma(struct radeon_device *rdev, size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE); - r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8); + + /* Worst case needed dwords: + * - 3 * RADEON_NUM_SYNCS for semaphore waits + * - 4 * num_loops for copy packages + * - 5 for fence + * Finally, the block align block size to account for padding. + */ + ring_size_reserve = __ALIGN_MASK(RADEON_NUM_SYNCS * 3 + num_loops * 4 + 5, + ring->align_mask); + r = radeon_ring_lock(rdev, ring, ring_size_reserve); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_sync_free(rdev, &sync, NULL); diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index 4c91614b5e70..c28c11a91e07 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -48,7 +48,7 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, struct radeon_sync sync; int ring_index = rdev->asic->copy.dma_ring_index; struct radeon_ring *ring = &rdev->ring[ring_index]; - u32 size_in_dw, cur_size_in_dw; + u32 size_in_dw, cur_size_in_dw, ring_size_reserve; int i, num_loops; int r = 0; @@ -56,7 +56,16 @@ struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev, size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4; num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF); - r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8); + + /* Worst case needed dwords: + * - 3 * RADEON_NUM_SYNCS for semaphore waits + * - 5 * num_loops for copy packages + * - 5 for fence + * Finally, the block align block size to account for padding. + */ + ring_size_reserve = __ALIGN_MASK( + RADEON_NUM_SYNCS * 3 + num_loops * 5 + 5, ring->align_mask); + r = radeon_ring_lock(rdev, ring, ring_size_reserve); if (r) { DRM_ERROR("radeon: moving bo (%d).\n", r); radeon_sync_free(rdev, &sync, NULL); -- 2.49.1
