[PATCH 2/3] drm/radeon: implement bo copy callback using CP DMA (v2)
I have just discovered that WAIT_UNTIL=WAIT_3D_IDLE must be set before using CP DMA. Marek On Thu, Jul 11, 2013 at 9:35 PM, wrote: > From: Alex Deucher > > Lighter weight than using the 3D engine. > > v2: fix ring count > > Signed-off-by: Alex Deucher > --- > drivers/gpu/drm/radeon/r600.c| 81 > ++ > drivers/gpu/drm/radeon/r600d.h |1 + > drivers/gpu/drm/radeon/radeon_asic.h |3 + > 3 files changed, 85 insertions(+), 0 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c > index 2d3655f..f7d494f 100644 > --- a/drivers/gpu/drm/radeon/r600.c > +++ b/drivers/gpu/drm/radeon/r600.c > @@ -3145,6 +3145,87 @@ int r600_copy_blit(struct radeon_device *rdev, > } > > /** > + * r600_copy_cpdma - copy pages using the CP DMA engine > + * > + * @rdev: radeon_device pointer > + * @src_offset: src GPU address > + * @dst_offset: dst GPU address > + * @num_gpu_pages: number of GPU pages to xfer > + * @fence: radeon fence object > + * > + * Copy GPU paging using the CP DMA engine (r6xx+). > + * Used by the radeon ttm implementation to move pages if > + * registered as the asic copy callback. > + */ > +int r600_copy_cpdma(struct radeon_device *rdev, > + uint64_t src_offset, uint64_t dst_offset, > + unsigned num_gpu_pages, > + struct radeon_fence **fence) > +{ > + struct radeon_semaphore *sem = NULL; > + int ring_index = rdev->asic->copy.blit_ring_index; > + struct radeon_ring *ring = >ring[ring_index]; > + u32 size_in_bytes, cur_size_in_bytes, tmp; > + int i, num_loops; > + int r = 0; > + > + r = radeon_semaphore_create(rdev, ); > + if (r) { > + DRM_ERROR("radeon: moving bo (%d).\n", r); > + return r; > + } > + > + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); > + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1f); > + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); > + if (r) { > + DRM_ERROR("radeon: moving bo (%d).\n", r); > + radeon_semaphore_free(rdev, , NULL); > + return r; > + } > + > + if (radeon_fence_need_sync(*fence, ring->idx)) { > + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, > + ring->idx); > + radeon_fence_note_sync(*fence, ring->idx); > + } else { > + radeon_semaphore_free(rdev, , NULL); > + } > + > + for (i = 0; i < num_loops; i++) { > + cur_size_in_bytes = size_in_bytes; > + if (cur_size_in_bytes > 0x1f) > + cur_size_in_bytes = 0x1f; > + size_in_bytes -= cur_size_in_bytes; > + tmp = upper_32_bits(src_offset) & 0xff; > + if (size_in_bytes == 0) > + tmp |= PACKET3_CP_DMA_CP_SYNC; > + radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4)); > + radeon_ring_write(ring, src_offset & 0x); > + radeon_ring_write(ring, tmp); > + radeon_ring_write(ring, dst_offset & 0x); > + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); > + radeon_ring_write(ring, cur_size_in_bytes); > + src_offset += cur_size_in_bytes; > + dst_offset += cur_size_in_bytes; > + } > + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); > + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) > >> 2); > + radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit); > + > + r = radeon_fence_emit(rdev, fence, ring->idx); > + if (r) { > + radeon_ring_unlock_undo(rdev, ring); > + return r; > + } > + > + radeon_ring_unlock_commit(rdev, ring); > + radeon_semaphore_free(rdev, , *fence); > + > + return r; > +} > + > +/** > * r600_copy_dma - copy pages using the DMA engine > * > * @rdev: radeon_device pointer > diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h > index f1b3084..8e3fe81 100644 > --- a/drivers/gpu/drm/radeon/r600d.h > +++ b/drivers/gpu/drm/radeon/r600d.h > @@ -602,6 +602,7 @@ > #defineL2_BUSY (1 << > 0) > > #defineWAIT_UNTIL 0x8040 > +#define WAIT_CP_DMA_IDLE_bit(1 << 8) > #define WAIT_2D_IDLE_bit(1 << 14) > #define WAIT_3D_IDLE_bit(1 << 15) > #define WAIT_2D_IDLECLEAN_bit (1 << 16) > diff --git a/drivers/gpu/drm/radeon/radeon_asic.h > b/drivers/gpu/drm/radeon/radeon_asic.h > index 45d0693..b04b578 100644 > --- a/drivers/gpu/drm/radeon/radeon_asic.h > +++
[PATCH 2/3] drm/radeon: implement bo copy callback using CP DMA (v2)
From: Alex DeucherLighter weight than using the 3D engine. v2: fix ring count Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600.c| 81 ++ drivers/gpu/drm/radeon/r600d.h |1 + drivers/gpu/drm/radeon/radeon_asic.h |3 + 3 files changed, 85 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 2d3655f..f7d494f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3145,6 +3145,87 @@ int r600_copy_blit(struct radeon_device *rdev, } /** + * r600_copy_cpdma - copy pages using the CP DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the CP DMA engine (r6xx+). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int r600_copy_cpdma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev->asic->copy.blit_ring_index; + struct radeon_ring *ring = >ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes, tmp; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, ); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + return r; + } + + size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1f); + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); + if (r) { + DRM_ERROR("radeon: moving bo (%d).\n", r); + radeon_semaphore_free(rdev, , NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring->idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring, + ring->idx); + radeon_fence_note_sync(*fence, ring->idx); + } else { + radeon_semaphore_free(rdev, , NULL); + } + + for (i = 0; i < num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes > 0x1f) + cur_size_in_bytes = 0x1f; + size_in_bytes -= cur_size_in_bytes; + tmp = upper_32_bits(src_offset) & 0xff; + if (size_in_bytes == 0) + tmp |= PACKET3_CP_DMA_CP_SYNC; + radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4)); + radeon_ring_write(ring, src_offset & 0x); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, dst_offset & 0x); + radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff); + radeon_ring_write(ring, cur_size_in_bytes); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); + radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit); + + r = radeon_fence_emit(rdev, fence, ring->idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, , *fence); + + return r; +} + +/** * r600_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index f1b3084..8e3fe81 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -602,6 +602,7 @@ #defineL2_BUSY (1 << 0) #defineWAIT_UNTIL 0x8040 +#define WAIT_CP_DMA_IDLE_bit(1 << 8) #define WAIT_2D_IDLE_bit(1 << 14) #define WAIT_3D_IDLE_bit(1 << 15) #define WAIT_2D_IDLECLEAN_bit (1 << 16) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 45d0693..b04b578 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -340,6 +340,9 @@ int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); +int r600_copy_cpdma(struct radeon_device *rdev, +
[PATCH 2/3] drm/radeon: implement bo copy callback using CP DMA (v2)
From: Alex Deucher alexander.deuc...@amd.com Lighter weight than using the 3D engine. v2: fix ring count Signed-off-by: Alex Deucher alexander.deuc...@amd.com --- drivers/gpu/drm/radeon/r600.c| 81 ++ drivers/gpu/drm/radeon/r600d.h |1 + drivers/gpu/drm/radeon/radeon_asic.h |3 + 3 files changed, 85 insertions(+), 0 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 2d3655f..f7d494f 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3145,6 +3145,87 @@ int r600_copy_blit(struct radeon_device *rdev, } /** + * r600_copy_cpdma - copy pages using the CP DMA engine + * + * @rdev: radeon_device pointer + * @src_offset: src GPU address + * @dst_offset: dst GPU address + * @num_gpu_pages: number of GPU pages to xfer + * @fence: radeon fence object + * + * Copy GPU paging using the CP DMA engine (r6xx+). + * Used by the radeon ttm implementation to move pages if + * registered as the asic copy callback. + */ +int r600_copy_cpdma(struct radeon_device *rdev, + uint64_t src_offset, uint64_t dst_offset, + unsigned num_gpu_pages, + struct radeon_fence **fence) +{ + struct radeon_semaphore *sem = NULL; + int ring_index = rdev-asic-copy.blit_ring_index; + struct radeon_ring *ring = rdev-ring[ring_index]; + u32 size_in_bytes, cur_size_in_bytes, tmp; + int i, num_loops; + int r = 0; + + r = radeon_semaphore_create(rdev, sem); + if (r) { + DRM_ERROR(radeon: moving bo (%d).\n, r); + return r; + } + + size_in_bytes = (num_gpu_pages RADEON_GPU_PAGE_SHIFT); + num_loops = DIV_ROUND_UP(size_in_bytes, 0x1f); + r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21); + if (r) { + DRM_ERROR(radeon: moving bo (%d).\n, r); + radeon_semaphore_free(rdev, sem, NULL); + return r; + } + + if (radeon_fence_need_sync(*fence, ring-idx)) { + radeon_semaphore_sync_rings(rdev, sem, (*fence)-ring, + ring-idx); + radeon_fence_note_sync(*fence, ring-idx); + } else { + radeon_semaphore_free(rdev, sem, NULL); + } + + for (i = 0; i num_loops; i++) { + cur_size_in_bytes = size_in_bytes; + if (cur_size_in_bytes 0x1f) + cur_size_in_bytes = 0x1f; + size_in_bytes -= cur_size_in_bytes; + tmp = upper_32_bits(src_offset) 0xff; + if (size_in_bytes == 0) + tmp |= PACKET3_CP_DMA_CP_SYNC; + radeon_ring_write(ring, PACKET3(PACKET3_CP_DMA, 4)); + radeon_ring_write(ring, src_offset 0x); + radeon_ring_write(ring, tmp); + radeon_ring_write(ring, dst_offset 0x); + radeon_ring_write(ring, upper_32_bits(dst_offset) 0xff); + radeon_ring_write(ring, cur_size_in_bytes); + src_offset += cur_size_in_bytes; + dst_offset += cur_size_in_bytes; + } + radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); + radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) 2); + radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit); + + r = radeon_fence_emit(rdev, fence, ring-idx); + if (r) { + radeon_ring_unlock_undo(rdev, ring); + return r; + } + + radeon_ring_unlock_commit(rdev, ring); + radeon_semaphore_free(rdev, sem, *fence); + + return r; +} + +/** * r600_copy_dma - copy pages using the DMA engine * * @rdev: radeon_device pointer diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index f1b3084..8e3fe81 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -602,6 +602,7 @@ #defineL2_BUSY (1 0) #defineWAIT_UNTIL 0x8040 +#define WAIT_CP_DMA_IDLE_bit(1 8) #define WAIT_2D_IDLE_bit(1 14) #define WAIT_3D_IDLE_bit(1 15) #define WAIT_2D_IDLECLEAN_bit (1 16) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 45d0693..b04b578 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -340,6 +340,9 @@ int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, struct radeon_fence **fence); +int r600_copy_cpdma(struct radeon_device