Module: Mesa Branch: main Commit: b6f435888b718506e8b806eae2a17d6ecf027dfc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b6f435888b718506e8b806eae2a17d6ecf027dfc
Author: Marek Olšák <[email protected]> Date: Sun Aug 6 23:17:48 2023 -0400 ac/gpu_info: replace ib_alignment with per-IP IB base and size alignments Reviewed-by: Timur Kristóf <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25043> --- src/amd/common/ac_gpu_info.c | 18 +++++++----------- src/amd/common/ac_gpu_info.h | 3 ++- src/amd/vulkan/radv_device_generated_commands.c | 9 ++++++--- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 9 +++++---- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 10 ++++++---- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 +++++- 6 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c index 4706edf7ece..f3d7ae08760 100644 --- a/src/amd/common/ac_gpu_info.c +++ b/src/amd/common/ac_gpu_info.c @@ -677,8 +677,8 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, info->ip[AMD_IP_GFX].ver_minor = info->ip[AMD_IP_COMPUTE].ver_minor = 3; } info->ip[ip_type].num_queues = util_bitcount(ip_info.available_rings); - info->ib_alignment = MAX3(info->ib_alignment, ip_info.ib_start_alignment, - ip_info.ib_size_alignment); + info->ip[ip_type].ib_base_alignment = ip_info.ib_start_alignment; + info->ip[ip_type].ib_size_alignment = ip_info.ib_size_alignment; } /* Only require gfx or compute. */ @@ -690,12 +690,6 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info, assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_COMPUTE].num_queues)); assert(util_is_power_of_two_or_zero(info->ip[AMD_IP_SDMA].num_queues)); - /* The kernel pads gfx and compute IBs to 256 dwords since: - * 66f3b2d527154bd258a57c8815004b5964aa1cf5 - * Do the same. - */ - info->ib_alignment = MAX2(info->ib_alignment, 1024); - r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0, &info->me_fw_version, &info->me_fw_feature); if (r) { @@ -1681,8 +1675,11 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f) for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) { if (info->ip[i].num_queues) { - fprintf(f, " IP %-7s %2u.%u \tqueues:%u\n", ip_string[i], - info->ip[i].ver_major, info->ip[i].ver_minor, info->ip[i].num_queues); + fprintf(f, " IP %-7s %2u.%u queues:%u " + "align(base:%u, size:%u)\n", + ip_string[i], info->ip[i].ver_major, info->ip[i].ver_minor, + info->ip[i].num_queues, info->ip[i].ib_base_alignment, + info->ip[i].ib_size_alignment); } } @@ -1756,7 +1753,6 @@ void ac_print_gpu_info(const struct radeon_info *info, FILE *f) fprintf(f, "CP info:\n"); fprintf(f, " gfx_ib_pad_with_type2 = %i\n", info->gfx_ib_pad_with_type2); - fprintf(f, " ib_alignment = %u\n", info->ib_alignment); fprintf(f, " me_fw_version = %i\n", info->me_fw_version); fprintf(f, " me_fw_feature = %i\n", info->me_fw_feature); fprintf(f, " mec_fw_version = %i\n", info->mec_fw_version); diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h index 1e9e8717c9f..30f4bdd823c 100644 --- a/src/amd/common/ac_gpu_info.h +++ b/src/amd/common/ac_gpu_info.h @@ -26,6 +26,8 @@ struct amd_ip_info { uint8_t ver_minor; uint8_t ver_rev; uint8_t num_queues; + uint16_t ib_base_alignment; + uint16_t ib_size_alignment; }; struct radeon_info { @@ -160,7 +162,6 @@ struct radeon_info { /* CP info. */ bool gfx_ib_pad_with_type2; - unsigned ib_alignment; /* both start and size alignment */ uint32_t me_fw_version; uint32_t me_fw_feature; uint32_t mec_fw_version; diff --git a/src/amd/vulkan/radv_device_generated_commands.c b/src/amd/vulkan/radv_device_generated_commands.c index 18c0c6ab574..e085a63a59a 100644 --- a/src/amd/vulkan/radv_device_generated_commands.c +++ b/src/amd/vulkan/radv_device_generated_commands.c @@ -1371,11 +1371,14 @@ radv_GetGeneratedCommandsMemoryRequirementsNV(VkDevice _device, VkDeviceSize cmd_buf_size = radv_align_cmdbuf_size(device, cmd_stride * pInfo->maxSequencesCount) + radv_dgc_preamble_cmdbuf_size(device); VkDeviceSize upload_buf_size = upload_stride * pInfo->maxSequencesCount; + unsigned ib_base_alignment = MAX2(device->physical_device->rad_info.ip[AMD_IP_GFX].ib_base_alignment, + device->physical_device->rad_info.ip[AMD_IP_COMPUTE].ib_base_alignment); + unsigned ib_size_alignment = MAX2(device->physical_device->rad_info.ip[AMD_IP_GFX].ib_size_alignment, + device->physical_device->rad_info.ip[AMD_IP_COMPUTE].ib_size_alignment); pMemoryRequirements->memoryRequirements.memoryTypeBits = device->physical_device->memory_types_32bit; - pMemoryRequirements->memoryRequirements.alignment = device->physical_device->rad_info.ib_alignment; - pMemoryRequirements->memoryRequirements.size = - align(cmd_buf_size + upload_buf_size, pMemoryRequirements->memoryRequirements.alignment); + pMemoryRequirements->memoryRequirements.alignment = ib_base_alignment; + pMemoryRequirements->memoryRequirements.size = align(cmd_buf_size + upload_buf_size, ib_size_alignment); } VKAPI_ATTR void VKAPI_CALL diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index a98b538a39d..0420c85f4a3 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -248,8 +248,9 @@ radv_amdgpu_cs_bo_create(struct radv_amdgpu_cs *cs, uint32_t ib_size) const enum radeon_bo_flag flags = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | gtt_wc_flag; - return ws->buffer_create(ws, ib_size, cs->ws->info.ib_alignment, domain, flags, RADV_BO_PRIORITY_CS, 0, - &cs->ib_buffer); + ib_size = align(ib_size, cs->ws->info.ip[cs->ib.ip_type].ib_size_alignment); + return ws->buffer_create(ws, ib_size, cs->ws->info.ip[cs->ib.ip_type].ib_base_alignment, domain, flags, + RADV_BO_PRIORITY_CS, 0, &cs->ib_buffer); } static VkResult @@ -1670,8 +1671,8 @@ radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i]; ib = &request->ibs[i]; - assert(ib->ib_mc_address && ib->ib_mc_address % ctx->ws->info.ib_alignment == 0); - assert(ib->size); + assert(ib->ib_mc_address && ib->ib_mc_address % ctx->ws->info.ip[ib->ip_type].ib_base_alignment == 0); + assert(ib->size && (ib->size * 4) % ctx->ws->info.ip[ib->ip_type].ib_size_alignment == 0); chunk_data[i].ib_data._pad = 0; chunk_data[i].ib_data.va_start = ib->ib_mc_address; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index c70c691843c..fd48e6c8eba 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -953,9 +953,11 @@ static void amdgpu_set_ib_size(struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib) static void amdgpu_ib_finalize(struct amdgpu_winsys *ws, struct radeon_cmdbuf *rcs, struct amdgpu_ib *ib) { + struct amdgpu_cs *cs = (struct amdgpu_cs*)ib; + amdgpu_set_ib_size(rcs, ib); ib->used_ib_space += rcs->current.cdw * 4; - ib->used_ib_space = align(ib->used_ib_space, ws->info.ib_alignment); + ib->used_ib_space = align(ib->used_ib_space, ws->info.ip[cs->ip_type].ib_base_alignment); ib->max_ib_size = MAX2(ib->max_ib_size, rcs->prev_dw + rcs->current.cdw); } @@ -1145,12 +1147,12 @@ amdgpu_cs_setup_preemption(struct radeon_cmdbuf *rcs, const uint32_t *preamble_i struct amdgpu_cs *cs = amdgpu_cs(rcs); struct amdgpu_winsys *ws = cs->ws; struct amdgpu_cs_context *csc[2] = {&cs->csc1, &cs->csc2}; - unsigned size = align(preamble_num_dw * 4, ws->info.ib_alignment); + unsigned size = align(preamble_num_dw * 4, ws->info.ip[cs->ip_type].ib_size_alignment); struct pb_buffer *preamble_bo; uint32_t *map; /* Create the preamble IB buffer. */ - preamble_bo = amdgpu_bo_create(ws, size, ws->info.ib_alignment, + preamble_bo = amdgpu_bo_create(ws, size, ws->info.ip[cs->ip_type].ib_base_alignment, RADEON_DOMAIN_VRAM, RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_GTT_WC | @@ -1708,7 +1710,7 @@ static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) if (noop && acs->ip_type == AMD_IP_GFX) { /* Reduce the IB size and fill it with NOP to make it like an empty IB. */ - unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ib_alignment); + unsigned noop_size = MIN2(cs->ib[IB_MAIN].ib_bytes, ws->info.ip[AMD_IP_GFX].ib_size_alignment); cs->ib_main_addr[0] = PKT3(PKT3_NOP, noop_size / 4 - 2, 0); cs->ib[IB_MAIN].ib_bytes = noop_size; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 18eb60b6ac2..990b8e9d6a1 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -551,7 +551,11 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws) (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 3); ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ - ws->info.ib_alignment = 4096; + for (unsigned i = 0; i < AMD_NUM_IP_TYPES; i++) { + /* This is probably too large. */ + ws->info.ip[i].ib_base_alignment = 4096; + ws->info.ip[i].ib_size_alignment = 4096; + } ws->info.has_bo_metadata = false; ws->info.has_eqaa_surface_allocator = false; ws->info.has_sparse_vm_mappings = false;
