Module: Mesa Branch: 18.0 Commit: 3b9b66560a6f5e6a349e144be20c32cd2ed76aa4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b9b66560a6f5e6a349e144be20c32cd2ed76aa4
Author: Samuel Pitoiset <samuel.pitoi...@gmail.com> Date: Tue Apr 24 10:33:33 2018 +0200 radv/winsys: allow to submit up to 4 IBs for chips without chaining The SI family doesn't support chaining which means the maximum size in dwords per CS is limited. When that limit was reached we failed to submit the CS and the application crashed. This patch allows to submit up to 4 IBs which is currently the limit, but recent amdgpu supports more than that. Please note that we can reach the limit of 4 IBs per submit but currently we can't improve that. The only solution is to upgrade libdrm. That will be improved later but for now this should fix crashes on SI or when using RADV_DEBUG=noibs. Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775 Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> --- bin/.cherry-ignore | 3 + src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 219 ++++++++++++++++++++------ 2 files changed, 172 insertions(+), 50 deletions(-) diff --git a/bin/.cherry-ignore b/bin/.cherry-ignore index 0b9765ad3c..06b1d8e459 100644 --- a/bin/.cherry-ignore +++ b/bin/.cherry-ignore @@ -29,3 +29,6 @@ d15fb766aa3c98ffbe16d050b2af4804e4b12c57 radeonsi/gfx9: fix a hang with an empty 0e945fdf23bac5a62c15edfcbfd9d6ac4eee592f nir: Do not use progress for unreachable code in return lowering. 84fef802fb16cef68ec358cbfed1cac9c3bfa410 ac/nir: add missing round_slice for 1D arrays d136a5fad9c7e67c1362453388914ecc60420883 ac: fix the number of coordinates for ac_image_get_lod and arrays + +# stable: There is a specific port for this patch for stable branch. +fedd0a4215bcd387525000d76b77993ca38916ae radv/winsys: allow to submit up to 4 IBs for chips without chaining diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index 5632b1d4ee..4195865a85 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -66,6 +66,10 @@ struct radv_amdgpu_cs { struct radeon_winsys_bo **virtual_buffers; uint8_t *virtual_buffer_priorities; int *virtual_buffer_hash_table; + + /* For chips that don't support chaining. */ + struct radeon_winsys_cs *old_cs_buffers; + unsigned num_old_cs_buffers; }; static inline struct radv_amdgpu_cs * @@ -166,6 +170,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { + struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i]; + free(rcs->buf); + } + + free(cs->old_cs_buffers); free(cs->old_ib_buffers); free(cs->virtual_buffers); free(cs->virtual_buffer_priorities); @@ -251,9 +261,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size) /* The total ib size cannot exceed limit_dws dwords. */ if (ib_dws > limit_dws) { - cs->failed = true; + /* The maximum size in dwords has been reached, + * try to allocate a new one. + */ + if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) { + /* TODO: Allow to submit more than 4 IBs. */ + fprintf(stderr, "amdgpu: Maximum number of IBs " + "per submit reached.\n"); + cs->failed = true; + cs->base.cdw = 0; + return; + } + + cs->old_cs_buffers = + realloc(cs->old_cs_buffers, + (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers)); + if (!cs->old_cs_buffers) { + cs->failed = true; + cs->base.cdw = 0; + return; + } + + /* Store the current one for submitting it later. */ + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw; + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw; + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf; + cs->num_old_cs_buffers++; + + /* Reset the cs, it will be re-allocated below. */ cs->base.cdw = 0; - return; + cs->base.buf = NULL; + + /* Re-compute the number of dwords to allocate. */ + ib_dws = MAX2(cs->base.cdw + min_size, + MIN2(cs->base.max_dw * 2, limit_dws)); + if (ib_dws > limit_dws) { + fprintf(stderr, "amdgpu: Too high number of " + "dwords to allocate\n"); + cs->failed = true; + return; + } } uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4); @@ -365,6 +412,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs) cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va; cs->ib_size_ptr = &cs->ib.size; cs->ib.size = 0; + } else { + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { + struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i]; + free(rcs->buf); + } + + free(cs->old_cs_buffers); + cs->old_cs_buffers = NULL; + cs->num_old_cs_buffers = 0; } } @@ -515,7 +571,8 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent, static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_winsys_cs **cs_array, unsigned count, - struct radv_amdgpu_winsys_bo *extra_bo, + struct radv_amdgpu_winsys_bo **extra_bo_array, + unsigned num_extra_bo, struct radeon_winsys_cs *extra_cs, amdgpu_bo_list_handle *bo_list) { @@ -544,7 +601,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, bo_list); free(handles); pthread_mutex_unlock(&ws->global_bo_list_lock); - } else if (count == 1 && !extra_bo && !extra_cs && + } else if (count == 1 && !num_extra_bo && !extra_cs && !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0]; if (cs->num_buffers == 0) { @@ -554,8 +611,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles, cs->priorities, bo_list); } else { - unsigned total_buffer_count = !!extra_bo; - unsigned unique_bo_count = !!extra_bo; + unsigned total_buffer_count = num_extra_bo; + unsigned unique_bo_count = num_extra_bo; for (unsigned i = 0; i < count; ++i) { struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i]; total_buffer_count += cs->num_buffers; @@ -578,9 +635,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws, return -ENOMEM; } - if (extra_bo) { - handles[0] = extra_bo->bo; - priorities[0] = 8; + for (unsigned i = 0; i < num_extra_bo; i++) { + handles[i] = extra_bo_array[i]->bo; + priorities[i] = 8; } for (unsigned i = 0; i < count + !!extra_cs; ++i) { @@ -710,7 +767,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, } } - r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list); + r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs, + &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed for the " "chained submission(%d)\n", r); @@ -777,7 +835,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, memset(&request, 0, sizeof(request)); - r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, + r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0, preamble_cs, &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed " @@ -857,68 +915,127 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, assert(cs_count); for (unsigned i = 0; i < cs_count;) { - struct amdgpu_cs_ib_info ib = {0}; - struct radeon_winsys_bo *bo = NULL; + struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0}; + unsigned number_of_ibs = 1; + struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0}; struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs; + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]); uint32_t *ptr; unsigned cnt = 0; unsigned size = 0; unsigned pad_words = 0; - if (preamble_cs) - size += preamble_cs->cdw; - while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { - size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; - ++cnt; - } + if (cs->num_old_cs_buffers > 0) { + /* Special path when the maximum size in dwords has + * been reached because we need to handle more than one + * IB per submit. + */ + unsigned new_cs_count = cs->num_old_cs_buffers + 1; + struct radeon_winsys_cs *new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT]; + unsigned idx = 0; + + for (unsigned j = 0; j < cs->num_old_cs_buffers; j++) + new_cs_array[idx++] = &cs->old_cs_buffers[j]; + new_cs_array[idx++] = cs_array[i]; + + for (unsigned j = 0; j < new_cs_count; j++) { + struct radeon_winsys_cs *rcs = new_cs_array[j]; + bool needs_preamble = preamble_cs && j == 0; + unsigned size = 0; + + if (needs_preamble) + size += preamble_cs->cdw; + size += rcs->cdw; + + assert(size < 0xffff8); + + while (!size || (size & 7)) { + size++; + pad_words++; + } - while(!size || (size & 7)) { - size++; - pad_words++; - } - assert(cnt); + bos[j] = ws->buffer_create(ws, 4 * size, 4096, + RADEON_DOMAIN_GTT, + RADEON_FLAG_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_READ_ONLY); + ptr = ws->buffer_map(bos[j]); - bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT, - RADEON_FLAG_CPU_ACCESS | - RADEON_FLAG_NO_INTERPROCESS_SHARING | - RADEON_FLAG_READ_ONLY); - ptr = ws->buffer_map(bo); + if (needs_preamble) { + memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4); + ptr += preamble_cs->cdw; + } - if (preamble_cs) { - memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4); - ptr += preamble_cs->cdw; - } + memcpy(ptr, rcs->buf, 4 * rcs->cdw); + ptr += rcs->cdw; - for (unsigned j = 0; j < cnt; ++j) { - struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); - memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); - ptr += cs->base.cdw; + for (unsigned k = 0; k < pad_words; ++k) + *ptr++ = pad_word; - } + ibs[j].size = size; + ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]); + } - for (unsigned j = 0; j < pad_words; ++j) - *ptr++ = pad_word; + number_of_ibs = new_cs_count; + cnt++; + } else { + if (preamble_cs) + size += preamble_cs->cdw; - memset(&request, 0, sizeof(request)); + while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) { + size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw; + ++cnt; + } + + while (!size || (size & 7)) { + size++; + pad_words++; + } + assert(cnt); + bos[0] = ws->buffer_create(ws, 4 * size, 4096, + RADEON_DOMAIN_GTT, + RADEON_FLAG_CPU_ACCESS | + RADEON_FLAG_NO_INTERPROCESS_SHARING | + RADEON_FLAG_READ_ONLY); + ptr = ws->buffer_map(bos[0]); + + if (preamble_cs) { + memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4); + ptr += preamble_cs->cdw; + } + + for (unsigned j = 0; j < cnt; ++j) { + struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]); + memcpy(ptr, cs->base.buf, 4 * cs->base.cdw); + ptr += cs->base.cdw; + + } + + for (unsigned j = 0; j < pad_words; ++j) + *ptr++ = pad_word; + + ibs[0].size = size; + ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]); + } r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, - (struct radv_amdgpu_winsys_bo*)bo, - preamble_cs, &bo_list); + (struct radv_amdgpu_winsys_bo **)bos, + number_of_ibs, preamble_cs, + &bo_list); if (r) { fprintf(stderr, "amdgpu: buffer list creation failed " "for the sysmem submission (%d)\n", r); return r; } - ib.size = size; - ib.ib_mc_address = radv_buffer_get_va(bo); + memset(&request, 0, sizeof(request)); request.ip_type = cs0->hw_ip; request.ring = queue_idx; request.resources = bo_list; - request.number_of_ibs = 1; - request.ibs = &ib; + request.number_of_ibs = number_of_ibs; + request.ibs = ibs; request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx); sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false; @@ -934,9 +1051,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, if (bo_list) amdgpu_bo_list_destroy(bo_list); - ws->buffer_destroy(bo); - if (r) - return r; + for (unsigned j = 0; j < number_of_ibs; j++) { + ws->buffer_destroy(bos[j]); + if (r) + return r; + } i += cnt; } _______________________________________________ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit