From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 18 ++++++++++++++++-- src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 7 +++++++ 2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index b3dedef3d73..dd5193c003d 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -675,21 +675,21 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib, * size, aligned to a power of two (and multiplied by 4 to reduce internal * fragmentation if chaining is not available). Limit to 512k dwords, which * is the largest power of two that fits into the size field of the * INDIRECT_BUFFER packet. */ if (amdgpu_cs_has_chaining(amdgpu_cs_from_ib(ib))) buffer_size = 4 *util_next_power_of_two(ib->max_ib_size); else buffer_size = 4 *util_next_power_of_two(4 * ib->max_ib_size); - const unsigned min_size = 8 * 1024 * 4; + const unsigned min_size = MAX2(ib->max_check_space_size, 8 * 1024 * 4); const unsigned max_size = 512 * 1024 * 4; buffer_size = MIN2(buffer_size, max_size); buffer_size = MAX2(buffer_size, min_size); /* min_size is more important */ pb = ws->base.buffer_create(&ws->base, buffer_size, ws->info.gart_page_size, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING | (ring_type == RING_GFX || @@ -742,20 +742,25 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs, switch (ib_type) { case IB_MAIN: ib = &cs->main; ib_size = 4 * 1024 * 4; break; default: unreachable("unhandled IB type"); } + /* Always allocate at least the size of the biggest cs_check_space call, + * because precisely the last call might have requested this size. + */ + ib_size = MAX2(ib_size, ib->max_check_space_size); + if (!amdgpu_cs_has_chaining(cs)) { ib_size = MAX2(ib_size, 4 * MIN2(util_next_power_of_two(ib->max_ib_size), amdgpu_ib_max_submit_dwords(ib_type))); } ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32; ib->base.prev_dw = 0; ib->base.num_prev = 0; @@ -776,20 +781,21 @@ static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs, ib->ptr_ib_size = &info->ib_bytes; ib->ptr_ib_size_inside_ib = false; amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); ib_size = ib->big_ib_buffer->size - ib->used_ib_space; ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs->ring_type); + assert(ib->base.current.max_dw >= ib->max_check_space_size / 4); return true; } static void amdgpu_set_ib_size(struct amdgpu_ib *ib) { if (ib->ptr_ib_size_inside_ib) { *ib->ptr_ib_size = ib->base.current.cdw | S_3F2_CHAIN(1) | S_3F2_VALID(1); } else { *ib->ptr_ib_size = ib->base.current.cdw; @@ -971,25 +977,32 @@ amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, static bool amdgpu_cs_validate(struct radeon_cmdbuf *rcs) { return true; } static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw) { struct amdgpu_ib *ib = amdgpu_ib(rcs); struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib); unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw; + unsigned cs_epilog_dw = amdgpu_cs_epilog_dws(cs->ring_type); + unsigned need_byte_size = (dw + cs_epilog_dw) * 4; uint64_t va; uint32_t *new_ptr_ib_size; assert(rcs->current.cdw <= rcs->current.max_dw); + /* 125% of the size for IB epilog. */ + unsigned safe_byte_size = need_byte_size + need_byte_size / 4; + ib->max_check_space_size = MAX2(ib->max_check_space_size, + safe_byte_size); + if (requested_size > amdgpu_ib_max_submit_dwords(ib->ib_type)) return false; ib->max_ib_size = MAX2(ib->max_ib_size, requested_size); if (rcs->current.max_dw - rcs->current.cdw >= dw) return true; if (!amdgpu_cs_has_chaining(cs)) return false; @@ -1038,21 +1051,22 @@ static bool amdgpu_cs_check_space(struct radeon_cmdbuf *rcs, unsigned dw) /* Hook up the new chunk */ rcs->prev[rcs->num_prev].buf = rcs->current.buf; rcs->prev[rcs->num_prev].cdw = rcs->current.cdw; rcs->prev[rcs->num_prev].max_dw = rcs->current.cdw; /* no modifications */ rcs->num_prev++; ib->base.prev_dw += ib->base.current.cdw; ib->base.current.cdw = 0; ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); - ib->base.current.max_dw = ib->big_ib_buffer->size / 4 - amdgpu_cs_epilog_dws(cs->ring_type); + ib->base.current.max_dw = ib->big_ib_buffer->size / 4 - cs_epilog_dw; + assert(ib->base.current.max_dw >= ib->max_check_space_size / 4); amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); return true; } static unsigned amdgpu_cs_get_buffer_list(struct radeon_cmdbuf *rcs, struct radeon_bo_list_item *list) { diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h index 17c0581b6f4..4f49a9065c6 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h @@ -60,20 +60,27 @@ enum ib_type { IB_NUM, }; struct amdgpu_ib { struct radeon_cmdbuf base; /* A buffer out of which new IBs are allocated. */ struct pb_buffer *big_ib_buffer; uint8_t *ib_mapped; unsigned used_ib_space; + + /* The maximum seen size from cs_check_space. If the driver does + * cs_check_space and flush, the newly allocated IB should have at least + * this size. + */ + unsigned max_check_space_size; + unsigned max_ib_size; uint32_t *ptr_ib_size; bool ptr_ib_size_inside_ib; enum ib_type ib_type; }; struct amdgpu_fence_list { struct pipe_fence_handle **list; unsigned num; unsigned max; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev