Re: [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining
On 04/20/2018 03:30 PM, Grazvydas Ignotas wrote: On Fri, Apr 20, 2018 at 3:21 PM, Samuel Pitoisetwrote: The SI family doesn't support chaining which means the maximum size in dwords per CS is limited. When that limit was reached we failed to submit the CS and the application crashed. This patch allows to submit up to 4 IBs which is currently the limit, but recent amdgpu supports more than that. Please note that we can reach the limit of 4 IBs per submit but currently we can't improve that. The only solution is to upgrade libdrm. That will be improved later but for now this should fix crashes on SI or when using RADV_DEBUG=noibs. Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775 Signed-off-by: Samuel Pitoiset --- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++ 1 file changed, 168 insertions(+), 50 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index c4b2232ce9e..17e6d8ba2b6 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -68,6 +68,10 @@ struct radv_amdgpu_cs { struct radeon_winsys_bo **virtual_buffers; uint8_t *virtual_buffer_priorities; int *virtual_buffer_hash_table; + + /* For chips that don't support chaining. */ + struct radeon_winsys_cs *old_cs_buffers; + unsignednum_old_cs_buffers; }; static inline struct radv_amdgpu_cs * @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { + struct radeon_winsys_cs *rcs = >old_cs_buffers[i]; + free(rcs->buf); + } + + free(cs->old_cs_buffers); free(cs->old_ib_buffers); free(cs->virtual_buffers); free(cs->virtual_buffer_priorities); @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size) /* The total ib size cannot exceed limit_dws dwords. */ if (ib_dws > limit_dws) { - cs->failed = true; + /* The maximum size in dwords has been reached, +* try to allocate a new one. +*/ + if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) { + /* TODO: Allow to submit more than 4 IBs. */ + fprintf(stderr, "amdgpu: Maximum number of IBs " + "per submit reached.\n"); + cs->failed = true; + cs->base.cdw = 0; + return; + } + + cs->old_cs_buffers = + realloc(cs->old_cs_buffers, + (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers)); + if (!cs->old_cs_buffers) { leaking previous cs->old_cs_buffers memory here. I don't think that matters much because if we fail here, the whole application will crash. I have pushed the patch with the two cosmetic changes. Thanks! + cs->failed = true; + cs->base.cdw = 0; + return; + } + + /* Store the current one for submitting it later. */ + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw; + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw; + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf; + cs->num_old_cs_buffers++; + + /* Reset the cs, it will be re-allocated below. */ cs->base.cdw = 0; - return; + cs->base.buf = NULL; + + /* Re-compute the number of dwords to allocate. */ + ib_dws = MAX2(cs->base.cdw + min_size, + MIN2(cs->base.max_dw * 2, limit_dws)); + if (ib_dws > limit_dws) { + fprintf(stderr, "amdgpu: Too high number of " + "dwords to allocate\n"); + cs->failed = true; + return; + } } uint32_t *new_buf = realloc(cs->base.buf,
Re: [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining
On Fri, Apr 20, 2018 at 3:21 PM, Samuel Pitoisetwrote: > The SI family doesn't support chaining which means the maximum > size in dwords per CS is limited. When that limit was reached > we failed to submit the CS and the application crashed. > > This patch allows to submit up to 4 IBs which is currently the > limit, but recent amdgpu supports more than that. > > Please note that we can reach the limit of 4 IBs per submit > but currently we can't improve that. The only solution is to > upgrade libdrm. That will be improved later but for now this > should fix crashes on SI or when using RADV_DEBUG=noibs. > > Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.") > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775 > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++ > 1 file changed, 168 insertions(+), 50 deletions(-) > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > index c4b2232ce9e..17e6d8ba2b6 100644 > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > @@ -68,6 +68,10 @@ struct radv_amdgpu_cs { > struct radeon_winsys_bo **virtual_buffers; > uint8_t *virtual_buffer_priorities; > int *virtual_buffer_hash_table; > + > + /* For chips that don't support chaining. */ > + struct radeon_winsys_cs *old_cs_buffers; > + unsignednum_old_cs_buffers; > }; > > static inline struct radv_amdgpu_cs * > @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct > radeon_winsys_cs *rcs) > for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) > cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); > > + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { > + struct radeon_winsys_cs *rcs = >old_cs_buffers[i]; > + free(rcs->buf); > + } > + > + free(cs->old_cs_buffers); > free(cs->old_ib_buffers); > free(cs->virtual_buffers); > free(cs->virtual_buffer_priorities); > @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs > *_cs, size_t min_size) > /* The total ib size cannot exceed limit_dws dwords. */ > if (ib_dws > limit_dws) > { > - cs->failed = true; > + /* The maximum size in dwords has been reached, > +* try to allocate a new one. > +*/ > + if (cs->num_old_cs_buffers + 1 >= > AMDGPU_CS_MAX_IBS_PER_SUBMIT) { > + /* TODO: Allow to submit more than 4 IBs. */ > + fprintf(stderr, "amdgpu: Maximum number of > IBs " > + "per submit reached.\n"); > + cs->failed = true; > + cs->base.cdw = 0; > + return; > + } > + > + cs->old_cs_buffers = > + realloc(cs->old_cs_buffers, > + (cs->num_old_cs_buffers + 1) * > sizeof(*cs->old_cs_buffers)); > + if (!cs->old_cs_buffers) { leaking previous cs->old_cs_buffers memory here. > + cs->failed = true; > + cs->base.cdw = 0; > + return; > + } > + > + /* Store the current one for submitting it later. */ > + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = > cs->base.cdw; > + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = > cs->base.max_dw; > + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = > cs->base.buf; > + cs->num_old_cs_buffers++; > + > + /* Reset the cs, it will be re-allocated below. */ > cs->base.cdw = 0; > - return; > + cs->base.buf = NULL; > + > + /* Re-compute the number of dwords to allocate. */ > + ib_dws = MAX2(cs->base.cdw + min_size, > + MIN2(cs->base.max_dw * 2, limit_dws)); > + if (ib_dws > limit_dws) { > + fprintf(stderr, "amdgpu: Too high number of " > + "dwords to allocate\n"); > + cs->failed = true; > + return; > + } > } > > uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4); > @@ -400,6 +447,15
Re: [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining
Reviewed-by: Bas NieuwenhuizenOn Fri, Apr 20, 2018 at 2:21 PM, Samuel Pitoiset wrote: > The SI family doesn't support chaining which means the maximum > size in dwords per CS is limited. When that limit was reached > we failed to submit the CS and the application crashed. > > This patch allows to submit up to 4 IBs which is currently the > limit, but recent amdgpu supports more than that. > > Please note that we can reach the limit of 4 IBs per submit > but currently we can't improve that. The only solution is to > upgrade libdrm. That will be improved later but for now this > should fix crashes on SI or when using RADV_DEBUG=noibs. > > Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.") > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775 > Signed-off-by: Samuel Pitoiset > --- > src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++ > 1 file changed, 168 insertions(+), 50 deletions(-) > > diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > index c4b2232ce9e..17e6d8ba2b6 100644 > --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c > @@ -68,6 +68,10 @@ struct radv_amdgpu_cs { > struct radeon_winsys_bo **virtual_buffers; > uint8_t *virtual_buffer_priorities; > int *virtual_buffer_hash_table; > + > + /* For chips that don't support chaining. */ > + struct radeon_winsys_cs *old_cs_buffers; > + unsignednum_old_cs_buffers; > }; > > static inline struct radv_amdgpu_cs * > @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct > radeon_winsys_cs *rcs) > for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) > cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); > > + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { > + struct radeon_winsys_cs *rcs = >old_cs_buffers[i]; > + free(rcs->buf); > + } > + > + free(cs->old_cs_buffers); > free(cs->old_ib_buffers); > free(cs->virtual_buffers); > free(cs->virtual_buffer_priorities); > @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs > *_cs, size_t min_size) > /* The total ib size cannot exceed limit_dws dwords. */ > if (ib_dws > limit_dws) > { > - cs->failed = true; > + /* The maximum size in dwords has been reached, > +* try to allocate a new one. > +*/ > + if (cs->num_old_cs_buffers + 1 >= > AMDGPU_CS_MAX_IBS_PER_SUBMIT) { > + /* TODO: Allow to submit more than 4 IBs. */ > + fprintf(stderr, "amdgpu: Maximum number of > IBs " > + "per submit reached.\n"); > + cs->failed = true; > + cs->base.cdw = 0; > + return; > + } > + > + cs->old_cs_buffers = > + realloc(cs->old_cs_buffers, > + (cs->num_old_cs_buffers + 1) * > sizeof(*cs->old_cs_buffers)); > + if (!cs->old_cs_buffers) { > + cs->failed = true; > + cs->base.cdw = 0; > + return; > + } > + > + /* Store the current one for submitting it later. */ > + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = > cs->base.cdw; > + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = > cs->base.max_dw; > + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = > cs->base.buf; > + cs->num_old_cs_buffers++; > + > + /* Reset the cs, it will be re-allocated below. */ > cs->base.cdw = 0; > - return; > + cs->base.buf = NULL; > + > + /* Re-compute the number of dwords to allocate. */ > + ib_dws = MAX2(cs->base.cdw + min_size, > + MIN2(cs->base.max_dw * 2, limit_dws)); > + if (ib_dws > limit_dws) { > + fprintf(stderr, "amdgpu: Too high number of " > + "dwords to allocate\n"); > + cs->failed = true; > + return; > + } > } > > uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4); > @@ -400,6
[Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining
The SI family doesn't support chaining which means the maximum size in dwords per CS is limited. When that limit was reached we failed to submit the CS and the application crashed. This patch allows to submit up to 4 IBs which is currently the limit, but recent amdgpu supports more than that. Please note that we can reach the limit of 4 IBs per submit but currently we can't improve that. The only solution is to upgrade libdrm. That will be improved later but for now this should fix crashes on SI or when using RADV_DEBUG=noibs. Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775 Signed-off-by: Samuel Pitoiset--- src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++ 1 file changed, 168 insertions(+), 50 deletions(-) diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c index c4b2232ce9e..17e6d8ba2b6 100644 --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c @@ -68,6 +68,10 @@ struct radv_amdgpu_cs { struct radeon_winsys_bo **virtual_buffers; uint8_t *virtual_buffer_priorities; int *virtual_buffer_hash_table; + + /* For chips that don't support chaining. */ + struct radeon_winsys_cs *old_cs_buffers; + unsignednum_old_cs_buffers; }; static inline struct radv_amdgpu_cs * @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i) cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]); + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { + struct radeon_winsys_cs *rcs = >old_cs_buffers[i]; + free(rcs->buf); + } + + free(cs->old_cs_buffers); free(cs->old_ib_buffers); free(cs->virtual_buffers); free(cs->virtual_buffer_priorities); @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size) /* The total ib size cannot exceed limit_dws dwords. */ if (ib_dws > limit_dws) { - cs->failed = true; + /* The maximum size in dwords has been reached, +* try to allocate a new one. +*/ + if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) { + /* TODO: Allow to submit more than 4 IBs. */ + fprintf(stderr, "amdgpu: Maximum number of IBs " + "per submit reached.\n"); + cs->failed = true; + cs->base.cdw = 0; + return; + } + + cs->old_cs_buffers = + realloc(cs->old_cs_buffers, + (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers)); + if (!cs->old_cs_buffers) { + cs->failed = true; + cs->base.cdw = 0; + return; + } + + /* Store the current one for submitting it later. */ + cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw; + cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw; + cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf; + cs->num_old_cs_buffers++; + + /* Reset the cs, it will be re-allocated below. */ cs->base.cdw = 0; - return; + cs->base.buf = NULL; + + /* Re-compute the number of dwords to allocate. */ + ib_dws = MAX2(cs->base.cdw + min_size, + MIN2(cs->base.max_dw * 2, limit_dws)); + if (ib_dws > limit_dws) { + fprintf(stderr, "amdgpu: Too high number of " + "dwords to allocate\n"); + cs->failed = true; + return; + } } uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4); @@ -400,6 +447,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs) cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va; cs->ib_size_ptr = >ib.size; cs->ib.size = 0; + } else { + for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) { +