Re: [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining

2018-04-20 Thread Samuel Pitoiset



On 04/20/2018 03:30 PM, Grazvydas Ignotas wrote:

On Fri, Apr 20, 2018 at 3:21 PM, Samuel Pitoiset
 wrote:

The SI family doesn't support chaining which means the maximum
size in dwords per CS is limited. When that limit was reached
we failed to submit the CS and the application crashed.

This patch allows to submit up to 4 IBs which is currently the
limit, but recent amdgpu supports more than that.

Please note that we can reach the limit of 4 IBs per submit
but currently we can't improve that. The only solution is to
upgrade libdrm. That will be improved later but for now this
should fix crashes on SI or when using RADV_DEBUG=noibs.

Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775
Signed-off-by: Samuel Pitoiset 
---
  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++
  1 file changed, 168 insertions(+), 50 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index c4b2232ce9e..17e6d8ba2b6 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -68,6 +68,10 @@ struct radv_amdgpu_cs {
 struct radeon_winsys_bo **virtual_buffers;
 uint8_t *virtual_buffer_priorities;
 int *virtual_buffer_hash_table;
+
+   /* For chips that don't support chaining. */
+   struct radeon_winsys_cs *old_cs_buffers;
+   unsignednum_old_cs_buffers;
  };

  static inline struct radv_amdgpu_cs *
@@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs 
*rcs)
 for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
 cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);

+   for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+   struct radeon_winsys_cs *rcs = >old_cs_buffers[i];
+   free(rcs->buf);
+   }
+
+   free(cs->old_cs_buffers);
 free(cs->old_ib_buffers);
 free(cs->virtual_buffers);
 free(cs->virtual_buffer_priorities);
@@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs 
*_cs, size_t min_size)
 /* The total ib size cannot exceed limit_dws dwords. */
 if (ib_dws > limit_dws)
 {
-   cs->failed = true;
+   /* The maximum size in dwords has been reached,
+* try to allocate a new one.
+*/
+   if (cs->num_old_cs_buffers + 1 >= 
AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
+   /* TODO: Allow to submit more than 4 IBs. */
+   fprintf(stderr, "amdgpu: Maximum number of IBs "
+   "per submit reached.\n");
+   cs->failed = true;
+   cs->base.cdw = 0;
+   return;
+   }
+
+   cs->old_cs_buffers =
+   realloc(cs->old_cs_buffers,
+   (cs->num_old_cs_buffers + 1) * 
sizeof(*cs->old_cs_buffers));
+   if (!cs->old_cs_buffers) {


leaking previous cs->old_cs_buffers memory here.


I don't think that matters much because if we fail here, the whole 
application will crash.


I have pushed the patch with the two cosmetic changes.

Thanks!




+   cs->failed = true;
+   cs->base.cdw = 0;
+   return;
+   }
+
+   /* Store the current one for submitting it later. */
+   cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = 
cs->base.cdw;
+   cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = 
cs->base.max_dw;
+   cs->old_cs_buffers[cs->num_old_cs_buffers].buf = 
cs->base.buf;
+   cs->num_old_cs_buffers++;
+
+   /* Reset the cs, it will be re-allocated below. */
 cs->base.cdw = 0;
-   return;
+   cs->base.buf = NULL;
+
+   /* Re-compute the number of dwords to allocate. */
+   ib_dws = MAX2(cs->base.cdw + min_size,
+ MIN2(cs->base.max_dw * 2, limit_dws));
+   if (ib_dws > limit_dws) {
+   fprintf(stderr, "amdgpu: Too high number of "
+   "dwords to allocate\n");
+   cs->failed = true;
+   return;
+   }
 }

 uint32_t *new_buf = realloc(cs->base.buf, 

Re: [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining

2018-04-20 Thread Grazvydas Ignotas
On Fri, Apr 20, 2018 at 3:21 PM, Samuel Pitoiset
 wrote:
> The SI family doesn't support chaining which means the maximum
> size in dwords per CS is limited. When that limit was reached
> we failed to submit the CS and the application crashed.
>
> This patch allows to submit up to 4 IBs which is currently the
> limit, but recent amdgpu supports more than that.
>
> Please note that we can reach the limit of 4 IBs per submit
> but currently we can't improve that. The only solution is to
> upgrade libdrm. That will be improved later but for now this
> should fix crashes on SI or when using RADV_DEBUG=noibs.
>
> Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++
>  1 file changed, 168 insertions(+), 50 deletions(-)
>
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
> b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index c4b2232ce9e..17e6d8ba2b6 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -68,6 +68,10 @@ struct radv_amdgpu_cs {
> struct radeon_winsys_bo **virtual_buffers;
> uint8_t *virtual_buffer_priorities;
> int *virtual_buffer_hash_table;
> +
> +   /* For chips that don't support chaining. */
> +   struct radeon_winsys_cs *old_cs_buffers;
> +   unsignednum_old_cs_buffers;
>  };
>
>  static inline struct radv_amdgpu_cs *
> @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct 
> radeon_winsys_cs *rcs)
> for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
> cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
>
> +   for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
> +   struct radeon_winsys_cs *rcs = >old_cs_buffers[i];
> +   free(rcs->buf);
> +   }
> +
> +   free(cs->old_cs_buffers);
> free(cs->old_ib_buffers);
> free(cs->virtual_buffers);
> free(cs->virtual_buffer_priorities);
> @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs 
> *_cs, size_t min_size)
> /* The total ib size cannot exceed limit_dws dwords. */
> if (ib_dws > limit_dws)
> {
> -   cs->failed = true;
> +   /* The maximum size in dwords has been reached,
> +* try to allocate a new one.
> +*/
> +   if (cs->num_old_cs_buffers + 1 >= 
> AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
> +   /* TODO: Allow to submit more than 4 IBs. */
> +   fprintf(stderr, "amdgpu: Maximum number of 
> IBs "
> +   "per submit reached.\n");
> +   cs->failed = true;
> +   cs->base.cdw = 0;
> +   return;
> +   }
> +
> +   cs->old_cs_buffers =
> +   realloc(cs->old_cs_buffers,
> +   (cs->num_old_cs_buffers + 1) * 
> sizeof(*cs->old_cs_buffers));
> +   if (!cs->old_cs_buffers) {

leaking previous cs->old_cs_buffers memory here.

> +   cs->failed = true;
> +   cs->base.cdw = 0;
> +   return;
> +   }
> +
> +   /* Store the current one for submitting it later. */
> +   cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = 
> cs->base.cdw;
> +   cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = 
> cs->base.max_dw;
> +   cs->old_cs_buffers[cs->num_old_cs_buffers].buf = 
> cs->base.buf;
> +   cs->num_old_cs_buffers++;
> +
> +   /* Reset the cs, it will be re-allocated below. */
> cs->base.cdw = 0;
> -   return;
> +   cs->base.buf = NULL;
> +
> +   /* Re-compute the number of dwords to allocate. */
> +   ib_dws = MAX2(cs->base.cdw + min_size,
> + MIN2(cs->base.max_dw * 2, limit_dws));
> +   if (ib_dws > limit_dws) {
> +   fprintf(stderr, "amdgpu: Too high number of "
> +   "dwords to allocate\n");
> +   cs->failed = true;
> +   return;
> +   }
> }
>
> uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
> @@ -400,6 +447,15 

Re: [Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining

2018-04-20 Thread Bas Nieuwenhuizen
Reviewed-by: Bas Nieuwenhuizen 

On Fri, Apr 20, 2018 at 2:21 PM, Samuel Pitoiset
 wrote:
> The SI family doesn't support chaining which means the maximum
> size in dwords per CS is limited. When that limit was reached
> we failed to submit the CS and the application crashed.
>
> This patch allows to submit up to 4 IBs which is currently the
> limit, but recent amdgpu supports more than that.
>
> Please note that we can reach the limit of 4 IBs per submit
> but currently we can't improve that. The only solution is to
> upgrade libdrm. That will be improved later but for now this
> should fix crashes on SI or when using RADV_DEBUG=noibs.
>
> Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++
>  1 file changed, 168 insertions(+), 50 deletions(-)
>
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
> b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index c4b2232ce9e..17e6d8ba2b6 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -68,6 +68,10 @@ struct radv_amdgpu_cs {
> struct radeon_winsys_bo **virtual_buffers;
> uint8_t *virtual_buffer_priorities;
> int *virtual_buffer_hash_table;
> +
> +   /* For chips that don't support chaining. */
> +   struct radeon_winsys_cs *old_cs_buffers;
> +   unsignednum_old_cs_buffers;
>  };
>
>  static inline struct radv_amdgpu_cs *
> @@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct 
> radeon_winsys_cs *rcs)
> for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
> cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
>
> +   for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
> +   struct radeon_winsys_cs *rcs = >old_cs_buffers[i];
> +   free(rcs->buf);
> +   }
> +
> +   free(cs->old_cs_buffers);
> free(cs->old_ib_buffers);
> free(cs->virtual_buffers);
> free(cs->virtual_buffer_priorities);
> @@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs 
> *_cs, size_t min_size)
> /* The total ib size cannot exceed limit_dws dwords. */
> if (ib_dws > limit_dws)
> {
> -   cs->failed = true;
> +   /* The maximum size in dwords has been reached,
> +* try to allocate a new one.
> +*/
> +   if (cs->num_old_cs_buffers + 1 >= 
> AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
> +   /* TODO: Allow to submit more than 4 IBs. */
> +   fprintf(stderr, "amdgpu: Maximum number of 
> IBs "
> +   "per submit reached.\n");
> +   cs->failed = true;
> +   cs->base.cdw = 0;
> +   return;
> +   }
> +
> +   cs->old_cs_buffers =
> +   realloc(cs->old_cs_buffers,
> +   (cs->num_old_cs_buffers + 1) * 
> sizeof(*cs->old_cs_buffers));
> +   if (!cs->old_cs_buffers) {
> +   cs->failed = true;
> +   cs->base.cdw = 0;
> +   return;
> +   }
> +
> +   /* Store the current one for submitting it later. */
> +   cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = 
> cs->base.cdw;
> +   cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = 
> cs->base.max_dw;
> +   cs->old_cs_buffers[cs->num_old_cs_buffers].buf = 
> cs->base.buf;
> +   cs->num_old_cs_buffers++;
> +
> +   /* Reset the cs, it will be re-allocated below. */
> cs->base.cdw = 0;
> -   return;
> +   cs->base.buf = NULL;
> +
> +   /* Re-compute the number of dwords to allocate. */
> +   ib_dws = MAX2(cs->base.cdw + min_size,
> + MIN2(cs->base.max_dw * 2, limit_dws));
> +   if (ib_dws > limit_dws) {
> +   fprintf(stderr, "amdgpu: Too high number of "
> +   "dwords to allocate\n");
> +   cs->failed = true;
> +   return;
> +   }
> }
>
> uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
> @@ -400,6 

[Mesa-dev] [PATCH] radv/winsys: allow to submit up to 4 IBs for chips without chaining

2018-04-20 Thread Samuel Pitoiset
The SI family doesn't support chaining which means the maximum
size in dwords per CS is limited. When that limit was reached
we failed to submit the CS and the application crashed.

This patch allows to submit up to 4 IBs which is currently the
limit, but recent amdgpu supports more than that.

Please note that we can reach the limit of 4 IBs per submit
but currently we can't improve that. The only solution is to
upgrade libdrm. That will be improved later but for now this
should fix crashes on SI or when using RADV_DEBUG=noibs.

Fixes: 36cb5508e89 ("radv/winsys: Fail early on overgrown cs.")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105775
Signed-off-by: Samuel Pitoiset 
---
 src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 218 ++
 1 file changed, 168 insertions(+), 50 deletions(-)

diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c 
b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index c4b2232ce9e..17e6d8ba2b6 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -68,6 +68,10 @@ struct radv_amdgpu_cs {
struct radeon_winsys_bo **virtual_buffers;
uint8_t *virtual_buffer_priorities;
int *virtual_buffer_hash_table;
+
+   /* For chips that don't support chaining. */
+   struct radeon_winsys_cs *old_cs_buffers;
+   unsignednum_old_cs_buffers;
 };
 
 static inline struct radv_amdgpu_cs *
@@ -201,6 +205,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs 
*rcs)
for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
 
+   for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+   struct radeon_winsys_cs *rcs = >old_cs_buffers[i];
+   free(rcs->buf);
+   }
+
+   free(cs->old_cs_buffers);
free(cs->old_ib_buffers);
free(cs->virtual_buffers);
free(cs->virtual_buffer_priorities);
@@ -286,9 +296,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs 
*_cs, size_t min_size)
/* The total ib size cannot exceed limit_dws dwords. */
if (ib_dws > limit_dws)
{
-   cs->failed = true;
+   /* The maximum size in dwords has been reached,
+* try to allocate a new one.
+*/
+   if (cs->num_old_cs_buffers + 1 >= 
AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
+   /* TODO: Allow to submit more than 4 IBs. */
+   fprintf(stderr, "amdgpu: Maximum number of IBs "
+   "per submit reached.\n");
+   cs->failed = true;
+   cs->base.cdw = 0;
+   return;
+   }
+
+   cs->old_cs_buffers =
+   realloc(cs->old_cs_buffers,
+   (cs->num_old_cs_buffers + 1) * 
sizeof(*cs->old_cs_buffers));
+   if (!cs->old_cs_buffers) {
+   cs->failed = true;
+   cs->base.cdw = 0;
+   return;
+   }
+
+   /* Store the current one for submitting it later. */
+   cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = 
cs->base.cdw;
+   cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = 
cs->base.max_dw;
+   cs->old_cs_buffers[cs->num_old_cs_buffers].buf = 
cs->base.buf;
+   cs->num_old_cs_buffers++;
+
+   /* Reset the cs, it will be re-allocated below. */
cs->base.cdw = 0;
-   return;
+   cs->base.buf = NULL;
+
+   /* Re-compute the number of dwords to allocate. */
+   ib_dws = MAX2(cs->base.cdw + min_size,
+ MIN2(cs->base.max_dw * 2, limit_dws));
+   if (ib_dws > limit_dws) {
+   fprintf(stderr, "amdgpu: Too high number of "
+   "dwords to allocate\n");
+   cs->failed = true;
+   return;
+   }
}
 
uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
@@ -400,6 +447,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs 
*_cs)
cs->ib.ib_mc_address = 
radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
cs->ib_size_ptr = >ib.size;
cs->ib.size = 0;
+   } else {
+   for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+